diff options
Diffstat (limited to 'target/linux/brcm2708/patches-4.19/950-0538-drm-v3d-Refactor-job-management.patch')
-rw-r--r-- | target/linux/brcm2708/patches-4.19/950-0538-drm-v3d-Refactor-job-management.patch | 1104 |
1 files changed, 1104 insertions, 0 deletions
diff --git a/target/linux/brcm2708/patches-4.19/950-0538-drm-v3d-Refactor-job-management.patch b/target/linux/brcm2708/patches-4.19/950-0538-drm-v3d-Refactor-job-management.patch new file mode 100644 index 0000000000..1192b166d4 --- /dev/null +++ b/target/linux/brcm2708/patches-4.19/950-0538-drm-v3d-Refactor-job-management.patch @@ -0,0 +1,1104 @@ +From ccf319a0265bfdb4a622a52645f159461bc88079 Mon Sep 17 00:00:00 2001 +From: Eric Anholt <eric@anholt.net> +Date: Thu, 27 Dec 2018 12:11:52 -0800 +Subject: [PATCH] drm/v3d: Refactor job management. + +The CL submission had two jobs embedded in an exec struct. When I +added TFU support, I had to replicate some of the exec stuff and some +of the job stuff. As I went to add CSD, it became clear that actually +what was in exec should just be in the two CL jobs, and it would let +us share a lot more code between the 4 queues. + +Signed-off-by: Eric Anholt <eric@anholt.net> +--- + drivers/gpu/drm/v3d/v3d_drv.h | 77 ++++---- + drivers/gpu/drm/v3d/v3d_gem.c | 331 +++++++++++++++++--------------- + drivers/gpu/drm/v3d/v3d_irq.c | 8 +- + drivers/gpu/drm/v3d/v3d_sched.c | 264 ++++++++++++++----------- + 4 files changed, 373 insertions(+), 307 deletions(-) + +--- a/drivers/gpu/drm/v3d/v3d_drv.h ++++ b/drivers/gpu/drm/v3d/v3d_drv.h +@@ -67,8 +67,8 @@ struct v3d_dev { + + struct work_struct overflow_mem_work; + +- struct v3d_exec_info *bin_job; +- struct v3d_exec_info *render_job; ++ struct v3d_bin_job *bin_job; ++ struct v3d_render_job *render_job; + struct v3d_tfu_job *tfu_job; + + struct v3d_queue_state queue[V3D_MAX_QUEUES]; +@@ -132,7 +132,7 @@ struct v3d_bo { + struct list_head vmas; /* list of v3d_vma */ + + /* List entry for the BO's position in +- * v3d_exec_info->unref_list ++ * v3d_render_job->unref_list + */ + struct list_head unref_head; + +@@ -176,7 +176,15 @@ to_v3d_fence(struct dma_fence *fence) + struct v3d_job { + struct drm_sched_job base; + +- struct v3d_exec_info *exec; ++ struct kref refcount; ++ ++ struct v3d_dev *v3d; ++ ++ /* This is the array of BOs that were looked up at the start ++ * of submission. ++ */ ++ struct v3d_bo **bo; ++ u32 bo_count; + + /* An optional fence userspace can pass in for the job to depend on. */ + struct dma_fence *in_fence; +@@ -184,59 +192,53 @@ struct v3d_job { + /* v3d fence to be signaled by IRQ handler when the job is complete. */ + struct dma_fence *irq_fence; + ++ /* scheduler fence for when the job is considered complete and ++ * the BO reservations can be released. ++ */ ++ struct dma_fence *done_fence; ++ ++ /* Callback for the freeing of the job on refcount going to 0. */ ++ void (*free)(struct kref *ref); ++}; ++ ++struct v3d_bin_job { ++ struct v3d_job base; ++ + /* GPU virtual addresses of the start/end of the CL job. */ + u32 start, end; + + u32 timedout_ctca, timedout_ctra; +-}; + +-struct v3d_exec_info { +- struct v3d_dev *v3d; ++ /* Corresponding render job, for attaching our overflow memory. */ ++ struct v3d_render_job *render; ++ ++ /* Submitted tile memory allocation start/size, tile state. */ ++ u32 qma, qms, qts; ++}; + +- struct v3d_job bin, render; ++struct v3d_render_job { ++ struct v3d_job base; + +- /* Fence for when the scheduler considers the binner to be +- * done, for render to depend on. ++ /* Optional fence for the binner, to depend on before starting ++ * our job. + */ + struct dma_fence *bin_done_fence; + +- /* Fence for when the scheduler considers the render to be +- * done, for when the BOs reservations should be complete. +- */ +- struct dma_fence *render_done_fence; +- +- struct kref refcount; ++ /* GPU virtual addresses of the start/end of the CL job. */ ++ u32 start, end; + +- /* This is the array of BOs that were looked up at the start of exec. */ +- struct v3d_bo **bo; +- u32 bo_count; ++ u32 timedout_ctca, timedout_ctra; + + /* List of overflow BOs used in the job that need to be + * released once the job is complete. + */ + struct list_head unref_list; +- +- /* Submitted tile memory allocation start/size, tile state. */ +- u32 qma, qms, qts; + }; + + struct v3d_tfu_job { +- struct drm_sched_job base; ++ struct v3d_job base; + + struct drm_v3d_submit_tfu args; +- +- /* An optional fence userspace can pass in for the job to depend on. */ +- struct dma_fence *in_fence; +- +- /* v3d fence to be signaled by IRQ handler when the job is complete. */ +- struct dma_fence *irq_fence; +- +- struct v3d_dev *v3d; +- +- struct kref refcount; +- +- /* This is the array of BOs that were looked up at the start of exec. */ +- struct v3d_bo *bo[4]; + }; + + /** +@@ -306,8 +308,7 @@ int v3d_submit_tfu_ioctl(struct drm_devi + struct drm_file *file_priv); + int v3d_wait_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +-void v3d_exec_put(struct v3d_exec_info *exec); +-void v3d_tfu_job_put(struct v3d_tfu_job *exec); ++void v3d_job_put(struct v3d_job *job); + void v3d_reset(struct v3d_dev *v3d); + void v3d_invalidate_caches(struct v3d_dev *v3d); + +--- a/drivers/gpu/drm/v3d/v3d_gem.c ++++ b/drivers/gpu/drm/v3d/v3d_gem.c +@@ -293,11 +293,11 @@ retry: + } + + /** +- * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects ++ * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects + * referenced by the job. + * @dev: DRM device + * @file_priv: DRM file for this fd +- * @exec: V3D job being set up ++ * @job: V3D job being set up + * + * The command validator needs to reference BOs by their index within + * the submitted job's BO list. This does the validation of the job's +@@ -307,18 +307,19 @@ retry: + * failure, because that will happen at v3d_exec_cleanup() time. + */ + static int +-v3d_cl_lookup_bos(struct drm_device *dev, +- struct drm_file *file_priv, +- struct drm_v3d_submit_cl *args, +- struct v3d_exec_info *exec) ++v3d_lookup_bos(struct drm_device *dev, ++ struct drm_file *file_priv, ++ struct v3d_job *job, ++ u64 bo_handles, ++ u32 bo_count) + { + u32 *handles; + int ret = 0; + int i; + +- exec->bo_count = args->bo_handle_count; ++ job->bo_count = bo_count; + +- if (!exec->bo_count) { ++ if (!job->bo_count) { + /* See comment on bo_index for why we have to check + * this. + */ +@@ -326,15 +327,15 @@ v3d_cl_lookup_bos(struct drm_device *dev + return -EINVAL; + } + +- exec->bo = kvmalloc_array(exec->bo_count, +- sizeof(struct drm_gem_cma_object *), +- GFP_KERNEL | __GFP_ZERO); +- if (!exec->bo) { ++ job->bo = kvmalloc_array(job->bo_count, ++ sizeof(struct drm_gem_cma_object *), ++ GFP_KERNEL | __GFP_ZERO); ++ if (!job->bo) { + DRM_DEBUG("Failed to allocate validated BO pointers\n"); + return -ENOMEM; + } + +- handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL); ++ handles = kvmalloc_array(job->bo_count, sizeof(u32), GFP_KERNEL); + if (!handles) { + ret = -ENOMEM; + DRM_DEBUG("Failed to allocate incoming GEM handles\n"); +@@ -342,15 +343,15 @@ v3d_cl_lookup_bos(struct drm_device *dev + } + + if (copy_from_user(handles, +- (void __user *)(uintptr_t)args->bo_handles, +- exec->bo_count * sizeof(u32))) { ++ (void __user *)(uintptr_t)bo_handles, ++ job->bo_count * sizeof(u32))) { + ret = -EFAULT; + DRM_DEBUG("Failed to copy in GEM handles\n"); + goto fail; + } + + spin_lock(&file_priv->table_lock); +- for (i = 0; i < exec->bo_count; i++) { ++ for (i = 0; i < job->bo_count; i++) { + struct drm_gem_object *bo = idr_find(&file_priv->object_idr, + handles[i]); + if (!bo) { +@@ -361,7 +362,7 @@ v3d_cl_lookup_bos(struct drm_device *dev + goto fail; + } + drm_gem_object_get(bo); +- exec->bo[i] = to_v3d_bo(bo); ++ job->bo[i] = to_v3d_bo(bo); + } + spin_unlock(&file_priv->table_lock); + +@@ -371,59 +372,41 @@ fail: + } + + static void +-v3d_exec_cleanup(struct kref *ref) ++v3d_job_free(struct kref *ref) + { +- struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info, +- refcount); +- unsigned int i; +- struct v3d_bo *bo, *save; +- +- dma_fence_put(exec->bin.in_fence); +- dma_fence_put(exec->render.in_fence); +- +- dma_fence_put(exec->bin.irq_fence); +- dma_fence_put(exec->render.irq_fence); +- +- dma_fence_put(exec->bin_done_fence); +- dma_fence_put(exec->render_done_fence); +- +- for (i = 0; i < exec->bo_count; i++) +- drm_gem_object_put_unlocked(&exec->bo[i]->base); +- kvfree(exec->bo); ++ struct v3d_job *job = container_of(ref, struct v3d_job, refcount); ++ int i; + +- list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) { +- drm_gem_object_put_unlocked(&bo->base); ++ for (i = 0; i < job->bo_count; i++) { ++ if (job->bo[i]) ++ drm_gem_object_put_unlocked(&job->bo[i]->base); + } ++ kvfree(job->bo); + +- kfree(exec); +-} ++ dma_fence_put(job->in_fence); ++ dma_fence_put(job->irq_fence); ++ dma_fence_put(job->done_fence); + +-void v3d_exec_put(struct v3d_exec_info *exec) +-{ +- kref_put(&exec->refcount, v3d_exec_cleanup); ++ kfree(job); + } + + static void +-v3d_tfu_job_cleanup(struct kref *ref) ++v3d_render_job_free(struct kref *ref) + { +- struct v3d_tfu_job *job = container_of(ref, struct v3d_tfu_job, +- refcount); +- unsigned int i; +- +- dma_fence_put(job->in_fence); +- dma_fence_put(job->irq_fence); ++ struct v3d_render_job *job = container_of(ref, struct v3d_render_job, ++ base.refcount); ++ struct v3d_bo *bo, *save; + +- for (i = 0; i < ARRAY_SIZE(job->bo); i++) { +- if (job->bo[i]) +- drm_gem_object_put_unlocked(&job->bo[i]->base); ++ list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) { ++ drm_gem_object_put_unlocked(&bo->base); + } + +- kfree(job); ++ v3d_job_free(ref); + } + +-void v3d_tfu_job_put(struct v3d_tfu_job *job) ++void v3d_job_put(struct v3d_job *job) + { +- kref_put(&job->refcount, v3d_tfu_job_cleanup); ++ kref_put(&job->refcount, job->free); + } + + int +@@ -476,6 +459,65 @@ v3d_wait_bo_ioctl(struct drm_device *dev + return ret; + } + ++static int ++v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, ++ struct v3d_job *job, void (*free)(struct kref *ref), ++ u32 in_sync) ++{ ++ int ret; ++ ++ job->v3d = v3d; ++ job->free = free; ++ ++ ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &job->in_fence); ++ if (ret == -EINVAL) ++ return ret; ++ ++ kref_init(&job->refcount); ++ ++ return 0; ++} ++ ++static int ++v3d_push_job(struct v3d_file_priv *v3d_priv, ++ struct v3d_job *job, enum v3d_queue queue) ++{ ++ int ret; ++ ++ ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], ++ v3d_priv); ++ if (ret) ++ return ret; ++ ++ job->done_fence = dma_fence_get(&job->base.s_fence->finished); ++ ++ /* put by scheduler job completion */ ++ kref_get(&job->refcount); ++ ++ drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[queue]); ++ ++ return 0; ++} ++ ++static void ++v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, ++ struct v3d_job *job, ++ struct ww_acquire_ctx *acquire_ctx, ++ u32 out_sync) ++{ ++ struct drm_syncobj *sync_out; ++ ++ v3d_attach_object_fences(job->bo, job->bo_count, job->done_fence); ++ v3d_unlock_bo_reservations(job->bo, job->bo_count, acquire_ctx); ++ ++ /* Update the return sync object for the job */ ++ sync_out = drm_syncobj_find(file_priv, out_sync); ++ if (sync_out) { ++ drm_syncobj_replace_fence(sync_out, job->done_fence); ++ drm_syncobj_put(sync_out); ++ } ++} ++ + /** + * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. + * @dev: DRM device +@@ -495,9 +537,9 @@ v3d_submit_cl_ioctl(struct drm_device *d + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_submit_cl *args = data; +- struct v3d_exec_info *exec; ++ struct v3d_bin_job *bin = NULL; ++ struct v3d_render_job *render; + struct ww_acquire_ctx acquire_ctx; +- struct drm_syncobj *sync_out; + int ret = 0; + + trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); +@@ -507,95 +549,84 @@ v3d_submit_cl_ioctl(struct drm_device *d + return -EINVAL; + } + +- exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); +- if (!exec) ++ render = kcalloc(1, sizeof(*render), GFP_KERNEL); ++ if (!render) + return -ENOMEM; + +- kref_init(&exec->refcount); ++ render->start = args->rcl_start; ++ render->end = args->rcl_end; ++ INIT_LIST_HEAD(&render->unref_list); + +- ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl, +- 0, &exec->bin.in_fence); +- if (ret == -EINVAL) +- goto fail; ++ ret = v3d_job_init(v3d, file_priv, &render->base, ++ v3d_render_job_free, args->in_sync_rcl); ++ if (ret) { ++ kfree(bin); ++ kfree(render); ++ return ret; ++ } + +- ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl, +- 0, &exec->render.in_fence); +- if (ret == -EINVAL) +- goto fail; ++ if (args->bcl_start != args->bcl_end) { ++ bin = kcalloc(1, sizeof(*bin), GFP_KERNEL); ++ if (!bin) ++ return -ENOMEM; ++ ++ ret = v3d_job_init(v3d, file_priv, &bin->base, ++ v3d_job_free, args->in_sync_bcl); ++ if (ret) { ++ v3d_job_put(&render->base); ++ return ret; ++ } + +- exec->qma = args->qma; +- exec->qms = args->qms; +- exec->qts = args->qts; +- exec->bin.exec = exec; +- exec->bin.start = args->bcl_start; +- exec->bin.end = args->bcl_end; +- exec->render.exec = exec; +- exec->render.start = args->rcl_start; +- exec->render.end = args->rcl_end; +- exec->v3d = v3d; +- INIT_LIST_HEAD(&exec->unref_list); ++ bin->start = args->bcl_start; ++ bin->end = args->bcl_end; ++ bin->qma = args->qma; ++ bin->qms = args->qms; ++ bin->qts = args->qts; ++ bin->render = render; ++ } + +- ret = v3d_cl_lookup_bos(dev, file_priv, args, exec); ++ ret = v3d_lookup_bos(dev, file_priv, &render->base, ++ args->bo_handles, args->bo_handle_count); + if (ret) + goto fail; + +- ret = v3d_lock_bo_reservations(exec->bo, exec->bo_count, ++ ret = v3d_lock_bo_reservations(render->base.bo, render->base.bo_count, + &acquire_ctx); + if (ret) + goto fail; + + mutex_lock(&v3d->sched_lock); +- if (exec->bin.start != exec->bin.end) { +- ret = drm_sched_job_init(&exec->bin.base, +- &v3d_priv->sched_entity[V3D_BIN], +- v3d_priv); ++ if (bin) { ++ ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN); + if (ret) + goto fail_unreserve; + +- exec->bin_done_fence = +- dma_fence_get(&exec->bin.base.s_fence->finished); +- +- kref_get(&exec->refcount); /* put by scheduler job completion */ +- drm_sched_entity_push_job(&exec->bin.base, +- &v3d_priv->sched_entity[V3D_BIN]); ++ render->bin_done_fence = dma_fence_get(bin->base.done_fence); + } + +- ret = drm_sched_job_init(&exec->render.base, +- &v3d_priv->sched_entity[V3D_RENDER], +- v3d_priv); ++ ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER); + if (ret) + goto fail_unreserve; +- +- exec->render_done_fence = +- dma_fence_get(&exec->render.base.s_fence->finished); +- +- kref_get(&exec->refcount); /* put by scheduler job completion */ +- drm_sched_entity_push_job(&exec->render.base, +- &v3d_priv->sched_entity[V3D_RENDER]); + mutex_unlock(&v3d->sched_lock); + +- v3d_attach_object_fences(exec->bo, exec->bo_count, +- exec->render_done_fence); +- +- v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx); +- +- /* Update the return sync object for the */ +- sync_out = drm_syncobj_find(file_priv, args->out_sync); +- if (sync_out) { +- drm_syncobj_replace_fence(sync_out, +- exec->render_done_fence); +- drm_syncobj_put(sync_out); +- } +- +- v3d_exec_put(exec); ++ v3d_attach_fences_and_unlock_reservation(file_priv, ++ &render->base, &acquire_ctx, ++ args->out_sync); ++ ++ if (bin) ++ v3d_job_put(&bin->base); ++ v3d_job_put(&render->base); + + return 0; + + fail_unreserve: + mutex_unlock(&v3d->sched_lock); +- v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx); ++ v3d_unlock_bo_reservations(render->base.bo, ++ render->base.bo_count, &acquire_ctx); + fail: +- v3d_exec_put(exec); ++ if (bin) ++ v3d_job_put(&bin->base); ++ v3d_job_put(&render->base); + + return ret; + } +@@ -618,10 +649,7 @@ v3d_submit_tfu_ioctl(struct drm_device * + struct drm_v3d_submit_tfu *args = data; + struct v3d_tfu_job *job; + struct ww_acquire_ctx acquire_ctx; +- struct drm_syncobj *sync_out; +- struct dma_fence *sched_done_fence; + int ret = 0; +- int bo_count; + + trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); + +@@ -629,75 +657,66 @@ v3d_submit_tfu_ioctl(struct drm_device * + if (!job) + return -ENOMEM; + +- kref_init(&job->refcount); +- +- ret = drm_syncobj_find_fence(file_priv, args->in_sync, +- 0, &job->in_fence); +- if (ret == -EINVAL) +- goto fail; ++ ret = v3d_job_init(v3d, file_priv, &job->base, ++ v3d_job_free, args->in_sync); ++ if (ret) { ++ kfree(job); ++ return ret; ++ } + ++ job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), ++ sizeof(*job->base.bo), GFP_KERNEL); + job->args = *args; +- job->v3d = v3d; + + spin_lock(&file_priv->table_lock); +- for (bo_count = 0; bo_count < ARRAY_SIZE(job->bo); bo_count++) { ++ for (job->base.bo_count = 0; ++ job->base.bo_count < ARRAY_SIZE(args->bo_handles); ++ job->base.bo_count++) { + struct drm_gem_object *bo; + +- if (!args->bo_handles[bo_count]) ++ if (!args->bo_handles[job->base.bo_count]) + break; + + bo = idr_find(&file_priv->object_idr, +- args->bo_handles[bo_count]); ++ args->bo_handles[job->base.bo_count]); + if (!bo) { + DRM_DEBUG("Failed to look up GEM BO %d: %d\n", +- bo_count, args->bo_handles[bo_count]); ++ job->base.bo_count, ++ args->bo_handles[job->base.bo_count]); + ret = -ENOENT; + spin_unlock(&file_priv->table_lock); + goto fail; + } + drm_gem_object_get(bo); +- job->bo[bo_count] = to_v3d_bo(bo); ++ job->base.bo[job->base.bo_count] = to_v3d_bo(bo); + } + spin_unlock(&file_priv->table_lock); + +- ret = v3d_lock_bo_reservations(job->bo, bo_count, &acquire_ctx); ++ ret = v3d_lock_bo_reservations(job->base.bo, job->base.bo_count, ++ &acquire_ctx); + if (ret) + goto fail; + + mutex_lock(&v3d->sched_lock); +- ret = drm_sched_job_init(&job->base, +- &v3d_priv->sched_entity[V3D_TFU], +- v3d_priv); ++ ret = v3d_push_job(v3d_priv, &job->base, V3D_TFU); + if (ret) + goto fail_unreserve; +- +- sched_done_fence = dma_fence_get(&job->base.s_fence->finished); +- +- kref_get(&job->refcount); /* put by scheduler job completion */ +- drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[V3D_TFU]); + mutex_unlock(&v3d->sched_lock); + +- v3d_attach_object_fences(job->bo, bo_count, sched_done_fence); +- +- v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx); +- +- /* Update the return sync object */ +- sync_out = drm_syncobj_find(file_priv, args->out_sync); +- if (sync_out) { +- drm_syncobj_replace_fence(sync_out, sched_done_fence); +- drm_syncobj_put(sync_out); +- } +- dma_fence_put(sched_done_fence); ++ v3d_attach_fences_and_unlock_reservation(file_priv, ++ &job->base, &acquire_ctx, ++ args->out_sync); + +- v3d_tfu_job_put(job); ++ v3d_job_put(&job->base); + + return 0; + + fail_unreserve: + mutex_unlock(&v3d->sched_lock); +- v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx); ++ v3d_unlock_bo_reservations(job->base.bo, job->base.bo_count, ++ &acquire_ctx); + fail: +- v3d_tfu_job_put(job); ++ v3d_job_put(&job->base); + + return ret; + } +@@ -755,7 +774,7 @@ v3d_gem_destroy(struct drm_device *dev) + + v3d_sched_fini(v3d); + +- /* Waiting for exec to finish would need to be done before ++ /* Waiting for jobs to finish would need to be done before + * unregistering V3D. + */ + WARN_ON(v3d->bin_job); +--- a/drivers/gpu/drm/v3d/v3d_irq.c ++++ b/drivers/gpu/drm/v3d/v3d_irq.c +@@ -60,7 +60,7 @@ v3d_overflow_mem_work(struct work_struct + } + + drm_gem_object_get(&bo->base); +- list_add_tail(&bo->unref_head, &v3d->bin_job->unref_list); ++ list_add_tail(&bo->unref_head, &v3d->bin_job->render->unref_list); + spin_unlock_irqrestore(&v3d->job_lock, irqflags); + + V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << PAGE_SHIFT); +@@ -93,7 +93,7 @@ v3d_irq(int irq, void *arg) + + if (intsts & V3D_INT_FLDONE) { + struct v3d_fence *fence = +- to_v3d_fence(v3d->bin_job->bin.irq_fence); ++ to_v3d_fence(v3d->bin_job->base.irq_fence); + + trace_v3d_bcl_irq(&v3d->drm, fence->seqno); + dma_fence_signal(&fence->base); +@@ -102,7 +102,7 @@ v3d_irq(int irq, void *arg) + + if (intsts & V3D_INT_FRDONE) { + struct v3d_fence *fence = +- to_v3d_fence(v3d->render_job->render.irq_fence); ++ to_v3d_fence(v3d->render_job->base.irq_fence); + + trace_v3d_rcl_irq(&v3d->drm, fence->seqno); + dma_fence_signal(&fence->base); +@@ -138,7 +138,7 @@ v3d_hub_irq(int irq, void *arg) + + if (intsts & V3D_HUB_INT_TFUC) { + struct v3d_fence *fence = +- to_v3d_fence(v3d->tfu_job->irq_fence); ++ to_v3d_fence(v3d->tfu_job->base.irq_fence); + + trace_v3d_tfu_irq(&v3d->drm, fence->seqno); + dma_fence_signal(&fence->base); +--- a/drivers/gpu/drm/v3d/v3d_sched.c ++++ b/drivers/gpu/drm/v3d/v3d_sched.c +@@ -30,39 +30,43 @@ to_v3d_job(struct drm_sched_job *sched_j + return container_of(sched_job, struct v3d_job, base); + } + +-static struct v3d_tfu_job * +-to_tfu_job(struct drm_sched_job *sched_job) ++static struct v3d_bin_job * ++to_bin_job(struct drm_sched_job *sched_job) + { +- return container_of(sched_job, struct v3d_tfu_job, base); ++ return container_of(sched_job, struct v3d_bin_job, base.base); + } + +-static void +-v3d_job_free(struct drm_sched_job *sched_job) ++static struct v3d_render_job * ++to_render_job(struct drm_sched_job *sched_job) + { +- struct v3d_job *job = to_v3d_job(sched_job); ++ return container_of(sched_job, struct v3d_render_job, base.base); ++} + +- v3d_exec_put(job->exec); ++static struct v3d_tfu_job * ++to_tfu_job(struct drm_sched_job *sched_job) ++{ ++ return container_of(sched_job, struct v3d_tfu_job, base.base); + } + + static void +-v3d_tfu_job_free(struct drm_sched_job *sched_job) ++v3d_job_free(struct drm_sched_job *sched_job) + { +- struct v3d_tfu_job *job = to_tfu_job(sched_job); ++ struct v3d_job *job = to_v3d_job(sched_job); + +- v3d_tfu_job_put(job); ++ v3d_job_put(job); + } + + /** +- * Returns the fences that the bin or render job depends on, one by one. +- * v3d_job_run() won't be called until all of them have been signaled. ++ * Returns the fences that the job depends on, one by one. ++ * ++ * If placed in the scheduler's .dependency method, the corresponding ++ * .run_job won't be called until all of them have been signaled. + */ + static struct dma_fence * + v3d_job_dependency(struct drm_sched_job *sched_job, + struct drm_sched_entity *s_entity) + { + struct v3d_job *job = to_v3d_job(sched_job); +- struct v3d_exec_info *exec = job->exec; +- enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; + struct dma_fence *fence; + + fence = job->in_fence; +@@ -71,113 +75,132 @@ v3d_job_dependency(struct drm_sched_job + return fence; + } + +- if (q == V3D_RENDER) { +- /* If we had a bin job, the render job definitely depends on +- * it. We first have to wait for bin to be scheduled, so that +- * its done_fence is created. +- */ +- fence = exec->bin_done_fence; +- if (fence) { +- exec->bin_done_fence = NULL; +- return fence; +- } +- } +- +- /* XXX: Wait on a fence for switching the GMP if necessary, +- * and then do so. +- */ +- +- return fence; ++ return NULL; + } + + /** +- * Returns the fences that the TFU job depends on, one by one. +- * v3d_tfu_job_run() won't be called until all of them have been +- * signaled. ++ * Returns the fences that the render job depends on, one by one. ++ * v3d_job_run() won't be called until all of them have been signaled. + */ + static struct dma_fence * +-v3d_tfu_job_dependency(struct drm_sched_job *sched_job, +- struct drm_sched_entity *s_entity) ++v3d_render_job_dependency(struct drm_sched_job *sched_job, ++ struct drm_sched_entity *s_entity) + { +- struct v3d_tfu_job *job = to_tfu_job(sched_job); ++ struct v3d_render_job *job = to_render_job(sched_job); + struct dma_fence *fence; + +- fence = job->in_fence; ++ fence = v3d_job_dependency(sched_job, s_entity); ++ if (fence) ++ return fence; ++ ++ /* If we had a bin job, the render job definitely depends on ++ * it. We first have to wait for bin to be scheduled, so that ++ * its done_fence is created. ++ */ ++ fence = job->bin_done_fence; + if (fence) { +- job->in_fence = NULL; ++ job->bin_done_fence = NULL; + return fence; + } + +- return NULL; ++ /* XXX: Wait on a fence for switching the GMP if necessary, ++ * and then do so. ++ */ ++ ++ return fence; + } + +-static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job) ++static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) + { +- struct v3d_job *job = to_v3d_job(sched_job); +- struct v3d_exec_info *exec = job->exec; +- enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; +- struct v3d_dev *v3d = exec->v3d; ++ struct v3d_bin_job *job = to_bin_job(sched_job); ++ struct v3d_dev *v3d = job->base.v3d; + struct drm_device *dev = &v3d->drm; + struct dma_fence *fence; + unsigned long irqflags; + +- if (unlikely(job->base.s_fence->finished.error)) ++ if (unlikely(job->base.base.s_fence->finished.error)) + return NULL; + + /* Lock required around bin_job update vs + * v3d_overflow_mem_work(). + */ + spin_lock_irqsave(&v3d->job_lock, irqflags); +- if (q == V3D_BIN) { +- v3d->bin_job = job->exec; ++ v3d->bin_job = job; ++ /* Clear out the overflow allocation, so we don't ++ * reuse the overflow attached to a previous job. ++ */ ++ V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0); ++ spin_unlock_irqrestore(&v3d->job_lock, irqflags); ++ ++ v3d_invalidate_caches(v3d); + +- /* Clear out the overflow allocation, so we don't +- * reuse the overflow attached to a previous job. +- */ +- V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0); +- } else { +- v3d->render_job = job->exec; ++ fence = v3d_fence_create(v3d, V3D_BIN); ++ if (IS_ERR(fence)) ++ return NULL; ++ ++ if (job->base.irq_fence) ++ dma_fence_put(job->base.irq_fence); ++ job->base.irq_fence = dma_fence_get(fence); ++ ++ trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno, ++ job->start, job->end); ++ ++ /* Set the current and end address of the control list. ++ * Writing the end register is what starts the job. ++ */ ++ if (job->qma) { ++ V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma); ++ V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms); + } +- spin_unlock_irqrestore(&v3d->job_lock, irqflags); ++ if (job->qts) { ++ V3D_CORE_WRITE(0, V3D_CLE_CT0QTS, ++ V3D_CLE_CT0QTS_ENABLE | ++ job->qts); ++ } ++ V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start); ++ V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end); ++ ++ return fence; ++} ++ ++static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job) ++{ ++ struct v3d_render_job *job = to_render_job(sched_job); ++ struct v3d_dev *v3d = job->base.v3d; ++ struct drm_device *dev = &v3d->drm; ++ struct dma_fence *fence; ++ ++ if (unlikely(job->base.base.s_fence->finished.error)) ++ return NULL; + +- /* Can we avoid this flush when q==RENDER? We need to be +- * careful of scheduling, though -- imagine job0 rendering to +- * texture and job1 reading, and them being executed as bin0, +- * bin1, render0, render1, so that render1's flush at bin time ++ v3d->render_job = job; ++ ++ /* Can we avoid this flush? We need to be careful of ++ * scheduling, though -- imagine job0 rendering to texture and ++ * job1 reading, and them being executed as bin0, bin1, ++ * render0, render1, so that render1's flush at bin time + * wasn't enough. + */ + v3d_invalidate_caches(v3d); + +- fence = v3d_fence_create(v3d, q); ++ fence = v3d_fence_create(v3d, V3D_RENDER); + if (IS_ERR(fence)) + return NULL; + +- if (job->irq_fence) +- dma_fence_put(job->irq_fence); +- job->irq_fence = dma_fence_get(fence); ++ if (job->base.irq_fence) ++ dma_fence_put(job->base.irq_fence); ++ job->base.irq_fence = dma_fence_get(fence); + +- trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno, ++ trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno, + job->start, job->end); + +- if (q == V3D_BIN) { +- if (exec->qma) { +- V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma); +- V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms); +- } +- if (exec->qts) { +- V3D_CORE_WRITE(0, V3D_CLE_CT0QTS, +- V3D_CLE_CT0QTS_ENABLE | +- exec->qts); +- } +- } else { +- /* XXX: Set the QCFG */ +- } ++ /* XXX: Set the QCFG */ + + /* Set the current and end address of the control list. + * Writing the end register is what starts the job. + */ +- V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start); +- V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end); ++ V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start); ++ V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end); + + return fence; + } +@@ -186,7 +209,7 @@ static struct dma_fence * + v3d_tfu_job_run(struct drm_sched_job *sched_job) + { + struct v3d_tfu_job *job = to_tfu_job(sched_job); +- struct v3d_dev *v3d = job->v3d; ++ struct v3d_dev *v3d = job->base.v3d; + struct drm_device *dev = &v3d->drm; + struct dma_fence *fence; + +@@ -195,9 +218,9 @@ v3d_tfu_job_run(struct drm_sched_job *sc + return NULL; + + v3d->tfu_job = job; +- if (job->irq_fence) +- dma_fence_put(job->irq_fence); +- job->irq_fence = dma_fence_get(fence); ++ if (job->base.irq_fence) ++ dma_fence_put(job->base.irq_fence); ++ job->base.irq_fence = dma_fence_get(fence); + + trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno); + +@@ -247,25 +270,23 @@ v3d_gpu_reset_for_timeout(struct v3d_dev + mutex_unlock(&v3d->reset_lock); + } + ++/* If the current address or return address have changed, then the GPU ++ * has probably made progress and we should delay the reset. This ++ * could fail if the GPU got in an infinite loop in the CL, but that ++ * is pretty unlikely outside of an i-g-t testcase. ++ */ + static void +-v3d_job_timedout(struct drm_sched_job *sched_job) ++v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, ++ u32 *timedout_ctca, u32 *timedout_ctra) + { + struct v3d_job *job = to_v3d_job(sched_job); +- struct v3d_exec_info *exec = job->exec; +- struct v3d_dev *v3d = exec->v3d; +- enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER; +- u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q)); +- u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q)); +- +- /* If the current address or return address have changed, then +- * the GPU has probably made progress and we should delay the +- * reset. This could fail if the GPU got in an infinite loop +- * in the CL, but that is pretty unlikely outside of an i-g-t +- * testcase. +- */ +- if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) { +- job->timedout_ctca = ctca; +- job->timedout_ctra = ctra; ++ struct v3d_dev *v3d = job->v3d; ++ u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q)); ++ u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q)); ++ ++ if (*timedout_ctca != ctca || *timedout_ctra != ctra) { ++ *timedout_ctca = ctca; ++ *timedout_ctra = ctra; + schedule_delayed_work(&job->base.work_tdr, + job->base.sched->timeout); + return; +@@ -275,25 +296,50 @@ v3d_job_timedout(struct drm_sched_job *s + } + + static void ++v3d_bin_job_timedout(struct drm_sched_job *sched_job) ++{ ++ struct v3d_bin_job *job = to_bin_job(sched_job); ++ ++ v3d_cl_job_timedout(sched_job, V3D_BIN, ++ &job->timedout_ctca, &job->timedout_ctra); ++} ++ ++static void ++v3d_render_job_timedout(struct drm_sched_job *sched_job) ++{ ++ struct v3d_render_job *job = to_render_job(sched_job); ++ ++ v3d_cl_job_timedout(sched_job, V3D_RENDER, ++ &job->timedout_ctca, &job->timedout_ctra); ++} ++ ++static void + v3d_tfu_job_timedout(struct drm_sched_job *sched_job) + { +- struct v3d_tfu_job *job = to_tfu_job(sched_job); ++ struct v3d_job *job = to_v3d_job(sched_job); + + v3d_gpu_reset_for_timeout(job->v3d, sched_job); + } + +-static const struct drm_sched_backend_ops v3d_sched_ops = { ++static const struct drm_sched_backend_ops v3d_bin_sched_ops = { + .dependency = v3d_job_dependency, +- .run_job = v3d_job_run, +- .timedout_job = v3d_job_timedout, +- .free_job = v3d_job_free ++ .run_job = v3d_bin_job_run, ++ .timedout_job = v3d_bin_job_timedout, ++ .free_job = v3d_job_free, ++}; ++ ++static const struct drm_sched_backend_ops v3d_render_sched_ops = { ++ .dependency = v3d_render_job_dependency, ++ .run_job = v3d_render_job_run, ++ .timedout_job = v3d_render_job_timedout, ++ .free_job = v3d_job_free, + }; + + static const struct drm_sched_backend_ops v3d_tfu_sched_ops = { +- .dependency = v3d_tfu_job_dependency, ++ .dependency = v3d_job_dependency, + .run_job = v3d_tfu_job_run, + .timedout_job = v3d_tfu_job_timedout, +- .free_job = v3d_tfu_job_free ++ .free_job = v3d_job_free, + }; + + int +@@ -305,7 +351,7 @@ v3d_sched_init(struct v3d_dev *v3d) + int ret; + + ret = drm_sched_init(&v3d->queue[V3D_BIN].sched, +- &v3d_sched_ops, ++ &v3d_bin_sched_ops, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), + "v3d_bin"); +@@ -315,7 +361,7 @@ v3d_sched_init(struct v3d_dev *v3d) + } + + ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched, +- &v3d_sched_ops, ++ &v3d_render_sched_ops, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), + "v3d_render"); |