aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/bcm27xx/patches-4.19/950-0511-drm-v3d-Refactor-job-management.patch
diff options
context:
space:
mode:
authorAdrian Schmutzler <freifunk@adrianschmutzler.de>2020-02-08 21:58:55 +0100
committerAdrian Schmutzler <freifunk@adrianschmutzler.de>2020-02-14 14:10:51 +0100
commit7d7aa2fd924c27829ec25f825481554dd81bce97 (patch)
tree658b87b89331670266163e522ea5fb52535633cb /target/linux/bcm27xx/patches-4.19/950-0511-drm-v3d-Refactor-job-management.patch
parente7bfda2c243e66a75ff966ba04c28b1590b5d24c (diff)
downloadupstream-7d7aa2fd924c27829ec25f825481554dd81bce97.tar.gz
upstream-7d7aa2fd924c27829ec25f825481554dd81bce97.tar.bz2
upstream-7d7aa2fd924c27829ec25f825481554dd81bce97.zip
brcm2708: rename target to bcm27xx
This change makes the names of Broadcom targets consistent by using the common notation based on SoC/CPU ID (which is used internally anyway), bcmXXXX instead of brcmXXXX. This is even used for target TITLE in make menuconfig already, only the short target name used brcm so far. Despite, since subtargets range from bcm2708 to bcm2711, it seems appropriate to use bcm27xx instead of bcm2708 (again, as already done for BOARDNAME). This also renames the packages brcm2708-userland and brcm2708-gpu-fw. Signed-off-by: Adrian Schmutzler <freifunk@adrianschmutzler.de> Acked-by: Álvaro Fernández Rojas <noltari@gmail.com>
Diffstat (limited to 'target/linux/bcm27xx/patches-4.19/950-0511-drm-v3d-Refactor-job-management.patch')
-rw-r--r--target/linux/bcm27xx/patches-4.19/950-0511-drm-v3d-Refactor-job-management.patch1104
1 files changed, 1104 insertions, 0 deletions
diff --git a/target/linux/bcm27xx/patches-4.19/950-0511-drm-v3d-Refactor-job-management.patch b/target/linux/bcm27xx/patches-4.19/950-0511-drm-v3d-Refactor-job-management.patch
new file mode 100644
index 0000000000..1192b166d4
--- /dev/null
+++ b/target/linux/bcm27xx/patches-4.19/950-0511-drm-v3d-Refactor-job-management.patch
@@ -0,0 +1,1104 @@
+From ccf319a0265bfdb4a622a52645f159461bc88079 Mon Sep 17 00:00:00 2001
+From: Eric Anholt <eric@anholt.net>
+Date: Thu, 27 Dec 2018 12:11:52 -0800
+Subject: [PATCH] drm/v3d: Refactor job management.
+
+The CL submission had two jobs embedded in an exec struct. When I
+added TFU support, I had to replicate some of the exec stuff and some
+of the job stuff. As I went to add CSD, it became clear that actually
+what was in exec should just be in the two CL jobs, and it would let
+us share a lot more code between the 4 queues.
+
+Signed-off-by: Eric Anholt <eric@anholt.net>
+---
+ drivers/gpu/drm/v3d/v3d_drv.h | 77 ++++----
+ drivers/gpu/drm/v3d/v3d_gem.c | 331 +++++++++++++++++---------------
+ drivers/gpu/drm/v3d/v3d_irq.c | 8 +-
+ drivers/gpu/drm/v3d/v3d_sched.c | 264 ++++++++++++++-----------
+ 4 files changed, 373 insertions(+), 307 deletions(-)
+
+--- a/drivers/gpu/drm/v3d/v3d_drv.h
++++ b/drivers/gpu/drm/v3d/v3d_drv.h
+@@ -67,8 +67,8 @@ struct v3d_dev {
+
+ struct work_struct overflow_mem_work;
+
+- struct v3d_exec_info *bin_job;
+- struct v3d_exec_info *render_job;
++ struct v3d_bin_job *bin_job;
++ struct v3d_render_job *render_job;
+ struct v3d_tfu_job *tfu_job;
+
+ struct v3d_queue_state queue[V3D_MAX_QUEUES];
+@@ -132,7 +132,7 @@ struct v3d_bo {
+ struct list_head vmas; /* list of v3d_vma */
+
+ /* List entry for the BO's position in
+- * v3d_exec_info->unref_list
++ * v3d_render_job->unref_list
+ */
+ struct list_head unref_head;
+
+@@ -176,7 +176,15 @@ to_v3d_fence(struct dma_fence *fence)
+ struct v3d_job {
+ struct drm_sched_job base;
+
+- struct v3d_exec_info *exec;
++ struct kref refcount;
++
++ struct v3d_dev *v3d;
++
++ /* This is the array of BOs that were looked up at the start
++ * of submission.
++ */
++ struct v3d_bo **bo;
++ u32 bo_count;
+
+ /* An optional fence userspace can pass in for the job to depend on. */
+ struct dma_fence *in_fence;
+@@ -184,59 +192,53 @@ struct v3d_job {
+ /* v3d fence to be signaled by IRQ handler when the job is complete. */
+ struct dma_fence *irq_fence;
+
++ /* scheduler fence for when the job is considered complete and
++ * the BO reservations can be released.
++ */
++ struct dma_fence *done_fence;
++
++ /* Callback for the freeing of the job on refcount going to 0. */
++ void (*free)(struct kref *ref);
++};
++
++struct v3d_bin_job {
++ struct v3d_job base;
++
+ /* GPU virtual addresses of the start/end of the CL job. */
+ u32 start, end;
+
+ u32 timedout_ctca, timedout_ctra;
+-};
+
+-struct v3d_exec_info {
+- struct v3d_dev *v3d;
++ /* Corresponding render job, for attaching our overflow memory. */
++ struct v3d_render_job *render;
++
++ /* Submitted tile memory allocation start/size, tile state. */
++ u32 qma, qms, qts;
++};
+
+- struct v3d_job bin, render;
++struct v3d_render_job {
++ struct v3d_job base;
+
+- /* Fence for when the scheduler considers the binner to be
+- * done, for render to depend on.
++ /* Optional fence for the binner, to depend on before starting
++ * our job.
+ */
+ struct dma_fence *bin_done_fence;
+
+- /* Fence for when the scheduler considers the render to be
+- * done, for when the BOs reservations should be complete.
+- */
+- struct dma_fence *render_done_fence;
+-
+- struct kref refcount;
++ /* GPU virtual addresses of the start/end of the CL job. */
++ u32 start, end;
+
+- /* This is the array of BOs that were looked up at the start of exec. */
+- struct v3d_bo **bo;
+- u32 bo_count;
++ u32 timedout_ctca, timedout_ctra;
+
+ /* List of overflow BOs used in the job that need to be
+ * released once the job is complete.
+ */
+ struct list_head unref_list;
+-
+- /* Submitted tile memory allocation start/size, tile state. */
+- u32 qma, qms, qts;
+ };
+
+ struct v3d_tfu_job {
+- struct drm_sched_job base;
++ struct v3d_job base;
+
+ struct drm_v3d_submit_tfu args;
+-
+- /* An optional fence userspace can pass in for the job to depend on. */
+- struct dma_fence *in_fence;
+-
+- /* v3d fence to be signaled by IRQ handler when the job is complete. */
+- struct dma_fence *irq_fence;
+-
+- struct v3d_dev *v3d;
+-
+- struct kref refcount;
+-
+- /* This is the array of BOs that were looked up at the start of exec. */
+- struct v3d_bo *bo[4];
+ };
+
+ /**
+@@ -306,8 +308,7 @@ int v3d_submit_tfu_ioctl(struct drm_devi
+ struct drm_file *file_priv);
+ int v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+-void v3d_exec_put(struct v3d_exec_info *exec);
+-void v3d_tfu_job_put(struct v3d_tfu_job *exec);
++void v3d_job_put(struct v3d_job *job);
+ void v3d_reset(struct v3d_dev *v3d);
+ void v3d_invalidate_caches(struct v3d_dev *v3d);
+
+--- a/drivers/gpu/drm/v3d/v3d_gem.c
++++ b/drivers/gpu/drm/v3d/v3d_gem.c
+@@ -293,11 +293,11 @@ retry:
+ }
+
+ /**
+- * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects
++ * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects
+ * referenced by the job.
+ * @dev: DRM device
+ * @file_priv: DRM file for this fd
+- * @exec: V3D job being set up
++ * @job: V3D job being set up
+ *
+ * The command validator needs to reference BOs by their index within
+ * the submitted job's BO list. This does the validation of the job's
+@@ -307,18 +307,19 @@ retry:
+ * failure, because that will happen at v3d_exec_cleanup() time.
+ */
+ static int
+-v3d_cl_lookup_bos(struct drm_device *dev,
+- struct drm_file *file_priv,
+- struct drm_v3d_submit_cl *args,
+- struct v3d_exec_info *exec)
++v3d_lookup_bos(struct drm_device *dev,
++ struct drm_file *file_priv,
++ struct v3d_job *job,
++ u64 bo_handles,
++ u32 bo_count)
+ {
+ u32 *handles;
+ int ret = 0;
+ int i;
+
+- exec->bo_count = args->bo_handle_count;
++ job->bo_count = bo_count;
+
+- if (!exec->bo_count) {
++ if (!job->bo_count) {
+ /* See comment on bo_index for why we have to check
+ * this.
+ */
+@@ -326,15 +327,15 @@ v3d_cl_lookup_bos(struct drm_device *dev
+ return -EINVAL;
+ }
+
+- exec->bo = kvmalloc_array(exec->bo_count,
+- sizeof(struct drm_gem_cma_object *),
+- GFP_KERNEL | __GFP_ZERO);
+- if (!exec->bo) {
++ job->bo = kvmalloc_array(job->bo_count,
++ sizeof(struct drm_gem_cma_object *),
++ GFP_KERNEL | __GFP_ZERO);
++ if (!job->bo) {
+ DRM_DEBUG("Failed to allocate validated BO pointers\n");
+ return -ENOMEM;
+ }
+
+- handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL);
++ handles = kvmalloc_array(job->bo_count, sizeof(u32), GFP_KERNEL);
+ if (!handles) {
+ ret = -ENOMEM;
+ DRM_DEBUG("Failed to allocate incoming GEM handles\n");
+@@ -342,15 +343,15 @@ v3d_cl_lookup_bos(struct drm_device *dev
+ }
+
+ if (copy_from_user(handles,
+- (void __user *)(uintptr_t)args->bo_handles,
+- exec->bo_count * sizeof(u32))) {
++ (void __user *)(uintptr_t)bo_handles,
++ job->bo_count * sizeof(u32))) {
+ ret = -EFAULT;
+ DRM_DEBUG("Failed to copy in GEM handles\n");
+ goto fail;
+ }
+
+ spin_lock(&file_priv->table_lock);
+- for (i = 0; i < exec->bo_count; i++) {
++ for (i = 0; i < job->bo_count; i++) {
+ struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
+ handles[i]);
+ if (!bo) {
+@@ -361,7 +362,7 @@ v3d_cl_lookup_bos(struct drm_device *dev
+ goto fail;
+ }
+ drm_gem_object_get(bo);
+- exec->bo[i] = to_v3d_bo(bo);
++ job->bo[i] = to_v3d_bo(bo);
+ }
+ spin_unlock(&file_priv->table_lock);
+
+@@ -371,59 +372,41 @@ fail:
+ }
+
+ static void
+-v3d_exec_cleanup(struct kref *ref)
++v3d_job_free(struct kref *ref)
+ {
+- struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info,
+- refcount);
+- unsigned int i;
+- struct v3d_bo *bo, *save;
+-
+- dma_fence_put(exec->bin.in_fence);
+- dma_fence_put(exec->render.in_fence);
+-
+- dma_fence_put(exec->bin.irq_fence);
+- dma_fence_put(exec->render.irq_fence);
+-
+- dma_fence_put(exec->bin_done_fence);
+- dma_fence_put(exec->render_done_fence);
+-
+- for (i = 0; i < exec->bo_count; i++)
+- drm_gem_object_put_unlocked(&exec->bo[i]->base);
+- kvfree(exec->bo);
++ struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
++ int i;
+
+- list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) {
+- drm_gem_object_put_unlocked(&bo->base);
++ for (i = 0; i < job->bo_count; i++) {
++ if (job->bo[i])
++ drm_gem_object_put_unlocked(&job->bo[i]->base);
+ }
++ kvfree(job->bo);
+
+- kfree(exec);
+-}
++ dma_fence_put(job->in_fence);
++ dma_fence_put(job->irq_fence);
++ dma_fence_put(job->done_fence);
+
+-void v3d_exec_put(struct v3d_exec_info *exec)
+-{
+- kref_put(&exec->refcount, v3d_exec_cleanup);
++ kfree(job);
+ }
+
+ static void
+-v3d_tfu_job_cleanup(struct kref *ref)
++v3d_render_job_free(struct kref *ref)
+ {
+- struct v3d_tfu_job *job = container_of(ref, struct v3d_tfu_job,
+- refcount);
+- unsigned int i;
+-
+- dma_fence_put(job->in_fence);
+- dma_fence_put(job->irq_fence);
++ struct v3d_render_job *job = container_of(ref, struct v3d_render_job,
++ base.refcount);
++ struct v3d_bo *bo, *save;
+
+- for (i = 0; i < ARRAY_SIZE(job->bo); i++) {
+- if (job->bo[i])
+- drm_gem_object_put_unlocked(&job->bo[i]->base);
++ list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) {
++ drm_gem_object_put_unlocked(&bo->base);
+ }
+
+- kfree(job);
++ v3d_job_free(ref);
+ }
+
+-void v3d_tfu_job_put(struct v3d_tfu_job *job)
++void v3d_job_put(struct v3d_job *job)
+ {
+- kref_put(&job->refcount, v3d_tfu_job_cleanup);
++ kref_put(&job->refcount, job->free);
+ }
+
+ int
+@@ -476,6 +459,65 @@ v3d_wait_bo_ioctl(struct drm_device *dev
+ return ret;
+ }
+
++static int
++v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
++ struct v3d_job *job, void (*free)(struct kref *ref),
++ u32 in_sync)
++{
++ int ret;
++
++ job->v3d = v3d;
++ job->free = free;
++
++ ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &job->in_fence);
++ if (ret == -EINVAL)
++ return ret;
++
++ kref_init(&job->refcount);
++
++ return 0;
++}
++
++static int
++v3d_push_job(struct v3d_file_priv *v3d_priv,
++ struct v3d_job *job, enum v3d_queue queue)
++{
++ int ret;
++
++ ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
++ v3d_priv);
++ if (ret)
++ return ret;
++
++ job->done_fence = dma_fence_get(&job->base.s_fence->finished);
++
++ /* put by scheduler job completion */
++ kref_get(&job->refcount);
++
++ drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[queue]);
++
++ return 0;
++}
++
++static void
++v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
++ struct v3d_job *job,
++ struct ww_acquire_ctx *acquire_ctx,
++ u32 out_sync)
++{
++ struct drm_syncobj *sync_out;
++
++ v3d_attach_object_fences(job->bo, job->bo_count, job->done_fence);
++ v3d_unlock_bo_reservations(job->bo, job->bo_count, acquire_ctx);
++
++ /* Update the return sync object for the job */
++ sync_out = drm_syncobj_find(file_priv, out_sync);
++ if (sync_out) {
++ drm_syncobj_replace_fence(sync_out, job->done_fence);
++ drm_syncobj_put(sync_out);
++ }
++}
++
+ /**
+ * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
+ * @dev: DRM device
+@@ -495,9 +537,9 @@ v3d_submit_cl_ioctl(struct drm_device *d
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
+ struct drm_v3d_submit_cl *args = data;
+- struct v3d_exec_info *exec;
++ struct v3d_bin_job *bin = NULL;
++ struct v3d_render_job *render;
+ struct ww_acquire_ctx acquire_ctx;
+- struct drm_syncobj *sync_out;
+ int ret = 0;
+
+ trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
+@@ -507,95 +549,84 @@ v3d_submit_cl_ioctl(struct drm_device *d
+ return -EINVAL;
+ }
+
+- exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
+- if (!exec)
++ render = kcalloc(1, sizeof(*render), GFP_KERNEL);
++ if (!render)
+ return -ENOMEM;
+
+- kref_init(&exec->refcount);
++ render->start = args->rcl_start;
++ render->end = args->rcl_end;
++ INIT_LIST_HEAD(&render->unref_list);
+
+- ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
+- 0, &exec->bin.in_fence);
+- if (ret == -EINVAL)
+- goto fail;
++ ret = v3d_job_init(v3d, file_priv, &render->base,
++ v3d_render_job_free, args->in_sync_rcl);
++ if (ret) {
++ kfree(bin);
++ kfree(render);
++ return ret;
++ }
+
+- ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
+- 0, &exec->render.in_fence);
+- if (ret == -EINVAL)
+- goto fail;
++ if (args->bcl_start != args->bcl_end) {
++ bin = kcalloc(1, sizeof(*bin), GFP_KERNEL);
++ if (!bin)
++ return -ENOMEM;
++
++ ret = v3d_job_init(v3d, file_priv, &bin->base,
++ v3d_job_free, args->in_sync_bcl);
++ if (ret) {
++ v3d_job_put(&render->base);
++ return ret;
++ }
+
+- exec->qma = args->qma;
+- exec->qms = args->qms;
+- exec->qts = args->qts;
+- exec->bin.exec = exec;
+- exec->bin.start = args->bcl_start;
+- exec->bin.end = args->bcl_end;
+- exec->render.exec = exec;
+- exec->render.start = args->rcl_start;
+- exec->render.end = args->rcl_end;
+- exec->v3d = v3d;
+- INIT_LIST_HEAD(&exec->unref_list);
++ bin->start = args->bcl_start;
++ bin->end = args->bcl_end;
++ bin->qma = args->qma;
++ bin->qms = args->qms;
++ bin->qts = args->qts;
++ bin->render = render;
++ }
+
+- ret = v3d_cl_lookup_bos(dev, file_priv, args, exec);
++ ret = v3d_lookup_bos(dev, file_priv, &render->base,
++ args->bo_handles, args->bo_handle_count);
+ if (ret)
+ goto fail;
+
+- ret = v3d_lock_bo_reservations(exec->bo, exec->bo_count,
++ ret = v3d_lock_bo_reservations(render->base.bo, render->base.bo_count,
+ &acquire_ctx);
+ if (ret)
+ goto fail;
+
+ mutex_lock(&v3d->sched_lock);
+- if (exec->bin.start != exec->bin.end) {
+- ret = drm_sched_job_init(&exec->bin.base,
+- &v3d_priv->sched_entity[V3D_BIN],
+- v3d_priv);
++ if (bin) {
++ ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN);
+ if (ret)
+ goto fail_unreserve;
+
+- exec->bin_done_fence =
+- dma_fence_get(&exec->bin.base.s_fence->finished);
+-
+- kref_get(&exec->refcount); /* put by scheduler job completion */
+- drm_sched_entity_push_job(&exec->bin.base,
+- &v3d_priv->sched_entity[V3D_BIN]);
++ render->bin_done_fence = dma_fence_get(bin->base.done_fence);
+ }
+
+- ret = drm_sched_job_init(&exec->render.base,
+- &v3d_priv->sched_entity[V3D_RENDER],
+- v3d_priv);
++ ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
+ if (ret)
+ goto fail_unreserve;
+-
+- exec->render_done_fence =
+- dma_fence_get(&exec->render.base.s_fence->finished);
+-
+- kref_get(&exec->refcount); /* put by scheduler job completion */
+- drm_sched_entity_push_job(&exec->render.base,
+- &v3d_priv->sched_entity[V3D_RENDER]);
+ mutex_unlock(&v3d->sched_lock);
+
+- v3d_attach_object_fences(exec->bo, exec->bo_count,
+- exec->render_done_fence);
+-
+- v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx);
+-
+- /* Update the return sync object for the */
+- sync_out = drm_syncobj_find(file_priv, args->out_sync);
+- if (sync_out) {
+- drm_syncobj_replace_fence(sync_out,
+- exec->render_done_fence);
+- drm_syncobj_put(sync_out);
+- }
+-
+- v3d_exec_put(exec);
++ v3d_attach_fences_and_unlock_reservation(file_priv,
++ &render->base, &acquire_ctx,
++ args->out_sync);
++
++ if (bin)
++ v3d_job_put(&bin->base);
++ v3d_job_put(&render->base);
+
+ return 0;
+
+ fail_unreserve:
+ mutex_unlock(&v3d->sched_lock);
+- v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx);
++ v3d_unlock_bo_reservations(render->base.bo,
++ render->base.bo_count, &acquire_ctx);
+ fail:
+- v3d_exec_put(exec);
++ if (bin)
++ v3d_job_put(&bin->base);
++ v3d_job_put(&render->base);
+
+ return ret;
+ }
+@@ -618,10 +649,7 @@ v3d_submit_tfu_ioctl(struct drm_device *
+ struct drm_v3d_submit_tfu *args = data;
+ struct v3d_tfu_job *job;
+ struct ww_acquire_ctx acquire_ctx;
+- struct drm_syncobj *sync_out;
+- struct dma_fence *sched_done_fence;
+ int ret = 0;
+- int bo_count;
+
+ trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
+
+@@ -629,75 +657,66 @@ v3d_submit_tfu_ioctl(struct drm_device *
+ if (!job)
+ return -ENOMEM;
+
+- kref_init(&job->refcount);
+-
+- ret = drm_syncobj_find_fence(file_priv, args->in_sync,
+- 0, &job->in_fence);
+- if (ret == -EINVAL)
+- goto fail;
++ ret = v3d_job_init(v3d, file_priv, &job->base,
++ v3d_job_free, args->in_sync);
++ if (ret) {
++ kfree(job);
++ return ret;
++ }
+
++ job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
++ sizeof(*job->base.bo), GFP_KERNEL);
+ job->args = *args;
+- job->v3d = v3d;
+
+ spin_lock(&file_priv->table_lock);
+- for (bo_count = 0; bo_count < ARRAY_SIZE(job->bo); bo_count++) {
++ for (job->base.bo_count = 0;
++ job->base.bo_count < ARRAY_SIZE(args->bo_handles);
++ job->base.bo_count++) {
+ struct drm_gem_object *bo;
+
+- if (!args->bo_handles[bo_count])
++ if (!args->bo_handles[job->base.bo_count])
+ break;
+
+ bo = idr_find(&file_priv->object_idr,
+- args->bo_handles[bo_count]);
++ args->bo_handles[job->base.bo_count]);
+ if (!bo) {
+ DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
+- bo_count, args->bo_handles[bo_count]);
++ job->base.bo_count,
++ args->bo_handles[job->base.bo_count]);
+ ret = -ENOENT;
+ spin_unlock(&file_priv->table_lock);
+ goto fail;
+ }
+ drm_gem_object_get(bo);
+- job->bo[bo_count] = to_v3d_bo(bo);
++ job->base.bo[job->base.bo_count] = to_v3d_bo(bo);
+ }
+ spin_unlock(&file_priv->table_lock);
+
+- ret = v3d_lock_bo_reservations(job->bo, bo_count, &acquire_ctx);
++ ret = v3d_lock_bo_reservations(job->base.bo, job->base.bo_count,
++ &acquire_ctx);
+ if (ret)
+ goto fail;
+
+ mutex_lock(&v3d->sched_lock);
+- ret = drm_sched_job_init(&job->base,
+- &v3d_priv->sched_entity[V3D_TFU],
+- v3d_priv);
++ ret = v3d_push_job(v3d_priv, &job->base, V3D_TFU);
+ if (ret)
+ goto fail_unreserve;
+-
+- sched_done_fence = dma_fence_get(&job->base.s_fence->finished);
+-
+- kref_get(&job->refcount); /* put by scheduler job completion */
+- drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[V3D_TFU]);
+ mutex_unlock(&v3d->sched_lock);
+
+- v3d_attach_object_fences(job->bo, bo_count, sched_done_fence);
+-
+- v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx);
+-
+- /* Update the return sync object */
+- sync_out = drm_syncobj_find(file_priv, args->out_sync);
+- if (sync_out) {
+- drm_syncobj_replace_fence(sync_out, sched_done_fence);
+- drm_syncobj_put(sync_out);
+- }
+- dma_fence_put(sched_done_fence);
++ v3d_attach_fences_and_unlock_reservation(file_priv,
++ &job->base, &acquire_ctx,
++ args->out_sync);
+
+- v3d_tfu_job_put(job);
++ v3d_job_put(&job->base);
+
+ return 0;
+
+ fail_unreserve:
+ mutex_unlock(&v3d->sched_lock);
+- v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx);
++ v3d_unlock_bo_reservations(job->base.bo, job->base.bo_count,
++ &acquire_ctx);
+ fail:
+- v3d_tfu_job_put(job);
++ v3d_job_put(&job->base);
+
+ return ret;
+ }
+@@ -755,7 +774,7 @@ v3d_gem_destroy(struct drm_device *dev)
+
+ v3d_sched_fini(v3d);
+
+- /* Waiting for exec to finish would need to be done before
++ /* Waiting for jobs to finish would need to be done before
+ * unregistering V3D.
+ */
+ WARN_ON(v3d->bin_job);
+--- a/drivers/gpu/drm/v3d/v3d_irq.c
++++ b/drivers/gpu/drm/v3d/v3d_irq.c
+@@ -60,7 +60,7 @@ v3d_overflow_mem_work(struct work_struct
+ }
+
+ drm_gem_object_get(&bo->base);
+- list_add_tail(&bo->unref_head, &v3d->bin_job->unref_list);
++ list_add_tail(&bo->unref_head, &v3d->bin_job->render->unref_list);
+ spin_unlock_irqrestore(&v3d->job_lock, irqflags);
+
+ V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << PAGE_SHIFT);
+@@ -93,7 +93,7 @@ v3d_irq(int irq, void *arg)
+
+ if (intsts & V3D_INT_FLDONE) {
+ struct v3d_fence *fence =
+- to_v3d_fence(v3d->bin_job->bin.irq_fence);
++ to_v3d_fence(v3d->bin_job->base.irq_fence);
+
+ trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
+ dma_fence_signal(&fence->base);
+@@ -102,7 +102,7 @@ v3d_irq(int irq, void *arg)
+
+ if (intsts & V3D_INT_FRDONE) {
+ struct v3d_fence *fence =
+- to_v3d_fence(v3d->render_job->render.irq_fence);
++ to_v3d_fence(v3d->render_job->base.irq_fence);
+
+ trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
+ dma_fence_signal(&fence->base);
+@@ -138,7 +138,7 @@ v3d_hub_irq(int irq, void *arg)
+
+ if (intsts & V3D_HUB_INT_TFUC) {
+ struct v3d_fence *fence =
+- to_v3d_fence(v3d->tfu_job->irq_fence);
++ to_v3d_fence(v3d->tfu_job->base.irq_fence);
+
+ trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
+ dma_fence_signal(&fence->base);
+--- a/drivers/gpu/drm/v3d/v3d_sched.c
++++ b/drivers/gpu/drm/v3d/v3d_sched.c
+@@ -30,39 +30,43 @@ to_v3d_job(struct drm_sched_job *sched_j
+ return container_of(sched_job, struct v3d_job, base);
+ }
+
+-static struct v3d_tfu_job *
+-to_tfu_job(struct drm_sched_job *sched_job)
++static struct v3d_bin_job *
++to_bin_job(struct drm_sched_job *sched_job)
+ {
+- return container_of(sched_job, struct v3d_tfu_job, base);
++ return container_of(sched_job, struct v3d_bin_job, base.base);
+ }
+
+-static void
+-v3d_job_free(struct drm_sched_job *sched_job)
++static struct v3d_render_job *
++to_render_job(struct drm_sched_job *sched_job)
+ {
+- struct v3d_job *job = to_v3d_job(sched_job);
++ return container_of(sched_job, struct v3d_render_job, base.base);
++}
+
+- v3d_exec_put(job->exec);
++static struct v3d_tfu_job *
++to_tfu_job(struct drm_sched_job *sched_job)
++{
++ return container_of(sched_job, struct v3d_tfu_job, base.base);
+ }
+
+ static void
+-v3d_tfu_job_free(struct drm_sched_job *sched_job)
++v3d_job_free(struct drm_sched_job *sched_job)
+ {
+- struct v3d_tfu_job *job = to_tfu_job(sched_job);
++ struct v3d_job *job = to_v3d_job(sched_job);
+
+- v3d_tfu_job_put(job);
++ v3d_job_put(job);
+ }
+
+ /**
+- * Returns the fences that the bin or render job depends on, one by one.
+- * v3d_job_run() won't be called until all of them have been signaled.
++ * Returns the fences that the job depends on, one by one.
++ *
++ * If placed in the scheduler's .dependency method, the corresponding
++ * .run_job won't be called until all of them have been signaled.
+ */
+ static struct dma_fence *
+ v3d_job_dependency(struct drm_sched_job *sched_job,
+ struct drm_sched_entity *s_entity)
+ {
+ struct v3d_job *job = to_v3d_job(sched_job);
+- struct v3d_exec_info *exec = job->exec;
+- enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
+ struct dma_fence *fence;
+
+ fence = job->in_fence;
+@@ -71,113 +75,132 @@ v3d_job_dependency(struct drm_sched_job
+ return fence;
+ }
+
+- if (q == V3D_RENDER) {
+- /* If we had a bin job, the render job definitely depends on
+- * it. We first have to wait for bin to be scheduled, so that
+- * its done_fence is created.
+- */
+- fence = exec->bin_done_fence;
+- if (fence) {
+- exec->bin_done_fence = NULL;
+- return fence;
+- }
+- }
+-
+- /* XXX: Wait on a fence for switching the GMP if necessary,
+- * and then do so.
+- */
+-
+- return fence;
++ return NULL;
+ }
+
+ /**
+- * Returns the fences that the TFU job depends on, one by one.
+- * v3d_tfu_job_run() won't be called until all of them have been
+- * signaled.
++ * Returns the fences that the render job depends on, one by one.
++ * v3d_job_run() won't be called until all of them have been signaled.
+ */
+ static struct dma_fence *
+-v3d_tfu_job_dependency(struct drm_sched_job *sched_job,
+- struct drm_sched_entity *s_entity)
++v3d_render_job_dependency(struct drm_sched_job *sched_job,
++ struct drm_sched_entity *s_entity)
+ {
+- struct v3d_tfu_job *job = to_tfu_job(sched_job);
++ struct v3d_render_job *job = to_render_job(sched_job);
+ struct dma_fence *fence;
+
+- fence = job->in_fence;
++ fence = v3d_job_dependency(sched_job, s_entity);
++ if (fence)
++ return fence;
++
++ /* If we had a bin job, the render job definitely depends on
++ * it. We first have to wait for bin to be scheduled, so that
++ * its done_fence is created.
++ */
++ fence = job->bin_done_fence;
+ if (fence) {
+- job->in_fence = NULL;
++ job->bin_done_fence = NULL;
+ return fence;
+ }
+
+- return NULL;
++ /* XXX: Wait on a fence for switching the GMP if necessary,
++ * and then do so.
++ */
++
++ return fence;
+ }
+
+-static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
++static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
+ {
+- struct v3d_job *job = to_v3d_job(sched_job);
+- struct v3d_exec_info *exec = job->exec;
+- enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
+- struct v3d_dev *v3d = exec->v3d;
++ struct v3d_bin_job *job = to_bin_job(sched_job);
++ struct v3d_dev *v3d = job->base.v3d;
+ struct drm_device *dev = &v3d->drm;
+ struct dma_fence *fence;
+ unsigned long irqflags;
+
+- if (unlikely(job->base.s_fence->finished.error))
++ if (unlikely(job->base.base.s_fence->finished.error))
+ return NULL;
+
+ /* Lock required around bin_job update vs
+ * v3d_overflow_mem_work().
+ */
+ spin_lock_irqsave(&v3d->job_lock, irqflags);
+- if (q == V3D_BIN) {
+- v3d->bin_job = job->exec;
++ v3d->bin_job = job;
++ /* Clear out the overflow allocation, so we don't
++ * reuse the overflow attached to a previous job.
++ */
++ V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
++ spin_unlock_irqrestore(&v3d->job_lock, irqflags);
++
++ v3d_invalidate_caches(v3d);
+
+- /* Clear out the overflow allocation, so we don't
+- * reuse the overflow attached to a previous job.
+- */
+- V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
+- } else {
+- v3d->render_job = job->exec;
++ fence = v3d_fence_create(v3d, V3D_BIN);
++ if (IS_ERR(fence))
++ return NULL;
++
++ if (job->base.irq_fence)
++ dma_fence_put(job->base.irq_fence);
++ job->base.irq_fence = dma_fence_get(fence);
++
++ trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
++ job->start, job->end);
++
++ /* Set the current and end address of the control list.
++ * Writing the end register is what starts the job.
++ */
++ if (job->qma) {
++ V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma);
++ V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms);
+ }
+- spin_unlock_irqrestore(&v3d->job_lock, irqflags);
++ if (job->qts) {
++ V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
++ V3D_CLE_CT0QTS_ENABLE |
++ job->qts);
++ }
++ V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start);
++ V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end);
++
++ return fence;
++}
++
++static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
++{
++ struct v3d_render_job *job = to_render_job(sched_job);
++ struct v3d_dev *v3d = job->base.v3d;
++ struct drm_device *dev = &v3d->drm;
++ struct dma_fence *fence;
++
++ if (unlikely(job->base.base.s_fence->finished.error))
++ return NULL;
+
+- /* Can we avoid this flush when q==RENDER? We need to be
+- * careful of scheduling, though -- imagine job0 rendering to
+- * texture and job1 reading, and them being executed as bin0,
+- * bin1, render0, render1, so that render1's flush at bin time
++ v3d->render_job = job;
++
++ /* Can we avoid this flush? We need to be careful of
++ * scheduling, though -- imagine job0 rendering to texture and
++ * job1 reading, and them being executed as bin0, bin1,
++ * render0, render1, so that render1's flush at bin time
+ * wasn't enough.
+ */
+ v3d_invalidate_caches(v3d);
+
+- fence = v3d_fence_create(v3d, q);
++ fence = v3d_fence_create(v3d, V3D_RENDER);
+ if (IS_ERR(fence))
+ return NULL;
+
+- if (job->irq_fence)
+- dma_fence_put(job->irq_fence);
+- job->irq_fence = dma_fence_get(fence);
++ if (job->base.irq_fence)
++ dma_fence_put(job->base.irq_fence);
++ job->base.irq_fence = dma_fence_get(fence);
+
+- trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno,
++ trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
+ job->start, job->end);
+
+- if (q == V3D_BIN) {
+- if (exec->qma) {
+- V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma);
+- V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms);
+- }
+- if (exec->qts) {
+- V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
+- V3D_CLE_CT0QTS_ENABLE |
+- exec->qts);
+- }
+- } else {
+- /* XXX: Set the QCFG */
+- }
++ /* XXX: Set the QCFG */
+
+ /* Set the current and end address of the control list.
+ * Writing the end register is what starts the job.
+ */
+- V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start);
+- V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end);
++ V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start);
++ V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end);
+
+ return fence;
+ }
+@@ -186,7 +209,7 @@ static struct dma_fence *
+ v3d_tfu_job_run(struct drm_sched_job *sched_job)
+ {
+ struct v3d_tfu_job *job = to_tfu_job(sched_job);
+- struct v3d_dev *v3d = job->v3d;
++ struct v3d_dev *v3d = job->base.v3d;
+ struct drm_device *dev = &v3d->drm;
+ struct dma_fence *fence;
+
+@@ -195,9 +218,9 @@ v3d_tfu_job_run(struct drm_sched_job *sc
+ return NULL;
+
+ v3d->tfu_job = job;
+- if (job->irq_fence)
+- dma_fence_put(job->irq_fence);
+- job->irq_fence = dma_fence_get(fence);
++ if (job->base.irq_fence)
++ dma_fence_put(job->base.irq_fence);
++ job->base.irq_fence = dma_fence_get(fence);
+
+ trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
+
+@@ -247,25 +270,23 @@ v3d_gpu_reset_for_timeout(struct v3d_dev
+ mutex_unlock(&v3d->reset_lock);
+ }
+
++/* If the current address or return address have changed, then the GPU
++ * has probably made progress and we should delay the reset. This
++ * could fail if the GPU got in an infinite loop in the CL, but that
++ * is pretty unlikely outside of an i-g-t testcase.
++ */
+ static void
+-v3d_job_timedout(struct drm_sched_job *sched_job)
++v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
++ u32 *timedout_ctca, u32 *timedout_ctra)
+ {
+ struct v3d_job *job = to_v3d_job(sched_job);
+- struct v3d_exec_info *exec = job->exec;
+- struct v3d_dev *v3d = exec->v3d;
+- enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
+- u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q));
+- u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q));
+-
+- /* If the current address or return address have changed, then
+- * the GPU has probably made progress and we should delay the
+- * reset. This could fail if the GPU got in an infinite loop
+- * in the CL, but that is pretty unlikely outside of an i-g-t
+- * testcase.
+- */
+- if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) {
+- job->timedout_ctca = ctca;
+- job->timedout_ctra = ctra;
++ struct v3d_dev *v3d = job->v3d;
++ u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q));
++ u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q));
++
++ if (*timedout_ctca != ctca || *timedout_ctra != ctra) {
++ *timedout_ctca = ctca;
++ *timedout_ctra = ctra;
+ schedule_delayed_work(&job->base.work_tdr,
+ job->base.sched->timeout);
+ return;
+@@ -275,25 +296,50 @@ v3d_job_timedout(struct drm_sched_job *s
+ }
+
+ static void
++v3d_bin_job_timedout(struct drm_sched_job *sched_job)
++{
++ struct v3d_bin_job *job = to_bin_job(sched_job);
++
++ v3d_cl_job_timedout(sched_job, V3D_BIN,
++ &job->timedout_ctca, &job->timedout_ctra);
++}
++
++static void
++v3d_render_job_timedout(struct drm_sched_job *sched_job)
++{
++ struct v3d_render_job *job = to_render_job(sched_job);
++
++ v3d_cl_job_timedout(sched_job, V3D_RENDER,
++ &job->timedout_ctca, &job->timedout_ctra);
++}
++
++static void
+ v3d_tfu_job_timedout(struct drm_sched_job *sched_job)
+ {
+- struct v3d_tfu_job *job = to_tfu_job(sched_job);
++ struct v3d_job *job = to_v3d_job(sched_job);
+
+ v3d_gpu_reset_for_timeout(job->v3d, sched_job);
+ }
+
+-static const struct drm_sched_backend_ops v3d_sched_ops = {
++static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
+ .dependency = v3d_job_dependency,
+- .run_job = v3d_job_run,
+- .timedout_job = v3d_job_timedout,
+- .free_job = v3d_job_free
++ .run_job = v3d_bin_job_run,
++ .timedout_job = v3d_bin_job_timedout,
++ .free_job = v3d_job_free,
++};
++
++static const struct drm_sched_backend_ops v3d_render_sched_ops = {
++ .dependency = v3d_render_job_dependency,
++ .run_job = v3d_render_job_run,
++ .timedout_job = v3d_render_job_timedout,
++ .free_job = v3d_job_free,
+ };
+
+ static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
+- .dependency = v3d_tfu_job_dependency,
++ .dependency = v3d_job_dependency,
+ .run_job = v3d_tfu_job_run,
+ .timedout_job = v3d_tfu_job_timedout,
+- .free_job = v3d_tfu_job_free
++ .free_job = v3d_job_free,
+ };
+
+ int
+@@ -305,7 +351,7 @@ v3d_sched_init(struct v3d_dev *v3d)
+ int ret;
+
+ ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
+- &v3d_sched_ops,
++ &v3d_bin_sched_ops,
+ hw_jobs_limit, job_hang_limit,
+ msecs_to_jiffies(hang_limit_ms),
+ "v3d_bin");
+@@ -315,7 +361,7 @@ v3d_sched_init(struct v3d_dev *v3d)
+ }
+
+ ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
+- &v3d_sched_ops,
++ &v3d_render_sched_ops,
+ hw_jobs_limit, job_hang_limit,
+ msecs_to_jiffies(hang_limit_ms),
+ "v3d_render");