From c2308a7e4adbb2acc8ff149f91d1ca46801c135e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Mon, 23 Dec 2019 17:25:19 +0100 Subject: brcm2708: update to latest patches from RPi Foundation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also removes reverted patches. Signed-off-by: Álvaro Fernández Rojas --- ...dd-support-for-submitting-jobs-to-the-TFU.patch | 802 +++++++++++++++++++++ 1 file changed, 802 insertions(+) create mode 100644 target/linux/brcm2708/patches-4.19/950-0492-drm-v3d-Add-support-for-submitting-jobs-to-the-TFU.patch (limited to 'target/linux/brcm2708/patches-4.19/950-0492-drm-v3d-Add-support-for-submitting-jobs-to-the-TFU.patch') diff --git a/target/linux/brcm2708/patches-4.19/950-0492-drm-v3d-Add-support-for-submitting-jobs-to-the-TFU.patch b/target/linux/brcm2708/patches-4.19/950-0492-drm-v3d-Add-support-for-submitting-jobs-to-the-TFU.patch new file mode 100644 index 0000000000..99b1a69c09 --- /dev/null +++ b/target/linux/brcm2708/patches-4.19/950-0492-drm-v3d-Add-support-for-submitting-jobs-to-the-TFU.patch @@ -0,0 +1,802 @@ +From ba1e90b6c3b3bf0e88ab01c824c4f8fde582e878 Mon Sep 17 00:00:00 2001 +From: Eric Anholt +Date: Wed, 28 Nov 2018 15:09:25 -0800 +Subject: [PATCH] drm/v3d: Add support for submitting jobs to the TFU. + +The TFU can copy from raster, UIF, and SAND input images to UIF output +images, with optional mipmap generation. This will certainly be +useful for media EGL image input, but is also useful immediately for +mipmap generation without bogging the V3D core down. + +For now we only run the queue 1 job deep, and don't have any hang +recovery (though I don't think we should need it, with TFU). Queuing +multiple jobs in the HW will require synchronizing the YUV coefficient +regs updates since they don't get FIFOed with the job. + +v2: Change the ioctl to IOW instead of IOWR, always set COEF0, explain + why TFU is AUTH, clarify the syncing docs, drop the unused TFU + interrupt regs (you're expected to use the hub's), don't take + &bo->base for NULL bos. +v3: Fix a little whitespace alignment (noticed by checkpatch), rebase + on drm_sched_job_cleanup() changes. + +Signed-off-by: Eric Anholt +Reviewed-by: Dave Emett (v2) +Link: https://patchwork.freedesktop.org/patch/264607/ +(cherry picked from commit 1584f16ca96ef124aad79efa3303cff5f3530e2c) +--- + drivers/gpu/drm/v3d/v3d_drv.c | 15 ++- + drivers/gpu/drm/v3d/v3d_drv.h | 32 +++++- + drivers/gpu/drm/v3d/v3d_gem.c | 178 ++++++++++++++++++++++++++++---- + drivers/gpu/drm/v3d/v3d_irq.c | 12 ++- + drivers/gpu/drm/v3d/v3d_regs.h | 49 +++++++++ + drivers/gpu/drm/v3d/v3d_sched.c | 148 ++++++++++++++++++++++---- + drivers/gpu/drm/v3d/v3d_trace.h | 20 ++++ + include/uapi/drm/v3d_drm.h | 25 +++++ + 8 files changed, 426 insertions(+), 53 deletions(-) + +--- a/drivers/gpu/drm/v3d/v3d_drv.c ++++ b/drivers/gpu/drm/v3d/v3d_drv.c +@@ -112,10 +112,15 @@ static int v3d_get_param_ioctl(struct dr + return 0; + } + +- /* Any params that aren't just register reads would go here. */ + +- DRM_DEBUG("Unknown parameter %d\n", args->param); +- return -EINVAL; ++ switch (args->param) { ++ case DRM_V3D_PARAM_SUPPORTS_TFU: ++ args->value = 1; ++ return 0; ++ default: ++ DRM_DEBUG("Unknown parameter %d\n", args->param); ++ return -EINVAL; ++ } + } + + static int +@@ -170,7 +175,8 @@ static const struct file_operations v3d_ + /* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP + * protection between clients. Note that render nodes would be be + * able to submit CLs that could access BOs from clients authenticated +- * with the master node. ++ * with the master node. The TFU doesn't use the GMP, so it would ++ * need to stay DRM_AUTH until we do buffer size/offset validation. + */ + static const struct drm_ioctl_desc v3d_drm_ioctls[] = { + DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CL, v3d_submit_cl_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), +@@ -179,6 +185,7 @@ static const struct drm_ioctl_desc v3d_d + DRM_IOCTL_DEF_DRV(V3D_MMAP_BO, v3d_mmap_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW), ++ DRM_IOCTL_DEF_DRV(V3D_SUBMIT_TFU, v3d_submit_tfu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), + }; + + static const struct vm_operations_struct v3d_vm_ops = { +--- a/drivers/gpu/drm/v3d/v3d_drv.h ++++ b/drivers/gpu/drm/v3d/v3d_drv.h +@@ -7,19 +7,18 @@ + #include + #include + #include ++#include "uapi/drm/v3d_drm.h" + + #define GMP_GRANULARITY (128 * 1024) + +-/* Enum for each of the V3D queues. We maintain various queue +- * tracking as an array because at some point we'll want to support +- * the TFU (texture formatting unit) as another queue. +- */ ++/* Enum for each of the V3D queues. */ + enum v3d_queue { + V3D_BIN, + V3D_RENDER, ++ V3D_TFU, + }; + +-#define V3D_MAX_QUEUES (V3D_RENDER + 1) ++#define V3D_MAX_QUEUES (V3D_TFU + 1) + + struct v3d_queue_state { + struct drm_gpu_scheduler sched; +@@ -68,6 +67,7 @@ struct v3d_dev { + + struct v3d_exec_info *bin_job; + struct v3d_exec_info *render_job; ++ struct v3d_tfu_job *tfu_job; + + struct v3d_queue_state queue[V3D_MAX_QUEUES]; + +@@ -218,6 +218,25 @@ struct v3d_exec_info { + u32 qma, qms, qts; + }; + ++struct v3d_tfu_job { ++ struct drm_sched_job base; ++ ++ struct drm_v3d_submit_tfu args; ++ ++ /* An optional fence userspace can pass in for the job to depend on. */ ++ struct dma_fence *in_fence; ++ ++ /* v3d fence to be signaled by IRQ handler when the job is complete. */ ++ struct dma_fence *done_fence; ++ ++ struct v3d_dev *v3d; ++ ++ struct kref refcount; ++ ++ /* This is the array of BOs that were looked up at the start of exec. */ ++ struct v3d_bo *bo[4]; ++}; ++ + /** + * _wait_for - magic (register) wait macro + * +@@ -281,9 +300,12 @@ int v3d_gem_init(struct drm_device *dev) + void v3d_gem_destroy(struct drm_device *dev); + int v3d_submit_cl_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); ++int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); + int v3d_wait_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); + void v3d_exec_put(struct v3d_exec_info *exec); ++void v3d_tfu_job_put(struct v3d_tfu_job *exec); + void v3d_reset(struct v3d_dev *v3d); + void v3d_invalidate_caches(struct v3d_dev *v3d); + void v3d_flush_caches(struct v3d_dev *v3d); +--- a/drivers/gpu/drm/v3d/v3d_gem.c ++++ b/drivers/gpu/drm/v3d/v3d_gem.c +@@ -207,26 +207,27 @@ v3d_flush_caches(struct v3d_dev *v3d) + } + + static void +-v3d_attach_object_fences(struct v3d_exec_info *exec) ++v3d_attach_object_fences(struct v3d_bo **bos, int bo_count, ++ struct dma_fence *fence) + { +- struct dma_fence *out_fence = exec->render_done_fence; + int i; + +- for (i = 0; i < exec->bo_count; i++) { ++ for (i = 0; i < bo_count; i++) { + /* XXX: Use shared fences for read-only objects. */ +- reservation_object_add_excl_fence(exec->bo[i]->resv, out_fence); ++ reservation_object_add_excl_fence(bos[i]->resv, fence); + } + } + + static void + v3d_unlock_bo_reservations(struct drm_device *dev, +- struct v3d_exec_info *exec, ++ struct v3d_bo **bos, ++ int bo_count, + struct ww_acquire_ctx *acquire_ctx) + { + int i; + +- for (i = 0; i < exec->bo_count; i++) +- ww_mutex_unlock(&exec->bo[i]->resv->lock); ++ for (i = 0; i < bo_count; i++) ++ ww_mutex_unlock(&bos[i]->resv->lock); + + ww_acquire_fini(acquire_ctx); + } +@@ -240,7 +241,8 @@ v3d_unlock_bo_reservations(struct drm_de + */ + static int + v3d_lock_bo_reservations(struct drm_device *dev, +- struct v3d_exec_info *exec, ++ struct v3d_bo **bos, ++ int bo_count, + struct ww_acquire_ctx *acquire_ctx) + { + int contended_lock = -1; +@@ -250,7 +252,7 @@ v3d_lock_bo_reservations(struct drm_devi + + retry: + if (contended_lock != -1) { +- struct v3d_bo *bo = exec->bo[contended_lock]; ++ struct v3d_bo *bo = bos[contended_lock]; + + ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, + acquire_ctx); +@@ -260,20 +262,20 @@ retry: + } + } + +- for (i = 0; i < exec->bo_count; i++) { ++ for (i = 0; i < bo_count; i++) { + if (i == contended_lock) + continue; + +- ret = ww_mutex_lock_interruptible(&exec->bo[i]->resv->lock, ++ ret = ww_mutex_lock_interruptible(&bos[i]->resv->lock, + acquire_ctx); + if (ret) { + int j; + + for (j = 0; j < i; j++) +- ww_mutex_unlock(&exec->bo[j]->resv->lock); ++ ww_mutex_unlock(&bos[j]->resv->lock); + + if (contended_lock != -1 && contended_lock >= i) { +- struct v3d_bo *bo = exec->bo[contended_lock]; ++ struct v3d_bo *bo = bos[contended_lock]; + + ww_mutex_unlock(&bo->resv->lock); + } +@@ -293,10 +295,11 @@ retry: + /* Reserve space for our shared (read-only) fence references, + * before we commit the CL to the hardware. + */ +- for (i = 0; i < exec->bo_count; i++) { +- ret = reservation_object_reserve_shared(exec->bo[i]->resv); ++ for (i = 0; i < bo_count; i++) { ++ ret = reservation_object_reserve_shared(bos[i]->resv); + if (ret) { +- v3d_unlock_bo_reservations(dev, exec, acquire_ctx); ++ v3d_unlock_bo_reservations(dev, bos, bo_count, ++ acquire_ctx); + return ret; + } + } +@@ -419,6 +422,33 @@ void v3d_exec_put(struct v3d_exec_info * + kref_put(&exec->refcount, v3d_exec_cleanup); + } + ++static void ++v3d_tfu_job_cleanup(struct kref *ref) ++{ ++ struct v3d_tfu_job *job = container_of(ref, struct v3d_tfu_job, ++ refcount); ++ struct v3d_dev *v3d = job->v3d; ++ unsigned int i; ++ ++ dma_fence_put(job->in_fence); ++ dma_fence_put(job->done_fence); ++ ++ for (i = 0; i < ARRAY_SIZE(job->bo); i++) { ++ if (job->bo[i]) ++ drm_gem_object_put_unlocked(&job->bo[i]->base); ++ } ++ ++ pm_runtime_mark_last_busy(v3d->dev); ++ pm_runtime_put_autosuspend(v3d->dev); ++ ++ kfree(job); ++} ++ ++void v3d_tfu_job_put(struct v3d_tfu_job *job) ++{ ++ kref_put(&job->refcount, v3d_tfu_job_cleanup); ++} ++ + int + v3d_wait_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +@@ -536,7 +566,8 @@ v3d_submit_cl_ioctl(struct drm_device *d + if (ret) + goto fail; + +- ret = v3d_lock_bo_reservations(dev, exec, &acquire_ctx); ++ ret = v3d_lock_bo_reservations(dev, exec->bo, exec->bo_count, ++ &acquire_ctx); + if (ret) + goto fail; + +@@ -570,9 +601,10 @@ v3d_submit_cl_ioctl(struct drm_device *d + &v3d_priv->sched_entity[V3D_RENDER]); + mutex_unlock(&v3d->sched_lock); + +- v3d_attach_object_fences(exec); ++ v3d_attach_object_fences(exec->bo, exec->bo_count, ++ exec->render_done_fence); + +- v3d_unlock_bo_reservations(dev, exec, &acquire_ctx); ++ v3d_unlock_bo_reservations(dev, exec->bo, exec->bo_count, &acquire_ctx); + + /* Update the return sync object for the */ + sync_out = drm_syncobj_find(file_priv, args->out_sync); +@@ -588,12 +620,118 @@ v3d_submit_cl_ioctl(struct drm_device *d + + fail_unreserve: + mutex_unlock(&v3d->sched_lock); +- v3d_unlock_bo_reservations(dev, exec, &acquire_ctx); ++ v3d_unlock_bo_reservations(dev, exec->bo, exec->bo_count, &acquire_ctx); + fail: + v3d_exec_put(exec); + + return ret; + } ++ ++/** ++ * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D. ++ * @dev: DRM device ++ * @data: ioctl argument ++ * @file_priv: DRM file for this fd ++ * ++ * Userspace provides the register setup for the TFU, which we don't ++ * need to validate since the TFU is behind the MMU. ++ */ ++int ++v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct v3d_dev *v3d = to_v3d_dev(dev); ++ struct v3d_file_priv *v3d_priv = file_priv->driver_priv; ++ struct drm_v3d_submit_tfu *args = data; ++ struct v3d_tfu_job *job; ++ struct ww_acquire_ctx acquire_ctx; ++ struct drm_syncobj *sync_out; ++ struct dma_fence *sched_done_fence; ++ int ret = 0; ++ int bo_count; ++ ++ job = kcalloc(1, sizeof(*job), GFP_KERNEL); ++ if (!job) ++ return -ENOMEM; ++ ++ ret = pm_runtime_get_sync(v3d->dev); ++ if (ret < 0) { ++ kfree(job); ++ return ret; ++ } ++ ++ kref_init(&job->refcount); ++ ++ ret = drm_syncobj_find_fence(file_priv, args->in_sync, ++ 0, &job->in_fence); ++ if (ret == -EINVAL) ++ goto fail; ++ ++ job->args = *args; ++ job->v3d = v3d; ++ ++ spin_lock(&file_priv->table_lock); ++ for (bo_count = 0; bo_count < ARRAY_SIZE(job->bo); bo_count++) { ++ struct drm_gem_object *bo; ++ ++ if (!args->bo_handles[bo_count]) ++ break; ++ ++ bo = idr_find(&file_priv->object_idr, ++ args->bo_handles[bo_count]); ++ if (!bo) { ++ DRM_DEBUG("Failed to look up GEM BO %d: %d\n", ++ bo_count, args->bo_handles[bo_count]); ++ ret = -ENOENT; ++ spin_unlock(&file_priv->table_lock); ++ goto fail; ++ } ++ drm_gem_object_get(bo); ++ job->bo[bo_count] = to_v3d_bo(bo); ++ } ++ spin_unlock(&file_priv->table_lock); ++ ++ ret = v3d_lock_bo_reservations(dev, job->bo, bo_count, &acquire_ctx); ++ if (ret) ++ goto fail; ++ ++ mutex_lock(&v3d->sched_lock); ++ ret = drm_sched_job_init(&job->base, ++ &v3d_priv->sched_entity[V3D_TFU], ++ v3d_priv); ++ if (ret) ++ goto fail_unreserve; ++ ++ sched_done_fence = dma_fence_get(&job->base.s_fence->finished); ++ ++ kref_get(&job->refcount); /* put by scheduler job completion */ ++ drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[V3D_TFU]); ++ mutex_unlock(&v3d->sched_lock); ++ ++ v3d_attach_object_fences(job->bo, bo_count, sched_done_fence); ++ ++ v3d_unlock_bo_reservations(dev, job->bo, bo_count, &acquire_ctx); ++ ++ /* Update the return sync object */ ++ sync_out = drm_syncobj_find(file_priv, args->out_sync); ++ if (sync_out) { ++ drm_syncobj_replace_fence(sync_out, sched_done_fence); ++ drm_syncobj_put(sync_out); ++ } ++ dma_fence_put(sched_done_fence); ++ ++ v3d_tfu_job_put(job); ++ ++ return 0; ++ ++fail_unreserve: ++ mutex_unlock(&v3d->sched_lock); ++ v3d_unlock_bo_reservations(dev, job->bo, bo_count, &acquire_ctx); ++fail: ++ v3d_tfu_job_put(job); ++ ++ return ret; ++} + + int + v3d_gem_init(struct drm_device *dev) +--- a/drivers/gpu/drm/v3d/v3d_irq.c ++++ b/drivers/gpu/drm/v3d/v3d_irq.c +@@ -4,8 +4,8 @@ + /** + * DOC: Interrupt management for the V3D engine + * +- * When we take a binning or rendering flush done interrupt, we need +- * to signal the fence for that job so that the scheduler can queue up ++ * When we take a bin, render, or TFU done interrupt, we need to ++ * signal the fence for that job so that the scheduler can queue up + * the next one and unblock any waiters. + * + * When we take the binner out of memory interrupt, we need to +@@ -23,7 +23,8 @@ + + #define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV | \ + V3D_HUB_INT_MMU_PTI | \ +- V3D_HUB_INT_MMU_CAP)) ++ V3D_HUB_INT_MMU_CAP | \ ++ V3D_HUB_INT_TFUC)) + + static void + v3d_overflow_mem_work(struct work_struct *work) +@@ -117,6 +118,11 @@ v3d_hub_irq(int irq, void *arg) + /* Acknowledge the interrupts we're handling here. */ + V3D_WRITE(V3D_HUB_INT_CLR, intsts); + ++ if (intsts & V3D_HUB_INT_TFUC) { ++ dma_fence_signal(v3d->tfu_job->done_fence); ++ status = IRQ_HANDLED; ++ } ++ + if (intsts & (V3D_HUB_INT_MMU_WRV | + V3D_HUB_INT_MMU_PTI | + V3D_HUB_INT_MMU_CAP)) { +--- a/drivers/gpu/drm/v3d/v3d_regs.h ++++ b/drivers/gpu/drm/v3d/v3d_regs.h +@@ -86,6 +86,55 @@ + # define V3D_TOP_GR_BRIDGE_SW_INIT_1 0x0000c + # define V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT BIT(0) + ++#define V3D_TFU_CS 0x00400 ++/* Stops current job, empties input fifo. */ ++# define V3D_TFU_CS_TFURST BIT(31) ++# define V3D_TFU_CS_CVTCT_MASK V3D_MASK(23, 16) ++# define V3D_TFU_CS_CVTCT_SHIFT 16 ++# define V3D_TFU_CS_NFREE_MASK V3D_MASK(13, 8) ++# define V3D_TFU_CS_NFREE_SHIFT 8 ++# define V3D_TFU_CS_BUSY BIT(0) ++ ++#define V3D_TFU_SU 0x00404 ++/* Interrupt when FINTTHR input slots are free (0 = disabled) */ ++# define V3D_TFU_SU_FINTTHR_MASK V3D_MASK(13, 8) ++# define V3D_TFU_SU_FINTTHR_SHIFT 8 ++/* Skips resetting the CRC at the start of CRC generation. */ ++# define V3D_TFU_SU_CRCCHAIN BIT(4) ++/* skips writes, computes CRC of the image. miplevels must be 0. */ ++# define V3D_TFU_SU_CRC BIT(3) ++# define V3D_TFU_SU_THROTTLE_MASK V3D_MASK(1, 0) ++# define V3D_TFU_SU_THROTTLE_SHIFT 0 ++ ++#define V3D_TFU_ICFG 0x00408 ++/* Interrupt when the conversion is complete. */ ++# define V3D_TFU_ICFG_IOC BIT(0) ++ ++/* Input Image Address */ ++#define V3D_TFU_IIA 0x0040c ++/* Input Chroma Address */ ++#define V3D_TFU_ICA 0x00410 ++/* Input Image Stride */ ++#define V3D_TFU_IIS 0x00414 ++/* Input Image U-Plane Address */ ++#define V3D_TFU_IUA 0x00418 ++/* Output Image Address */ ++#define V3D_TFU_IOA 0x0041c ++/* Image Output Size */ ++#define V3D_TFU_IOS 0x00420 ++/* TFU YUV Coefficient 0 */ ++#define V3D_TFU_COEF0 0x00424 ++/* Use these regs instead of the defaults. */ ++# define V3D_TFU_COEF0_USECOEF BIT(31) ++/* TFU YUV Coefficient 1 */ ++#define V3D_TFU_COEF1 0x00428 ++/* TFU YUV Coefficient 2 */ ++#define V3D_TFU_COEF2 0x0042c ++/* TFU YUV Coefficient 3 */ ++#define V3D_TFU_COEF3 0x00430 ++ ++#define V3D_TFU_CRC 0x00434 ++ + /* Per-MMU registers. */ + + #define V3D_MMUC_CONTROL 0x01000 +--- a/drivers/gpu/drm/v3d/v3d_sched.c ++++ b/drivers/gpu/drm/v3d/v3d_sched.c +@@ -30,6 +30,12 @@ to_v3d_job(struct drm_sched_job *sched_j + return container_of(sched_job, struct v3d_job, base); + } + ++static struct v3d_tfu_job * ++to_tfu_job(struct drm_sched_job *sched_job) ++{ ++ return container_of(sched_job, struct v3d_tfu_job, base); ++} ++ + static void + v3d_job_free(struct drm_sched_job *sched_job) + { +@@ -38,6 +44,14 @@ v3d_job_free(struct drm_sched_job *sched + v3d_exec_put(job->exec); + } + ++static void ++v3d_tfu_job_free(struct drm_sched_job *sched_job) ++{ ++ struct v3d_tfu_job *job = to_tfu_job(sched_job); ++ ++ v3d_tfu_job_put(job); ++} ++ + /** + * Returns the fences that the bin or render job depends on, one by one. + * v3d_job_run() won't be called until all of them have been signaled. +@@ -76,6 +90,27 @@ v3d_job_dependency(struct drm_sched_job + return fence; + } + ++/** ++ * Returns the fences that the TFU job depends on, one by one. ++ * v3d_tfu_job_run() won't be called until all of them have been ++ * signaled. ++ */ ++static struct dma_fence * ++v3d_tfu_job_dependency(struct drm_sched_job *sched_job, ++ struct drm_sched_entity *s_entity) ++{ ++ struct v3d_tfu_job *job = to_tfu_job(sched_job); ++ struct dma_fence *fence; ++ ++ fence = job->in_fence; ++ if (fence) { ++ job->in_fence = NULL; ++ return fence; ++ } ++ ++ return NULL; ++} ++ + static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job) + { + struct v3d_job *job = to_v3d_job(sched_job); +@@ -147,31 +182,47 @@ static struct dma_fence *v3d_job_run(str + return fence; + } + +-static void +-v3d_job_timedout(struct drm_sched_job *sched_job) ++static struct dma_fence * ++v3d_tfu_job_run(struct drm_sched_job *sched_job) + { +- struct v3d_job *job = to_v3d_job(sched_job); +- struct v3d_exec_info *exec = job->exec; +- struct v3d_dev *v3d = exec->v3d; +- enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER; +- enum v3d_queue q; +- u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q)); +- u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q)); ++ struct v3d_tfu_job *job = to_tfu_job(sched_job); ++ struct v3d_dev *v3d = job->v3d; ++ struct drm_device *dev = &v3d->drm; ++ struct dma_fence *fence; + +- /* If the current address or return address have changed, then +- * the GPU has probably made progress and we should delay the +- * reset. This could fail if the GPU got in an infinite loop +- * in the CL, but that is pretty unlikely outside of an i-g-t +- * testcase. +- */ +- if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) { +- job->timedout_ctca = ctca; +- job->timedout_ctra = ctra; ++ fence = v3d_fence_create(v3d, V3D_TFU); ++ if (IS_ERR(fence)) ++ return NULL; + +- schedule_delayed_work(&job->base.work_tdr, +- job->base.sched->timeout); +- return; ++ v3d->tfu_job = job; ++ if (job->done_fence) ++ dma_fence_put(job->done_fence); ++ job->done_fence = dma_fence_get(fence); ++ ++ trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno); ++ ++ V3D_WRITE(V3D_TFU_IIA, job->args.iia); ++ V3D_WRITE(V3D_TFU_IIS, job->args.iis); ++ V3D_WRITE(V3D_TFU_ICA, job->args.ica); ++ V3D_WRITE(V3D_TFU_IUA, job->args.iua); ++ V3D_WRITE(V3D_TFU_IOA, job->args.ioa); ++ V3D_WRITE(V3D_TFU_IOS, job->args.ios); ++ V3D_WRITE(V3D_TFU_COEF0, job->args.coef[0]); ++ if (job->args.coef[0] & V3D_TFU_COEF0_USECOEF) { ++ V3D_WRITE(V3D_TFU_COEF1, job->args.coef[1]); ++ V3D_WRITE(V3D_TFU_COEF2, job->args.coef[2]); ++ V3D_WRITE(V3D_TFU_COEF3, job->args.coef[3]); + } ++ /* ICFG kicks off the job. */ ++ V3D_WRITE(V3D_TFU_ICFG, job->args.icfg | V3D_TFU_ICFG_IOC); ++ ++ return fence; ++} ++ ++static void ++v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) ++{ ++ enum v3d_queue q; + + mutex_lock(&v3d->reset_lock); + +@@ -196,6 +247,41 @@ v3d_job_timedout(struct drm_sched_job *s + mutex_unlock(&v3d->reset_lock); + } + ++static void ++v3d_job_timedout(struct drm_sched_job *sched_job) ++{ ++ struct v3d_job *job = to_v3d_job(sched_job); ++ struct v3d_exec_info *exec = job->exec; ++ struct v3d_dev *v3d = exec->v3d; ++ enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER; ++ u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q)); ++ u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q)); ++ ++ /* If the current address or return address have changed, then ++ * the GPU has probably made progress and we should delay the ++ * reset. This could fail if the GPU got in an infinite loop ++ * in the CL, but that is pretty unlikely outside of an i-g-t ++ * testcase. ++ */ ++ if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) { ++ job->timedout_ctca = ctca; ++ job->timedout_ctra = ctra; ++ schedule_delayed_work(&job->base.work_tdr, ++ job->base.sched->timeout); ++ return; ++ } ++ ++ v3d_gpu_reset_for_timeout(v3d, sched_job); ++} ++ ++static void ++v3d_tfu_job_timedout(struct drm_sched_job *sched_job) ++{ ++ struct v3d_tfu_job *job = to_tfu_job(sched_job); ++ ++ v3d_gpu_reset_for_timeout(job->v3d, sched_job); ++} ++ + static const struct drm_sched_backend_ops v3d_sched_ops = { + .dependency = v3d_job_dependency, + .run_job = v3d_job_run, +@@ -203,6 +289,13 @@ static const struct drm_sched_backend_op + .free_job = v3d_job_free + }; + ++static const struct drm_sched_backend_ops v3d_tfu_sched_ops = { ++ .dependency = v3d_tfu_job_dependency, ++ .run_job = v3d_tfu_job_run, ++ .timedout_job = v3d_tfu_job_timedout, ++ .free_job = v3d_tfu_job_free ++}; ++ + int + v3d_sched_init(struct v3d_dev *v3d) + { +@@ -232,6 +325,19 @@ v3d_sched_init(struct v3d_dev *v3d) + drm_sched_fini(&v3d->queue[V3D_BIN].sched); + return ret; + } ++ ++ ret = drm_sched_init(&v3d->queue[V3D_TFU].sched, ++ &v3d_tfu_sched_ops, ++ hw_jobs_limit, job_hang_limit, ++ msecs_to_jiffies(hang_limit_ms), ++ "v3d_tfu"); ++ if (ret) { ++ dev_err(v3d->dev, "Failed to create TFU scheduler: %d.", ++ ret); ++ drm_sched_fini(&v3d->queue[V3D_RENDER].sched); ++ drm_sched_fini(&v3d->queue[V3D_BIN].sched); ++ return ret; ++ } + + return 0; + } +--- a/drivers/gpu/drm/v3d/v3d_trace.h ++++ b/drivers/gpu/drm/v3d/v3d_trace.h +@@ -42,6 +42,26 @@ TRACE_EVENT(v3d_submit_cl, + __entry->ctnqea) + ); + ++TRACE_EVENT(v3d_submit_tfu, ++ TP_PROTO(struct drm_device *dev, ++ uint64_t seqno), ++ TP_ARGS(dev, seqno), ++ ++ TP_STRUCT__entry( ++ __field(u32, dev) ++ __field(u64, seqno) ++ ), ++ ++ TP_fast_assign( ++ __entry->dev = dev->primary->index; ++ __entry->seqno = seqno; ++ ), ++ ++ TP_printk("dev=%u, seqno=%llu", ++ __entry->dev, ++ __entry->seqno) ++); ++ + TRACE_EVENT(v3d_reset_begin, + TP_PROTO(struct drm_device *dev), + TP_ARGS(dev), +--- a/include/uapi/drm/v3d_drm.h ++++ b/include/uapi/drm/v3d_drm.h +@@ -36,6 +36,7 @@ extern "C" { + #define DRM_V3D_MMAP_BO 0x03 + #define DRM_V3D_GET_PARAM 0x04 + #define DRM_V3D_GET_BO_OFFSET 0x05 ++#define DRM_V3D_SUBMIT_TFU 0x06 + + #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) + #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) +@@ -43,6 +44,7 @@ extern "C" { + #define DRM_IOCTL_V3D_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo) + #define DRM_IOCTL_V3D_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param) + #define DRM_IOCTL_V3D_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset) ++#define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu) + + /** + * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D +@@ -169,6 +171,7 @@ enum drm_v3d_param { + DRM_V3D_PARAM_V3D_CORE0_IDENT0, + DRM_V3D_PARAM_V3D_CORE0_IDENT1, + DRM_V3D_PARAM_V3D_CORE0_IDENT2, ++ DRM_V3D_PARAM_SUPPORTS_TFU, + }; + + struct drm_v3d_get_param { +@@ -187,6 +190,28 @@ struct drm_v3d_get_bo_offset { + __u32 offset; + }; + ++struct drm_v3d_submit_tfu { ++ __u32 icfg; ++ __u32 iia; ++ __u32 iis; ++ __u32 ica; ++ __u32 iua; ++ __u32 ioa; ++ __u32 ios; ++ __u32 coef[4]; ++ /* First handle is the output BO, following are other inputs. ++ * 0 for unused. ++ */ ++ __u32 bo_handles[4]; ++ /* sync object to block on before running the TFU job. Each TFU ++ * job will execute in the order submitted to its FD. Synchronization ++ * against rendering jobs requires using sync objects. ++ */ ++ __u32 in_sync; ++ /* Sync object to signal when the TFU job is done. */ ++ __u32 out_sync; ++}; ++ + #if defined(__cplusplus) + } + #endif -- cgit v1.2.3