From 3643e31e7808587e5f4a2619bdd355848ac88737 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 4 Dec 2015 11:35:34 -0800 Subject: [PATCH 115/156] drm/vc4: Update a bunch of code to match upstream submission. This gets almost everything matching, except for the MSAA support and using generic PM domains. Signed-off-by: Eric Anholt --- drivers/gpu/drm/drm_gem_cma_helper.c | 13 +- drivers/gpu/drm/vc4/vc4_bo.c | 322 +++++++++++++++++------------ drivers/gpu/drm/vc4/vc4_crtc.c | 7 +- drivers/gpu/drm/vc4/vc4_drv.c | 6 +- drivers/gpu/drm/vc4/vc4_drv.h | 20 +- drivers/gpu/drm/vc4/vc4_gem.c | 24 ++- drivers/gpu/drm/vc4/vc4_irq.c | 5 +- drivers/gpu/drm/vc4/vc4_kms.c | 1 + drivers/gpu/drm/vc4/vc4_packet.h | 210 +++++++++---------- drivers/gpu/drm/vc4/vc4_qpu_defines.h | 308 ++++++++++++++------------- drivers/gpu/drm/vc4/vc4_render_cl.c | 4 +- drivers/gpu/drm/vc4/vc4_v3d.c | 10 +- drivers/gpu/drm/vc4/vc4_validate.c | 130 ++++++------ drivers/gpu/drm/vc4/vc4_validate_shaders.c | 66 +++--- include/drm/drmP.h | 8 +- 15 files changed, 598 insertions(+), 536 deletions(-) --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -58,15 +58,14 @@ __drm_gem_cma_create(struct drm_device * struct drm_gem_cma_object *cma_obj; struct drm_gem_object *gem_obj; int ret; - size_t obj_size = (drm->driver->gem_obj_size ? - drm->driver->gem_obj_size : - sizeof(*cma_obj)); - cma_obj = kzalloc(obj_size, GFP_KERNEL); - if (!cma_obj) + if (drm->driver->gem_create_object) + gem_obj = drm->driver->gem_create_object(drm, size); + else + gem_obj = kzalloc(sizeof(*cma_obj), GFP_KERNEL); + if (!gem_obj) return ERR_PTR(-ENOMEM); - - gem_obj = &cma_obj->base; + cma_obj = container_of(gem_obj, struct drm_gem_cma_object, base); ret = drm_gem_object_init(drm, gem_obj, size); if (ret) --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -12,6 +12,10 @@ * access to system memory with no MMU in between. To support it, we * use the GEM CMA helper functions to allocate contiguous ranges of * physical memory for our BOs. + * + * Since the CMA allocator is very slow, we keep a cache of recently + * freed BOs around so that the kernel's allocation of objects for 3D + * rendering can return quickly. */ #include "vc4_drv.h" @@ -34,6 +38,36 @@ static void vc4_bo_stats_dump(struct vc4 vc4->bo_stats.size_cached / 1024); } +#ifdef CONFIG_DEBUG_FS +int vc4_bo_stats_debugfs(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_bo_stats stats; + + /* Take a snapshot of the current stats with the lock held. */ + mutex_lock(&vc4->bo_lock); + stats = vc4->bo_stats; + mutex_unlock(&vc4->bo_lock); + + seq_printf(m, "num bos allocated: %d\n", + stats.num_allocated); + seq_printf(m, "size bos allocated: %dkb\n", + stats.size_allocated / 1024); + seq_printf(m, "num bos used: %d\n", + stats.num_allocated - stats.num_cached); + seq_printf(m, "size bos used: %dkb\n", + (stats.size_allocated - stats.size_cached) / 1024); + seq_printf(m, "num bos cached: %d\n", + stats.num_cached); + seq_printf(m, "size bos cached: %dkb\n", + stats.size_cached / 1024); + + return 0; +} +#endif + static uint32_t bo_page_index(size_t size) { return (size / PAGE_SIZE) - 1; @@ -81,8 +115,8 @@ static struct list_head *vc4_get_cache_l struct list_head *new_list; uint32_t i; - new_list = kmalloc(new_size * sizeof(struct list_head), - GFP_KERNEL); + new_list = kmalloc_array(new_size, sizeof(struct list_head), + GFP_KERNEL); if (!new_list) return NULL; @@ -90,7 +124,9 @@ static struct list_head *vc4_get_cache_l * head locations. */ for (i = 0; i < vc4->bo_cache.size_list_size; i++) { - struct list_head *old_list = &vc4->bo_cache.size_list[i]; + struct list_head *old_list = + &vc4->bo_cache.size_list[i]; + if (list_empty(old_list)) INIT_LIST_HEAD(&new_list[i]); else @@ -122,11 +158,60 @@ void vc4_bo_cache_purge(struct drm_devic mutex_unlock(&vc4->bo_lock); } -struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size) +static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev, + uint32_t size) { struct vc4_dev *vc4 = to_vc4_dev(dev); - uint32_t size = roundup(unaligned_size, PAGE_SIZE); uint32_t page_index = bo_page_index(size); + struct vc4_bo *bo = NULL; + + size = roundup(size, PAGE_SIZE); + + mutex_lock(&vc4->bo_lock); + if (page_index >= vc4->bo_cache.size_list_size) + goto out; + + if (list_empty(&vc4->bo_cache.size_list[page_index])) + goto out; + + bo = list_first_entry(&vc4->bo_cache.size_list[page_index], + struct vc4_bo, size_head); + vc4_bo_remove_from_cache(bo); + kref_init(&bo->base.base.refcount); + +out: + mutex_unlock(&vc4->bo_lock); + return bo; +} + +/** + * vc4_gem_create_object - Implementation of driver->gem_create_object. + * + * This lets the CMA helpers allocate object structs for us, and keep + * our BO stats correct. + */ +struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_bo *bo; + + bo = kzalloc(sizeof(*bo), GFP_KERNEL); + if (!bo) + return ERR_PTR(-ENOMEM); + + mutex_lock(&vc4->bo_lock); + vc4->bo_stats.num_allocated++; + vc4->bo_stats.size_allocated += size; + mutex_unlock(&vc4->bo_lock); + + return &bo->base.base; +} + +struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, + bool from_cache) +{ + size_t size = roundup(unaligned_size, PAGE_SIZE); + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_gem_cma_object *cma_obj; int pass; @@ -134,18 +219,12 @@ struct vc4_bo *vc4_bo_create(struct drm_ return NULL; /* First, try to get a vc4_bo from the kernel BO cache. */ - mutex_lock(&vc4->bo_lock); - if (page_index < vc4->bo_cache.size_list_size && - !list_empty(&vc4->bo_cache.size_list[page_index])) { - struct vc4_bo *bo = - list_first_entry(&vc4->bo_cache.size_list[page_index], - struct vc4_bo, size_head); - vc4_bo_remove_from_cache(bo); - mutex_unlock(&vc4->bo_lock); - kref_init(&bo->base.base.refcount); - return bo; + if (from_cache) { + struct vc4_bo *bo = vc4_bo_get_from_cache(dev, size); + + if (bo) + return bo; } - mutex_unlock(&vc4->bo_lock); /* Otherwise, make a new BO. */ for (pass = 0; ; pass++) { @@ -179,9 +258,6 @@ struct vc4_bo *vc4_bo_create(struct drm_ } } - vc4->bo_stats.num_allocated++; - vc4->bo_stats.size_allocated += size; - return to_vc4_bo(&cma_obj->base); } @@ -199,7 +275,7 @@ int vc4_dumb_create(struct drm_file *fil if (args->size < args->pitch * args->height) args->size = args->pitch * args->height; - bo = vc4_bo_create(dev, args->size); + bo = vc4_bo_create(dev, args->size, false); if (!bo) return -ENOMEM; @@ -209,8 +285,8 @@ int vc4_dumb_create(struct drm_file *fil return ret; } -static void -vc4_bo_cache_free_old(struct drm_device *dev) +/* Must be called with bo_lock held. */ +static void vc4_bo_cache_free_old(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); unsigned long expire_time = jiffies - msecs_to_jiffies(1000); @@ -313,15 +389,77 @@ vc4_prime_export(struct drm_device *dev, return drm_gem_prime_export(dev, obj, flags); } -int -vc4_create_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) +int vc4_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_gem_object *gem_obj; + struct vc4_bo *bo; + int ret; + + ret = drm_gem_mmap(filp, vma); + if (ret) + return ret; + + gem_obj = vma->vm_private_data; + bo = to_vc4_bo(gem_obj); + + if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) { + DRM_ERROR("mmaping of shader BOs for writing not allowed.\n"); + return -EINVAL; + } + + /* + * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the + * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map + * the whole buffer. + */ + vma->vm_flags &= ~VM_PFNMAP; + vma->vm_pgoff = 0; + + ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma, + bo->base.vaddr, bo->base.paddr, + vma->vm_end - vma->vm_start); + if (ret) + drm_gem_vm_close(vma); + + return ret; +} + +int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + struct vc4_bo *bo = to_vc4_bo(obj); + + if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) { + DRM_ERROR("mmaping of shader BOs for writing not allowed.\n"); + return -EINVAL; + } + + return drm_gem_cma_prime_mmap(obj, vma); +} + +void *vc4_prime_vmap(struct drm_gem_object *obj) +{ + struct vc4_bo *bo = to_vc4_bo(obj); + + if (bo->validated_shader) { + DRM_ERROR("mmaping of shader BOs not allowed.\n"); + return ERR_PTR(-EINVAL); + } + + return drm_gem_cma_prime_vmap(obj); +} + +int vc4_create_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) { struct drm_vc4_create_bo *args = data; struct vc4_bo *bo = NULL; int ret; - bo = vc4_bo_create(dev, args->size); + /* + * We can't allocate from the BO cache, because the BOs don't + * get zeroed, and that might leak data between users. + */ + bo = vc4_bo_create(dev, args->size, false); if (!bo) return -ENOMEM; @@ -331,6 +469,25 @@ vc4_create_bo_ioctl(struct drm_device *d return ret; } +int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_vc4_mmap_bo *args = data; + struct drm_gem_object *gem_obj; + + gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (!gem_obj) { + DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); + return -EINVAL; + } + + /* The mmap offset was set up at BO allocation time. */ + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); + + drm_gem_object_unreference_unlocked(gem_obj); + return 0; +} + int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -355,7 +512,7 @@ vc4_create_shader_bo_ioctl(struct drm_de return -EINVAL; } - bo = vc4_bo_create(dev, args->size); + bo = vc4_bo_create(dev, args->size, true); if (!bo) return -ENOMEM; @@ -364,6 +521,11 @@ vc4_create_shader_bo_ioctl(struct drm_de args->size); if (ret != 0) goto fail; + /* Clear the rest of the memory from allocating from the BO + * cache. + */ + memset(bo->base.vaddr + args->size, 0, + bo->base.base.size - args->size); bo->validated_shader = vc4_validate_shader(&bo->base); if (!bo->validated_shader) { @@ -382,85 +544,6 @@ vc4_create_shader_bo_ioctl(struct drm_de return ret; } -int -vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_vc4_mmap_bo *args = data; - struct drm_gem_object *gem_obj; - - gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle); - if (!gem_obj) { - DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); - return -EINVAL; - } - - /* The mmap offset was set up at BO allocation time. */ - args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); - - drm_gem_object_unreference(gem_obj); - return 0; -} - -int vc4_mmap(struct file *filp, struct vm_area_struct *vma) -{ - struct drm_gem_object *gem_obj; - struct vc4_bo *bo; - int ret; - - ret = drm_gem_mmap(filp, vma); - if (ret) - return ret; - - gem_obj = vma->vm_private_data; - bo = to_vc4_bo(gem_obj); - - if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) { - DRM_ERROR("mmaping of shader BOs for writing not allowed.\n"); - return -EINVAL; - } - - /* - * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the - * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map - * the whole buffer. - */ - vma->vm_flags &= ~VM_PFNMAP; - vma->vm_pgoff = 0; - - ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma, - bo->base.vaddr, bo->base.paddr, - vma->vm_end - vma->vm_start); - if (ret) - drm_gem_vm_close(vma); - - return ret; -} - -int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) -{ - struct vc4_bo *bo = to_vc4_bo(obj); - - if (bo->validated_shader) { - DRM_ERROR("mmaping of shader BOs not allowed.\n"); - return -EINVAL; - } - - return drm_gem_cma_prime_mmap(obj, vma); -} - -void *vc4_prime_vmap(struct drm_gem_object *obj) -{ - struct vc4_bo *bo = to_vc4_bo(obj); - - if (bo->validated_shader) { - DRM_ERROR("mmaping of shader BOs not allowed.\n"); - return ERR_PTR(-EINVAL); - } - - return drm_gem_cma_prime_vmap(obj); -} - void vc4_bo_cache_init(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); @@ -472,7 +555,7 @@ void vc4_bo_cache_init(struct drm_device INIT_WORK(&vc4->bo_cache.time_work, vc4_bo_cache_time_work); setup_timer(&vc4->bo_cache.time_timer, vc4_bo_cache_time_timer, - (unsigned long) dev); + (unsigned long)dev); } void vc4_bo_cache_destroy(struct drm_device *dev) @@ -489,28 +572,3 @@ void vc4_bo_cache_destroy(struct drm_dev vc4_bo_stats_dump(vc4); } } - -#ifdef CONFIG_DEBUG_FS -int vc4_bo_stats_debugfs(struct seq_file *m, void *unused) -{ - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct vc4_dev *vc4 = to_vc4_dev(dev); - struct vc4_bo_stats stats; - - mutex_lock(&vc4->bo_lock); - stats = vc4->bo_stats; - mutex_unlock(&vc4->bo_lock); - - seq_printf(m, "num bos allocated: %d\n", stats.num_allocated); - seq_printf(m, "size bos allocated: %dkb\n", stats.size_allocated / 1024); - seq_printf(m, "num bos used: %d\n", (stats.num_allocated - - stats.num_cached)); - seq_printf(m, "size bos used: %dkb\n", (stats.size_allocated - - stats.size_cached) / 1024); - seq_printf(m, "num bos cached: %d\n", stats.num_cached); - seq_printf(m, "size bos cached: %dkb\n", stats.size_cached / 1024); - - return 0; -} -#endif --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -501,6 +501,7 @@ vc4_async_page_flip_complete(struct vc4_ vc4_plane_async_set_fb(plane, flip_state->fb); if (flip_state->event) { unsigned long flags; + spin_lock_irqsave(&dev->event_lock, flags); drm_crtc_send_vblank_event(crtc, flip_state->event); spin_unlock_irqrestore(&dev->event_lock, flags); @@ -562,9 +563,9 @@ static int vc4_async_page_flip(struct dr } static int vc4_page_flip(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t flags) + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t flags) { if (flags & DRM_MODE_PAGE_FLIP_ASYNC) return vc4_async_page_flip(crtc, fb, event, flags); --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -81,7 +81,8 @@ static const struct drm_ioctl_desc vc4_d DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0), DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0), DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, + DRM_ROOT_ONLY), }; static struct drm_driver vc4_drm_driver = { @@ -107,6 +108,7 @@ static struct drm_driver vc4_drm_driver .debugfs_cleanup = vc4_debugfs_cleanup, #endif + .gem_create_object = vc4_create_object, .gem_free_object = vc4_free_object, .gem_vm_ops = &drm_gem_cma_vm_ops, @@ -128,8 +130,6 @@ static struct drm_driver vc4_drm_driver .num_ioctls = ARRAY_SIZE(vc4_drm_ioctls), .fops = &vc4_drm_fops, - //.gem_obj_size = sizeof(struct vc4_bo), - .name = DRIVER_NAME, .desc = DRIVER_DESC, .date = DRIVER_DATE, --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -72,6 +72,9 @@ struct vc4_dev { * job_done_work. */ struct list_head job_done_list; + /* Spinlock used to synchronize the job_list and seqno + * accesses between the IRQ handler and GEM ioctls. + */ spinlock_t job_lock; wait_queue_head_t job_wait_queue; struct work_struct job_done_work; @@ -318,8 +321,7 @@ struct vc4_texture_sample_info { * and validate the shader state record's uniforms that define the texture * samples. */ -struct vc4_validated_shader_info -{ +struct vc4_validated_shader_info { uint32_t uniforms_size; uint32_t uniforms_src_size; uint32_t num_texture_samples; @@ -355,8 +357,10 @@ struct vc4_validated_shader_info #define wait_for(COND, MS) _wait_for(COND, MS, 1) /* vc4_bo.c */ +struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size); void vc4_free_object(struct drm_gem_object *gem_obj); -struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size); +struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size, + bool from_cache); int vc4_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); @@ -432,7 +436,8 @@ struct drm_plane *vc4_plane_init(struct enum drm_plane_type type); u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist); u32 vc4_plane_dlist_size(struct drm_plane_state *state); -void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb); +void vc4_plane_async_set_fb(struct drm_plane *plane, + struct drm_framebuffer *fb); /* vc4_v3d.c */ extern struct platform_driver vc4_v3d_driver; @@ -450,9 +455,6 @@ vc4_validate_bin_cl(struct drm_device *d int vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec); -struct vc4_validated_shader_info * -vc4_validate_shader(struct drm_gem_cma_object *shader_obj); - bool vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex, enum vc4_bo_mode mode, @@ -464,3 +466,7 @@ bool vc4_check_tex_size(struct vc4_exec_ struct drm_gem_cma_object *fbo, uint32_t offset, uint8_t tiling_format, uint32_t width, uint32_t height, uint8_t cpp); + +/* vc4_validate_shader.c */ +struct vc4_validated_shader_info * +vc4_validate_shader(struct drm_gem_cma_object *shader_obj); --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -53,9 +53,8 @@ vc4_free_hang_state(struct drm_device *d unsigned int i; mutex_lock(&dev->struct_mutex); - for (i = 0; i < state->user_state.bo_count; i++) { + for (i = 0; i < state->user_state.bo_count; i++) drm_gem_object_unreference(state->bo[i]); - } mutex_unlock(&dev->struct_mutex); kfree(state); @@ -65,10 +64,10 @@ int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_vc4_get_hang_state *get_state = data; + struct drm_vc4_get_hang_state *get_state = data; struct drm_vc4_get_hang_state_bo *bo_state; struct vc4_hang_state *kernel_state; - struct drm_vc4_get_hang_state *state; + struct drm_vc4_get_hang_state *state; struct vc4_dev *vc4 = to_vc4_dev(dev); unsigned long irqflags; u32 i; @@ -107,6 +106,7 @@ vc4_get_hang_state_ioctl(struct drm_devi for (i = 0; i < state->bo_count; i++) { struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]); u32 handle; + ret = drm_gem_handle_create(file_priv, kernel_state->bo[i], &handle); @@ -124,7 +124,7 @@ vc4_get_hang_state_ioctl(struct drm_devi state->bo_count * sizeof(*bo_state)); kfree(bo_state); - err_free: +err_free: vc4_free_hang_state(dev, kernel_state); @@ -578,7 +578,7 @@ vc4_get_bcl(struct drm_device *dev, stru goto fail; } - bo = vc4_bo_create(dev, exec_size); + bo = vc4_bo_create(dev, exec_size, true); if (!bo) { DRM_ERROR("Couldn't allocate BO for binning\n"); ret = PTR_ERR(exec->exec_bo); @@ -668,6 +668,7 @@ vc4_job_handle_completed(struct vc4_dev static void vc4_seqno_cb_work(struct work_struct *work) { struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work); + cb->func(cb); } @@ -717,6 +718,7 @@ vc4_wait_for_seqno_ioctl_helper(struct d if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) { uint64_t delta = jiffies_to_nsecs(jiffies - start); + if (*timeout_ns >= delta) *timeout_ns -= delta; } @@ -750,9 +752,10 @@ vc4_wait_bo_ioctl(struct drm_device *dev } bo = to_vc4_bo(gem_obj); - ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, &args->timeout_ns); + ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, + &args->timeout_ns); - drm_gem_object_unreference(gem_obj); + drm_gem_object_unreference_unlocked(gem_obj); return ret; } @@ -793,7 +796,8 @@ vc4_submit_cl_ioctl(struct drm_device *d if (ret) goto fail; } else { - exec->ct0ca = exec->ct0ea = 0; + exec->ct0ca = 0; + exec->ct0ea = 0; } ret = vc4_get_rcl(dev, exec); @@ -831,7 +835,7 @@ vc4_gem_init(struct drm_device *dev) INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); setup_timer(&vc4->hangcheck.timer, vc4_hangcheck_elapsed, - (unsigned long) dev); + (unsigned long)dev); INIT_WORK(&vc4->job_done_work, vc4_job_done_work); } --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -56,7 +56,7 @@ vc4_overflow_mem_work(struct work_struct struct drm_device *dev = vc4->dev; struct vc4_bo *bo; - bo = vc4_bo_create(dev, 256 * 1024); + bo = vc4_bo_create(dev, 256 * 1024, true); if (!bo) { DRM_ERROR("Couldn't allocate binner overflow mem\n"); return; @@ -87,9 +87,8 @@ vc4_overflow_mem_work(struct work_struct spin_unlock_irqrestore(&vc4->job_lock, irqflags); } - if (vc4->overflow_mem) { + if (vc4->overflow_mem) drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); - } vc4->overflow_mem = bo; V3D_WRITE(V3D_BPOA, bo->base.paddr); --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -132,6 +132,7 @@ static int vc4_atomic_commit(struct drm_ struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(new_state->fb, 0); struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); + wait_seqno = max(bo->seqno, wait_seqno); } } --- a/drivers/gpu/drm/vc4/vc4_packet.h +++ b/drivers/gpu/drm/vc4/vc4_packet.h @@ -27,60 +27,60 @@ #include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */ enum vc4_packet { - VC4_PACKET_HALT = 0, - VC4_PACKET_NOP = 1, + VC4_PACKET_HALT = 0, + VC4_PACKET_NOP = 1, - VC4_PACKET_FLUSH = 4, - VC4_PACKET_FLUSH_ALL = 5, - VC4_PACKET_START_TILE_BINNING = 6, - VC4_PACKET_INCREMENT_SEMAPHORE = 7, - VC4_PACKET_WAIT_ON_SEMAPHORE = 8, - - VC4_PACKET_BRANCH = 16, - VC4_PACKET_BRANCH_TO_SUB_LIST = 17, - - VC4_PACKET_STORE_MS_TILE_BUFFER = 24, - VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25, - VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26, - VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27, - VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28, - VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29, - - VC4_PACKET_GL_INDEXED_PRIMITIVE = 32, - VC4_PACKET_GL_ARRAY_PRIMITIVE = 33, - - VC4_PACKET_COMPRESSED_PRIMITIVE = 48, - VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49, - - VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56, - - VC4_PACKET_GL_SHADER_STATE = 64, - VC4_PACKET_NV_SHADER_STATE = 65, - VC4_PACKET_VG_SHADER_STATE = 66, - - VC4_PACKET_CONFIGURATION_BITS = 96, - VC4_PACKET_FLAT_SHADE_FLAGS = 97, - VC4_PACKET_POINT_SIZE = 98, - VC4_PACKET_LINE_WIDTH = 99, - VC4_PACKET_RHT_X_BOUNDARY = 100, - VC4_PACKET_DEPTH_OFFSET = 101, - VC4_PACKET_CLIP_WINDOW = 102, - VC4_PACKET_VIEWPORT_OFFSET = 103, - VC4_PACKET_Z_CLIPPING = 104, - VC4_PACKET_CLIPPER_XY_SCALING = 105, - VC4_PACKET_CLIPPER_Z_SCALING = 106, - - VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112, - VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113, - VC4_PACKET_CLEAR_COLORS = 114, - VC4_PACKET_TILE_COORDINATES = 115, - - /* Not an actual hardware packet -- this is what we use to put - * references to GEM bos in the command stream, since we need the u32 - * int the actual address packet in order to store the offset from the - * start of the BO. - */ - VC4_PACKET_GEM_HANDLES = 254, + VC4_PACKET_FLUSH = 4, + VC4_PACKET_FLUSH_ALL = 5, + VC4_PACKET_START_TILE_BINNING = 6, + VC4_PACKET_INCREMENT_SEMAPHORE = 7, + VC4_PACKET_WAIT_ON_SEMAPHORE = 8, + + VC4_PACKET_BRANCH = 16, + VC4_PACKET_BRANCH_TO_SUB_LIST = 17, + + VC4_PACKET_STORE_MS_TILE_BUFFER = 24, + VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25, + VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26, + VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27, + VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28, + VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29, + + VC4_PACKET_GL_INDEXED_PRIMITIVE = 32, + VC4_PACKET_GL_ARRAY_PRIMITIVE = 33, + + VC4_PACKET_COMPRESSED_PRIMITIVE = 48, + VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49, + + VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56, + + VC4_PACKET_GL_SHADER_STATE = 64, + VC4_PACKET_NV_SHADER_STATE = 65, + VC4_PACKET_VG_SHADER_STATE = 66, + + VC4_PACKET_CONFIGURATION_BITS = 96, + VC4_PACKET_FLAT_SHADE_FLAGS = 97, + VC4_PACKET_POINT_SIZE = 98, + VC4_PACKET_LINE_WIDTH = 99, + VC4_PACKET_RHT_X_BOUNDARY = 100, + VC4_PACKET_DEPTH_OFFSET = 101, + VC4_PACKET_CLIP_WINDOW = 102, + VC4_PACKET_VIEWPORT_OFFSET = 103, + VC4_PACKET_Z_CLIPPING = 104, + VC4_PACKET_CLIPPER_XY_SCALING = 105, + VC4_PACKET_CLIPPER_Z_SCALING = 106, + + VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112, + VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113, + VC4_PACKET_CLEAR_COLORS = 114, + VC4_PACKET_TILE_COORDINATES = 115, + + /* Not an actual hardware packet -- this is what we use to put + * references to GEM bos in the command stream, since we need the u32 + * int the actual address packet in order to store the offset from the + * start of the BO. + */ + VC4_PACKET_GEM_HANDLES = 254, } __attribute__ ((__packed__)); #define VC4_PACKET_HALT_SIZE 1 @@ -148,10 +148,10 @@ enum vc4_packet { * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address) */ -#define VC4_LOADSTORE_TILE_BUFFER_EOF (1 << 3) -#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK (1 << 2) -#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS (1 << 1) -#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR (1 << 0) +#define VC4_LOADSTORE_TILE_BUFFER_EOF BIT(3) +#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK BIT(2) +#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS BIT(1) +#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR BIT(0) /** @} */ @@ -160,10 +160,10 @@ enum vc4_packet { * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL */ -#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 15) -#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR (1 << 14) -#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR (1 << 13) -#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP (1 << 12) +#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR BIT(15) +#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR BIT(14) +#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR BIT(13) +#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP BIT(12) #define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8) #define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8 @@ -201,28 +201,28 @@ enum vc4_packet { #define VC4_INDEX_BUFFER_U16 (1 << 4) /* This flag is only present in NV shader state. */ -#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS (1 << 3) -#define VC4_SHADER_FLAG_ENABLE_CLIPPING (1 << 2) -#define VC4_SHADER_FLAG_VS_POINT_SIZE (1 << 1) -#define VC4_SHADER_FLAG_FS_SINGLE_THREAD (1 << 0) +#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS BIT(3) +#define VC4_SHADER_FLAG_ENABLE_CLIPPING BIT(2) +#define VC4_SHADER_FLAG_VS_POINT_SIZE BIT(1) +#define VC4_SHADER_FLAG_FS_SINGLE_THREAD BIT(0) /** @{ byte 2 of config bits. */ -#define VC4_CONFIG_BITS_EARLY_Z_UPDATE (1 << 1) -#define VC4_CONFIG_BITS_EARLY_Z (1 << 0) +#define VC4_CONFIG_BITS_EARLY_Z_UPDATE BIT(1) +#define VC4_CONFIG_BITS_EARLY_Z BIT(0) /** @} */ /** @{ byte 1 of config bits. */ -#define VC4_CONFIG_BITS_Z_UPDATE (1 << 7) +#define VC4_CONFIG_BITS_Z_UPDATE BIT(7) /** same values in this 3-bit field as PIPE_FUNC_* */ #define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT 4 -#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE (1 << 3) +#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE BIT(3) #define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO (0 << 1) #define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD (1 << 1) #define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR (2 << 1) #define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO (3 << 1) -#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT (1 << 0) +#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT BIT(0) /** @} */ /** @{ byte 0 of config bits. */ @@ -230,15 +230,15 @@ enum vc4_packet { #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X (1 << 6) #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X (2 << 6) -#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES (1 << 4) -#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET (1 << 3) -#define VC4_CONFIG_BITS_CW_PRIMITIVES (1 << 2) -#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK (1 << 1) -#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT (1 << 0) +#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES BIT(4) +#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET BIT(3) +#define VC4_CONFIG_BITS_CW_PRIMITIVES BIT(2) +#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK BIT(1) +#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT BIT(0) /** @} */ /** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */ -#define VC4_BIN_CONFIG_DB_NON_MS (1 << 7) +#define VC4_BIN_CONFIG_DB_NON_MS BIT(7) #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5) #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5 @@ -254,17 +254,17 @@ enum vc4_packet { #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2 #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3 -#define VC4_BIN_CONFIG_AUTO_INIT_TSDA (1 << 2) -#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT (1 << 1) -#define VC4_BIN_CONFIG_MS_MODE_4X (1 << 0) +#define VC4_BIN_CONFIG_AUTO_INIT_TSDA BIT(2) +#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT BIT(1) +#define VC4_BIN_CONFIG_MS_MODE_4X BIT(0) /** @} */ /** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */ -#define VC4_RENDER_CONFIG_DB_NON_MS (1 << 12) -#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE (1 << 11) -#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G (1 << 10) -#define VC4_RENDER_CONFIG_COVERAGE_MODE (1 << 9) -#define VC4_RENDER_CONFIG_ENABLE_VG_MASK (1 << 8) +#define VC4_RENDER_CONFIG_DB_NON_MS BIT(12) +#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE BIT(11) +#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G BIT(10) +#define VC4_RENDER_CONFIG_COVERAGE_MODE BIT(9) +#define VC4_RENDER_CONFIG_ENABLE_VG_MASK BIT(8) /** The values of the field are VC4_TILING_FORMAT_* */ #define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6) @@ -280,8 +280,8 @@ enum vc4_packet { #define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1 #define VC4_RENDER_CONFIG_FORMAT_BGR565 2 -#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT (1 << 1) -#define VC4_RENDER_CONFIG_MS_MODE_4X (1 << 0) +#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT BIT(1) +#define VC4_RENDER_CONFIG_MS_MODE_4X BIT(0) #define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 << 4) #define VC4_PRIMITIVE_LIST_FORMAT_32_XY (3 << 4) @@ -291,24 +291,24 @@ enum vc4_packet { #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 << 0) enum vc4_texture_data_type { - VC4_TEXTURE_TYPE_RGBA8888 = 0, - VC4_TEXTURE_TYPE_RGBX8888 = 1, - VC4_TEXTURE_TYPE_RGBA4444 = 2, - VC4_TEXTURE_TYPE_RGBA5551 = 3, - VC4_TEXTURE_TYPE_RGB565 = 4, - VC4_TEXTURE_TYPE_LUMINANCE = 5, - VC4_TEXTURE_TYPE_ALPHA = 6, - VC4_TEXTURE_TYPE_LUMALPHA = 7, - VC4_TEXTURE_TYPE_ETC1 = 8, - VC4_TEXTURE_TYPE_S16F = 9, - VC4_TEXTURE_TYPE_S8 = 10, - VC4_TEXTURE_TYPE_S16 = 11, - VC4_TEXTURE_TYPE_BW1 = 12, - VC4_TEXTURE_TYPE_A4 = 13, - VC4_TEXTURE_TYPE_A1 = 14, - VC4_TEXTURE_TYPE_RGBA64 = 15, - VC4_TEXTURE_TYPE_RGBA32R = 16, - VC4_TEXTURE_TYPE_YUV422R = 17, + VC4_TEXTURE_TYPE_RGBA8888 = 0, + VC4_TEXTURE_TYPE_RGBX8888 = 1, + VC4_TEXTURE_TYPE_RGBA4444 = 2, + VC4_TEXTURE_TYPE_RGBA5551 = 3, + VC4_TEXTURE_TYPE_RGB565 = 4, + VC4_TEXTURE_TYPE_LUMINANCE = 5, + VC4_TEXTURE_TYPE_ALPHA = 6, + VC4_TEXTURE_TYPE_LUMALPHA = 7, + VC4_TEXTURE_TYPE_ETC1 = 8, + VC4_TEXTURE_TYPE_S16F = 9, + VC4_TEXTURE_TYPE_S8 = 10, + VC4_TEXTURE_TYPE_S16 = 11, + VC4_TEXTURE_TYPE_BW1 = 12, + VC4_TEXTURE_TYPE_A4 = 13, + VC4_TEXTURE_TYPE_A1 = 14, + VC4_TEXTURE_TYPE_RGBA64 = 15, + VC4_TEXTURE_TYPE_RGBA32R = 16, + VC4_TEXTURE_TYPE_YUV422R = 17, }; #define VC4_TEX_P0_OFFSET_MASK VC4_MASK(31, 12) --- a/drivers/gpu/drm/vc4/vc4_qpu_defines.h +++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h @@ -25,194 +25,190 @@ #define VC4_QPU_DEFINES_H enum qpu_op_add { - QPU_A_NOP, - QPU_A_FADD, - QPU_A_FSUB, - QPU_A_FMIN, - QPU_A_FMAX, - QPU_A_FMINABS, - QPU_A_FMAXABS, - QPU_A_FTOI, - QPU_A_ITOF, - QPU_A_ADD = 12, - QPU_A_SUB, - QPU_A_SHR, - QPU_A_ASR, - QPU_A_ROR, - QPU_A_SHL, - QPU_A_MIN, - QPU_A_MAX, - QPU_A_AND, - QPU_A_OR, - QPU_A_XOR, - QPU_A_NOT, - QPU_A_CLZ, - QPU_A_V8ADDS = 30, - QPU_A_V8SUBS = 31, + QPU_A_NOP, + QPU_A_FADD, + QPU_A_FSUB, + QPU_A_FMIN, + QPU_A_FMAX, + QPU_A_FMINABS, + QPU_A_FMAXABS, + QPU_A_FTOI, + QPU_A_ITOF, + QPU_A_ADD = 12, + QPU_A_SUB, + QPU_A_SHR, + QPU_A_ASR, + QPU_A_ROR, + QPU_A_SHL, + QPU_A_MIN, + QPU_A_MAX, + QPU_A_AND, + QPU_A_OR, + QPU_A_XOR, + QPU_A_NOT, + QPU_A_CLZ, + QPU_A_V8ADDS = 30, + QPU_A_V8SUBS = 31, }; enum qpu_op_mul { - QPU_M_NOP, - QPU_M_FMUL, - QPU_M_MUL24, - QPU_M_V8MULD, - QPU_M_V8MIN, - QPU_M_V8MAX, - QPU_M_V8ADDS, - QPU_M_V8SUBS, + QPU_M_NOP, + QPU_M_FMUL, + QPU_M_MUL24, + QPU_M_V8MULD, + QPU_M_V8MIN, + QPU_M_V8MAX, + QPU_M_V8ADDS, + QPU_M_V8SUBS, }; enum qpu_raddr { - QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */ - /* 0-31 are the plain regfile a or b fields */ - QPU_R_UNIF = 32, - QPU_R_VARY = 35, - QPU_R_ELEM_QPU = 38, - QPU_R_NOP, - QPU_R_XY_PIXEL_COORD = 41, - QPU_R_MS_REV_FLAGS = 41, - QPU_R_VPM = 48, - QPU_R_VPM_LD_BUSY, - QPU_R_VPM_LD_WAIT, - QPU_R_MUTEX_ACQUIRE, + QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */ + /* 0-31 are the plain regfile a or b fields */ + QPU_R_UNIF = 32, + QPU_R_VARY = 35, + QPU_R_ELEM_QPU = 38, + QPU_R_NOP, + QPU_R_XY_PIXEL_COORD = 41, + QPU_R_MS_REV_FLAGS = 41, + QPU_R_VPM = 48, + QPU_R_VPM_LD_BUSY, + QPU_R_VPM_LD_WAIT, + QPU_R_MUTEX_ACQUIRE, }; enum qpu_waddr { - /* 0-31 are the plain regfile a or b fields */ - QPU_W_ACC0 = 32, /* aka r0 */ - QPU_W_ACC1, - QPU_W_ACC2, - QPU_W_ACC3, - QPU_W_TMU_NOSWAP, - QPU_W_ACC5, - QPU_W_HOST_INT, - QPU_W_NOP, - QPU_W_UNIFORMS_ADDRESS, - QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */ - QPU_W_MS_FLAGS = 42, - QPU_W_REV_FLAG = 42, - QPU_W_TLB_STENCIL_SETUP = 43, - QPU_W_TLB_Z, - QPU_W_TLB_COLOR_MS, - QPU_W_TLB_COLOR_ALL, - QPU_W_TLB_ALPHA_MASK, - QPU_W_VPM, - QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */ - QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */ - QPU_W_MUTEX_RELEASE, - QPU_W_SFU_RECIP, - QPU_W_SFU_RECIPSQRT, - QPU_W_SFU_EXP, - QPU_W_SFU_LOG, - QPU_W_TMU0_S, - QPU_W_TMU0_T, - QPU_W_TMU0_R, - QPU_W_TMU0_B, - QPU_W_TMU1_S, - QPU_W_TMU1_T, - QPU_W_TMU1_R, - QPU_W_TMU1_B, + /* 0-31 are the plain regfile a or b fields */ + QPU_W_ACC0 = 32, /* aka r0 */ + QPU_W_ACC1, + QPU_W_ACC2, + QPU_W_ACC3, + QPU_W_TMU_NOSWAP, + QPU_W_ACC5, + QPU_W_HOST_INT, + QPU_W_NOP, + QPU_W_UNIFORMS_ADDRESS, + QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */ + QPU_W_MS_FLAGS = 42, + QPU_W_REV_FLAG = 42, + QPU_W_TLB_STENCIL_SETUP = 43, + QPU_W_TLB_Z, + QPU_W_TLB_COLOR_MS, + QPU_W_TLB_COLOR_ALL, + QPU_W_TLB_ALPHA_MASK, + QPU_W_VPM, + QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */ + QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */ + QPU_W_MUTEX_RELEASE, + QPU_W_SFU_RECIP, + QPU_W_SFU_RECIPSQRT, + QPU_W_SFU_EXP, + QPU_W_SFU_LOG, + QPU_W_TMU0_S, + QPU_W_TMU0_T, + QPU_W_TMU0_R, + QPU_W_TMU0_B, + QPU_W_TMU1_S, + QPU_W_TMU1_T, + QPU_W_TMU1_R, + QPU_W_TMU1_B, }; enum qpu_sig_bits { - QPU_SIG_SW_BREAKPOINT, - QPU_SIG_NONE, - QPU_SIG_THREAD_SWITCH, - QPU_SIG_PROG_END, - QPU_SIG_WAIT_FOR_SCOREBOARD, - QPU_SIG_SCOREBOARD_UNLOCK, - QPU_SIG_LAST_THREAD_SWITCH, - QPU_SIG_COVERAGE_LOAD, - QPU_SIG_COLOR_LOAD, - QPU_SIG_COLOR_LOAD_END, - QPU_SIG_LOAD_TMU0, - QPU_SIG_LOAD_TMU1, - QPU_SIG_ALPHA_MASK_LOAD, - QPU_SIG_SMALL_IMM, - QPU_SIG_LOAD_IMM, - QPU_SIG_BRANCH + QPU_SIG_SW_BREAKPOINT, + QPU_SIG_NONE, + QPU_SIG_THREAD_SWITCH, + QPU_SIG_PROG_END, + QPU_SIG_WAIT_FOR_SCOREBOARD, + QPU_SIG_SCOREBOARD_UNLOCK, + QPU_SIG_LAST_THREAD_SWITCH, + QPU_SIG_COVERAGE_LOAD, + QPU_SIG_COLOR_LOAD, + QPU_SIG_COLOR_LOAD_END, + QPU_SIG_LOAD_TMU0, + QPU_SIG_LOAD_TMU1, + QPU_SIG_ALPHA_MASK_LOAD, + QPU_SIG_SMALL_IMM, + QPU_SIG_LOAD_IMM, + QPU_SIG_BRANCH }; enum qpu_mux { - /* hardware mux values */ - QPU_MUX_R0, - QPU_MUX_R1, - QPU_MUX_R2, - QPU_MUX_R3, - QPU_MUX_R4, - QPU_MUX_R5, - QPU_MUX_A, - QPU_MUX_B, + /* hardware mux values */ + QPU_MUX_R0, + QPU_MUX_R1, + QPU_MUX_R2, + QPU_MUX_R3, + QPU_MUX_R4, + QPU_MUX_R5, + QPU_MUX_A, + QPU_MUX_B, - /* non-hardware mux values */ - QPU_MUX_IMM, + /* non-hardware mux values */ + QPU_MUX_IMM, }; enum qpu_cond { - QPU_COND_NEVER, - QPU_COND_ALWAYS, - QPU_COND_ZS, - QPU_COND_ZC, - QPU_COND_NS, - QPU_COND_NC, - QPU_COND_CS, - QPU_COND_CC, + QPU_COND_NEVER, + QPU_COND_ALWAYS, + QPU_COND_ZS, + QPU_COND_ZC, + QPU_COND_NS, + QPU_COND_NC, + QPU_COND_CS, + QPU_COND_CC, }; enum qpu_pack_mul { - QPU_PACK_MUL_NOP, - QPU_PACK_MUL_8888 = 3, /* replicated to each 8 bits of the 32-bit dst. */ - QPU_PACK_MUL_8A, - QPU_PACK_MUL_8B, - QPU_PACK_MUL_8C, - QPU_PACK_MUL_8D, + QPU_PACK_MUL_NOP, + /* replicated to each 8 bits of the 32-bit dst. */ + QPU_PACK_MUL_8888 = 3, + QPU_PACK_MUL_8A, + QPU_PACK_MUL_8B, + QPU_PACK_MUL_8C, + QPU_PACK_MUL_8D, }; enum qpu_pack_a { - QPU_PACK_A_NOP, - /* convert to 16 bit float if float input, or to int16. */ - QPU_PACK_A_16A, - QPU_PACK_A_16B, - /* replicated to each 8 bits of the 32-bit dst. */ - QPU_PACK_A_8888, - /* Convert to 8-bit unsigned int. */ - QPU_PACK_A_8A, - QPU_PACK_A_8B, - QPU_PACK_A_8C, - QPU_PACK_A_8D, - - /* Saturating variants of the previous instructions. */ - QPU_PACK_A_32_SAT, /* int-only */ - QPU_PACK_A_16A_SAT, /* int or float */ - QPU_PACK_A_16B_SAT, - QPU_PACK_A_8888_SAT, - QPU_PACK_A_8A_SAT, - QPU_PACK_A_8B_SAT, - QPU_PACK_A_8C_SAT, - QPU_PACK_A_8D_SAT, + QPU_PACK_A_NOP, + /* convert to 16 bit float if float input, or to int16. */ + QPU_PACK_A_16A, + QPU_PACK_A_16B, + /* replicated to each 8 bits of the 32-bit dst. */ + QPU_PACK_A_8888, + /* Convert to 8-bit unsigned int. */ + QPU_PACK_A_8A, + QPU_PACK_A_8B, + QPU_PACK_A_8C, + QPU_PACK_A_8D, + + /* Saturating variants of the previous instructions. */ + QPU_PACK_A_32_SAT, /* int-only */ + QPU_PACK_A_16A_SAT, /* int or float */ + QPU_PACK_A_16B_SAT, + QPU_PACK_A_8888_SAT, + QPU_PACK_A_8A_SAT, + QPU_PACK_A_8B_SAT, + QPU_PACK_A_8C_SAT, + QPU_PACK_A_8D_SAT, }; enum qpu_unpack_r4 { - QPU_UNPACK_R4_NOP, - QPU_UNPACK_R4_F16A_TO_F32, - QPU_UNPACK_R4_F16B_TO_F32, - QPU_UNPACK_R4_8D_REP, - QPU_UNPACK_R4_8A, - QPU_UNPACK_R4_8B, - QPU_UNPACK_R4_8C, - QPU_UNPACK_R4_8D, -}; - -#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low)) -/* Using the GNU statement expression extension */ -#define QPU_SET_FIELD(value, field) \ - ({ \ - uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \ - assert((fieldval & ~ field ## _MASK) == 0); \ - fieldval & field ## _MASK; \ - }) + QPU_UNPACK_R4_NOP, + QPU_UNPACK_R4_F16A_TO_F32, + QPU_UNPACK_R4_F16B_TO_F32, + QPU_UNPACK_R4_8D_REP, + QPU_UNPACK_R4_8A, + QPU_UNPACK_R4_8B, + QPU_UNPACK_R4_8C, + QPU_UNPACK_R4_8D, +}; + +#define QPU_MASK(high, low) \ + ((((uint64_t)1 << ((high) - (low) + 1)) - 1) << (low)) -#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT)) +#define QPU_GET_FIELD(word, field) \ + ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT)) #define QPU_SIG_SHIFT 60 #define QPU_SIG_MASK QPU_MASK(63, 60) --- a/drivers/gpu/drm/vc4/vc4_render_cl.c +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c @@ -63,7 +63,6 @@ static inline void rcl_u32(struct vc4_rc setup->next_offset += 4; } - /* * Emits a no-op STORE_TILE_BUFFER_GENERAL. * @@ -217,7 +216,7 @@ static int vc4_create_rcl_bo(struct drm_ } size += xtiles * ytiles * loop_body_size; - setup->rcl = &vc4_bo_create(dev, size)->base; + setup->rcl = &vc4_bo_create(dev, size, true)->base; if (!setup->rcl) return -ENOMEM; list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head, @@ -256,6 +255,7 @@ static int vc4_create_rcl_bo(struct drm_ for (x = min_x_tile; x <= max_x_tile; x++) { bool first = (x == min_x_tile && y == min_y_tile); bool last = (x == max_x_tile && y == max_y_tile); + emit_tile(exec, setup, x, y, first, last); } } --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -125,7 +125,7 @@ int vc4_v3d_debugfs_regs(struct seq_file int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused) { - struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); uint32_t ident1 = V3D_READ(V3D_IDENT1); @@ -133,11 +133,13 @@ int vc4_v3d_debugfs_ident(struct seq_fil uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS); uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS); - seq_printf(m, "Revision: %d\n", VC4_GET_FIELD(ident1, V3D_IDENT1_REV)); + seq_printf(m, "Revision: %d\n", + VC4_GET_FIELD(ident1, V3D_IDENT1_REV)); seq_printf(m, "Slices: %d\n", nslc); seq_printf(m, "TMUs: %d\n", nslc * tups); seq_printf(m, "QPUs: %d\n", nslc * qups); - seq_printf(m, "Semaphores: %d\n", VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM)); + seq_printf(m, "Semaphores: %d\n", + VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM)); return 0; } @@ -218,7 +220,7 @@ static int vc4_v3d_bind(struct device *d } static void vc4_v3d_unbind(struct device *dev, struct device *master, - void *data) + void *data) { struct drm_device *drm = dev_get_drvdata(master); struct vc4_dev *vc4 = to_vc4_dev(drm); --- a/drivers/gpu/drm/vc4/vc4_validate.c +++ b/drivers/gpu/drm/vc4/vc4_validate.c @@ -48,7 +48,6 @@ void *validated, \ void *untrusted - /** Return the width in pixels of a 64-byte microtile. */ static uint32_t utile_width(int cpp) @@ -192,7 +191,7 @@ vc4_check_tex_size(struct vc4_exec_info if (size + offset < size || size + offset > fbo->base.size) { - DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %d)\n", + DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n", width, height, aligned_width, aligned_height, size, offset, fbo->base.size); @@ -278,7 +277,7 @@ validate_indexed_prim_list(VALIDATE_ARGS if (offset > ib->base.size || (ib->base.size - offset) / index_size < length) { - DRM_ERROR("IB access overflow (%d + %d*%d > %d)\n", + DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n", offset, length, index_size, ib->base.size); return -EINVAL; } @@ -377,6 +376,7 @@ static int validate_tile_binning_config(VALIDATE_ARGS) { struct drm_device *dev = exec->exec_bo->base.dev; + struct vc4_bo *tile_bo; uint8_t flags; uint32_t tile_state_size, tile_alloc_size; uint32_t tile_count; @@ -438,12 +438,12 @@ validate_tile_binning_config(VALIDATE_AR */ tile_alloc_size += 1024 * 1024; - exec->tile_bo = &vc4_bo_create(dev, exec->tile_alloc_offset + - tile_alloc_size)->base; + tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size, + true); + exec->tile_bo = &tile_bo->base; if (!exec->tile_bo) return -ENOMEM; - list_add_tail(&to_vc4_bo(&exec->tile_bo->base)->unref_head, - &exec->unref_list); + list_add_tail(&tile_bo->unref_head, &exec->unref_list); /* tile alloc address. */ *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr + @@ -463,8 +463,8 @@ validate_gem_handles(VALIDATE_ARGS) return 0; } -#define VC4_DEFINE_PACKET(packet, name, func) \ - [packet] = { packet ## _SIZE, name, func } +#define VC4_DEFINE_PACKET(packet, func) \ + [packet] = { packet ## _SIZE, #packet, func } static const struct cmd_info { uint16_t len; @@ -472,42 +472,43 @@ static const struct cmd_info { int (*func)(struct vc4_exec_info *exec, void *validated, void *untrusted); } cmd_info[] = { - VC4_DEFINE_PACKET(VC4_PACKET_HALT, "halt", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_NOP, "nop", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", validate_flush_all), - VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, "start tile binning", validate_start_tile_binning), - VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, "increment semaphore", validate_increment_semaphore), - - VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, "Indexed Primitive List", validate_indexed_prim_list), - - VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, "Vertex Array Primitives", validate_gl_array_primitive), - - /* This is only used by clipped primitives (packets 48 and 49), which - * we don't support parsing yet. - */ - VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, "primitive list format", NULL), - - VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, "GL Shader State", validate_gl_shader_state), - VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, "NV Shader State", validate_nv_shader_state), - - VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, "configuration bits", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, "flat shade flags", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, "point size", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, "line width", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, "RHT X boundary", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, "Depth Offset", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, "Clip Window", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, "Viewport Offset", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, "Clipper XY Scaling", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL), + VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, validate_flush_all), + VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, + validate_start_tile_binning), + VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, + validate_increment_semaphore), + + VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, + validate_indexed_prim_list), + VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, + validate_gl_array_primitive), + + VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL), + + VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state), + VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, validate_nv_shader_state), + + VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL), + VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL), + VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL), + VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL), + VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL), + VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL), + VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL), + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL), /* Note: The docs say this was also 105, but it was 106 in the * initial userland code drop. */ - VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, "Clipper Z Scale and Offset", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL), - VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, "tile binning configuration", validate_tile_binning_config), + VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, + validate_tile_binning_config), - VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, "GEM handles", validate_gem_handles), + VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles), }; int @@ -526,7 +527,7 @@ vc4_validate_bin_cl(struct drm_device *d u8 cmd = *(uint8_t *)src_pkt; const struct cmd_info *info; - if (cmd > ARRAY_SIZE(cmd_info)) { + if (cmd >= ARRAY_SIZE(cmd_info)) { DRM_ERROR("0x%08x: packet %d out of bounds\n", src_offset, cmd); return -EINVAL; @@ -539,11 +540,6 @@ vc4_validate_bin_cl(struct drm_device *d return -EINVAL; } -#if 0 - DRM_INFO("0x%08x: packet %d (%s) size %d processing...\n", - src_offset, cmd, info->name, info->len); -#endif - if (src_offset + info->len > len) { DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x " "exceeds bounds (0x%08x)\n", @@ -558,8 +554,7 @@ vc4_validate_bin_cl(struct drm_device *d if (info->func && info->func(exec, dst_pkt + 1, src_pkt + 1)) { - DRM_ERROR("0x%08x: packet %d (%s) failed to " - "validate\n", + DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n", src_offset, cmd, info->name); return -EINVAL; } @@ -618,12 +613,14 @@ reloc_tex(struct vc4_exec_info *exec, if (sample->is_direct) { uint32_t remaining_size = tex->base.size - p0; + if (p0 > tex->base.size - 4) { DRM_ERROR("UBO offset greater than UBO size\n"); goto fail; } if (p1 > remaining_size - 4) { - DRM_ERROR("UBO clamp would allow reads outside of UBO\n"); + DRM_ERROR("UBO clamp would allow reads " + "outside of UBO\n"); goto fail; } *validated_p0 = tex->paddr + p0; @@ -786,7 +783,7 @@ validate_shader_rec(struct drm_device *d struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_relocs) + 8]; uint32_t nr_attributes = 0, nr_fixed_relocs, nr_relocs, packet_size; int i; - struct vc4_validated_shader_info *validated_shader; + struct vc4_validated_shader_info *shader; if (state->packet == VC4_PACKET_NV_SHADER_STATE) { relocs = nv_relocs; @@ -841,12 +838,12 @@ validate_shader_rec(struct drm_device *d else mode = VC4_MODE_RENDER; - if (!vc4_use_bo(exec, src_handles[i], mode, &bo[i])) { + if (!vc4_use_bo(exec, src_handles[i], mode, &bo[i])) return false; - } } for (i = 0; i < nr_fixed_relocs; i++) { + struct vc4_bo *vc4_bo; uint32_t o = relocs[i].offset; uint32_t src_offset = *(uint32_t *)(pkt_u + o); uint32_t *texture_handles_u; @@ -858,34 +855,34 @@ validate_shader_rec(struct drm_device *d switch (relocs[i].type) { case RELOC_CODE: if (src_offset != 0) { - DRM_ERROR("Shaders must be at offset 0 of " - "the BO.\n"); + DRM_ERROR("Shaders must be at offset 0 " + "of the BO.\n"); goto fail; } - validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader; - if (!validated_shader) + vc4_bo = to_vc4_bo(&bo[i]->base); + shader = vc4_bo->validated_shader; + if (!shader) goto fail; - if (validated_shader->uniforms_src_size > - exec->uniforms_size) { + if (shader->uniforms_src_size > exec->uniforms_size) { DRM_ERROR("Uniforms src buffer overflow\n"); goto fail; } texture_handles_u = exec->uniforms_u; uniform_data_u = (texture_handles_u + - validated_shader->num_texture_samples); + shader->num_texture_samples); memcpy(exec->uniforms_v, uniform_data_u, - validated_shader->uniforms_size); + shader->uniforms_size); for (tex = 0; - tex < validated_shader->num_texture_samples; + tex < shader->num_texture_samples; tex++) { if (!reloc_tex(exec, uniform_data_u, - &validated_shader->texture_samples[tex], + &shader->texture_samples[tex], texture_handles_u[tex])) { goto fail; } @@ -893,9 +890,9 @@ validate_shader_rec(struct drm_device *d *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p; - exec->uniforms_u += validated_shader->uniforms_src_size; - exec->uniforms_v += validated_shader->uniforms_size; - exec->uniforms_p += validated_shader->uniforms_size; + exec->uniforms_u += shader->uniforms_src_size; + exec->uniforms_v += shader->uniforms_size; + exec->uniforms_p += shader->uniforms_size; break; @@ -926,7 +923,8 @@ validate_shader_rec(struct drm_device *d max_index = ((vbo->base.size - offset - attr_size) / stride); if (state->max_index > max_index) { - DRM_ERROR("primitives use index %d out of supplied %d\n", + DRM_ERROR("primitives use index %d out of " + "supplied %d\n", state->max_index, max_index); return -EINVAL; } --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -24,24 +24,16 @@ /** * DOC: Shader validator for VC4. * - * The VC4 has no IOMMU between it and system memory. So, a user with access - * to execute shaders could escalate privilege by overwriting system memory - * (using the VPM write address register in the general-purpose DMA mode) or - * reading system memory it shouldn't (reading it as a texture, or uniform - * data, or vertex data). + * The VC4 has no IOMMU between it and system memory, so a user with + * access to execute shaders could escalate privilege by overwriting + * system memory (using the VPM write address register in the + * general-purpose DMA mode) or reading system memory it shouldn't + * (reading it as a texture, or uniform data, or vertex data). * - * This walks over a shader starting from some offset within a BO, ensuring - * that its accesses are appropriately bounded, and recording how many texture - * accesses are made and where so that we can do relocations for them in the + * This walks over a shader BO, ensuring that its accesses are + * appropriately bounded, and recording how many texture accesses are + * made and where so that we can do relocations for them in the * uniform stream. - * - * The kernel API has shaders stored in user-mapped BOs. The BOs will be - * forcibly unmapped from the process before validation, and any cache of - * validated state will be flushed if the mapping is faulted back in. - * - * Storing the shaders in BOs means that the validation process will be slow - * due to uncached reads, but since shaders are long-lived and shader BOs are - * never actually modified, this shouldn't be a problem. */ #include "vc4_drv.h" @@ -70,7 +62,6 @@ waddr_to_live_reg_index(uint32_t waddr, else return waddr; } else if (waddr <= QPU_W_ACC3) { - return 64 + waddr - QPU_W_ACC0; } else { return ~0; @@ -85,15 +76,14 @@ raddr_add_a_to_live_reg_index(uint64_t i uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); - if (add_a == QPU_MUX_A) { + if (add_a == QPU_MUX_A) return raddr_a; - } else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) { + else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) return 32 + raddr_b; - } else if (add_a <= QPU_MUX_R3) { + else if (add_a <= QPU_MUX_R3) return 64 + add_a; - } else { + else return ~0; - } } static bool @@ -111,9 +101,9 @@ is_tmu_write(uint32_t waddr) } static bool -record_validated_texture_sample(struct vc4_validated_shader_info *validated_shader, - struct vc4_shader_validation_state *validation_state, - int tmu) +record_texture_sample(struct vc4_validated_shader_info *validated_shader, + struct vc4_shader_validation_state *validation_state, + int tmu) { uint32_t s = validated_shader->num_texture_samples; int i; @@ -226,8 +216,8 @@ check_tmu_write(uint64_t inst, validated_shader->uniforms_size += 4; if (submit) { - if (!record_validated_texture_sample(validated_shader, - validation_state, tmu)) { + if (!record_texture_sample(validated_shader, + validation_state, tmu)) { return false; } @@ -238,10 +228,10 @@ check_tmu_write(uint64_t inst, } static bool -check_register_write(uint64_t inst, - struct vc4_validated_shader_info *validated_shader, - struct vc4_shader_validation_state *validation_state, - bool is_mul) +check_reg_write(uint64_t inst, + struct vc4_validated_shader_info *validated_shader, + struct vc4_shader_validation_state *validation_state, + bool is_mul) { uint32_t waddr = (is_mul ? QPU_GET_FIELD(inst, QPU_WADDR_MUL) : @@ -297,7 +287,7 @@ check_register_write(uint64_t inst, return true; case QPU_W_TLB_STENCIL_SETUP: - return true; + return true; } return true; @@ -360,7 +350,7 @@ track_live_clamps(uint64_t inst, } validation_state->live_max_clamp_regs[lri_add] = true; - } if (op_add == QPU_A_MIN) { + } else if (op_add == QPU_A_MIN) { /* Track live clamps of a value clamped to a minimum of 0 and * a maximum of some uniform's offset. */ @@ -392,8 +382,10 @@ check_instruction_writes(uint64_t inst, return false; } - ok = (check_register_write(inst, validated_shader, validation_state, false) && - check_register_write(inst, validated_shader, validation_state, true)); + ok = (check_reg_write(inst, validated_shader, validation_state, + false) && + check_reg_write(inst, validated_shader, validation_state, + true)); track_live_clamps(inst, validated_shader, validation_state); @@ -441,7 +433,7 @@ vc4_validate_shader(struct drm_gem_cma_o shader = shader_obj->vaddr; max_ip = shader_obj->base.size / sizeof(uint64_t); - validated_shader = kcalloc(sizeof(*validated_shader), 1, GFP_KERNEL); + validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); if (!validated_shader) return NULL; @@ -497,7 +489,7 @@ vc4_validate_shader(struct drm_gem_cma_o if (ip == max_ip) { DRM_ERROR("shader failed to terminate before " - "shader BO end at %d\n", + "shader BO end at %zd\n", shader_obj->base.size); goto fail; } --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -585,6 +585,13 @@ struct drm_driver { int (*gem_open_object) (struct drm_gem_object *, struct drm_file *); void (*gem_close_object) (struct drm_gem_object *, struct drm_file *); + /** + * Hook for allocating the GEM object struct, for use by core + * helpers. + */ + struct drm_gem_object *(*gem_create_object)(struct drm_device *dev, + size_t size); + /* prime: */ /* export handle -> fd (see drm_gem_prime_handle_to_fd() helper) */ int (*prime_handle_to_fd)(struct drm_device *dev, struct drm_file *file_priv, @@ -639,7 +646,6 @@ struct drm_driver { u32 driver_features; int dev_priv_size; - size_t gem_obj_size; const struct drm_ioctl_desc *ioctls; int num_ioctls; const struct file_operations *fops;