1 files changed, 870 insertions, 0 deletions
diff --git a/target/linux/brcm2708/patches-4.14/950-0174-drm-vc4-Add-the-DRM_IOCTL_VC4_GEM_MADVISE-ioctl.patch b/target/linux/brcm2708/patches-4.14/950-0174-drm-vc4-Add-the-DRM_IOCTL_VC4_GEM_MADVISE-ioctl.patch
new file mode 100644
index 0000000000..3d10eb11fb
--- /dev/null
+++ b/target/linux/brcm2708/patches-4.14/950-0174-drm-vc4-Add-the-DRM_IOCTL_VC4_GEM_MADVISE-ioctl.patch
@@ -0,0 +1,870 @@
+From 4264bba50d050577580cc6309524e3d92959fff2 Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@free-electrons.com>
+Date: Thu, 19 Oct 2017 14:57:48 +0200
+Subject: [PATCH 174/454] drm/vc4: Add the DRM_IOCTL_VC4_GEM_MADVISE ioctl
+
+This ioctl will allow us to purge inactive userspace buffers when the
+system is running out of contiguous memory.
+
+For now, the purge logic is rather dumb in that it does not try to
+release only the amount of BO needed to meet the last CMA alloc request
+but instead purges all objects placed in the purgeable pool as soon as
+we experience a CMA allocation failure.
+
+Note that the in-kernel BO cache is always purged before the purgeable
+cache because those objects are known to be unused while objects marked
+as purgeable by a userspace application/library might have to be
+restored when they are marked back as unpurgeable, which can be
+expensive.
+
+Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
+Signed-off-by: Eric Anholt <eric@anholt.net>
+Reviewed-by: Eric Anholt <eric@anholt.net>
+Link: https://patchwork.freedesktop.org/patch/msgid/20171019125748.3152-1-boris.brezillon@free-electrons.com
+(cherry picked from commit b9f19259b84dc648f207a46f3581d15eeaedf4b6)
+---
+ drivers/gpu/drm/vc4/vc4_bo.c    | 287 +++++++++++++++++++++++++++++++-
+ drivers/gpu/drm/vc4/vc4_drv.c   |  10 +-
+ drivers/gpu/drm/vc4/vc4_drv.h   |  30 ++++
+ drivers/gpu/drm/vc4/vc4_gem.c   | 156 ++++++++++++++++-
+ drivers/gpu/drm/vc4/vc4_plane.c |  20 +++
+ include/uapi/drm/vc4_drm.h      |  19 +++
+ 6 files changed, 507 insertions(+), 15 deletions(-)
+
+--- a/drivers/gpu/drm/vc4/vc4_bo.c
++++ b/drivers/gpu/drm/vc4/vc4_bo.c
+@@ -53,6 +53,17 @@ static void vc4_bo_stats_dump(struct vc4
+ 			 vc4->bo_labels[i].size_allocated / 1024,
+ 			 vc4->bo_labels[i].num_allocated);
+ 	}
++
++	mutex_lock(&vc4->purgeable.lock);
++	if (vc4->purgeable.num)
++		DRM_INFO("%30s: %6zdkb BOs (%d)\n", "userspace BO cache",
++			 vc4->purgeable.size / 1024, vc4->purgeable.num);
++
++	if (vc4->purgeable.purged_num)
++		DRM_INFO("%30s: %6zdkb BOs (%d)\n", "total purged BO",
++			 vc4->purgeable.purged_size / 1024,
++			 vc4->purgeable.purged_num);
++	mutex_unlock(&vc4->purgeable.lock);
+ }
+ 
+ #ifdef CONFIG_DEBUG_FS
+@@ -75,6 +86,17 @@ int vc4_bo_stats_debugfs(struct seq_file
+ 	}
+ 	mutex_unlock(&vc4->bo_lock);
+ 
++	mutex_lock(&vc4->purgeable.lock);
++	if (vc4->purgeable.num)
++		seq_printf(m, "%30s: %6dkb BOs (%d)\n", "userspace BO cache",
++			   vc4->purgeable.size / 1024, vc4->purgeable.num);
++
++	if (vc4->purgeable.purged_num)
++		seq_printf(m, "%30s: %6dkb BOs (%d)\n", "total purged BO",
++			   vc4->purgeable.purged_size / 1024,
++			   vc4->purgeable.purged_num);
++	mutex_unlock(&vc4->purgeable.lock);
++
+ 	return 0;
+ }
+ #endif
+@@ -248,6 +270,109 @@ static void vc4_bo_cache_purge(struct dr
+ 	mutex_unlock(&vc4->bo_lock);
+ }
+ 
++void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo)
++{
++	struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
++
++	mutex_lock(&vc4->purgeable.lock);
++	list_add_tail(&bo->size_head, &vc4->purgeable.list);
++	vc4->purgeable.num++;
++	vc4->purgeable.size += bo->base.base.size;
++	mutex_unlock(&vc4->purgeable.lock);
++}
++
++static void vc4_bo_remove_from_purgeable_pool_locked(struct vc4_bo *bo)
++{
++	struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
++
++	/* list_del_init() is used here because the caller might release
++	 * the purgeable lock in order to acquire the madv one and update the
++	 * madv status.
++	 * During this short period of time a user might decide to mark
++	 * the BO as unpurgeable, and if bo->madv is set to
++	 * VC4_MADV_DONTNEED it will try to remove the BO from the
++	 * purgeable list which will fail if the ->next/prev fields
++	 * are set to LIST_POISON1/LIST_POISON2 (which is what
++	 * list_del() does).
++	 * Re-initializing the list element guarantees that list_del()
++	 * will work correctly even if it's a NOP.
++	 */
++	list_del_init(&bo->size_head);
++	vc4->purgeable.num--;
++	vc4->purgeable.size -= bo->base.base.size;
++}
++
++void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo)
++{
++	struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
++
++	mutex_lock(&vc4->purgeable.lock);
++	vc4_bo_remove_from_purgeable_pool_locked(bo);
++	mutex_unlock(&vc4->purgeable.lock);
++}
++
++static void vc4_bo_purge(struct drm_gem_object *obj)
++{
++	struct vc4_bo *bo = to_vc4_bo(obj);
++	struct drm_device *dev = obj->dev;
++
++	WARN_ON(!mutex_is_locked(&bo->madv_lock));
++	WARN_ON(bo->madv != VC4_MADV_DONTNEED);
++
++	drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping);
++
++	dma_free_wc(dev->dev, obj->size, bo->base.vaddr, bo->base.paddr);
++	bo->base.vaddr = NULL;
++	bo->madv = __VC4_MADV_PURGED;
++}
++
++static void vc4_bo_userspace_cache_purge(struct drm_device *dev)
++{
++	struct vc4_dev *vc4 = to_vc4_dev(dev);
++
++	mutex_lock(&vc4->purgeable.lock);
++	while (!list_empty(&vc4->purgeable.list)) {
++		struct vc4_bo *bo = list_first_entry(&vc4->purgeable.list,
++						     struct vc4_bo, size_head);
++		struct drm_gem_object *obj = &bo->base.base;
++		size_t purged_size = 0;
++
++		vc4_bo_remove_from_purgeable_pool_locked(bo);
++
++		/* Release the purgeable lock while we're purging the BO so
++		 * that other people can continue inserting things in the
++		 * purgeable pool without having to wait for all BOs to be
++		 * purged.
++		 */
++		mutex_unlock(&vc4->purgeable.lock);
++		mutex_lock(&bo->madv_lock);
++
++		/* Since we released the purgeable pool lock before acquiring
++		 * the BO madv one, the user may have marked the BO as WILLNEED
++		 * and re-used it in the meantime.
++		 * Before purging the BO we need to make sure
++		 * - it is still marked as DONTNEED
++		 * - it has not been re-inserted in the purgeable list
++		 * - it is not used by HW blocks
++		 * If one of these conditions is not met, just skip the entry.
++		 */
++		if (bo->madv == VC4_MADV_DONTNEED &&
++		    list_empty(&bo->size_head) &&
++		    !refcount_read(&bo->usecnt)) {
++			purged_size = bo->base.base.size;
++			vc4_bo_purge(obj);
++		}
++		mutex_unlock(&bo->madv_lock);
++		mutex_lock(&vc4->purgeable.lock);
++
++		if (purged_size) {
++			vc4->purgeable.purged_size += purged_size;
++			vc4->purgeable.purged_num++;
++		}
++	}
++	mutex_unlock(&vc4->purgeable.lock);
++}
++
+ static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev,
+ 					    uint32_t size,
+ 					    enum vc4_kernel_bo_type type)
+@@ -294,6 +419,9 @@ struct drm_gem_object *vc4_create_object
+ 	if (!bo)
+ 		return ERR_PTR(-ENOMEM);
+ 
++	bo->madv = VC4_MADV_WILLNEED;
++	refcount_set(&bo->usecnt, 0);
++	mutex_init(&bo->madv_lock);
+ 	mutex_lock(&vc4->bo_lock);
+ 	bo->label = VC4_BO_TYPE_KERNEL;
+ 	vc4->bo_labels[VC4_BO_TYPE_KERNEL].num_allocated++;
+@@ -331,16 +459,38 @@ struct vc4_bo *vc4_bo_create(struct drm_
+ 		 * CMA allocations we've got laying around and try again.
+ 		 */
+ 		vc4_bo_cache_purge(dev);
++		cma_obj = drm_gem_cma_create(dev, size);
++	}
+ 
++	if (IS_ERR(cma_obj)) {
++		/*
++		 * Still not enough CMA memory, purge the userspace BO
++		 * cache and retry.
++		 * This is sub-optimal since we purge the whole userspace
++		 * BO cache which forces user that want to re-use the BO to
++		 * restore its initial content.
++		 * Ideally, we should purge entries one by one and retry
++		 * after each to see if CMA allocation succeeds. Or even
++		 * better, try to find an entry with at least the same
++		 * size.
++		 */
++		vc4_bo_userspace_cache_purge(dev);
+ 		cma_obj = drm_gem_cma_create(dev, size);
+-		if (IS_ERR(cma_obj)) {
+-			DRM_ERROR("Failed to allocate from CMA:\n");
+-			vc4_bo_stats_dump(vc4);
+-			return ERR_PTR(-ENOMEM);
+-		}
++	}
++
++	if (IS_ERR(cma_obj)) {
++		DRM_ERROR("Failed to allocate from CMA:\n");
++		vc4_bo_stats_dump(vc4);
++		return ERR_PTR(-ENOMEM);
+ 	}
+ 	bo = to_vc4_bo(&cma_obj->base);
+ 
++	/* By default, BOs do not support the MADV ioctl. This will be enabled
++	 * only on BOs that are exposed to userspace (V3D, V3D_SHADER and DUMB
++	 * BOs).
++	 */
++	bo->madv = __VC4_MADV_NOTSUPP;
++
+ 	mutex_lock(&vc4->bo_lock);
+ 	vc4_bo_set_label(&cma_obj->base, type);
+ 	mutex_unlock(&vc4->bo_lock);
+@@ -366,6 +516,8 @@ int vc4_dumb_create(struct drm_file *fil
+ 	if (IS_ERR(bo))
+ 		return PTR_ERR(bo);
+ 
++	bo->madv = VC4_MADV_WILLNEED;
++
+ 	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
+ 	drm_gem_object_put_unlocked(&bo->base.base);
+ 
+@@ -404,6 +556,12 @@ void vc4_free_object(struct drm_gem_obje
+ 	struct vc4_bo *bo = to_vc4_bo(gem_bo);
+ 	struct list_head *cache_list;
+ 
++	/* Remove the BO from the purgeable list. */
++	mutex_lock(&bo->madv_lock);
++	if (bo->madv == VC4_MADV_DONTNEED && !refcount_read(&bo->usecnt))
++		vc4_bo_remove_from_purgeable_pool(bo);
++	mutex_unlock(&bo->madv_lock);
++
+ 	mutex_lock(&vc4->bo_lock);
+ 	/* If the object references someone else's memory, we can't cache it.
+ 	 */
+@@ -419,7 +577,8 @@ void vc4_free_object(struct drm_gem_obje
+ 	}
+ 
+ 	/* If this object was partially constructed but CMA allocation
+-	 * had failed, just free it.
++	 * had failed, just free it. Can also happen when the BO has been
++	 * purged.
+ 	 */
+ 	if (!bo->base.vaddr) {
+ 		vc4_bo_destroy(bo);
+@@ -439,6 +598,10 @@ void vc4_free_object(struct drm_gem_obje
+ 		bo->validated_shader = NULL;
+ 	}
+ 
++	/* Reset madv and usecnt before adding the BO to the cache. */
++	bo->madv = __VC4_MADV_NOTSUPP;
++	refcount_set(&bo->usecnt, 0);
++
+ 	bo->t_format = false;
+ 	bo->free_time = jiffies;
+ 	list_add(&bo->size_head, cache_list);
+@@ -463,6 +626,56 @@ static void vc4_bo_cache_time_work(struc
+ 	mutex_unlock(&vc4->bo_lock);
+ }
+ 
++int vc4_bo_inc_usecnt(struct vc4_bo *bo)
++{
++	int ret;
++
++	/* Fast path: if the BO is already retained by someone, no need to
++	 * check the madv status.
++	 */
++	if (refcount_inc_not_zero(&bo->usecnt))
++		return 0;
++
++	mutex_lock(&bo->madv_lock);
++	switch (bo->madv) {
++	case VC4_MADV_WILLNEED:
++		refcount_inc(&bo->usecnt);
++		ret = 0;
++		break;
++	case VC4_MADV_DONTNEED:
++		/* We shouldn't use a BO marked as purgeable if at least
++		 * someone else retained its content by incrementing usecnt.
++		 * Luckily the BO hasn't been purged yet, but something wrong
++		 * is happening here. Just throw an error instead of
++		 * authorizing this use case.
++		 */
++	case __VC4_MADV_PURGED:
++		/* We can't use a purged BO. */
++	default:
++		/* Invalid madv value. */
++		ret = -EINVAL;
++		break;
++	}
++	mutex_unlock(&bo->madv_lock);
++
++	return ret;
++}
++
++void vc4_bo_dec_usecnt(struct vc4_bo *bo)
++{
++	/* Fast path: if the BO is still retained by someone, no need to test
++	 * the madv value.
++	 */
++	if (refcount_dec_not_one(&bo->usecnt))
++		return;
++
++	mutex_lock(&bo->madv_lock);
++	if (refcount_dec_and_test(&bo->usecnt) &&
++	    bo->madv == VC4_MADV_DONTNEED)
++		vc4_bo_add_to_purgeable_pool(bo);
++	mutex_unlock(&bo->madv_lock);
++}
++
+ static void vc4_bo_cache_time_timer(unsigned long data)
+ {
+ 	struct drm_device *dev = (struct drm_device *)data;
+@@ -482,18 +695,52 @@ struct dma_buf *
+ vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
+ {
+ 	struct vc4_bo *bo = to_vc4_bo(obj);
++	struct dma_buf *dmabuf;
++	int ret;
+ 
+ 	if (bo->validated_shader) {
+ 		DRM_DEBUG("Attempting to export shader BO\n");
+ 		return ERR_PTR(-EINVAL);
+ 	}
+ 
+-	return drm_gem_prime_export(dev, obj, flags);
++	/* Note: as soon as the BO is exported it becomes unpurgeable, because
++	 * noone ever decrements the usecnt even if the reference held by the
++	 * exported BO is released. This shouldn't be a problem since we don't
++	 * expect exported BOs to be marked as purgeable.
++	 */
++	ret = vc4_bo_inc_usecnt(bo);
++	if (ret) {
++		DRM_ERROR("Failed to increment BO usecnt\n");
++		return ERR_PTR(ret);
++	}
++
++	dmabuf = drm_gem_prime_export(dev, obj, flags);
++	if (IS_ERR(dmabuf))
++		vc4_bo_dec_usecnt(bo);
++
++	return dmabuf;
++}
++
++int vc4_fault(struct vm_fault *vmf)
++{
++	struct vm_area_struct *vma = vmf->vma;
++	struct drm_gem_object *obj = vma->vm_private_data;
++	struct vc4_bo *bo = to_vc4_bo(obj);
++
++	/* The only reason we would end up here is when user-space accesses
++	 * BO's memory after it's been purged.
++	 */
++	mutex_lock(&bo->madv_lock);
++	WARN_ON(bo->madv != __VC4_MADV_PURGED);
++	mutex_unlock(&bo->madv_lock);
++
++	return VM_FAULT_SIGBUS;
+ }
+ 
+ int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
+ {
+ 	struct drm_gem_object *gem_obj;
++	unsigned long vm_pgoff;
+ 	struct vc4_bo *bo;
+ 	int ret;
+ 
+@@ -509,16 +756,36 @@ int vc4_mmap(struct file *filp, struct v
+ 		return -EINVAL;
+ 	}
+ 
++	if (bo->madv != VC4_MADV_WILLNEED) {
++		DRM_DEBUG("mmaping of %s BO not allowed\n",
++			  bo->madv == VC4_MADV_DONTNEED ?
++			  "purgeable" : "purged");
++		return -EINVAL;
++	}
++
+ 	/*
+ 	 * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
+ 	 * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
+ 	 * the whole buffer.
+ 	 */
+ 	vma->vm_flags &= ~VM_PFNMAP;
+-	vma->vm_pgoff = 0;
+ 
++	/* This ->vm_pgoff dance is needed to make all parties happy:
++	 * - dma_mmap_wc() uses ->vm_pgoff as an offset within the allocated
++	 *   mem-region, hence the need to set it to zero (the value set by
++	 *   the DRM core is a virtual offset encoding the GEM object-id)
++	 * - the mmap() core logic needs ->vm_pgoff to be restored to its
++	 *   initial value before returning from this function because it
++	 *   encodes the  offset of this GEM in the dev->anon_inode pseudo-file
++	 *   and this information will be used when we invalidate userspace
++	 *   mappings  with drm_vma_node_unmap() (called from vc4_gem_purge()).
++	 */
++	vm_pgoff = vma->vm_pgoff;
++	vma->vm_pgoff = 0;
+ 	ret = dma_mmap_wc(bo->base.base.dev->dev, vma, bo->base.vaddr,
+ 			  bo->base.paddr, vma->vm_end - vma->vm_start);
++	vma->vm_pgoff = vm_pgoff;
++
+ 	if (ret)
+ 		drm_gem_vm_close(vma);
+ 
+@@ -582,6 +849,8 @@ int vc4_create_bo_ioctl(struct drm_devic
+ 	if (IS_ERR(bo))
+ 		return PTR_ERR(bo);
+ 
++	bo->madv = VC4_MADV_WILLNEED;
++
+ 	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
+ 	drm_gem_object_put_unlocked(&bo->base.base);
+ 
+@@ -635,6 +904,8 @@ vc4_create_shader_bo_ioctl(struct drm_de
+ 	if (IS_ERR(bo))
+ 		return PTR_ERR(bo);
+ 
++	bo->madv = VC4_MADV_WILLNEED;
++
+ 	if (copy_from_user(bo->base.vaddr,
+ 			     (void __user *)(uintptr_t)args->data,
+ 			     args->size)) {
+--- a/drivers/gpu/drm/vc4/vc4_drv.c
++++ b/drivers/gpu/drm/vc4/vc4_drv.c
+@@ -100,6 +100,7 @@ static int vc4_get_param_ioctl(struct dr
+ 	case DRM_VC4_PARAM_SUPPORTS_ETC1:
+ 	case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
+ 	case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER:
++	case DRM_VC4_PARAM_SUPPORTS_MADVISE:
+ 		args->value = true;
+ 		break;
+ 	default:
+@@ -117,6 +118,12 @@ static void vc4_lastclose(struct drm_dev
+ 	drm_fbdev_cma_restore_mode(vc4->fbdev);
+ }
+ 
++static const struct vm_operations_struct vc4_vm_ops = {
++	.fault = vc4_fault,
++	.open = drm_gem_vm_open,
++	.close = drm_gem_vm_close,
++};
++
+ static const struct file_operations vc4_drm_fops = {
+ 	.owner = THIS_MODULE,
+ 	.open = drm_open,
+@@ -142,6 +149,7 @@ static const struct drm_ioctl_desc vc4_d
+ 	DRM_IOCTL_DEF_DRV(VC4_SET_TILING, vc4_set_tiling_ioctl, DRM_RENDER_ALLOW),
+ 	DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW),
+ 	DRM_IOCTL_DEF_DRV(VC4_LABEL_BO, vc4_label_bo_ioctl, DRM_RENDER_ALLOW),
++	DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW),
+ };
+ 
+ static struct drm_driver vc4_drm_driver = {
+@@ -166,7 +174,7 @@ static struct drm_driver vc4_drm_driver
+ 
+ 	.gem_create_object = vc4_create_object,
+ 	.gem_free_object_unlocked = vc4_free_object,
+-	.gem_vm_ops = &drm_gem_cma_vm_ops,
++	.gem_vm_ops = &vc4_vm_ops,
+ 
+ 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+ 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+--- a/drivers/gpu/drm/vc4/vc4_drv.h
++++ b/drivers/gpu/drm/vc4/vc4_drv.h
+@@ -77,6 +77,19 @@ struct vc4_dev {
+ 	/* Protects bo_cache and bo_labels. */
+ 	struct mutex bo_lock;
+ 
++	/* Purgeable BO pool. All BOs in this pool can have their memory
++	 * reclaimed if the driver is unable to allocate new BOs. We also
++	 * keep stats related to the purge mechanism here.
++	 */
++	struct {
++		struct list_head list;
++		unsigned int num;
++		size_t size;
++		unsigned int purged_num;
++		size_t purged_size;
++		struct mutex lock;
++	} purgeable;
++
+ 	uint64_t dma_fence_context;
+ 
+ 	/* Sequence number for the last job queued in bin_job_list.
+@@ -195,6 +208,16 @@ struct vc4_bo {
+ 	 * for user-allocated labels.
+ 	 */
+ 	int label;
++
++	/* Count the number of active users. This is needed to determine
++	 * whether we can move the BO to the purgeable list or not (when the BO
++	 * is used by the GPU or the display engine we can't purge it).
++	 */
++	refcount_t usecnt;
++
++	/* Store purgeable/purged state here */
++	u32 madv;
++	struct mutex madv_lock;
+ };
+ 
+ static inline struct vc4_bo *
+@@ -506,6 +529,7 @@ int vc4_get_hang_state_ioctl(struct drm_
+ 			     struct drm_file *file_priv);
+ int vc4_label_bo_ioctl(struct drm_device *dev, void *data,
+ 		       struct drm_file *file_priv);
++int vc4_fault(struct vm_fault *vmf);
+ int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
+ struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj);
+ int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+@@ -516,6 +540,10 @@ void *vc4_prime_vmap(struct drm_gem_obje
+ int vc4_bo_cache_init(struct drm_device *dev);
+ void vc4_bo_cache_destroy(struct drm_device *dev);
+ int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);
++int vc4_bo_inc_usecnt(struct vc4_bo *bo);
++void vc4_bo_dec_usecnt(struct vc4_bo *bo);
++void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo);
++void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo);
+ 
+ /* vc4_crtc.c */
+ extern struct platform_driver vc4_crtc_driver;
+@@ -564,6 +592,8 @@ void vc4_job_handle_completed(struct vc4
+ int vc4_queue_seqno_cb(struct drm_device *dev,
+ 		       struct vc4_seqno_cb *cb, uint64_t seqno,
+ 		       void (*func)(struct vc4_seqno_cb *cb));
++int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
++			  struct drm_file *file_priv);
+ 
+ /* vc4_hdmi.c */
+ extern struct platform_driver vc4_hdmi_driver;
+--- a/drivers/gpu/drm/vc4/vc4_gem.c
++++ b/drivers/gpu/drm/vc4/vc4_gem.c
+@@ -188,11 +188,22 @@ vc4_save_hang_state(struct drm_device *d
+ 			continue;
+ 
+ 		for (j = 0; j < exec[i]->bo_count; j++) {
++			bo = to_vc4_bo(&exec[i]->bo[j]->base);
++
++			/* Retain BOs just in case they were marked purgeable.
++			 * This prevents the BO from being purged before
++			 * someone had a chance to dump the hang state.
++			 */
++			WARN_ON(!refcount_read(&bo->usecnt));
++			refcount_inc(&bo->usecnt);
+ 			drm_gem_object_get(&exec[i]->bo[j]->base);
+ 			kernel_state->bo[k++] = &exec[i]->bo[j]->base;
+ 		}
+ 
+ 		list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
++			/* No need to retain BOs coming from the ->unref_list
++			 * because they are naturally unpurgeable.
++			 */
+ 			drm_gem_object_get(&bo->base.base);
+ 			kernel_state->bo[k++] = &bo->base.base;
+ 		}
+@@ -233,6 +244,26 @@ vc4_save_hang_state(struct drm_device *d
+ 	state->fdbgs = V3D_READ(V3D_FDBGS);
+ 	state->errstat = V3D_READ(V3D_ERRSTAT);
+ 
++	/* We need to turn purgeable BOs into unpurgeable ones so that
++	 * userspace has a chance to dump the hang state before the kernel
++	 * decides to purge those BOs.
++	 * Note that BO consistency at dump time cannot be guaranteed. For
++	 * example, if the owner of these BOs decides to re-use them or mark
++	 * them purgeable again there's nothing we can do to prevent it.
++	 */
++	for (i = 0; i < kernel_state->user_state.bo_count; i++) {
++		struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]);
++
++		if (bo->madv == __VC4_MADV_NOTSUPP)
++			continue;
++
++		mutex_lock(&bo->madv_lock);
++		if (!WARN_ON(bo->madv == __VC4_MADV_PURGED))
++			bo->madv = VC4_MADV_WILLNEED;
++		refcount_dec(&bo->usecnt);
++		mutex_unlock(&bo->madv_lock);
++	}
++
+ 	spin_lock_irqsave(&vc4->job_lock, irqflags);
+ 	if (vc4->hang_state) {
+ 		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+@@ -639,9 +670,6 @@ vc4_queue_submit(struct drm_device *dev,
+  * The command validator needs to reference BOs by their index within
+  * the submitted job's BO list.  This does the validation of the job's
+  * BO list and reference counting for the lifetime of the job.
+- *
+- * Note that this function doesn't need to unreference the BOs on
+- * failure, because that will happen at vc4_complete_exec() time.
+  */
+ static int
+ vc4_cl_lookup_bos(struct drm_device *dev,
+@@ -693,16 +721,47 @@ vc4_cl_lookup_bos(struct drm_device *dev
+ 			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
+ 				  i, handles[i]);
+ 			ret = -EINVAL;
+-			spin_unlock(&file_priv->table_lock);
+-			goto fail;
++			break;
+ 		}
++
+ 		drm_gem_object_get(bo);
+ 		exec->bo[i] = (struct drm_gem_cma_object *)bo;
+ 	}
+ 	spin_unlock(&file_priv->table_lock);
+ 
++	if (ret)
++		goto fail_put_bo;
++
++	for (i = 0; i < exec->bo_count; i++) {
++		ret = vc4_bo_inc_usecnt(to_vc4_bo(&exec->bo[i]->base));
++		if (ret)
++			goto fail_dec_usecnt;
++	}
++
++	kvfree(handles);
++	return 0;
++
++fail_dec_usecnt:
++	/* Decrease usecnt on acquired objects.
++	 * We cannot rely on  vc4_complete_exec() to release resources here,
++	 * because vc4_complete_exec() has no information about which BO has
++	 * had its ->usecnt incremented.
++	 * To make things easier we just free everything explicitly and set
++	 * exec->bo to NULL so that vc4_complete_exec() skips the 'BO release'
++	 * step.
++	 */
++	for (i-- ; i >= 0; i--)
++		vc4_bo_dec_usecnt(to_vc4_bo(&exec->bo[i]->base));
++
++fail_put_bo:
++	/* Release any reference to acquired objects. */
++	for (i = 0; i < exec->bo_count && exec->bo[i]; i++)
++		drm_gem_object_put_unlocked(&exec->bo[i]->base);
++
+ fail:
+ 	kvfree(handles);
++	kvfree(exec->bo);
++	exec->bo = NULL;
+ 	return ret;
+ }
+ 
+@@ -835,8 +894,12 @@ vc4_complete_exec(struct drm_device *dev
+ 	}
+ 
+ 	if (exec->bo) {
+-		for (i = 0; i < exec->bo_count; i++)
++		for (i = 0; i < exec->bo_count; i++) {
++			struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
++
++			vc4_bo_dec_usecnt(bo);
+ 			drm_gem_object_put_unlocked(&exec->bo[i]->base);
++		}
+ 		kvfree(exec->bo);
+ 	}
+ 
+@@ -1100,6 +1163,9 @@ vc4_gem_init(struct drm_device *dev)
+ 	INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
+ 
+ 	mutex_init(&vc4->power_lock);
++
++	INIT_LIST_HEAD(&vc4->purgeable.list);
++	mutex_init(&vc4->purgeable.lock);
+ }
+ 
+ void
+@@ -1123,3 +1189,81 @@ vc4_gem_destroy(struct drm_device *dev)
+ 	if (vc4->hang_state)
+ 		vc4_free_hang_state(dev, vc4->hang_state);
+ }
++
++int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
++			  struct drm_file *file_priv)
++{
++	struct drm_vc4_gem_madvise *args = data;
++	struct drm_gem_object *gem_obj;
++	struct vc4_bo *bo;
++	int ret;
++
++	switch (args->madv) {
++	case VC4_MADV_DONTNEED:
++	case VC4_MADV_WILLNEED:
++		break;
++	default:
++		return -EINVAL;
++	}
++
++	if (args->pad != 0)
++		return -EINVAL;
++
++	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
++	if (!gem_obj) {
++		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
++		return -ENOENT;
++	}
++
++	bo = to_vc4_bo(gem_obj);
++
++	/* Only BOs exposed to userspace can be purged. */
++	if (bo->madv == __VC4_MADV_NOTSUPP) {
++		DRM_DEBUG("madvise not supported on this BO\n");
++		ret = -EINVAL;
++		goto out_put_gem;
++	}
++
++	/* Not sure it's safe to purge imported BOs. Let's just assume it's
++	 * not until proven otherwise.
++	 */
++	if (gem_obj->import_attach) {
++		DRM_DEBUG("madvise not supported on imported BOs\n");
++		ret = -EINVAL;
++		goto out_put_gem;
++	}
++
++	mutex_lock(&bo->madv_lock);
++
++	if (args->madv == VC4_MADV_DONTNEED && bo->madv == VC4_MADV_WILLNEED &&
++	    !refcount_read(&bo->usecnt)) {
++		/* If the BO is about to be marked as purgeable, is not used
++		 * and is not already purgeable or purged, add it to the
++		 * purgeable list.
++		 */
++		vc4_bo_add_to_purgeable_pool(bo);
++	} else if (args->madv == VC4_MADV_WILLNEED &&
++		   bo->madv == VC4_MADV_DONTNEED &&
++		   !refcount_read(&bo->usecnt)) {
++		/* The BO has not been purged yet, just remove it from
++		 * the purgeable list.
++		 */
++		vc4_bo_remove_from_purgeable_pool(bo);
++	}
++
++	/* Save the purged state. */
++	args->retained = bo->madv != __VC4_MADV_PURGED;
++
++	/* Update internal madv state only if the bo was not purged. */
++	if (bo->madv != __VC4_MADV_PURGED)
++		bo->madv = args->madv;
++
++	mutex_unlock(&bo->madv_lock);
++
++	ret = 0;
++
++out_put_gem:
++	drm_gem_object_put_unlocked(gem_obj);
++
++	return ret;
++}
+--- a/drivers/gpu/drm/vc4/vc4_plane.c
++++ b/drivers/gpu/drm/vc4/vc4_plane.c
+@@ -23,6 +23,7 @@
+ #include <drm/drm_fb_cma_helper.h>
+ #include <drm/drm_plane_helper.h>
+ 
++#include "uapi/drm/vc4_drm.h"
+ #include "vc4_drv.h"
+ #include "vc4_regs.h"
+ 
+@@ -776,21 +777,40 @@ static int vc4_prepare_fb(struct drm_pla
+ {
+ 	struct vc4_bo *bo;
+ 	struct dma_fence *fence;
++	int ret;
+ 
+ 	if ((plane->state->fb == state->fb) || !state->fb)
+ 		return 0;
+ 
+ 	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
++
++	ret = vc4_bo_inc_usecnt(bo);
++	if (ret)
++		return ret;
++
+ 	fence = reservation_object_get_excl_rcu(bo->resv);
+ 	drm_atomic_set_fence_for_plane(state, fence);
+ 
+ 	return 0;
+ }
+ 
++static void vc4_cleanup_fb(struct drm_plane *plane,
++			   struct drm_plane_state *state)
++{
++	struct vc4_bo *bo;
++
++	if (plane->state->fb == state->fb || !state->fb)
++		return;
++
++	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
++	vc4_bo_dec_usecnt(bo);
++}
++
+ static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
+ 	.atomic_check = vc4_plane_atomic_check,
+ 	.atomic_update = vc4_plane_atomic_update,
+ 	.prepare_fb = vc4_prepare_fb,
++	.cleanup_fb = vc4_cleanup_fb,
+ };
+ 
+ static void vc4_plane_destroy(struct drm_plane *plane)
+--- a/include/uapi/drm/vc4_drm.h
++++ b/include/uapi/drm/vc4_drm.h
+@@ -41,6 +41,7 @@ extern "C" {
+ #define DRM_VC4_SET_TILING                        0x08
+ #define DRM_VC4_GET_TILING                        0x09
+ #define DRM_VC4_LABEL_BO                          0x0a
++#define DRM_VC4_GEM_MADVISE                       0x0b
+ 
+ #define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
+ #define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
+@@ -53,6 +54,7 @@ extern "C" {
+ #define DRM_IOCTL_VC4_SET_TILING          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SET_TILING, struct drm_vc4_set_tiling)
+ #define DRM_IOCTL_VC4_GET_TILING          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling)
+ #define DRM_IOCTL_VC4_LABEL_BO            DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_LABEL_BO, struct drm_vc4_label_bo)
++#define DRM_IOCTL_VC4_GEM_MADVISE         DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GEM_MADVISE, struct drm_vc4_gem_madvise)
+ 
+ struct drm_vc4_submit_rcl_surface {
+ 	__u32 hindex; /* Handle index, or ~0 if not present. */
+@@ -305,6 +307,7 @@ struct drm_vc4_get_hang_state {
+ #define DRM_VC4_PARAM_SUPPORTS_ETC1		4
+ #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS	5
+ #define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER	6
++#define DRM_VC4_PARAM_SUPPORTS_MADVISE		7
+ 
+ struct drm_vc4_get_param {
+ 	__u32 param;
+@@ -333,6 +336,22 @@ struct drm_vc4_label_bo {
+ 	__u64 name;
+ };
+ 
++/*
++ * States prefixed with '__' are internal states and cannot be passed to the
++ * DRM_IOCTL_VC4_GEM_MADVISE ioctl.
++ */
++#define VC4_MADV_WILLNEED			0
++#define VC4_MADV_DONTNEED			1
++#define __VC4_MADV_PURGED			2
++#define __VC4_MADV_NOTSUPP			3
++
++struct drm_vc4_gem_madvise {
++	__u32 handle;
++	__u32 madv;
++	__u32 retained;
++	__u32 pad;
++};
++
+ #if defined(__cplusplus)
+ }
+ #endif