aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/brcm2708/patches-4.19/950-0550-staging-vc-sm-cma-Add-in-allocation-for-VPU-requests.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/brcm2708/patches-4.19/950-0550-staging-vc-sm-cma-Add-in-allocation-for-VPU-requests.patch')
-rw-r--r--target/linux/brcm2708/patches-4.19/950-0550-staging-vc-sm-cma-Add-in-allocation-for-VPU-requests.patch1206
1 files changed, 1206 insertions, 0 deletions
diff --git a/target/linux/brcm2708/patches-4.19/950-0550-staging-vc-sm-cma-Add-in-allocation-for-VPU-requests.patch b/target/linux/brcm2708/patches-4.19/950-0550-staging-vc-sm-cma-Add-in-allocation-for-VPU-requests.patch
new file mode 100644
index 0000000000..105309b439
--- /dev/null
+++ b/target/linux/brcm2708/patches-4.19/950-0550-staging-vc-sm-cma-Add-in-allocation-for-VPU-requests.patch
@@ -0,0 +1,1206 @@
+From 275f4673d8c0601e5dbb16e743187d264e7dbed6 Mon Sep 17 00:00:00 2001
+From: Dave Stevenson <dave.stevenson@raspberrypi.org>
+Date: Fri, 21 Dec 2018 16:50:53 +0000
+Subject: [PATCH] staging: vc-sm-cma: Add in allocation for VPU
+ requests.
+
+Module has to change from tristate to bool as all CMA functions
+are boolean.
+
+Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.org>
+---
+ .../staging/vc04_services/vc-sm-cma/Kconfig | 4 +-
+ .../staging/vc04_services/vc-sm-cma/Makefile | 2 +-
+ .../staging/vc04_services/vc-sm-cma/vc_sm.c | 642 +++++++++++++++---
+ .../staging/vc04_services/vc-sm-cma/vc_sm.h | 30 +-
+ .../vc04_services/vc-sm-cma/vc_sm_cma.c | 99 +++
+ .../vc04_services/vc-sm-cma/vc_sm_cma.h | 39 ++
+ .../vc04_services/vc-sm-cma/vc_sm_cma_vchi.c | 10 +
+ .../vc04_services/vc-sm-cma/vc_sm_cma_vchi.h | 4 +
+ .../vc04_services/vc-sm-cma/vc_sm_defs.h | 2 +
+ 9 files changed, 723 insertions(+), 109 deletions(-)
+ create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma.c
+ create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma.h
+
+--- a/drivers/staging/vc04_services/vc-sm-cma/Kconfig
++++ b/drivers/staging/vc04_services/vc-sm-cma/Kconfig
+@@ -1,6 +1,6 @@
+ config BCM_VC_SM_CMA
+- tristate "VideoCore Shared Memory (CMA) driver"
+- depends on BCM2835_VCHIQ
++ bool "VideoCore Shared Memory (CMA) driver"
++ depends on BCM2835_VCHIQ && DMA_CMA
+ select RBTREE
+ select DMA_SHARED_BUFFER
+ help
+--- a/drivers/staging/vc04_services/vc-sm-cma/Makefile
++++ b/drivers/staging/vc04_services/vc-sm-cma/Makefile
+@@ -3,6 +3,6 @@ ccflags-y += -Idrivers/staging/vc04_serv
+ ccflags-y += -D__VCCOREVER__=0
+
+ vc-sm-cma-$(CONFIG_BCM_VC_SM_CMA) := \
+- vc_sm.o vc_sm_cma_vchi.o
++ vc_sm.o vc_sm_cma_vchi.o vc_sm_cma.o
+
+ obj-$(CONFIG_BCM_VC_SM_CMA) += vc-sm-cma.o
+--- a/drivers/staging/vc04_services/vc-sm-cma/vc_sm.c
++++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm.c
+@@ -9,10 +9,21 @@
+ * and taking some code for CMA/dmabuf handling from the Android Ion
+ * driver (Google/Linaro).
+ *
+- * This is cut down version to only support import of dma_bufs from
+- * other kernel drivers. A more complete implementation of the old
+- * vmcs_sm functionality can follow later.
+ *
++ * This driver has 3 main uses:
++ * 1) Allocating buffers for the kernel or userspace that can be shared with the
++ * VPU.
++ * 2) Importing dmabufs from elsewhere for sharing with the VPU.
++ * 3) Allocating buffers for use by the VPU.
++ *
++ * In the first and second cases the native handle is a dmabuf. Releasing the
++ * resource inherently comes from releasing the dmabuf, and this will trigger
++ * unmapping on the VPU. The underlying allocation and our buffer structure are
++ * retained until the VPU has confirmed that it has finished with it.
++ *
++ * For the VPU allocations the VPU is responsible for triggering the release,
++ * and therefore the released message decrements the dma_buf refcount (with the
++ * VPU mapping having already been marked as released).
+ */
+
+ /* ---- Include Files ----------------------------------------------------- */
+@@ -39,6 +50,7 @@
+ #include "vc_sm_cma_vchi.h"
+
+ #include "vc_sm.h"
++#include "vc_sm_cma.h"
+ #include "vc_sm_knl.h"
+
+ /* ---- Private Constants and Types --------------------------------------- */
+@@ -72,6 +84,7 @@ struct sm_state_t {
+ struct platform_device *pdev;
+
+ struct sm_instance *sm_handle; /* Handle for videocore service. */
++ struct cma *cma_heap;
+
+ spinlock_t kernelid_map_lock; /* Spinlock protecting kernelid_map */
+ struct idr kernelid_map;
+@@ -80,6 +93,7 @@ struct sm_state_t {
+ struct list_head buffer_list; /* List of buffer. */
+
+ struct vc_sm_privdata_t *data_knl; /* Kernel internal data tracking. */
++ struct vc_sm_privdata_t *vpu_allocs; /* All allocations from the VPU */
+ struct dentry *dir_root; /* Debug fs entries root. */
+ struct sm_pde_t dir_state; /* Debug fs entries state sub-tree. */
+
+@@ -89,6 +103,12 @@ struct sm_state_t {
+ u32 int_trans_id; /* Interrupted transaction. */
+ };
+
++struct vc_sm_dma_buf_attachment {
++ struct device *dev;
++ struct sg_table *table;
++ struct list_head list;
++};
++
+ /* ---- Private Variables ----------------------------------------------- */
+
+ static struct sm_state_t *sm_state;
+@@ -172,12 +192,14 @@ static int vc_sm_cma_global_state_show(s
+ resource->size);
+ seq_printf(s, " DMABUF %p\n",
+ resource->dma_buf);
+- seq_printf(s, " ATTACH %p\n",
+- resource->attach);
++ if (resource->imported) {
++ seq_printf(s, " ATTACH %p\n",
++ resource->import.attach);
++ seq_printf(s, " SGT %p\n",
++ resource->import.sgt);
++ }
+ seq_printf(s, " SG_TABLE %p\n",
+ resource->sg_table);
+- seq_printf(s, " SGT %p\n",
+- resource->sgt);
+ seq_printf(s, " DMA_ADDR %pad\n",
+ &resource->dma_addr);
+ seq_printf(s, " VC_HANDLE %08x\n",
+@@ -209,17 +231,33 @@ static void vc_sm_add_resource(struct vc
+ }
+
+ /*
+- * Release an allocation.
+- * All refcounting is done via the dma buf object.
++ * Cleans up imported dmabuf.
+ */
+-static void vc_sm_release_resource(struct vc_sm_buffer *buffer, int force)
++static void vc_sm_clean_up_dmabuf(struct vc_sm_buffer *buffer)
+ {
+- mutex_lock(&sm_state->map_lock);
+- mutex_lock(&buffer->lock);
++ if (!buffer->imported)
++ return;
+
+- pr_debug("[%s]: buffer %p (name %s, size %zu)\n",
+- __func__, buffer, buffer->name, buffer->size);
++ /* Handle cleaning up imported dmabufs */
++ mutex_lock(&buffer->lock);
++ if (buffer->import.sgt) {
++ dma_buf_unmap_attachment(buffer->import.attach,
++ buffer->import.sgt,
++ DMA_BIDIRECTIONAL);
++ buffer->import.sgt = NULL;
++ }
++ if (buffer->import.attach) {
++ dma_buf_detach(buffer->dma_buf, buffer->import.attach);
++ buffer->import.attach = NULL;
++ }
++ mutex_unlock(&buffer->lock);
++}
+
++/*
++ * Instructs VPU to decrement the refcount on a buffer.
++ */
++static void vc_sm_vpu_free(struct vc_sm_buffer *buffer)
++{
+ if (buffer->vc_handle && buffer->vpu_state == VPU_MAPPED) {
+ struct vc_sm_free_t free = { buffer->vc_handle, 0 };
+ int status = vc_sm_cma_vchi_free(sm_state->sm_handle, &free,
+@@ -230,17 +268,32 @@ static void vc_sm_release_resource(struc
+ }
+
+ if (sm_state->require_released_callback) {
+- /* Need to wait for the VPU to confirm the free */
++ /* Need to wait for the VPU to confirm the free. */
+
+ /* Retain a reference on this until the VPU has
+ * released it
+ */
+ buffer->vpu_state = VPU_UNMAPPING;
+- goto defer;
++ } else {
++ buffer->vpu_state = VPU_NOT_MAPPED;
++ buffer->vc_handle = 0;
+ }
+- buffer->vpu_state = VPU_NOT_MAPPED;
+- buffer->vc_handle = 0;
+ }
++}
++
++/*
++ * Release an allocation.
++ * All refcounting is done via the dma buf object.
++ *
++ * Must be called with the mutex held. The function will either release the
++ * mutex (if defering the release) or destroy it. The caller must therefore not
++ * reuse the buffer on return.
++ */
++static void vc_sm_release_resource(struct vc_sm_buffer *buffer)
++{
++ pr_debug("[%s]: buffer %p (name %s, size %zu)\n",
++ __func__, buffer, buffer->name, buffer->size);
++
+ if (buffer->vc_handle) {
+ /* We've sent the unmap request but not had the response. */
+ pr_err("[%s]: Waiting for VPU unmap response on %p\n",
+@@ -248,45 +301,43 @@ static void vc_sm_release_resource(struc
+ goto defer;
+ }
+ if (buffer->in_use) {
+- /* Don't release dmabuf here - we await the release */
++ /* dmabuf still in use - we await the release */
+ pr_err("[%s]: buffer %p is still in use\n",
+ __func__, buffer);
+ goto defer;
+ }
+
+- /* Handle cleaning up imported dmabufs */
+- if (buffer->sgt) {
+- dma_buf_unmap_attachment(buffer->attach, buffer->sgt,
+- DMA_BIDIRECTIONAL);
+- buffer->sgt = NULL;
+- }
+- if (buffer->attach) {
+- dma_buf_detach(buffer->dma_buf, buffer->attach);
+- buffer->attach = NULL;
+- }
+-
+- /* Release the dma_buf (whether ours or imported) */
+- if (buffer->import_dma_buf) {
+- dma_buf_put(buffer->import_dma_buf);
+- buffer->import_dma_buf = NULL;
+- buffer->dma_buf = NULL;
+- } else if (buffer->dma_buf) {
+- dma_buf_put(buffer->dma_buf);
+- buffer->dma_buf = NULL;
++ /* Release the allocation (whether imported dmabuf or CMA allocation) */
++ if (buffer->imported) {
++ pr_debug("%s: Release imported dmabuf %p\n", __func__,
++ buffer->import.dma_buf);
++ if (buffer->import.dma_buf)
++ dma_buf_put(buffer->import.dma_buf);
++ else
++ pr_err("%s: Imported dmabuf already been put for buf %p\n",
++ __func__, buffer);
++ buffer->import.dma_buf = NULL;
++ } else {
++ if (buffer->sg_table) {
++ /* Our own allocation that we need to dma_unmap_sg */
++ dma_unmap_sg(&sm_state->pdev->dev,
++ buffer->sg_table->sgl,
++ buffer->sg_table->nents,
++ DMA_BIDIRECTIONAL);
++ }
++ pr_debug("%s: Release our allocation\n", __func__);
++ vc_sm_cma_buffer_free(&buffer->alloc);
++ pr_debug("%s: Release our allocation - done\n", __func__);
+ }
+
+- if (buffer->sg_table && !buffer->import_dma_buf) {
+- /* Our own allocation that we need to dma_unmap_sg */
+- dma_unmap_sg(&sm_state->pdev->dev, buffer->sg_table->sgl,
+- buffer->sg_table->nents, DMA_BIDIRECTIONAL);
+- }
+
+- /* Free the local resource. Start by removing it from the list */
+- buffer->private = NULL;
++ /* Free our buffer. Start by removing it from the list */
++ mutex_lock(&sm_state->map_lock);
+ list_del(&buffer->global_buffer_list);
++ mutex_unlock(&sm_state->map_lock);
+
++ pr_debug("%s: Release our allocation - done\n", __func__);
+ mutex_unlock(&buffer->lock);
+- mutex_unlock(&sm_state->map_lock);
+
+ mutex_destroy(&buffer->lock);
+
+@@ -295,7 +346,7 @@ static void vc_sm_release_resource(struc
+
+ defer:
+ mutex_unlock(&buffer->lock);
+- mutex_unlock(&sm_state->map_lock);
++ return;
+ }
+
+ /* Create support for private data tracking. */
+@@ -317,16 +368,267 @@ static struct vc_sm_privdata_t *vc_sm_cm
+ return file_data;
+ }
+
++static struct sg_table *dup_sg_table(struct sg_table *table)
++{
++ struct sg_table *new_table;
++ int ret, i;
++ struct scatterlist *sg, *new_sg;
++
++ new_table = kzalloc(sizeof(*new_table), GFP_KERNEL);
++ if (!new_table)
++ return ERR_PTR(-ENOMEM);
++
++ ret = sg_alloc_table(new_table, table->nents, GFP_KERNEL);
++ if (ret) {
++ kfree(new_table);
++ return ERR_PTR(-ENOMEM);
++ }
++
++ new_sg = new_table->sgl;
++ for_each_sg(table->sgl, sg, table->nents, i) {
++ memcpy(new_sg, sg, sizeof(*sg));
++ sg->dma_address = 0;
++ new_sg = sg_next(new_sg);
++ }
++
++ return new_table;
++}
++
++static void free_duped_table(struct sg_table *table)
++{
++ sg_free_table(table);
++ kfree(table);
++}
++
++/* Dma buf operations for use with our own allocations */
++
++static int vc_sm_dma_buf_attach(struct dma_buf *dmabuf,
++ struct dma_buf_attachment *attachment)
++
++{
++ struct vc_sm_dma_buf_attachment *a;
++ struct sg_table *table;
++ struct vc_sm_buffer *buf = dmabuf->priv;
++
++ a = kzalloc(sizeof(*a), GFP_KERNEL);
++ if (!a)
++ return -ENOMEM;
++
++ table = dup_sg_table(buf->sg_table);
++ if (IS_ERR(table)) {
++ kfree(a);
++ return -ENOMEM;
++ }
++
++ a->table = table;
++ INIT_LIST_HEAD(&a->list);
++
++ attachment->priv = a;
++
++ mutex_lock(&buf->lock);
++ list_add(&a->list, &buf->attachments);
++ mutex_unlock(&buf->lock);
++ pr_debug("%s dmabuf %p attachment %p\n", __func__, dmabuf, attachment);
++
++ return 0;
++}
++
++static void vc_sm_dma_buf_detatch(struct dma_buf *dmabuf,
++ struct dma_buf_attachment *attachment)
++{
++ struct vc_sm_dma_buf_attachment *a = attachment->priv;
++ struct vc_sm_buffer *buf = dmabuf->priv;
++
++ pr_debug("%s dmabuf %p attachment %p\n", __func__, dmabuf, attachment);
++ free_duped_table(a->table);
++ mutex_lock(&buf->lock);
++ list_del(&a->list);
++ mutex_unlock(&buf->lock);
++
++ kfree(a);
++}
++
++static struct sg_table *vc_sm_map_dma_buf(struct dma_buf_attachment *attachment,
++ enum dma_data_direction direction)
++{
++ struct vc_sm_dma_buf_attachment *a = attachment->priv;
++ struct sg_table *table;
++
++ table = a->table;
++
++ if (!dma_map_sg(attachment->dev, table->sgl, table->nents,
++ direction))
++ return ERR_PTR(-ENOMEM);
++
++ pr_debug("%s attachment %p\n", __func__, attachment);
++ return table;
++}
++
++static void vc_sm_unmap_dma_buf(struct dma_buf_attachment *attachment,
++ struct sg_table *table,
++ enum dma_data_direction direction)
++{
++ pr_debug("%s attachment %p\n", __func__, attachment);
++ dma_unmap_sg(attachment->dev, table->sgl, table->nents, direction);
++}
++
++static int vc_sm_dmabuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
++{
++ struct vc_sm_buffer *buf = dmabuf->priv;
++ struct sg_table *table = buf->sg_table;
++ unsigned long addr = vma->vm_start;
++ unsigned long offset = vma->vm_pgoff * PAGE_SIZE;
++ struct scatterlist *sg;
++ int i;
++ int ret = 0;
++
++ pr_debug("%s dmabuf %p, buf %p, vm_start %08lX\n", __func__, dmabuf,
++ buf, addr);
++
++ mutex_lock(&buf->lock);
++
++ /* now map it to userspace */
++ for_each_sg(table->sgl, sg, table->nents, i) {
++ struct page *page = sg_page(sg);
++ unsigned long remainder = vma->vm_end - addr;
++ unsigned long len = sg->length;
++
++ if (offset >= sg->length) {
++ offset -= sg->length;
++ continue;
++ } else if (offset) {
++ page += offset / PAGE_SIZE;
++ len = sg->length - offset;
++ offset = 0;
++ }
++ len = min(len, remainder);
++ ret = remap_pfn_range(vma, addr, page_to_pfn(page), len,
++ vma->vm_page_prot);
++ if (ret)
++ break;
++ addr += len;
++ if (addr >= vma->vm_end)
++ break;
++ }
++ mutex_unlock(&buf->lock);
++
++ if (ret)
++ pr_err("%s: failure mapping buffer to userspace\n",
++ __func__);
++
++ return ret;
++}
++
++static void vc_sm_dma_buf_release(struct dma_buf *dmabuf)
++{
++ struct vc_sm_buffer *buffer;
++
++ if (!dmabuf)
++ return;
++
++ buffer = (struct vc_sm_buffer *)dmabuf->priv;
++
++ mutex_lock(&buffer->lock);
++
++ pr_debug("%s dmabuf %p, buffer %p\n", __func__, dmabuf, buffer);
++
++ buffer->in_use = 0;
++
++ /* Unmap on the VPU */
++ vc_sm_vpu_free(buffer);
++ pr_debug("%s vpu_free done\n", __func__);
++
++ /* Unmap our dma_buf object (the vc_sm_buffer remains until released
++ * on the VPU).
++ */
++ vc_sm_clean_up_dmabuf(buffer);
++ pr_debug("%s clean_up dmabuf done\n", __func__);
++
++ vc_sm_release_resource(buffer);
++ pr_debug("%s done\n", __func__);
++}
++
++static int vc_sm_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
++ enum dma_data_direction direction)
++{
++ struct vc_sm_buffer *buf;
++ struct vc_sm_dma_buf_attachment *a;
++
++ if (!dmabuf)
++ return -EFAULT;
++
++ buf = dmabuf->priv;
++ if (!buf)
++ return -EFAULT;
++
++ mutex_lock(&buf->lock);
++
++ list_for_each_entry(a, &buf->attachments, list) {
++ dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents,
++ direction);
++ }
++ mutex_unlock(&buf->lock);
++
++ return 0;
++}
++
++static int vc_sm_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
++ enum dma_data_direction direction)
++{
++ struct vc_sm_buffer *buf;
++ struct vc_sm_dma_buf_attachment *a;
++
++ if (!dmabuf)
++ return -EFAULT;
++ buf = dmabuf->priv;
++ if (!buf)
++ return -EFAULT;
++
++ mutex_lock(&buf->lock);
++
++ list_for_each_entry(a, &buf->attachments, list) {
++ dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents,
++ direction);
++ }
++ mutex_unlock(&buf->lock);
++
++ return 0;
++}
++
++static void *vc_sm_dma_buf_kmap(struct dma_buf *dmabuf, unsigned long offset)
++{
++ /* FIXME */
++ return NULL;
++}
++
++static void vc_sm_dma_buf_kunmap(struct dma_buf *dmabuf, unsigned long offset,
++ void *ptr)
++{
++ /* FIXME */
++}
++
++static const struct dma_buf_ops dma_buf_ops = {
++ .map_dma_buf = vc_sm_map_dma_buf,
++ .unmap_dma_buf = vc_sm_unmap_dma_buf,
++ .mmap = vc_sm_dmabuf_mmap,
++ .release = vc_sm_dma_buf_release,
++ .attach = vc_sm_dma_buf_attach,
++ .detach = vc_sm_dma_buf_detatch,
++ .begin_cpu_access = vc_sm_dma_buf_begin_cpu_access,
++ .end_cpu_access = vc_sm_dma_buf_end_cpu_access,
++ .map = vc_sm_dma_buf_kmap,
++ .unmap = vc_sm_dma_buf_kunmap,
++};
+ /* Dma_buf operations for chaining through to an imported dma_buf */
+ static
+ int vc_sm_import_dma_buf_attach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attachment)
+ {
+- struct vc_sm_buffer *res = dmabuf->priv;
++ struct vc_sm_buffer *buf = dmabuf->priv;
+
+- if (!res->import_dma_buf)
++ if (!buf->imported)
+ return -EINVAL;
+- return res->import_dma_buf->ops->attach(res->import_dma_buf,
++ return buf->import.dma_buf->ops->attach(buf->import.dma_buf,
+ attachment);
+ }
+
+@@ -334,22 +636,23 @@ static
+ void vc_sm_import_dma_buf_detatch(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attachment)
+ {
+- struct vc_sm_buffer *res = dmabuf->priv;
++ struct vc_sm_buffer *buf = dmabuf->priv;
+
+- if (!res->import_dma_buf)
++ if (!buf->imported)
+ return;
+- res->import_dma_buf->ops->detach(res->import_dma_buf, attachment);
++ buf->import.dma_buf->ops->detach(buf->import.dma_buf, attachment);
+ }
+
+ static
+ struct sg_table *vc_sm_import_map_dma_buf(struct dma_buf_attachment *attachment,
+ enum dma_data_direction direction)
+ {
+- struct vc_sm_buffer *res = attachment->dmabuf->priv;
++ struct vc_sm_buffer *buf = attachment->dmabuf->priv;
+
+- if (!res->import_dma_buf)
++ if (!buf->imported)
+ return NULL;
+- return res->import_dma_buf->ops->map_dma_buf(attachment, direction);
++ return buf->import.dma_buf->ops->map_dma_buf(attachment,
++ direction);
+ }
+
+ static
+@@ -357,87 +660,88 @@ void vc_sm_import_unmap_dma_buf(struct d
+ struct sg_table *table,
+ enum dma_data_direction direction)
+ {
+- struct vc_sm_buffer *res = attachment->dmabuf->priv;
++ struct vc_sm_buffer *buf = attachment->dmabuf->priv;
+
+- if (!res->import_dma_buf)
++ if (!buf->imported)
+ return;
+- res->import_dma_buf->ops->unmap_dma_buf(attachment, table, direction);
++ buf->import.dma_buf->ops->unmap_dma_buf(attachment, table, direction);
+ }
+
+ static
+ int vc_sm_import_dmabuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+ {
+- struct vc_sm_buffer *res = dmabuf->priv;
++ struct vc_sm_buffer *buf = dmabuf->priv;
+
+- pr_debug("%s: mmap dma_buf %p, res %p, imported db %p\n", __func__,
+- dmabuf, res, res->import_dma_buf);
+- if (!res->import_dma_buf) {
++ pr_debug("%s: mmap dma_buf %p, buf %p, imported db %p\n", __func__,
++ dmabuf, buf, buf->import.dma_buf);
++ if (!buf->imported) {
+ pr_err("%s: mmap dma_buf %p- not an imported buffer\n",
+ __func__, dmabuf);
+ return -EINVAL;
+ }
+- return res->import_dma_buf->ops->mmap(res->import_dma_buf, vma);
++ return buf->import.dma_buf->ops->mmap(buf->import.dma_buf, vma);
+ }
+
+ static
+ void vc_sm_import_dma_buf_release(struct dma_buf *dmabuf)
+ {
+- struct vc_sm_buffer *res = dmabuf->priv;
++ struct vc_sm_buffer *buf = dmabuf->priv;
+
+ pr_debug("%s: Relasing dma_buf %p\n", __func__, dmabuf);
+- if (!res->import_dma_buf)
++ mutex_lock(&buf->lock);
++ if (!buf->imported)
+ return;
+
+- res->in_use = 0;
++ buf->in_use = 0;
+
+- vc_sm_release_resource(res, 0);
++ vc_sm_vpu_free(buf);
++
++ vc_sm_release_resource(buf);
+ }
+
+ static
+ void *vc_sm_import_dma_buf_kmap(struct dma_buf *dmabuf,
+ unsigned long offset)
+ {
+- struct vc_sm_buffer *res = dmabuf->priv;
++ struct vc_sm_buffer *buf = dmabuf->priv;
+
+- if (!res->import_dma_buf)
++ if (!buf->imported)
+ return NULL;
+- return res->import_dma_buf->ops->map(res->import_dma_buf,
+- offset);
++ return buf->import.dma_buf->ops->map(buf->import.dma_buf, offset);
+ }
+
+ static
+ void vc_sm_import_dma_buf_kunmap(struct dma_buf *dmabuf,
+ unsigned long offset, void *ptr)
+ {
+- struct vc_sm_buffer *res = dmabuf->priv;
++ struct vc_sm_buffer *buf = dmabuf->priv;
+
+- if (!res->import_dma_buf)
++ if (!buf->imported)
+ return;
+- res->import_dma_buf->ops->unmap(res->import_dma_buf,
+- offset, ptr);
++ buf->import.dma_buf->ops->unmap(buf->import.dma_buf, offset, ptr);
+ }
+
+ static
+ int vc_sm_import_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
+ enum dma_data_direction direction)
+ {
+- struct vc_sm_buffer *res = dmabuf->priv;
++ struct vc_sm_buffer *buf = dmabuf->priv;
+
+- if (!res->import_dma_buf)
++ if (!buf->imported)
+ return -EINVAL;
+- return res->import_dma_buf->ops->begin_cpu_access(res->import_dma_buf,
+- direction);
++ return buf->import.dma_buf->ops->begin_cpu_access(buf->import.dma_buf,
++ direction);
+ }
+
+ static
+ int vc_sm_import_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
+ enum dma_data_direction direction)
+ {
+- struct vc_sm_buffer *res = dmabuf->priv;
++ struct vc_sm_buffer *buf = dmabuf->priv;
+
+- if (!res->import_dma_buf)
++ if (!buf->imported)
+ return -EINVAL;
+- return res->import_dma_buf->ops->end_cpu_access(res->import_dma_buf,
++ return buf->import.dma_buf->ops->end_cpu_access(buf->import.dma_buf,
+ direction);
+ }
+
+@@ -516,9 +820,8 @@ vc_sm_cma_import_dmabuf_internal(struct
+ memcpy(import.name, VC_SM_RESOURCE_NAME_DEFAULT,
+ sizeof(VC_SM_RESOURCE_NAME_DEFAULT));
+
+- pr_debug("[%s]: attempt to import \"%s\" data - type %u, addr %pad, size %u\n",
+- __func__, import.name, import.type, &dma_addr,
+- import.size);
++ pr_debug("[%s]: attempt to import \"%s\" data - type %u, addr %pad, size %u.\n",
++ __func__, import.name, import.type, &dma_addr, import.size);
+
+ /* Allocate the videocore buffer. */
+ status = vc_sm_cma_vchi_import(sm_state->sm_handle, &import, &result,
+@@ -548,12 +851,14 @@ vc_sm_cma_import_dmabuf_internal(struct
+ buffer->size = import.size;
+ buffer->vpu_state = VPU_MAPPED;
+
+- buffer->import_dma_buf = dma_buf;
++ buffer->imported = 1;
++ buffer->import.dma_buf = dma_buf;
+
+- buffer->attach = attach;
+- buffer->sgt = sgt;
++ buffer->import.attach = attach;
++ buffer->import.sgt = sgt;
+ buffer->dma_addr = dma_addr;
+ buffer->in_use = 1;
++ buffer->kernel_id = import.kernel_id;
+
+ /*
+ * We're done - we need to export a new dmabuf chaining through most
+@@ -594,6 +899,91 @@ error:
+ return ret;
+ }
+
++static int vc_sm_cma_vpu_alloc(u32 size, uint32_t align, const char *name,
++ u32 mem_handle, struct vc_sm_buffer **ret_buffer)
++{
++ DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
++ struct vc_sm_buffer *buffer = NULL;
++ int aligned_size;
++ int ret = 0;
++
++ /* Align to the user requested align */
++ aligned_size = ALIGN(size, align);
++ /* and then to a page boundary */
++ aligned_size = PAGE_ALIGN(aligned_size);
++
++ if (!aligned_size)
++ return -EINVAL;
++
++ /* Allocate local buffer to track this allocation. */
++ buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
++ if (!buffer)
++ return -ENOMEM;
++
++ mutex_init(&buffer->lock);
++
++ if (vc_sm_cma_buffer_allocate(sm_state->cma_heap, &buffer->alloc,
++ aligned_size)) {
++ pr_err("[%s]: cma alloc of %d bytes failed\n",
++ __func__, aligned_size);
++ ret = -ENOMEM;
++ goto error;
++ }
++ buffer->sg_table = buffer->alloc.sg_table;
++
++ pr_debug("[%s]: cma alloc of %d bytes success\n",
++ __func__, aligned_size);
++
++ if (dma_map_sg(&sm_state->pdev->dev, buffer->sg_table->sgl,
++ buffer->sg_table->nents, DMA_BIDIRECTIONAL) <= 0) {
++ pr_err("[%s]: dma_map_sg failed\n", __func__);
++ goto error;
++ }
++
++ INIT_LIST_HEAD(&buffer->attachments);
++
++ memcpy(buffer->name, name,
++ min(sizeof(buffer->name), strlen(name)));
++
++ exp_info.ops = &dma_buf_ops;
++ exp_info.size = aligned_size;
++ exp_info.flags = O_RDWR;
++ exp_info.priv = buffer;
++
++ buffer->dma_buf = dma_buf_export(&exp_info);
++ if (IS_ERR(buffer->dma_buf)) {
++ ret = PTR_ERR(buffer->dma_buf);
++ goto error;
++ }
++ buffer->dma_addr = (uint32_t)sg_dma_address(buffer->sg_table->sgl);
++ if ((buffer->dma_addr & 0xC0000000) != 0xC0000000) {
++ pr_err("%s: Expecting an uncached alias for dma_addr %pad\n",
++ __func__, &buffer->dma_addr);
++ buffer->dma_addr |= 0xC0000000;
++ }
++ buffer->private = sm_state->vpu_allocs;
++
++ buffer->vc_handle = mem_handle;
++ buffer->vpu_state = VPU_MAPPED;
++ buffer->vpu_allocated = 1;
++ buffer->size = size;
++ /*
++ * Create an ID that will be passed along with our message so
++ * that when we service the release reply, we can look up which
++ * resource is being released.
++ */
++ buffer->kernel_id = get_kernel_id(buffer);
++
++ vc_sm_add_resource(sm_state->vpu_allocs, buffer);
++
++ *ret_buffer = buffer;
++ return 0;
++error:
++ if (buffer)
++ vc_sm_release_resource(buffer);
++ return ret;
++}
++
+ static void
+ vc_sm_vpu_event(struct sm_instance *instance, struct vc_sm_result_t *reply,
+ int reply_len)
+@@ -612,21 +1002,61 @@ vc_sm_vpu_event(struct sm_instance *inst
+ struct vc_sm_released *release = (struct vc_sm_released *)reply;
+ struct vc_sm_buffer *buffer =
+ lookup_kernel_id(release->kernel_id);
++ if (!buffer) {
++ pr_err("%s: VC released a buffer that is already released, kernel_id %d\n",
++ __func__, release->kernel_id);
++ break;
++ }
++ mutex_lock(&buffer->lock);
+
+- /*
+- * FIXME: Need to check buffer is still valid and allocated
+- * before continuing
+- */
+ pr_debug("%s: Released addr %08x, size %u, id %08x, mem_handle %08x\n",
+ __func__, release->addr, release->size,
+ release->kernel_id, release->vc_handle);
+- mutex_lock(&buffer->lock);
++
+ buffer->vc_handle = 0;
+ buffer->vpu_state = VPU_NOT_MAPPED;
+- mutex_unlock(&buffer->lock);
+ free_kernel_id(release->kernel_id);
+
+- vc_sm_release_resource(buffer, 0);
++ if (buffer->vpu_allocated) {
++ /* VPU allocation, so release the dmabuf which will
++ * trigger the clean up.
++ */
++ mutex_unlock(&buffer->lock);
++ dma_buf_put(buffer->dma_buf);
++ } else {
++ vc_sm_release_resource(buffer);
++ }
++ }
++ break;
++ case VC_SM_MSG_TYPE_VC_MEM_REQUEST:
++ {
++ struct vc_sm_buffer *buffer = NULL;
++ struct vc_sm_vc_mem_request *req =
++ (struct vc_sm_vc_mem_request *)reply;
++ struct vc_sm_vc_mem_request_result reply;
++ int ret;
++
++ pr_debug("%s: Request %u bytes of memory, align %d name %s, trans_id %08x\n",
++ __func__, req->size, req->align, req->name,
++ req->trans_id);
++ ret = vc_sm_cma_vpu_alloc(req->size, req->align, req->name,
++ req->vc_handle, &buffer);
++
++ reply.trans_id = req->trans_id;
++ if (!ret) {
++ reply.addr = buffer->dma_addr;
++ reply.kernel_id = buffer->kernel_id;
++ pr_debug("%s: Allocated resource buffer %p, addr %pad\n",
++ __func__, buffer, &buffer->dma_addr);
++ } else {
++ pr_err("%s: Allocation failed size %u, name %s, vc_handle %u\n",
++ __func__, req->size, req->name, req->vc_handle);
++ reply.addr = 0;
++ reply.kernel_id = 0;
++ }
++ vc_sm_vchi_client_vc_mem_req_reply(sm_state->sm_handle, &reply,
++ &sm_state->int_trans_id);
++ break;
+ }
+ break;
+ default:
+@@ -645,6 +1075,14 @@ static void vc_sm_connected_init(void)
+
+ pr_info("[%s]: start\n", __func__);
+
++ if (vc_sm_cma_add_heaps(&sm_state->cma_heap) ||
++ !sm_state->cma_heap) {
++ pr_err("[%s]: failed to initialise CMA heaps\n",
++ __func__);
++ ret = -EIO;
++ goto err_free_mem;
++ }
++
+ /*
+ * Initialize and create a VCHI connection for the shared memory service
+ * running on videocore.
+@@ -696,7 +1134,7 @@ static void vc_sm_connected_init(void)
+ goto err_remove_shared_memory;
+ }
+
+- version.version = 1;
++ version.version = 2;
+ ret = vc_sm_cma_vchi_client_version(sm_state->sm_handle, &version,
+ &version_result,
+ &sm_state->int_trans_id);
+@@ -768,7 +1206,7 @@ static int bcm2835_vc_sm_cma_remove(stru
+ int vc_sm_cma_int_handle(void *handle)
+ {
+ struct dma_buf *dma_buf = (struct dma_buf *)handle;
+- struct vc_sm_buffer *res;
++ struct vc_sm_buffer *buf;
+
+ /* Validate we can work with this device. */
+ if (!sm_state || !handle) {
+@@ -776,8 +1214,8 @@ int vc_sm_cma_int_handle(void *handle)
+ return 0;
+ }
+
+- res = (struct vc_sm_buffer *)dma_buf->priv;
+- return res->vc_handle;
++ buf = (struct vc_sm_buffer *)dma_buf->priv;
++ return buf->vc_handle;
+ }
+ EXPORT_SYMBOL_GPL(vc_sm_cma_int_handle);
+
+@@ -804,7 +1242,7 @@ EXPORT_SYMBOL_GPL(vc_sm_cma_free);
+ int vc_sm_cma_import_dmabuf(struct dma_buf *src_dmabuf, void **handle)
+ {
+ struct dma_buf *new_dma_buf;
+- struct vc_sm_buffer *res;
++ struct vc_sm_buffer *buf;
+ int ret;
+
+ /* Validate we can work with this device. */
+@@ -818,7 +1256,7 @@ int vc_sm_cma_import_dmabuf(struct dma_b
+
+ if (!ret) {
+ pr_debug("%s: imported to ptr %p\n", __func__, new_dma_buf);
+- res = (struct vc_sm_buffer *)new_dma_buf->priv;
++ buf = (struct vc_sm_buffer *)new_dma_buf->priv;
+
+ /* Assign valid handle at this time.*/
+ *handle = new_dma_buf;
+--- a/drivers/staging/vc04_services/vc-sm-cma/vc_sm.h
++++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm.h
+@@ -21,6 +21,8 @@
+ #include <linux/types.h>
+ #include <linux/miscdevice.h>
+
++#include "vc_sm_cma.h"
++
+ #define VC_SM_MAX_NAME_LEN 32
+
+ enum vc_sm_vpu_mapping_state {
+@@ -29,31 +31,51 @@ enum vc_sm_vpu_mapping_state {
+ VPU_UNMAPPING
+ };
+
++struct vc_sm_imported {
++ struct dma_buf *dma_buf;
++ struct dma_buf_attachment *attach;
++ struct sg_table *sgt;
++};
++
+ struct vc_sm_buffer {
+ struct list_head global_buffer_list; /* Global list of buffers. */
+
++ /* Index in the kernel_id idr so that we can find the
++ * mmal_msg_context again when servicing the VCHI reply.
++ */
++ int kernel_id;
++
+ size_t size;
+
+ /* Lock over all the following state for this buffer */
+ struct mutex lock;
+- struct sg_table *sg_table;
+ struct list_head attachments;
+
+ char name[VC_SM_MAX_NAME_LEN];
+
+ int in_use:1; /* Kernel is still using this resource */
++ int imported:1; /* Imported dmabuf */
++
++ struct sg_table *sg_table;
+
+ enum vc_sm_vpu_mapping_state vpu_state;
+ u32 vc_handle; /* VideoCore handle for this buffer */
++ int vpu_allocated; /*
++ * The VPU made this allocation. Release the
++ * local dma_buf when the VPU releases the
++ * resource.
++ */
+
+ /* DMABUF related fields */
+- struct dma_buf *import_dma_buf;
+ struct dma_buf *dma_buf;
+- struct dma_buf_attachment *attach;
+- struct sg_table *sgt;
+ dma_addr_t dma_addr;
+
+ struct vc_sm_privdata_t *private;
++
++ union {
++ struct vc_sm_cma_alloc_data alloc;
++ struct vc_sm_imported import;
++ };
+ };
+
+ #endif
+--- /dev/null
++++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma.c
+@@ -0,0 +1,99 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * VideoCore Shared Memory CMA allocator
++ *
++ * Copyright: 2018, Raspberry Pi (Trading) Ltd
++ *
++ * Based on the Android ION allocator
++ * Copyright (C) Linaro 2012
++ * Author: <benjamin.gaignard@linaro.org> for ST-Ericsson.
++ *
++ */
++
++#include <linux/slab.h>
++#include <linux/errno.h>
++#include <linux/err.h>
++#include <linux/cma.h>
++#include <linux/scatterlist.h>
++
++#include "vc_sm_cma.h"
++
++/* CMA heap operations functions */
++int vc_sm_cma_buffer_allocate(struct cma *cma_heap,
++ struct vc_sm_cma_alloc_data *buffer,
++ unsigned long len)
++{
++ /* len should already be page aligned */
++ unsigned long num_pages = len / PAGE_SIZE;
++ struct sg_table *table;
++ struct page *pages;
++ int ret;
++
++ pages = cma_alloc(cma_heap, num_pages, 0, GFP_KERNEL);
++ if (!pages)
++ return -ENOMEM;
++
++ table = kmalloc(sizeof(*table), GFP_KERNEL);
++ if (!table)
++ goto err;
++
++ ret = sg_alloc_table(table, 1, GFP_KERNEL);
++ if (ret)
++ goto free_mem;
++
++ sg_set_page(table->sgl, pages, len, 0);
++
++ buffer->priv_virt = pages;
++ buffer->sg_table = table;
++ buffer->cma_heap = cma_heap;
++ buffer->num_pages = num_pages;
++ return 0;
++
++free_mem:
++ kfree(table);
++err:
++ cma_release(cma_heap, pages, num_pages);
++ return -ENOMEM;
++}
++
++void vc_sm_cma_buffer_free(struct vc_sm_cma_alloc_data *buffer)
++{
++ struct cma *cma_heap = buffer->cma_heap;
++ struct page *pages = buffer->priv_virt;
++
++ /* release memory */
++ if (cma_heap)
++ cma_release(cma_heap, pages, buffer->num_pages);
++
++ /* release sg table */
++ if (buffer->sg_table) {
++ sg_free_table(buffer->sg_table);
++ kfree(buffer->sg_table);
++ buffer->sg_table = NULL;
++ }
++}
++
++int __vc_sm_cma_add_heaps(struct cma *cma, void *priv)
++{
++ struct cma **heap = (struct cma **)priv;
++ const char *name = cma_get_name(cma);
++
++ if (!(*heap)) {
++ phys_addr_t phys_addr = cma_get_base(cma);
++
++ pr_debug("%s: Adding cma heap %s (start %pap, size %lu) for use by vcsm\n",
++ __func__, name, &phys_addr, cma_get_size(cma));
++ *heap = cma;
++ } else {
++ pr_err("%s: Ignoring heap %s as already set\n",
++ __func__, name);
++ }
++
++ return 0;
++}
++
++int vc_sm_cma_add_heaps(struct cma **cma_heap)
++{
++ cma_for_each_area(__vc_sm_cma_add_heaps, cma_heap);
++ return 0;
++}
+--- /dev/null
++++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma.h
+@@ -0,0 +1,39 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++
++/*
++ * VideoCore Shared Memory CMA allocator
++ *
++ * Copyright: 2018, Raspberry Pi (Trading) Ltd
++ *
++ * Based on the Android ION allocator
++ * Copyright (C) Linaro 2012
++ * Author: <benjamin.gaignard@linaro.org> for ST-Ericsson.
++ *
++ * This software is licensed under the terms of the GNU General Public
++ * License version 2, as published by the Free Software Foundation, and
++ * may be copied, distributed, and modified under those terms.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ */
++#ifndef VC_SM_CMA_H
++#define VC_SM_CMA_H
++
++struct vc_sm_cma_alloc_data {
++ struct cma *cma_heap;
++ unsigned long num_pages;
++ void *priv_virt;
++ struct sg_table *sg_table;
++};
++
++int vc_sm_cma_buffer_allocate(struct cma *cma_heap,
++ struct vc_sm_cma_alloc_data *buffer,
++ unsigned long len);
++void vc_sm_cma_buffer_free(struct vc_sm_cma_alloc_data *buffer);
++
++int vc_sm_cma_add_heaps(struct cma **cma_heap);
++
++#endif
+--- a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.c
++++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.c
+@@ -500,3 +500,13 @@ int vc_sm_cma_vchi_client_version(struct
+ msg, sizeof(*msg), NULL, 0,
+ cur_trans_id, 0);
+ }
++
++int vc_sm_vchi_client_vc_mem_req_reply(struct sm_instance *handle,
++ struct vc_sm_vc_mem_request_result *msg,
++ uint32_t *cur_trans_id)
++{
++ return vc_sm_cma_vchi_send_msg(handle,
++ VC_SM_MSG_TYPE_VC_MEM_REQUEST_REPLY,
++ msg, sizeof(*msg), 0, 0, cur_trans_id,
++ 0);
++}
+--- a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.h
++++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.h
+@@ -56,4 +56,8 @@ int vc_sm_cma_vchi_client_version(struct
+ struct vc_sm_result_t *result,
+ u32 *cur_trans_id);
+
++int vc_sm_vchi_client_vc_mem_req_reply(struct sm_instance *handle,
++ struct vc_sm_vc_mem_request_result *msg,
++ uint32_t *cur_trans_id);
++
+ #endif /* __VC_SM_CMA_VCHI_H__INCLUDED__ */
+--- a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_defs.h
++++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_defs.h
+@@ -264,6 +264,8 @@ struct vc_sm_vc_mem_request {
+ u32 align;
+ /* resource name (for easier tracking) */
+ char name[VC_SM_RESOURCE_NAME];
++ /* VPU handle for the resource */
++ u32 vc_handle;
+ };
+
+ /* Response from the kernel to provide the VPU with some memory */