diff options
Diffstat (limited to 'target/linux/bcm27xx/patches-5.4/950-0502-staging-media-Add-Raspberry-Pi-V4L2-H265-decoder.patch')
-rw-r--r-- | target/linux/bcm27xx/patches-5.4/950-0502-staging-media-Add-Raspberry-Pi-V4L2-H265-decoder.patch | 4341 |
1 files changed, 0 insertions, 4341 deletions
diff --git a/target/linux/bcm27xx/patches-5.4/950-0502-staging-media-Add-Raspberry-Pi-V4L2-H265-decoder.patch b/target/linux/bcm27xx/patches-5.4/950-0502-staging-media-Add-Raspberry-Pi-V4L2-H265-decoder.patch deleted file mode 100644 index 134a685f0e..0000000000 --- a/target/linux/bcm27xx/patches-5.4/950-0502-staging-media-Add-Raspberry-Pi-V4L2-H265-decoder.patch +++ /dev/null @@ -1,4341 +0,0 @@ -From 82bbd353e2dc364bf37e6f0b91890cb432b1a72f Mon Sep 17 00:00:00 2001 -From: John Cox <jc@kynesim.co.uk> -Date: Thu, 5 Mar 2020 18:30:41 +0000 -Subject: [PATCH] staging: media: Add Raspberry Pi V4L2 H265 decoder - -This driver is for the HEVC/H265 decoder block on the Raspberry -Pi 4, and conforms to the V4L2 stateless decoder API. - -Signed-off-by: John Cox <jc@kynesim.co.uk> ---- - drivers/staging/media/Kconfig | 2 + - drivers/staging/media/Makefile | 1 + - drivers/staging/media/rpivid/Kconfig | 16 + - drivers/staging/media/rpivid/Makefile | 5 + - drivers/staging/media/rpivid/rpivid.c | 432 ++++ - drivers/staging/media/rpivid/rpivid.h | 181 ++ - drivers/staging/media/rpivid/rpivid_dec.c | 79 + - drivers/staging/media/rpivid/rpivid_dec.h | 19 + - drivers/staging/media/rpivid/rpivid_h265.c | 2275 +++++++++++++++++++ - drivers/staging/media/rpivid/rpivid_hw.c | 321 +++ - drivers/staging/media/rpivid/rpivid_hw.h | 300 +++ - drivers/staging/media/rpivid/rpivid_video.c | 593 +++++ - drivers/staging/media/rpivid/rpivid_video.h | 30 + - 14 files changed, 4256 insertions(+) - create mode 100644 drivers/staging/media/rpivid/Kconfig - create mode 100644 drivers/staging/media/rpivid/Makefile - create mode 100644 drivers/staging/media/rpivid/rpivid.c - create mode 100644 drivers/staging/media/rpivid/rpivid.h - create mode 100644 drivers/staging/media/rpivid/rpivid_dec.c - create mode 100644 drivers/staging/media/rpivid/rpivid_dec.h - create mode 100644 drivers/staging/media/rpivid/rpivid_h265.c - create mode 100644 drivers/staging/media/rpivid/rpivid_hw.c - create mode 100644 drivers/staging/media/rpivid/rpivid_hw.h - create mode 100644 drivers/staging/media/rpivid/rpivid_video.c - create mode 100644 drivers/staging/media/rpivid/rpivid_video.h - ---- a/drivers/staging/media/Kconfig -+++ b/drivers/staging/media/Kconfig -@@ -30,6 +30,8 @@ source "drivers/staging/media/meson/vdec - - source "drivers/staging/media/omap4iss/Kconfig" - -+source "drivers/staging/media/rpivid/Kconfig" -+ - source "drivers/staging/media/sunxi/Kconfig" - - source "drivers/staging/media/tegra-vde/Kconfig" ---- a/drivers/staging/media/Makefile -+++ b/drivers/staging/media/Makefile -@@ -3,6 +3,7 @@ obj-$(CONFIG_VIDEO_ALLEGRO_DVT) += alleg - obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx/ - obj-$(CONFIG_VIDEO_MESON_VDEC) += meson/vdec/ - obj-$(CONFIG_VIDEO_OMAP4) += omap4iss/ -+obj-$(CONFIG_VIDEO_RPIVID) += rpivid/ - obj-$(CONFIG_VIDEO_SUNXI) += sunxi/ - obj-$(CONFIG_TEGRA_VDE) += tegra-vde/ - obj-$(CONFIG_VIDEO_HANTRO) += hantro/ ---- /dev/null -+++ b/drivers/staging/media/rpivid/Kconfig -@@ -0,0 +1,16 @@ -+# SPDX-License-Identifier: GPL-2.0 -+ -+config VIDEO_RPIVID -+ tristate "Rpi H265 driver" -+ depends on VIDEO_DEV && VIDEO_V4L2 -+ depends on MEDIA_CONTROLLER -+ depends on OF -+ depends on MEDIA_CONTROLLER_REQUEST_API -+ select VIDEOBUF2_DMA_CONTIG -+ select V4L2_MEM2MEM_DEV -+ help -+ Support for the Rpi H265 h/w decoder. -+ -+ To compile this driver as a module, choose M here: the module -+ will be called rpivid-hevc. -+ ---- /dev/null -+++ b/drivers/staging/media/rpivid/Makefile -@@ -0,0 +1,5 @@ -+# SPDX-License-Identifier: GPL-2.0 -+obj-$(CONFIG_VIDEO_RPIVID) += rpivid-hevc.o -+ -+rpivid-hevc-y = rpivid.o rpivid_video.o rpivid_dec.o \ -+ rpivid_hw.o rpivid_h265.o ---- /dev/null -+++ b/drivers/staging/media/rpivid/rpivid.c -@@ -0,0 +1,432 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Raspberry Pi HEVC driver -+ * -+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd -+ * -+ * Based on the Cedrus VPU driver, that is: -+ * -+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> -+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> -+ * Copyright (C) 2018 Bootlin -+ */ -+ -+#include <linux/platform_device.h> -+#include <linux/module.h> -+#include <linux/of.h> -+ -+#include <media/v4l2-device.h> -+#include <media/v4l2-ioctl.h> -+#include <media/v4l2-ctrls.h> -+#include <media/v4l2-mem2mem.h> -+ -+#include "rpivid.h" -+#include "rpivid_video.h" -+#include "rpivid_hw.h" -+#include "rpivid_dec.h" -+ -+/* -+ * Default /dev/videoN node number. -+ * Deliberately avoid the very low numbers as these are often taken by webcams -+ * etc, and simple apps tend to only go for /dev/video0. -+ */ -+static int video_nr = 19; -+module_param(video_nr, int, 0644); -+MODULE_PARM_DESC(video_nr, "decoder video device number"); -+ -+static const struct rpivid_control rpivid_ctrls[] = { -+ { -+ .cfg = { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS, -+ }, -+ .required = true, -+ }, -+ { -+ .cfg = { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS, -+ }, -+ .required = true, -+ }, -+ { -+ .cfg = { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX, -+ }, -+ .required = false, -+ }, -+ { -+ .cfg = { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, -+ }, -+ .required = true, -+ }, -+ { -+ .cfg = { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, -+ .max = V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED, -+ .def = V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED, -+ }, -+ .required = false, -+ }, -+ { -+ .cfg = { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, -+ .max = V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE, -+ .def = V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE, -+ }, -+ .required = false, -+ }, -+}; -+ -+#define rpivid_ctrls_COUNT ARRAY_SIZE(rpivid_ctrls) -+ -+void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id) -+{ -+ unsigned int i; -+ -+ for (i = 0; ctx->ctrls[i]; i++) -+ if (ctx->ctrls[i]->id == id) -+ return ctx->ctrls[i]->p_cur.p; -+ -+ return NULL; -+} -+ -+static int rpivid_init_ctrls(struct rpivid_dev *dev, struct rpivid_ctx *ctx) -+{ -+ struct v4l2_ctrl_handler *hdl = &ctx->hdl; -+ struct v4l2_ctrl *ctrl; -+ unsigned int ctrl_size; -+ unsigned int i; -+ -+ v4l2_ctrl_handler_init(hdl, rpivid_ctrls_COUNT); -+ if (hdl->error) { -+ v4l2_err(&dev->v4l2_dev, -+ "Failed to initialize control handler\n"); -+ return hdl->error; -+ } -+ -+ ctrl_size = sizeof(ctrl) * rpivid_ctrls_COUNT + 1; -+ -+ ctx->ctrls = kzalloc(ctrl_size, GFP_KERNEL); -+ if (!ctx->ctrls) -+ return -ENOMEM; -+ -+ for (i = 0; i < rpivid_ctrls_COUNT; i++) { -+ ctrl = v4l2_ctrl_new_custom(hdl, &rpivid_ctrls[i].cfg, -+ NULL); -+ if (hdl->error) { -+ v4l2_err(&dev->v4l2_dev, -+ "Failed to create new custom control id=%#x\n", -+ rpivid_ctrls[i].cfg.id); -+ -+ v4l2_ctrl_handler_free(hdl); -+ kfree(ctx->ctrls); -+ return hdl->error; -+ } -+ -+ ctx->ctrls[i] = ctrl; -+ } -+ -+ ctx->fh.ctrl_handler = hdl; -+ v4l2_ctrl_handler_setup(hdl); -+ -+ return 0; -+} -+ -+static int rpivid_request_validate(struct media_request *req) -+{ -+ struct media_request_object *obj; -+ struct v4l2_ctrl_handler *parent_hdl, *hdl; -+ struct rpivid_ctx *ctx = NULL; -+ struct v4l2_ctrl *ctrl_test; -+ unsigned int count; -+ unsigned int i; -+ -+ list_for_each_entry(obj, &req->objects, list) { -+ struct vb2_buffer *vb; -+ -+ if (vb2_request_object_is_buffer(obj)) { -+ vb = container_of(obj, struct vb2_buffer, req_obj); -+ ctx = vb2_get_drv_priv(vb->vb2_queue); -+ -+ break; -+ } -+ } -+ -+ if (!ctx) -+ return -ENOENT; -+ -+ count = vb2_request_buffer_cnt(req); -+ if (!count) { -+ v4l2_info(&ctx->dev->v4l2_dev, -+ "No buffer was provided with the request\n"); -+ return -ENOENT; -+ } else if (count > 1) { -+ v4l2_info(&ctx->dev->v4l2_dev, -+ "More than one buffer was provided with the request\n"); -+ return -EINVAL; -+ } -+ -+ parent_hdl = &ctx->hdl; -+ -+ hdl = v4l2_ctrl_request_hdl_find(req, parent_hdl); -+ if (!hdl) { -+ v4l2_info(&ctx->dev->v4l2_dev, "Missing codec control(s)\n"); -+ return -ENOENT; -+ } -+ -+ for (i = 0; i < rpivid_ctrls_COUNT; i++) { -+ if (!rpivid_ctrls[i].required) -+ continue; -+ -+ ctrl_test = -+ v4l2_ctrl_request_hdl_ctrl_find(hdl, -+ rpivid_ctrls[i].cfg.id); -+ if (!ctrl_test) { -+ v4l2_info(&ctx->dev->v4l2_dev, -+ "Missing required codec control\n"); -+ return -ENOENT; -+ } -+ } -+ -+ v4l2_ctrl_request_hdl_put(hdl); -+ -+ return vb2_request_validate(req); -+} -+ -+static int rpivid_open(struct file *file) -+{ -+ struct rpivid_dev *dev = video_drvdata(file); -+ struct rpivid_ctx *ctx = NULL; -+ int ret; -+ -+ if (mutex_lock_interruptible(&dev->dev_mutex)) -+ return -ERESTARTSYS; -+ -+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); -+ if (!ctx) { -+ mutex_unlock(&dev->dev_mutex); -+ return -ENOMEM; -+ } -+ -+ v4l2_fh_init(&ctx->fh, video_devdata(file)); -+ file->private_data = &ctx->fh; -+ ctx->dev = dev; -+ -+ ret = rpivid_init_ctrls(dev, ctx); -+ if (ret) -+ goto err_free; -+ -+ ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx, -+ &rpivid_queue_init); -+ if (IS_ERR(ctx->fh.m2m_ctx)) { -+ ret = PTR_ERR(ctx->fh.m2m_ctx); -+ goto err_ctrls; -+ } -+ -+ /* The only bit of format info that we can guess now is H265 src -+ * Everything else we need more info for -+ */ -+ ctx->src_fmt.pixelformat = RPIVID_SRC_PIXELFORMAT_DEFAULT; -+ rpivid_prepare_src_format(&ctx->src_fmt); -+ -+ v4l2_fh_add(&ctx->fh); -+ -+ mutex_unlock(&dev->dev_mutex); -+ -+ return 0; -+ -+err_ctrls: -+ v4l2_ctrl_handler_free(&ctx->hdl); -+err_free: -+ kfree(ctx); -+ mutex_unlock(&dev->dev_mutex); -+ -+ return ret; -+} -+ -+static int rpivid_release(struct file *file) -+{ -+ struct rpivid_dev *dev = video_drvdata(file); -+ struct rpivid_ctx *ctx = container_of(file->private_data, -+ struct rpivid_ctx, fh); -+ -+ mutex_lock(&dev->dev_mutex); -+ -+ v4l2_fh_del(&ctx->fh); -+ v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); -+ -+ v4l2_ctrl_handler_free(&ctx->hdl); -+ kfree(ctx->ctrls); -+ -+ v4l2_fh_exit(&ctx->fh); -+ -+ kfree(ctx); -+ -+ mutex_unlock(&dev->dev_mutex); -+ -+ return 0; -+} -+ -+static const struct v4l2_file_operations rpivid_fops = { -+ .owner = THIS_MODULE, -+ .open = rpivid_open, -+ .release = rpivid_release, -+ .poll = v4l2_m2m_fop_poll, -+ .unlocked_ioctl = video_ioctl2, -+ .mmap = v4l2_m2m_fop_mmap, -+}; -+ -+static const struct video_device rpivid_video_device = { -+ .name = RPIVID_NAME, -+ .vfl_dir = VFL_DIR_M2M, -+ .fops = &rpivid_fops, -+ .ioctl_ops = &rpivid_ioctl_ops, -+ .minor = -1, -+ .release = video_device_release_empty, -+ .device_caps = V4L2_CAP_VIDEO_M2M | V4L2_CAP_STREAMING, -+}; -+ -+static const struct v4l2_m2m_ops rpivid_m2m_ops = { -+ .device_run = rpivid_device_run, -+}; -+ -+static const struct media_device_ops rpivid_m2m_media_ops = { -+ .req_validate = rpivid_request_validate, -+ .req_queue = v4l2_m2m_request_queue, -+}; -+ -+static int rpivid_probe(struct platform_device *pdev) -+{ -+ struct rpivid_dev *dev; -+ struct video_device *vfd; -+ int ret; -+ -+ dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL); -+ if (!dev) -+ return -ENOMEM; -+ -+ dev->vfd = rpivid_video_device; -+ dev->dev = &pdev->dev; -+ dev->pdev = pdev; -+ -+ ret = 0; -+ ret = rpivid_hw_probe(dev); -+ if (ret) { -+ dev_err(&pdev->dev, "Failed to probe hardware\n"); -+ return ret; -+ } -+ -+ dev->dec_ops = &rpivid_dec_ops_h265; -+ -+ mutex_init(&dev->dev_mutex); -+ -+ ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev); -+ if (ret) { -+ dev_err(&pdev->dev, "Failed to register V4L2 device\n"); -+ return ret; -+ } -+ -+ vfd = &dev->vfd; -+ vfd->lock = &dev->dev_mutex; -+ vfd->v4l2_dev = &dev->v4l2_dev; -+ -+ snprintf(vfd->name, sizeof(vfd->name), "%s", rpivid_video_device.name); -+ video_set_drvdata(vfd, dev); -+ -+ dev->m2m_dev = v4l2_m2m_init(&rpivid_m2m_ops); -+ if (IS_ERR(dev->m2m_dev)) { -+ v4l2_err(&dev->v4l2_dev, -+ "Failed to initialize V4L2 M2M device\n"); -+ ret = PTR_ERR(dev->m2m_dev); -+ -+ goto err_v4l2; -+ } -+ -+ dev->mdev.dev = &pdev->dev; -+ strscpy(dev->mdev.model, RPIVID_NAME, sizeof(dev->mdev.model)); -+ strscpy(dev->mdev.bus_info, "platform:" RPIVID_NAME, -+ sizeof(dev->mdev.bus_info)); -+ -+ media_device_init(&dev->mdev); -+ dev->mdev.ops = &rpivid_m2m_media_ops; -+ dev->v4l2_dev.mdev = &dev->mdev; -+ -+ ret = video_register_device(vfd, VFL_TYPE_GRABBER, video_nr); -+ if (ret) { -+ v4l2_err(&dev->v4l2_dev, "Failed to register video device\n"); -+ goto err_m2m; -+ } -+ -+ v4l2_info(&dev->v4l2_dev, -+ "Device registered as /dev/video%d\n", vfd->num); -+ -+ ret = v4l2_m2m_register_media_controller(dev->m2m_dev, vfd, -+ MEDIA_ENT_F_PROC_VIDEO_DECODER); -+ if (ret) { -+ v4l2_err(&dev->v4l2_dev, -+ "Failed to initialize V4L2 M2M media controller\n"); -+ goto err_video; -+ } -+ -+ ret = media_device_register(&dev->mdev); -+ if (ret) { -+ v4l2_err(&dev->v4l2_dev, "Failed to register media device\n"); -+ goto err_m2m_mc; -+ } -+ -+ platform_set_drvdata(pdev, dev); -+ -+ return 0; -+ -+err_m2m_mc: -+ v4l2_m2m_unregister_media_controller(dev->m2m_dev); -+err_video: -+ video_unregister_device(&dev->vfd); -+err_m2m: -+ v4l2_m2m_release(dev->m2m_dev); -+err_v4l2: -+ v4l2_device_unregister(&dev->v4l2_dev); -+ -+ return ret; -+} -+ -+static int rpivid_remove(struct platform_device *pdev) -+{ -+ struct rpivid_dev *dev = platform_get_drvdata(pdev); -+ -+ if (media_devnode_is_registered(dev->mdev.devnode)) { -+ media_device_unregister(&dev->mdev); -+ v4l2_m2m_unregister_media_controller(dev->m2m_dev); -+ media_device_cleanup(&dev->mdev); -+ } -+ -+ v4l2_m2m_release(dev->m2m_dev); -+ video_unregister_device(&dev->vfd); -+ v4l2_device_unregister(&dev->v4l2_dev); -+ -+ rpivid_hw_remove(dev); -+ -+ return 0; -+} -+ -+static const struct of_device_id rpivid_dt_match[] = { -+ { -+ .compatible = "raspberrypi,rpivid-vid-decoder", -+ }, -+ { /* sentinel */ } -+}; -+MODULE_DEVICE_TABLE(of, rpivid_dt_match); -+ -+static struct platform_driver rpivid_driver = { -+ .probe = rpivid_probe, -+ .remove = rpivid_remove, -+ .driver = { -+ .name = RPIVID_NAME, -+ .of_match_table = of_match_ptr(rpivid_dt_match), -+ }, -+}; -+module_platform_driver(rpivid_driver); -+ -+MODULE_LICENSE("GPL v2"); -+MODULE_AUTHOR("John Cox <jc@kynesim.co.uk>"); -+MODULE_DESCRIPTION("Raspberry Pi HEVC V4L2 driver"); ---- /dev/null -+++ b/drivers/staging/media/rpivid/rpivid.h -@@ -0,0 +1,181 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Raspberry Pi HEVC driver -+ * -+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd -+ * -+ * Based on the Cedrus VPU driver, that is: -+ * -+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> -+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> -+ * Copyright (C) 2018 Bootlin -+ */ -+ -+#ifndef _RPIVID_H_ -+#define _RPIVID_H_ -+ -+#include <linux/clk.h> -+#include <linux/platform_device.h> -+#include <media/v4l2-ctrls.h> -+#include <media/v4l2-device.h> -+#include <media/v4l2-mem2mem.h> -+#include <media/videobuf2-v4l2.h> -+#include <media/videobuf2-dma-contig.h> -+ -+#define OPT_DEBUG_POLL_IRQ 0 -+ -+#define RPIVID_NAME "rpivid" -+ -+#define RPIVID_CAPABILITY_UNTILED BIT(0) -+#define RPIVID_CAPABILITY_H265_DEC BIT(1) -+ -+#define RPIVID_QUIRK_NO_DMA_OFFSET BIT(0) -+ -+#define RPIVID_SRC_PIXELFORMAT_DEFAULT V4L2_PIX_FMT_HEVC_SLICE -+ -+enum rpivid_irq_status { -+ RPIVID_IRQ_NONE, -+ RPIVID_IRQ_ERROR, -+ RPIVID_IRQ_OK, -+}; -+ -+struct rpivid_control { -+ struct v4l2_ctrl_config cfg; -+ unsigned char required:1; -+}; -+ -+struct rpivid_h265_run { -+ const struct v4l2_ctrl_hevc_sps *sps; -+ const struct v4l2_ctrl_hevc_pps *pps; -+ const struct v4l2_ctrl_hevc_slice_params *slice_params; -+ const struct v4l2_ctrl_hevc_scaling_matrix *scaling_matrix; -+}; -+ -+struct rpivid_run { -+ struct vb2_v4l2_buffer *src; -+ struct vb2_v4l2_buffer *dst; -+ -+ struct rpivid_h265_run h265; -+}; -+ -+struct rpivid_buffer { -+ struct v4l2_m2m_buffer m2m_buf; -+}; -+ -+struct rpivid_dec_state; -+struct rpivid_dec_env; -+#define RPIVID_DEC_ENV_COUNT 3 -+ -+struct rpivid_gptr { -+ size_t size; -+ __u8 *ptr; -+ dma_addr_t addr; -+ unsigned long attrs; -+}; -+ -+struct rpivid_dev; -+typedef void (*rpivid_irq_callback)(struct rpivid_dev *dev, void *ctx); -+ -+struct rpivid_q_aux; -+#define RPIVID_AUX_ENT_COUNT VB2_MAX_FRAME -+ -+#define RPIVID_P2BUF_COUNT 2 -+ -+struct rpivid_ctx { -+ struct v4l2_fh fh; -+ struct rpivid_dev *dev; -+ -+ struct v4l2_pix_format src_fmt; -+ struct v4l2_pix_format dst_fmt; -+ int dst_fmt_set; -+ -+ struct v4l2_ctrl_handler hdl; -+ struct v4l2_ctrl **ctrls; -+ -+ /* Decode state - stateless decoder my *** */ -+ /* state contains stuff that is only needed in phase0 -+ * it could be held in dec_env but that would be wasteful -+ */ -+ struct rpivid_dec_state *state; -+ struct rpivid_dec_env *dec0; -+ -+ /* Spinlock protecting dec_free */ -+ spinlock_t dec_lock; -+ struct rpivid_dec_env *dec_free; -+ -+ struct rpivid_dec_env *dec_pool; -+ -+ /* Some of these should be in dev */ -+ struct rpivid_gptr bitbufs[1]; /* Will be 2 */ -+ struct rpivid_gptr cmdbufs[1]; /* Will be 2 */ -+ unsigned int p2idx; -+ atomic_t p2out; -+ struct rpivid_gptr pu_bufs[RPIVID_P2BUF_COUNT]; -+ struct rpivid_gptr coeff_bufs[RPIVID_P2BUF_COUNT]; -+ -+ /* Spinlock protecting aux_free */ -+ spinlock_t aux_lock; -+ struct rpivid_q_aux *aux_free; -+ -+ struct rpivid_q_aux *aux_ents[RPIVID_AUX_ENT_COUNT]; -+ -+ unsigned int colmv_stride; -+ unsigned int colmv_picsize; -+}; -+ -+struct rpivid_dec_ops { -+ void (*setup)(struct rpivid_ctx *ctx, struct rpivid_run *run); -+ int (*start)(struct rpivid_ctx *ctx); -+ void (*stop)(struct rpivid_ctx *ctx); -+ void (*trigger)(struct rpivid_ctx *ctx); -+}; -+ -+struct rpivid_variant { -+ unsigned int capabilities; -+ unsigned int quirks; -+ unsigned int mod_rate; -+}; -+ -+struct rpivid_hw_irq_ent; -+ -+struct rpivid_hw_irq_ctrl { -+ /* Spinlock protecting claim and tail */ -+ spinlock_t lock; -+ struct rpivid_hw_irq_ent *claim; -+ struct rpivid_hw_irq_ent *tail; -+ -+ /* Ent for pending irq - also prevents sched */ -+ struct rpivid_hw_irq_ent *irq; -+ /* Non-zero => do not start a new job - outer layer sched pending */ -+ int no_sched; -+ /* Thread CB requested */ -+ bool thread_reqed; -+}; -+ -+struct rpivid_dev { -+ struct v4l2_device v4l2_dev; -+ struct video_device vfd; -+ struct media_device mdev; -+ struct media_pad pad[2]; -+ struct platform_device *pdev; -+ struct device *dev; -+ struct v4l2_m2m_dev *m2m_dev; -+ struct rpivid_dec_ops *dec_ops; -+ -+ /* Device file mutex */ -+ struct mutex dev_mutex; -+ -+ void __iomem *base_irq; -+ void __iomem *base_h265; -+ -+ struct clk *clock; -+ -+ struct rpivid_hw_irq_ctrl ic_active1; -+ struct rpivid_hw_irq_ctrl ic_active2; -+}; -+ -+extern struct rpivid_dec_ops rpivid_dec_ops_h265; -+ -+void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id); -+ -+#endif ---- /dev/null -+++ b/drivers/staging/media/rpivid/rpivid_dec.c -@@ -0,0 +1,79 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Raspberry Pi HEVC driver -+ * -+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd -+ * -+ * Based on the Cedrus VPU driver, that is: -+ * -+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> -+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> -+ * Copyright (C) 2018 Bootlin -+ */ -+ -+#include <media/v4l2-device.h> -+#include <media/v4l2-ioctl.h> -+#include <media/v4l2-event.h> -+#include <media/v4l2-mem2mem.h> -+ -+#include "rpivid.h" -+#include "rpivid_dec.h" -+ -+void rpivid_device_run(void *priv) -+{ -+ struct rpivid_ctx *ctx = priv; -+ struct rpivid_dev *dev = ctx->dev; -+ struct rpivid_run run = {}; -+ struct media_request *src_req; -+ -+ run.src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); -+ run.dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); -+ -+ if (!run.src || !run.dst) { -+ v4l2_err(&dev->v4l2_dev, "%s: Missing buffer: src=%p, dst=%p\n", -+ __func__, run.src, run.dst); -+ /* We are stuffed - this probably won't dig us out of our -+ * current situation but it is better than nothing -+ */ -+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, -+ VB2_BUF_STATE_ERROR); -+ return; -+ } -+ -+ /* Apply request(s) controls if needed. */ -+ src_req = run.src->vb2_buf.req_obj.req; -+ -+ if (src_req) -+ v4l2_ctrl_request_setup(src_req, &ctx->hdl); -+ -+ switch (ctx->src_fmt.pixelformat) { -+ case V4L2_PIX_FMT_HEVC_SLICE: -+ run.h265.sps = -+ rpivid_find_control_data(ctx, -+ V4L2_CID_MPEG_VIDEO_HEVC_SPS); -+ run.h265.pps = -+ rpivid_find_control_data(ctx, -+ V4L2_CID_MPEG_VIDEO_HEVC_PPS); -+ run.h265.slice_params = -+ rpivid_find_control_data(ctx, -+ V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS); -+ run.h265.scaling_matrix = -+ rpivid_find_control_data(ctx, -+ V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX); -+ break; -+ -+ default: -+ break; -+ } -+ -+ v4l2_m2m_buf_copy_metadata(run.src, run.dst, true); -+ -+ dev->dec_ops->setup(ctx, &run); -+ -+ /* Complete request(s) controls if needed. */ -+ -+ if (src_req) -+ v4l2_ctrl_request_complete(src_req, &ctx->hdl); -+ -+ dev->dec_ops->trigger(ctx); -+} ---- /dev/null -+++ b/drivers/staging/media/rpivid/rpivid_dec.h -@@ -0,0 +1,19 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Raspberry Pi HEVC driver -+ * -+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd -+ * -+ * Based on the Cedrus VPU driver, that is: -+ * -+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> -+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> -+ * Copyright (C) 2018 Bootlin -+ */ -+ -+#ifndef _RPIVID_DEC_H_ -+#define _RPIVID_DEC_H_ -+ -+void rpivid_device_run(void *priv); -+ -+#endif ---- /dev/null -+++ b/drivers/staging/media/rpivid/rpivid_h265.c -@@ -0,0 +1,2275 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Raspberry Pi HEVC driver -+ * -+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd -+ * -+ * Based on the Cedrus VPU driver, that is: -+ * -+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> -+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> -+ * Copyright (C) 2018 Bootlin -+ */ -+ -+#include <linux/delay.h> -+#include <linux/types.h> -+ -+#include <media/videobuf2-dma-contig.h> -+ -+#include "rpivid.h" -+#include "rpivid_hw.h" -+ -+#define DEBUG_TRACE_P1_CMD 0 -+#define DEBUG_TRACE_EXECUTION 0 -+ -+#if DEBUG_TRACE_EXECUTION -+#define xtrace_in(dev_, de_)\ -+ v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: in\n", __func__,\ -+ (de_) == NULL ? -1 : (de_)->decode_order) -+#define xtrace_ok(dev_, de_)\ -+ v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: ok\n", __func__,\ -+ (de_) == NULL ? -1 : (de_)->decode_order) -+#define xtrace_fin(dev_, de_)\ -+ v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: finish\n", __func__,\ -+ (de_) == NULL ? -1 : (de_)->decode_order) -+#define xtrace_fail(dev_, de_)\ -+ v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: FAIL\n", __func__,\ -+ (de_) == NULL ? -1 : (de_)->decode_order) -+#else -+#define xtrace_in(dev_, de_) -+#define xtrace_ok(dev_, de_) -+#define xtrace_fin(dev_, de_) -+#define xtrace_fail(dev_, de_) -+#endif -+ -+enum hevc_slice_type { -+ HEVC_SLICE_B = 0, -+ HEVC_SLICE_P = 1, -+ HEVC_SLICE_I = 2, -+}; -+ -+enum hevc_layer { L0 = 0, L1 = 1 }; -+ -+static int gptr_alloc(struct rpivid_dev *const dev, struct rpivid_gptr *gptr, -+ size_t size, unsigned long attrs) -+{ -+ gptr->size = size; -+ gptr->attrs = attrs; -+ gptr->addr = 0; -+ gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size, &gptr->addr, -+ GFP_KERNEL, gptr->attrs); -+ return !gptr->ptr ? -ENOMEM : 0; -+} -+ -+static void gptr_free(struct rpivid_dev *const dev, -+ struct rpivid_gptr *const gptr) -+{ -+ if (gptr->ptr) -+ dma_free_attrs(dev->dev, gptr->size, gptr->ptr, gptr->addr, -+ gptr->attrs); -+ gptr->size = 0; -+ gptr->ptr = NULL; -+ gptr->addr = 0; -+ gptr->attrs = 0; -+} -+ -+/* Realloc but do not copy */ -+static int gptr_realloc_new(struct rpivid_dev * const dev, -+ struct rpivid_gptr * const gptr, size_t size) -+{ -+ if (size == gptr->size) -+ return 0; -+ -+ if (gptr->ptr) -+ dma_free_attrs(dev->dev, gptr->size, gptr->ptr, -+ gptr->addr, gptr->attrs); -+ -+ gptr->addr = 0; -+ gptr->size = size; -+ gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size, -+ &gptr->addr, GFP_KERNEL, gptr->attrs); -+ return gptr->ptr ? 0 : -ENOMEM; -+} -+ -+/* floor(log2(x)) */ -+static unsigned int log2_size(size_t x) -+{ -+ unsigned int n = 0; -+ -+ if (x & ~0xffff) { -+ n += 16; -+ x >>= 16; -+ } -+ if (x & ~0xff) { -+ n += 8; -+ x >>= 8; -+ } -+ if (x & ~0xf) { -+ n += 4; -+ x >>= 4; -+ } -+ if (x & ~3) { -+ n += 2; -+ x >>= 2; -+ } -+ return (x & ~1) ? n + 1 : n; -+} -+ -+static size_t round_up_size(const size_t x) -+{ -+ /* Admit no size < 256 */ -+ const unsigned int n = x < 256 ? 8 : log2_size(x) - 1; -+ -+ return x >= (3 << n) ? 4 << n : (3 << n); -+} -+ -+static size_t next_size(const size_t x) -+{ -+ return round_up_size(x + 1); -+} -+ -+#define NUM_SCALING_FACTORS 4064 /* Not a typo = 0xbe0 + 0x400 */ -+ -+#define AXI_BASE64 0 -+ -+#define PROB_BACKUP ((20 << 12) + (20 << 6) + (0 << 0)) -+#define PROB_RELOAD ((20 << 12) + (20 << 0) + (0 << 6)) -+ -+#define HEVC_MAX_REFS V4L2_HEVC_DPB_ENTRIES_NUM_MAX -+ -+////////////////////////////////////////////////////////////////////////////// -+ -+struct rpi_cmd { -+ u32 addr; -+ u32 data; -+} __packed; -+ -+struct rpivid_q_aux { -+ unsigned int refcount; -+ unsigned int q_index; -+ struct rpivid_q_aux *next; -+ struct rpivid_gptr col; -+}; -+ -+////////////////////////////////////////////////////////////////////////////// -+ -+enum rpivid_decode_state { -+ RPIVID_DECODE_SLICE_START, -+ RPIVID_DECODE_SLICE_CONTINUE, -+ RPIVID_DECODE_ERROR_CONTINUE, -+ RPIVID_DECODE_ERROR_DONE, -+ RPIVID_DECODE_PHASE1, -+ RPIVID_DECODE_END, -+}; -+ -+struct rpivid_dec_env { -+ struct rpivid_ctx *ctx; -+ struct rpivid_dec_env *next; -+ -+ enum rpivid_decode_state state; -+ unsigned int decode_order; -+ int p1_status; /* P1 status - what to realloc */ -+ -+ struct rpivid_dec_env *phase_wait_q_next; -+ -+ struct rpi_cmd *cmd_fifo; -+ unsigned int cmd_len, cmd_max; -+ unsigned int num_slice_msgs; -+ unsigned int pic_width_in_ctbs_y; -+ unsigned int pic_height_in_ctbs_y; -+ unsigned int dpbno_col; -+ u32 reg_slicestart; -+ int collocated_from_l0_flag; -+ unsigned int wpp_entry_x; -+ unsigned int wpp_entry_y; -+ -+ u32 rpi_config2; -+ u32 rpi_framesize; -+ u32 rpi_currpoc; -+ -+ struct vb2_v4l2_buffer *frame_buf; // Detached dest buffer -+ unsigned int frame_c_offset; -+ unsigned int frame_stride; -+ dma_addr_t frame_addr; -+ dma_addr_t ref_addrs[16]; -+ struct rpivid_q_aux *frame_aux; -+ struct rpivid_q_aux *col_aux; -+ -+ dma_addr_t pu_base_vc; -+ dma_addr_t coeff_base_vc; -+ u32 pu_stride; -+ u32 coeff_stride; -+ -+ struct rpivid_gptr *bit_copy_gptr; -+ size_t bit_copy_len; -+ struct rpivid_gptr *cmd_copy_gptr; -+ -+ u16 slice_msgs[2 * HEVC_MAX_REFS * 8 + 3]; -+ u8 scaling_factors[NUM_SCALING_FACTORS]; -+ -+ struct rpivid_hw_irq_ent irq_ent; -+}; -+ -+#define member_size(type, member) sizeof(((type *)0)->member) -+ -+struct rpivid_dec_state { -+ struct v4l2_ctrl_hevc_sps sps; -+ struct v4l2_ctrl_hevc_pps pps; -+ -+ // Helper vars & tables derived from sps/pps -+ unsigned int log2_ctb_size; /* log2 width of a CTB */ -+ unsigned int ctb_width; /* Width in CTBs */ -+ unsigned int ctb_height; /* Height in CTBs */ -+ unsigned int ctb_size; /* Pic area in CTBs */ -+ unsigned int num_tile_columns; -+ unsigned int num_tile_rows; -+ u8 column_width[member_size(struct v4l2_ctrl_hevc_pps, -+ column_width_minus1)]; -+ u8 row_height[member_size(struct v4l2_ctrl_hevc_pps, -+ row_height_minus1)]; -+ -+ int *col_bd; -+ int *row_bd; -+ int *ctb_addr_rs_to_ts; -+ int *ctb_addr_ts_to_rs; -+ int *tile_id; -+ -+ // Aux starage for DPB -+ // Hold refs -+ struct rpivid_q_aux *ref_aux[HEVC_MAX_REFS]; -+ struct rpivid_q_aux *frame_aux; -+ -+ // Slice vars -+ unsigned int slice_idx; -+ bool frame_end; -+ bool slice_temporal_mvp; /* Slice flag but constant for frame */ -+ -+ // Temp vars per run - don't actually need to persist -+ u8 *src_buf; -+ dma_addr_t src_addr; -+ const struct v4l2_ctrl_hevc_slice_params *sh; -+ unsigned int nb_refs[2]; -+ unsigned int slice_qp; -+ unsigned int max_num_merge_cand; // 0 if I-slice -+ bool dependent_slice_segment_flag; -+}; -+ -+static inline int clip_int(const int x, const int lo, const int hi) -+{ -+ return x < lo ? lo : x > hi ? hi : x; -+} -+ -+////////////////////////////////////////////////////////////////////////////// -+// Phase 1 command and bit FIFOs -+ -+#if DEBUG_TRACE_P1_CMD -+static int p1_z; -+#endif -+ -+// ???? u16 addr - put in u32 -+static int p1_apb_write(struct rpivid_dec_env *const de, const u16 addr, -+ const u32 data) -+{ -+ if (de->cmd_len == de->cmd_max) -+ de->cmd_fifo = -+ krealloc(de->cmd_fifo, -+ (de->cmd_max *= 2) * sizeof(struct rpi_cmd), -+ GFP_KERNEL); -+ de->cmd_fifo[de->cmd_len].addr = addr; -+ de->cmd_fifo[de->cmd_len].data = data; -+ -+#if DEBUG_TRACE_P1_CMD -+ if (++p1_z < 256) { -+ v4l2_info(&de->ctx->dev->v4l2_dev, "[%02x] %x %x\n", -+ de->cmd_len, addr, data); -+ } -+#endif -+ -+ return de->cmd_len++; -+} -+ -+static int ctb_to_tile(unsigned int ctb, unsigned int *bd, int num) -+{ -+ int i; -+ -+ for (i = 1; ctb >= bd[i]; i++) -+ ; // bd[] has num+1 elements; bd[0]=0; -+ return i - 1; -+} -+ -+static int ctb_to_slice_w_h(unsigned int ctb, int ctb_size, int width, -+ unsigned int *bd, int num) -+{ -+ if (ctb < bd[num - 1]) -+ return ctb_size; -+ else if (width % ctb_size) -+ return width % ctb_size; -+ else -+ return ctb_size; -+} -+ -+static void aux_q_free(struct rpivid_ctx *const ctx, -+ struct rpivid_q_aux *const aq) -+{ -+ struct rpivid_dev *const dev = ctx->dev; -+ -+ gptr_free(dev, &aq->col); -+ kfree(aq); -+} -+ -+static struct rpivid_q_aux *aux_q_alloc(struct rpivid_ctx *const ctx) -+{ -+ struct rpivid_dev *const dev = ctx->dev; -+ struct rpivid_q_aux *const aq = kzalloc(sizeof(*aq), GFP_KERNEL); -+ -+ if (!aq) -+ return NULL; -+ -+ aq->refcount = 1; -+ if (gptr_alloc(dev, &aq->col, ctx->colmv_picsize, -+ DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NO_KERNEL_MAPPING)) -+ goto fail; -+ -+ return aq; -+ -+fail: -+ kfree(aq); -+ return NULL; -+} -+ -+static struct rpivid_q_aux *aux_q_new(struct rpivid_ctx *const ctx, -+ const unsigned int q_index) -+{ -+ struct rpivid_q_aux *aq; -+ unsigned long lockflags; -+ -+ spin_lock_irqsave(&ctx->aux_lock, lockflags); -+ aq = ctx->aux_free; -+ if (aq) { -+ ctx->aux_free = aq->next; -+ aq->next = NULL; -+ aq->refcount = 1; -+ } -+ spin_unlock_irqrestore(&ctx->aux_lock, lockflags); -+ -+ if (!aq) { -+ aq = aux_q_alloc(ctx); -+ if (!aq) -+ return NULL; -+ } -+ -+ aq->q_index = q_index; -+ ctx->aux_ents[q_index] = aq; -+ return aq; -+} -+ -+static struct rpivid_q_aux *aux_q_ref(struct rpivid_ctx *const ctx, -+ struct rpivid_q_aux *const aq) -+{ -+ if (aq) { -+ unsigned long lockflags; -+ -+ spin_lock_irqsave(&ctx->aux_lock, lockflags); -+ -+ ++aq->refcount; -+ -+ spin_unlock_irqrestore(&ctx->aux_lock, lockflags); -+ } -+ return aq; -+} -+ -+static void aux_q_release(struct rpivid_ctx *const ctx, -+ struct rpivid_q_aux **const paq) -+{ -+ struct rpivid_q_aux *const aq = *paq; -+ *paq = NULL; -+ -+ if (aq) { -+ unsigned long lockflags; -+ -+ spin_lock_irqsave(&ctx->aux_lock, lockflags); -+ -+ if (--aq->refcount == 0) { -+ aq->next = ctx->aux_free; -+ ctx->aux_free = aq; -+ ctx->aux_ents[aq->q_index] = NULL; -+ } -+ -+ spin_unlock_irqrestore(&ctx->aux_lock, lockflags); -+ } -+} -+ -+static void aux_q_init(struct rpivid_ctx *const ctx) -+{ -+ spin_lock_init(&ctx->aux_lock); -+ ctx->aux_free = NULL; -+} -+ -+static void aux_q_uninit(struct rpivid_ctx *const ctx) -+{ -+ struct rpivid_q_aux *aq; -+ -+ ctx->colmv_picsize = 0; -+ ctx->colmv_stride = 0; -+ while ((aq = ctx->aux_free) != NULL) { -+ ctx->aux_free = aq->next; -+ aux_q_free(ctx, aq); -+ } -+} -+ -+////////////////////////////////////////////////////////////////////////////// -+ -+/* -+ * Initialisation process for context variables (CABAC init) -+ * see H.265 9.3.2.2 -+ * -+ * N.B. If comparing with FFmpeg note that this h/w uses slightly different -+ * offsets to FFmpegs array -+ */ -+ -+/* Actual number of values */ -+#define RPI_PROB_VALS 154U -+/* Rounded up as we copy words */ -+#define RPI_PROB_ARRAY_SIZE ((154 + 3) & ~3) -+ -+/* Initialiser values - see tables H.265 9-4 through 9-42 */ -+static const u8 prob_init[3][156] = { -+ { -+ 153, 200, 139, 141, 157, 154, 154, 154, 154, 154, 184, 154, 154, -+ 154, 184, 63, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, -+ 154, 154, 154, 153, 138, 138, 111, 141, 94, 138, 182, 154, 154, -+ 154, 140, 92, 137, 138, 140, 152, 138, 139, 153, 74, 149, 92, -+ 139, 107, 122, 152, 140, 179, 166, 182, 140, 227, 122, 197, 110, -+ 110, 124, 125, 140, 153, 125, 127, 140, 109, 111, 143, 127, 111, -+ 79, 108, 123, 63, 110, 110, 124, 125, 140, 153, 125, 127, 140, -+ 109, 111, 143, 127, 111, 79, 108, 123, 63, 91, 171, 134, 141, -+ 138, 153, 136, 167, 152, 152, 139, 139, 111, 111, 125, 110, 110, -+ 94, 124, 108, 124, 107, 125, 141, 179, 153, 125, 107, 125, 141, -+ 179, 153, 125, 107, 125, 141, 179, 153, 125, 140, 139, 182, 182, -+ 152, 136, 152, 136, 153, 136, 139, 111, 136, 139, 111, 0, 0, -+ }, -+ { -+ 153, 185, 107, 139, 126, 197, 185, 201, 154, 149, 154, 139, 154, -+ 154, 154, 152, 110, 122, 95, 79, 63, 31, 31, 153, 153, 168, -+ 140, 198, 79, 124, 138, 94, 153, 111, 149, 107, 167, 154, 154, -+ 154, 154, 196, 196, 167, 154, 152, 167, 182, 182, 134, 149, 136, -+ 153, 121, 136, 137, 169, 194, 166, 167, 154, 167, 137, 182, 125, -+ 110, 94, 110, 95, 79, 125, 111, 110, 78, 110, 111, 111, 95, -+ 94, 108, 123, 108, 125, 110, 94, 110, 95, 79, 125, 111, 110, -+ 78, 110, 111, 111, 95, 94, 108, 123, 108, 121, 140, 61, 154, -+ 107, 167, 91, 122, 107, 167, 139, 139, 155, 154, 139, 153, 139, -+ 123, 123, 63, 153, 166, 183, 140, 136, 153, 154, 166, 183, 140, -+ 136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 123, 123, -+ 107, 121, 107, 121, 167, 151, 183, 140, 151, 183, 140, 0, 0, -+ }, -+ { -+ 153, 160, 107, 139, 126, 197, 185, 201, 154, 134, 154, 139, 154, -+ 154, 183, 152, 154, 137, 95, 79, 63, 31, 31, 153, 153, 168, -+ 169, 198, 79, 224, 167, 122, 153, 111, 149, 92, 167, 154, 154, -+ 154, 154, 196, 167, 167, 154, 152, 167, 182, 182, 134, 149, 136, -+ 153, 121, 136, 122, 169, 208, 166, 167, 154, 152, 167, 182, 125, -+ 110, 124, 110, 95, 94, 125, 111, 111, 79, 125, 126, 111, 111, -+ 79, 108, 123, 93, 125, 110, 124, 110, 95, 94, 125, 111, 111, -+ 79, 125, 126, 111, 111, 79, 108, 123, 93, 121, 140, 61, 154, -+ 107, 167, 91, 107, 107, 167, 139, 139, 170, 154, 139, 153, 139, -+ 123, 123, 63, 124, 166, 183, 140, 136, 153, 154, 166, 183, 140, -+ 136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 138, 138, -+ 122, 121, 122, 121, 167, 151, 183, 140, 151, 183, 140, 0, 0, -+ }, -+}; -+ -+static void write_prob(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s) -+{ -+ u8 dst[RPI_PROB_ARRAY_SIZE]; -+ -+ const unsigned int init_type = -+ ((s->sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT) != 0 && -+ s->sh->slice_type != HEVC_SLICE_I) ? -+ s->sh->slice_type + 1 : -+ 2 - s->sh->slice_type; -+ const u8 *p = prob_init[init_type]; -+ const int q = clip_int(s->slice_qp, 0, 51); -+ unsigned int i; -+ -+ for (i = 0; i < RPI_PROB_VALS; i++) { -+ int init_value = p[i]; -+ int m = (init_value >> 4) * 5 - 45; -+ int n = ((init_value & 15) << 3) - 16; -+ int pre = 2 * (((m * q) >> 4) + n) - 127; -+ -+ pre ^= pre >> 31; -+ if (pre > 124) -+ pre = 124 + (pre & 1); -+ dst[i] = pre; -+ } -+ for (i = RPI_PROB_VALS; i != RPI_PROB_ARRAY_SIZE; ++i) -+ dst[i] = 0; -+ -+ for (i = 0; i < RPI_PROB_ARRAY_SIZE; i += 4) -+ p1_apb_write(de, 0x1000 + i, -+ dst[i] + (dst[i + 1] << 8) + (dst[i + 2] << 16) + -+ (dst[i + 3] << 24)); -+} -+ -+static void write_scaling_factors(struct rpivid_dec_env *const de) -+{ -+ int i; -+ const u8 *p = (u8 *)de->scaling_factors; -+ -+ for (i = 0; i < NUM_SCALING_FACTORS; i += 4, p += 4) -+ p1_apb_write(de, 0x2000 + i, -+ p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24)); -+} -+ -+static inline __u32 dma_to_axi_addr(dma_addr_t a) -+{ -+ return (__u32)(a >> 6); -+} -+ -+static void write_bitstream(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s) -+{ -+ // Note that FFmpeg removes emulation prevention bytes, so this is -+ // matched in the configuration here. -+ // Whether that is the correct behaviour or not is not clear in the -+ // spec. -+ const int rpi_use_emu = 1; -+ unsigned int offset = s->sh->data_bit_offset / 8 + 1; -+ const unsigned int len = (s->sh->bit_size + 7) / 8 - offset; -+ dma_addr_t addr; -+ -+ if (s->src_addr != 0) { -+ addr = s->src_addr + offset; -+ } else { -+ memcpy(de->bit_copy_gptr->ptr + de->bit_copy_len, -+ s->src_buf + offset, len); -+ addr = de->bit_copy_gptr->addr + de->bit_copy_len; -+ de->bit_copy_len += (len + 63) & ~63; -+ } -+ offset = addr & 63; -+ -+ p1_apb_write(de, RPI_BFBASE, dma_to_axi_addr(addr)); -+ p1_apb_write(de, RPI_BFNUM, len); -+ p1_apb_write(de, RPI_BFCONTROL, offset + (1 << 7)); // Stop -+ p1_apb_write(de, RPI_BFCONTROL, offset + (rpi_use_emu << 6)); -+} -+ -+////////////////////////////////////////////////////////////////////////////// -+ -+static void write_slice(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s, -+ const unsigned int slice_w, -+ const unsigned int slice_h) -+{ -+ u32 u32 = (s->sh->slice_type << 12) + -+ (((s->sh->flags & -+ V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA) != 0) -+ << 14) + -+ (((s->sh->flags & -+ V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA) != 0) -+ << 15) + -+ (slice_w << 17) + (slice_h << 24); -+ -+ u32 |= (s->max_num_merge_cand << 0) + (s->nb_refs[L0] << 4) + -+ (s->nb_refs[L1] << 8); -+ -+ if (s->sh->slice_type == HEVC_SLICE_B) -+ u32 |= ((s->sh->flags & -+ V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO) != 0) -+ << 16; -+ p1_apb_write(de, RPI_SLICE, u32); -+} -+ -+////////////////////////////////////////////////////////////////////////////// -+// Tiles mode -+ -+static void new_entry_point(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s, -+ const int do_bte, -+ const int reset_qp_y, const int ctb_addr_ts) -+{ -+ int ctb_col = s->ctb_addr_ts_to_rs[ctb_addr_ts] % -+ de->pic_width_in_ctbs_y; -+ int ctb_row = s->ctb_addr_ts_to_rs[ctb_addr_ts] / -+ de->pic_width_in_ctbs_y; -+ -+ int tile_x = ctb_to_tile(ctb_col, s->col_bd, s->num_tile_columns); -+ int tile_y = ctb_to_tile(ctb_row, s->row_bd, s->num_tile_rows); -+ -+ int endx = s->col_bd[tile_x + 1] - 1; -+ int endy = s->row_bd[tile_y + 1] - 1; -+ -+ u8 slice_w = ctb_to_slice_w_h(ctb_col, 1 << s->log2_ctb_size, -+ s->sps.pic_width_in_luma_samples, -+ s->col_bd, s->num_tile_columns); -+ u8 slice_h = ctb_to_slice_w_h(ctb_row, 1 << s->log2_ctb_size, -+ s->sps.pic_height_in_luma_samples, -+ s->row_bd, s->num_tile_rows); -+ -+ p1_apb_write(de, RPI_TILESTART, -+ s->col_bd[tile_x] + (s->row_bd[tile_y] << 16)); -+ p1_apb_write(de, RPI_TILEEND, endx + (endy << 16)); -+ -+ if (do_bte) -+ p1_apb_write(de, RPI_BEGINTILEEND, endx + (endy << 16)); -+ -+ write_slice(de, s, slice_w, slice_h); -+ -+ if (reset_qp_y) { -+ unsigned int sps_qp_bd_offset = -+ 6 * s->sps.bit_depth_luma_minus8; -+ -+ p1_apb_write(de, RPI_QP, sps_qp_bd_offset + s->slice_qp); -+ } -+ -+ p1_apb_write(de, RPI_MODE, -+ (0xFFFF << 0) + (0x0 << 16) + -+ ((tile_x == s->num_tile_columns - 1) << 17) + -+ ((tile_y == s->num_tile_rows - 1) << 18)); -+ -+ p1_apb_write(de, RPI_CONTROL, (ctb_col << 0) + (ctb_row << 16)); -+} -+ -+////////////////////////////////////////////////////////////////////////////// -+ -+static void new_slice_segment(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s) -+{ -+ const struct v4l2_ctrl_hevc_sps *const sps = &s->sps; -+ const struct v4l2_ctrl_hevc_pps *const pps = &s->pps; -+ -+ p1_apb_write(de, -+ RPI_SPS0, -+ ((sps->log2_min_luma_coding_block_size_minus3 + 3) << 0) | -+ (s->log2_ctb_size << 4) | -+ ((sps->log2_min_luma_transform_block_size_minus2 + 2) -+ << 8) | -+ ((sps->log2_min_luma_transform_block_size_minus2 + 2 + -+ sps->log2_diff_max_min_luma_transform_block_size) -+ << 12) | -+ ((sps->bit_depth_luma_minus8 + 8) << 16) | -+ ((sps->bit_depth_chroma_minus8 + 8) << 20) | -+ (sps->max_transform_hierarchy_depth_intra << 24) | -+ (sps->max_transform_hierarchy_depth_inter << 28)); -+ -+ p1_apb_write(de, -+ RPI_SPS1, -+ ((sps->pcm_sample_bit_depth_luma_minus1 + 1) << 0) | -+ ((sps->pcm_sample_bit_depth_chroma_minus1 + 1) << 4) | -+ ((sps->log2_min_pcm_luma_coding_block_size_minus3 + 3) -+ << 8) | -+ ((sps->log2_min_pcm_luma_coding_block_size_minus3 + 3 + -+ sps->log2_diff_max_min_pcm_luma_coding_block_size) -+ << 12) | -+ (((sps->flags & V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE) ? -+ 0 : sps->chroma_format_idc) << 16) | -+ ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED)) << 18) | -+ ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED)) << 19) | -+ ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED)) -+ << 20) | -+ ((!!(sps->flags & -+ V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED)) -+ << 21)); -+ -+ p1_apb_write(de, -+ RPI_PPS, -+ ((s->log2_ctb_size - pps->diff_cu_qp_delta_depth) << 0) | -+ ((!!(pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED)) -+ << 4) | -+ ((!!(pps->flags & -+ V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED)) -+ << 5) | -+ ((!!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED)) -+ << 6) | -+ ((!!(pps->flags & -+ V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED)) -+ << 7) | -+ (((pps->pps_cb_qp_offset + s->sh->slice_cb_qp_offset) & 255) -+ << 8) | -+ (((pps->pps_cr_qp_offset + s->sh->slice_cr_qp_offset) & 255) -+ << 16) | -+ ((!!(pps->flags & -+ V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED)) -+ << 24)); -+ -+ if ((sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0) -+ write_scaling_factors(de); -+ -+ if (!s->dependent_slice_segment_flag) { -+ int ctb_col = s->sh->slice_segment_addr % -+ de->pic_width_in_ctbs_y; -+ int ctb_row = s->sh->slice_segment_addr / -+ de->pic_width_in_ctbs_y; -+ -+ de->reg_slicestart = (ctb_col << 0) + (ctb_row << 16); -+ } -+ -+ p1_apb_write(de, RPI_SLICESTART, de->reg_slicestart); -+} -+ -+////////////////////////////////////////////////////////////////////////////// -+// Slice messages -+ -+static void msg_slice(struct rpivid_dec_env *const de, const u16 msg) -+{ -+ de->slice_msgs[de->num_slice_msgs++] = msg; -+} -+ -+static void program_slicecmds(struct rpivid_dec_env *const de, -+ const int sliceid) -+{ -+ int i; -+ -+ p1_apb_write(de, RPI_SLICECMDS, de->num_slice_msgs + (sliceid << 8)); -+ -+ for (i = 0; i < de->num_slice_msgs; i++) -+ p1_apb_write(de, 0x4000 + 4 * i, de->slice_msgs[i] & 0xffff); -+} -+ -+// NoBackwardPredictionFlag 8.3.5 -+// Simply checks POCs -+static int has_backward(const struct v4l2_hevc_dpb_entry *const dpb, -+ const __u8 *const idx, const unsigned int n, -+ const unsigned int cur_poc) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < n; ++i) { -+ // Compare mod 2^16 -+ // We only get u16 pocs & 8.3.1 says -+ // "The bitstream shall not contain data that result in values -+ // of DiffPicOrderCnt( picA, picB ) used in the decoding -+ // process that are not in the range of −2^15 to 2^15 − 1, -+ // inclusive." -+ if (((cur_poc - dpb[idx[i]].pic_order_cnt[0]) & 0x8000) != 0) -+ return 0; -+ } -+ return 1; -+} -+ -+static void pre_slice_decode(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s) -+{ -+ const struct v4l2_ctrl_hevc_slice_params *const sh = s->sh; -+ int weighted_pred_flag, idx; -+ u16 cmd_slice; -+ unsigned int collocated_from_l0_flag; -+ -+ de->num_slice_msgs = 0; -+ -+ cmd_slice = 0; -+ if (sh->slice_type == HEVC_SLICE_I) -+ cmd_slice = 1; -+ if (sh->slice_type == HEVC_SLICE_P) -+ cmd_slice = 2; -+ if (sh->slice_type == HEVC_SLICE_B) -+ cmd_slice = 3; -+ -+ cmd_slice |= (s->nb_refs[L0] << 2) | (s->nb_refs[L1] << 6) | -+ (s->max_num_merge_cand << 11); -+ -+ collocated_from_l0_flag = -+ !s->slice_temporal_mvp || -+ sh->slice_type != HEVC_SLICE_B || -+ (sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0); -+ cmd_slice |= collocated_from_l0_flag << 14; -+ -+ if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) { -+ // Flag to say all reference pictures are from the past -+ const int no_backward_pred_flag = -+ has_backward(sh->dpb, sh->ref_idx_l0, s->nb_refs[L0], -+ sh->slice_pic_order_cnt) && -+ has_backward(sh->dpb, sh->ref_idx_l1, s->nb_refs[L1], -+ sh->slice_pic_order_cnt); -+ cmd_slice |= no_backward_pred_flag << 10; -+ msg_slice(de, cmd_slice); -+ -+ if (s->slice_temporal_mvp) { -+ const __u8 *const rpl = collocated_from_l0_flag ? -+ sh->ref_idx_l0 : sh->ref_idx_l1; -+ de->dpbno_col = rpl[sh->collocated_ref_idx]; -+ //v4l2_info(&de->ctx->dev->v4l2_dev, -+ // "L0=%d col_ref_idx=%d, -+ // dpb_no=%d\n", collocated_from_l0_flag, -+ // sh->collocated_ref_idx, de->dpbno_col); -+ } -+ -+ // Write reference picture descriptions -+ weighted_pred_flag = -+ sh->slice_type == HEVC_SLICE_P ? -+ !!(s->pps.flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED) : -+ !!(s->pps.flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED); -+ -+ for (idx = 0; idx < s->nb_refs[L0]; ++idx) { -+ unsigned int dpb_no = sh->ref_idx_l0[idx]; -+ //v4l2_info(&de->ctx->dev->v4l2_dev, -+ // "L0[%d]=dpb[%d]\n", idx, dpb_no); -+ -+ msg_slice(de, -+ dpb_no | -+ (sh->dpb[dpb_no].rps == -+ V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR ? -+ (1 << 4) : 0) | -+ (weighted_pred_flag ? (3 << 5) : 0)); -+ msg_slice(de, sh->dpb[dpb_no].pic_order_cnt[0]); -+ -+ if (weighted_pred_flag) { -+ const struct v4l2_hevc_pred_weight_table -+ *const w = &sh->pred_weight_table; -+ const int luma_weight_denom = -+ (1 << w->luma_log2_weight_denom); -+ const unsigned int chroma_log2_weight_denom = -+ (w->luma_log2_weight_denom + -+ w->delta_chroma_log2_weight_denom); -+ const int chroma_weight_denom = -+ (1 << chroma_log2_weight_denom); -+ -+ msg_slice(de, -+ w->luma_log2_weight_denom | -+ (((w->delta_luma_weight_l0[idx] + -+ luma_weight_denom) & 0x1ff) -+ << 3)); -+ msg_slice(de, w->luma_offset_l0[idx] & 0xff); -+ msg_slice(de, -+ chroma_log2_weight_denom | -+ (((w->delta_chroma_weight_l0[idx][0] + -+ chroma_weight_denom) & 0x1ff) -+ << 3)); -+ msg_slice(de, -+ w->chroma_offset_l0[idx][0] & 0xff); -+ msg_slice(de, -+ chroma_log2_weight_denom | -+ (((w->delta_chroma_weight_l0[idx][1] + -+ chroma_weight_denom) & 0x1ff) -+ << 3)); -+ msg_slice(de, -+ w->chroma_offset_l0[idx][1] & 0xff); -+ } -+ } -+ -+ for (idx = 0; idx < s->nb_refs[L1]; ++idx) { -+ unsigned int dpb_no = sh->ref_idx_l1[idx]; -+ //v4l2_info(&de->ctx->dev->v4l2_dev, -+ // "L1[%d]=dpb[%d]\n", idx, dpb_no); -+ msg_slice(de, -+ dpb_no | -+ (sh->dpb[dpb_no].rps == -+ V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR ? -+ (1 << 4) : 0) | -+ (weighted_pred_flag ? (3 << 5) : 0)); -+ msg_slice(de, sh->dpb[dpb_no].pic_order_cnt[0]); -+ if (weighted_pred_flag) { -+ const struct v4l2_hevc_pred_weight_table -+ *const w = &sh->pred_weight_table; -+ const int luma_weight_denom = -+ (1 << w->luma_log2_weight_denom); -+ const unsigned int chroma_log2_weight_denom = -+ (w->luma_log2_weight_denom + -+ w->delta_chroma_log2_weight_denom); -+ const int chroma_weight_denom = -+ (1 << chroma_log2_weight_denom); -+ -+ msg_slice(de, -+ w->luma_log2_weight_denom | -+ (((w->delta_luma_weight_l1[idx] + -+ luma_weight_denom) & 0x1ff) << 3)); -+ msg_slice(de, w->luma_offset_l1[idx] & 0xff); -+ msg_slice(de, -+ chroma_log2_weight_denom | -+ (((w->delta_chroma_weight_l1[idx][0] + -+ chroma_weight_denom) & 0x1ff) -+ << 3)); -+ msg_slice(de, -+ w->chroma_offset_l1[idx][0] & 0xff); -+ msg_slice(de, -+ chroma_log2_weight_denom | -+ (((w->delta_chroma_weight_l1[idx][1] + -+ chroma_weight_denom) & 0x1ff) -+ << 3)); -+ msg_slice(de, -+ w->chroma_offset_l1[idx][1] & 0xff); -+ } -+ } -+ } else { -+ msg_slice(de, cmd_slice); -+ } -+ -+ msg_slice(de, -+ (sh->slice_beta_offset_div2 & 15) | -+ ((sh->slice_tc_offset_div2 & 15) << 4) | -+ ((sh->flags & -+ V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED) ? -+ 1 << 8 : 0) | -+ ((sh->flags & -+ V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED) ? -+ 1 << 9 : 0) | -+ ((s->pps.flags & -+ V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED) ? -+ 1 << 10 : 0)); -+ -+ msg_slice(de, ((sh->slice_cr_qp_offset & 31) << 5) + -+ (sh->slice_cb_qp_offset & 31)); // CMD_QPOFF -+} -+ -+////////////////////////////////////////////////////////////////////////////// -+// Write STATUS register with expected end CTU address of previous slice -+ -+static void end_previous_slice(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s, -+ const int ctb_addr_ts) -+{ -+ int last_x = -+ s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] % de->pic_width_in_ctbs_y; -+ int last_y = -+ s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] / de->pic_width_in_ctbs_y; -+ -+ p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18)); -+} -+ -+static void wpp_pause(struct rpivid_dec_env *const de, int ctb_row) -+{ -+ p1_apb_write(de, RPI_STATUS, (ctb_row << 18) + 0x25); -+ p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP); -+ p1_apb_write(de, RPI_MODE, -+ ctb_row == de->pic_height_in_ctbs_y - 1 ? -+ 0x70000 : 0x30000); -+ p1_apb_write(de, RPI_CONTROL, (ctb_row << 16) + 2); -+} -+ -+static void wpp_end_previous_slice(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s, -+ int ctb_addr_ts) -+{ -+ int new_x = s->sh->slice_segment_addr % de->pic_width_in_ctbs_y; -+ int new_y = s->sh->slice_segment_addr / de->pic_width_in_ctbs_y; -+ int last_x = -+ s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] % de->pic_width_in_ctbs_y; -+ int last_y = -+ s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] / de->pic_width_in_ctbs_y; -+ -+ if (de->wpp_entry_x < 2 && (de->wpp_entry_y < new_y || new_x > 2) && -+ de->pic_width_in_ctbs_y > 2) -+ wpp_pause(de, last_y); -+ p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18)); -+ if (new_x == 2 || (de->pic_width_in_ctbs_y == 2 && -+ de->wpp_entry_y < new_y)) -+ p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP); -+} -+ -+////////////////////////////////////////////////////////////////////////////// -+// Wavefront mode -+ -+static void wpp_entry_point(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s, -+ const int do_bte, -+ const int reset_qp_y, const int ctb_addr_ts) -+{ -+ int ctb_size = 1 << s->log2_ctb_size; -+ int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts]; -+ -+ int ctb_col = de->wpp_entry_x = ctb_addr_rs % de->pic_width_in_ctbs_y; -+ int ctb_row = de->wpp_entry_y = ctb_addr_rs / de->pic_width_in_ctbs_y; -+ -+ int endx = de->pic_width_in_ctbs_y - 1; -+ int endy = ctb_row; -+ -+ u8 slice_w = ctb_to_slice_w_h(ctb_col, ctb_size, -+ s->sps.pic_width_in_luma_samples, -+ s->col_bd, s->num_tile_columns); -+ u8 slice_h = ctb_to_slice_w_h(ctb_row, ctb_size, -+ s->sps.pic_height_in_luma_samples, -+ s->row_bd, s->num_tile_rows); -+ -+ p1_apb_write(de, RPI_TILESTART, 0); -+ p1_apb_write(de, RPI_TILEEND, endx + (endy << 16)); -+ -+ if (do_bte) -+ p1_apb_write(de, RPI_BEGINTILEEND, endx + (endy << 16)); -+ -+ write_slice(de, s, slice_w, -+ ctb_row == de->pic_height_in_ctbs_y - 1 ? -+ slice_h : ctb_size); -+ -+ if (reset_qp_y) { -+ unsigned int sps_qp_bd_offset = -+ 6 * s->sps.bit_depth_luma_minus8; -+ -+ p1_apb_write(de, RPI_QP, sps_qp_bd_offset + s->slice_qp); -+ } -+ -+ p1_apb_write(de, RPI_MODE, -+ ctb_row == de->pic_height_in_ctbs_y - 1 ? -+ 0x60001 : 0x20001); -+ p1_apb_write(de, RPI_CONTROL, (ctb_col << 0) + (ctb_row << 16)); -+} -+ -+////////////////////////////////////////////////////////////////////////////// -+// Wavefront mode -+ -+static void wpp_decode_slice(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s, -+ const struct v4l2_ctrl_hevc_slice_params *sh, -+ int ctb_addr_ts) -+{ -+ int i, reset_qp_y = 1; -+ int indep = !s->dependent_slice_segment_flag; -+ int ctb_col = s->sh->slice_segment_addr % de->pic_width_in_ctbs_y; -+ -+ if (ctb_addr_ts) -+ wpp_end_previous_slice(de, s, ctb_addr_ts); -+ pre_slice_decode(de, s); -+ write_bitstream(de, s); -+ if (ctb_addr_ts == 0 || indep || de->pic_width_in_ctbs_y == 1) -+ write_prob(de, s); -+ else if (ctb_col == 0) -+ p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD); -+ else -+ reset_qp_y = 0; -+ program_slicecmds(de, s->slice_idx); -+ new_slice_segment(de, s); -+ wpp_entry_point(de, s, indep, reset_qp_y, ctb_addr_ts); -+ -+ for (i = 0; i < s->sh->num_entry_point_offsets; i++) { -+ int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts]; -+ int ctb_row = ctb_addr_rs / de->pic_width_in_ctbs_y; -+ int last_x = de->pic_width_in_ctbs_y - 1; -+ -+ if (de->pic_width_in_ctbs_y > 2) -+ wpp_pause(de, ctb_row); -+ p1_apb_write(de, RPI_STATUS, -+ (ctb_row << 18) + (last_x << 5) + 2); -+ if (de->pic_width_in_ctbs_y == 2) -+ p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP); -+ if (de->pic_width_in_ctbs_y == 1) -+ write_prob(de, s); -+ else -+ p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD); -+ ctb_addr_ts += s->column_width[0]; -+ wpp_entry_point(de, s, 0, 1, ctb_addr_ts); -+ } -+} -+ -+////////////////////////////////////////////////////////////////////////////// -+// Tiles mode -+ -+static void decode_slice(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s, -+ const struct v4l2_ctrl_hevc_slice_params *const sh, -+ int ctb_addr_ts) -+{ -+ int i, reset_qp_y; -+ -+ if (ctb_addr_ts) -+ end_previous_slice(de, s, ctb_addr_ts); -+ -+ pre_slice_decode(de, s); -+ write_bitstream(de, s); -+ -+#if DEBUG_TRACE_P1_CMD -+ if (p1_z < 256) { -+ v4l2_info(&de->ctx->dev->v4l2_dev, -+ "TS=%d, tile=%d/%d, dss=%d, flags=%#llx\n", -+ ctb_addr_ts, s->tile_id[ctb_addr_ts], -+ s->tile_id[ctb_addr_ts - 1], -+ s->dependent_slice_segment_flag, sh->flags); -+ } -+#endif -+ -+ reset_qp_y = ctb_addr_ts == 0 || -+ s->tile_id[ctb_addr_ts] != s->tile_id[ctb_addr_ts - 1] || -+ !s->dependent_slice_segment_flag; -+ if (reset_qp_y) -+ write_prob(de, s); -+ -+ program_slicecmds(de, s->slice_idx); -+ new_slice_segment(de, s); -+ new_entry_point(de, s, !s->dependent_slice_segment_flag, reset_qp_y, -+ ctb_addr_ts); -+ -+ for (i = 0; i < s->sh->num_entry_point_offsets; i++) { -+ int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts]; -+ int ctb_col = ctb_addr_rs % de->pic_width_in_ctbs_y; -+ int ctb_row = ctb_addr_rs / de->pic_width_in_ctbs_y; -+ int tile_x = ctb_to_tile(ctb_col, s->col_bd, -+ s->num_tile_columns - 1); -+ int tile_y = -+ ctb_to_tile(ctb_row, s->row_bd, s->num_tile_rows - 1); -+ int last_x = s->col_bd[tile_x + 1] - 1; -+ int last_y = s->row_bd[tile_y + 1] - 1; -+ -+ p1_apb_write(de, RPI_STATUS, -+ 2 + (last_x << 5) + (last_y << 18)); -+ write_prob(de, s); -+ ctb_addr_ts += s->column_width[tile_x] * s->row_height[tile_y]; -+ new_entry_point(de, s, 0, 1, ctb_addr_ts); -+ } -+} -+ -+////////////////////////////////////////////////////////////////////////////// -+// Scaling factors -+ -+static void expand_scaling_list(const unsigned int size_id, -+ const unsigned int matrix_id, u8 *const dst0, -+ const u8 *const src0, uint8_t dc) -+{ -+ u8 *d; -+ unsigned int x, y; -+ -+ // FIXME: matrix_id is unused ? -+ switch (size_id) { -+ case 0: -+ memcpy(dst0, src0, 16); -+ break; -+ case 1: -+ memcpy(dst0, src0, 64); -+ break; -+ case 2: -+ d = dst0; -+ -+ for (y = 0; y != 16; y++) { -+ const u8 *s = src0 + (y >> 1) * 8; -+ -+ for (x = 0; x != 8; ++x) { -+ *d++ = *s; -+ *d++ = *s++; -+ } -+ } -+ dst0[0] = dc; -+ break; -+ default: -+ d = dst0; -+ -+ for (y = 0; y != 32; y++) { -+ const u8 *s = src0 + (y >> 2) * 8; -+ -+ for (x = 0; x != 8; ++x) { -+ *d++ = *s; -+ *d++ = *s; -+ *d++ = *s; -+ *d++ = *s++; -+ } -+ } -+ dst0[0] = dc; -+ break; -+ } -+} -+ -+static void populate_scaling_factors(const struct rpivid_run *const run, -+ struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s) -+{ -+ const struct v4l2_ctrl_hevc_scaling_matrix *const sl = -+ run->h265.scaling_matrix; -+ // Array of constants for scaling factors -+ static const u32 scaling_factor_offsets[4][6] = { -+ // MID0 MID1 MID2 MID3 MID4 MID5 -+ // SID0 (4x4) -+ { 0x0000, 0x0010, 0x0020, 0x0030, 0x0040, 0x0050 }, -+ // SID1 (8x8) -+ { 0x0060, 0x00A0, 0x00E0, 0x0120, 0x0160, 0x01A0 }, -+ // SID2 (16x16) -+ { 0x01E0, 0x02E0, 0x03E0, 0x04E0, 0x05E0, 0x06E0 }, -+ // SID3 (32x32) -+ { 0x07E0, 0x0BE0, 0x0000, 0x0000, 0x0000, 0x0000 } -+ }; -+ -+ unsigned int mid; -+ -+ for (mid = 0; mid < 6; mid++) -+ expand_scaling_list(0, mid, -+ de->scaling_factors + -+ scaling_factor_offsets[0][mid], -+ sl->scaling_list_4x4[mid], 0); -+ for (mid = 0; mid < 6; mid++) -+ expand_scaling_list(1, mid, -+ de->scaling_factors + -+ scaling_factor_offsets[1][mid], -+ sl->scaling_list_8x8[mid], 0); -+ for (mid = 0; mid < 6; mid++) -+ expand_scaling_list(2, mid, -+ de->scaling_factors + -+ scaling_factor_offsets[2][mid], -+ sl->scaling_list_16x16[mid], -+ sl->scaling_list_dc_coef_16x16[mid]); -+ for (mid = 0; mid < 2; mid += 1) -+ expand_scaling_list(3, mid, -+ de->scaling_factors + -+ scaling_factor_offsets[3][mid], -+ sl->scaling_list_32x32[mid], -+ sl->scaling_list_dc_coef_32x32[mid]); -+} -+ -+static void free_ps_info(struct rpivid_dec_state *const s) -+{ -+ kfree(s->ctb_addr_rs_to_ts); -+ s->ctb_addr_rs_to_ts = NULL; -+ kfree(s->ctb_addr_ts_to_rs); -+ s->ctb_addr_ts_to_rs = NULL; -+ kfree(s->tile_id); -+ s->tile_id = NULL; -+ -+ kfree(s->col_bd); -+ s->col_bd = NULL; -+ kfree(s->row_bd); -+ s->row_bd = NULL; -+} -+ -+static int updated_ps(struct rpivid_dec_state *const s) -+{ -+ unsigned int ctb_addr_rs; -+ int j, x, y, tile_id; -+ unsigned int i; -+ -+ free_ps_info(s); -+ -+ // Inferred parameters -+ s->log2_ctb_size = s->sps.log2_min_luma_coding_block_size_minus3 + 3 + -+ s->sps.log2_diff_max_min_luma_coding_block_size; -+ -+ s->ctb_width = (s->sps.pic_width_in_luma_samples + -+ (1 << s->log2_ctb_size) - 1) >> -+ s->log2_ctb_size; -+ s->ctb_height = (s->sps.pic_height_in_luma_samples + -+ (1 << s->log2_ctb_size) - 1) >> -+ s->log2_ctb_size; -+ s->ctb_size = s->ctb_width * s->ctb_height; -+ -+ // Inferred parameters -+ -+ if (!(s->pps.flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED)) { -+ s->num_tile_columns = 1; -+ s->num_tile_rows = 1; -+ s->column_width[0] = s->ctb_width; -+ s->row_height[0] = s->ctb_height; -+ } else { -+ s->num_tile_columns = s->pps.num_tile_columns_minus1 + 1; -+ s->num_tile_rows = s->pps.num_tile_rows_minus1 + 1; -+ for (i = 0; i < s->num_tile_columns; ++i) -+ s->column_width[i] = s->pps.column_width_minus1[i] + 1; -+ for (i = 0; i < s->num_tile_rows; ++i) -+ s->row_height[i] = s->pps.row_height_minus1[i] + 1; -+ } -+ -+ s->col_bd = kmalloc((s->num_tile_columns + 1) * sizeof(*s->col_bd), -+ GFP_KERNEL); -+ s->row_bd = kmalloc((s->num_tile_rows + 1) * sizeof(*s->row_bd), -+ GFP_KERNEL); -+ -+ s->col_bd[0] = 0; -+ for (i = 0; i < s->num_tile_columns; i++) -+ s->col_bd[i + 1] = s->col_bd[i] + s->column_width[i]; -+ -+ s->row_bd[0] = 0; -+ for (i = 0; i < s->num_tile_rows; i++) -+ s->row_bd[i + 1] = s->row_bd[i] + s->row_height[i]; -+ -+ s->ctb_addr_rs_to_ts = kmalloc_array(s->ctb_size, -+ sizeof(*s->ctb_addr_rs_to_ts), -+ GFP_KERNEL); -+ s->ctb_addr_ts_to_rs = kmalloc_array(s->ctb_size, -+ sizeof(*s->ctb_addr_ts_to_rs), -+ GFP_KERNEL); -+ s->tile_id = kmalloc_array(s->ctb_size, sizeof(*s->tile_id), -+ GFP_KERNEL); -+ -+ for (ctb_addr_rs = 0; ctb_addr_rs < s->ctb_size; ctb_addr_rs++) { -+ int tb_x = ctb_addr_rs % s->ctb_width; -+ int tb_y = ctb_addr_rs / s->ctb_width; -+ int tile_x = 0; -+ int tile_y = 0; -+ int val = 0; -+ -+ for (i = 0; i < s->num_tile_columns; i++) { -+ if (tb_x < s->col_bd[i + 1]) { -+ tile_x = i; -+ break; -+ } -+ } -+ -+ for (i = 0; i < s->num_tile_rows; i++) { -+ if (tb_y < s->row_bd[i + 1]) { -+ tile_y = i; -+ break; -+ } -+ } -+ -+ for (i = 0; i < tile_x; i++) -+ val += s->row_height[tile_y] * s->column_width[i]; -+ for (i = 0; i < tile_y; i++) -+ val += s->ctb_width * s->row_height[i]; -+ -+ val += (tb_y - s->row_bd[tile_y]) * s->column_width[tile_x] + -+ tb_x - s->col_bd[tile_x]; -+ -+ s->ctb_addr_rs_to_ts[ctb_addr_rs] = val; -+ s->ctb_addr_ts_to_rs[val] = ctb_addr_rs; -+ } -+ -+ for (j = 0, tile_id = 0; j < s->num_tile_rows; j++) -+ for (i = 0; i < s->num_tile_columns; i++, tile_id++) -+ for (y = s->row_bd[j]; y < s->row_bd[j + 1]; y++) -+ for (x = s->col_bd[i]; -+ x < s->col_bd[i + 1]; -+ x++) -+ s->tile_id[s->ctb_addr_rs_to_ts -+ [y * s->ctb_width + -+ x]] = tile_id; -+ -+ return 0; -+} -+ -+static int frame_end(struct rpivid_dev *const dev, -+ struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s) -+{ -+ const unsigned int last_x = s->col_bd[s->num_tile_columns] - 1; -+ const unsigned int last_y = s->row_bd[s->num_tile_rows] - 1; -+ size_t cmd_size; -+ -+ if (s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED) { -+ if (de->wpp_entry_x < 2 && de->pic_width_in_ctbs_y > 2) -+ wpp_pause(de, last_y); -+ } -+ p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18)); -+ -+ // Copy commands out to dma buf -+ cmd_size = de->cmd_len * sizeof(de->cmd_fifo[0]); -+ -+ if (!de->cmd_copy_gptr->ptr || cmd_size > de->cmd_copy_gptr->size) { -+ size_t cmd_alloc = round_up_size(cmd_size); -+ -+ if (gptr_realloc_new(dev, de->cmd_copy_gptr, cmd_alloc)) { -+ v4l2_err(&dev->v4l2_dev, -+ "Alloc cmd buffer (%d): FAILED\n", cmd_alloc); -+ return -ENOMEM; -+ } -+ v4l2_info(&dev->v4l2_dev, "Alloc cmd buffer (%d): OK\n", -+ cmd_alloc); -+ } -+ -+ memcpy(de->cmd_copy_gptr->ptr, de->cmd_fifo, cmd_size); -+ return 0; -+} -+ -+static void setup_colmv(struct rpivid_ctx *const ctx, struct rpivid_run *run, -+ struct rpivid_dec_state *const s) -+{ -+ ctx->colmv_stride = ALIGN(s->sps.pic_width_in_luma_samples, 64); -+ ctx->colmv_picsize = ctx->colmv_stride * -+ (ALIGN(s->sps.pic_height_in_luma_samples, 64) >> 4); -+} -+ -+// Can be called from irq context -+static struct rpivid_dec_env *dec_env_new(struct rpivid_ctx *const ctx) -+{ -+ struct rpivid_dec_env *de; -+ unsigned long lock_flags; -+ -+ spin_lock_irqsave(&ctx->dec_lock, lock_flags); -+ -+ de = ctx->dec_free; -+ if (de) { -+ ctx->dec_free = de->next; -+ de->next = NULL; -+ de->state = RPIVID_DECODE_SLICE_START; -+ } -+ -+ spin_unlock_irqrestore(&ctx->dec_lock, lock_flags); -+ return de; -+} -+ -+// Can be called from irq context -+static void dec_env_delete(struct rpivid_dec_env *const de) -+{ -+ struct rpivid_ctx * const ctx = de->ctx; -+ unsigned long lock_flags; -+ -+ aux_q_release(ctx, &de->frame_aux); -+ aux_q_release(ctx, &de->col_aux); -+ -+ spin_lock_irqsave(&ctx->dec_lock, lock_flags); -+ -+ de->state = RPIVID_DECODE_END; -+ de->next = ctx->dec_free; -+ ctx->dec_free = de; -+ -+ spin_unlock_irqrestore(&ctx->dec_lock, lock_flags); -+} -+ -+static void dec_env_uninit(struct rpivid_ctx *const ctx) -+{ -+ unsigned int i; -+ -+ if (ctx->dec_pool) { -+ for (i = 0; i != RPIVID_DEC_ENV_COUNT; ++i) { -+ struct rpivid_dec_env *const de = ctx->dec_pool + i; -+ -+ kfree(de->cmd_fifo); -+ } -+ -+ kfree(ctx->dec_pool); -+ } -+ -+ ctx->dec_pool = NULL; -+ ctx->dec_free = NULL; -+} -+ -+static int dec_env_init(struct rpivid_ctx *const ctx) -+{ -+ unsigned int i; -+ -+ ctx->dec_pool = kzalloc(sizeof(*ctx->dec_pool) * RPIVID_DEC_ENV_COUNT, -+ GFP_KERNEL); -+ if (!ctx->dec_pool) -+ return -1; -+ -+ spin_lock_init(&ctx->dec_lock); -+ -+ // Build free chain -+ ctx->dec_free = ctx->dec_pool; -+ for (i = 0; i != RPIVID_DEC_ENV_COUNT - 1; ++i) -+ ctx->dec_pool[i].next = ctx->dec_pool + i + 1; -+ -+ // Fill in other bits -+ for (i = 0; i != RPIVID_DEC_ENV_COUNT; ++i) { -+ struct rpivid_dec_env *const de = ctx->dec_pool + i; -+ -+ de->ctx = ctx; -+ de->decode_order = i; -+ de->cmd_max = 1024; -+ de->cmd_fifo = kmalloc_array(de->cmd_max, -+ sizeof(struct rpi_cmd), -+ GFP_KERNEL); -+ if (!de->cmd_fifo) -+ goto fail; -+ } -+ -+ return 0; -+ -+fail: -+ dec_env_uninit(ctx); -+ return -1; -+} -+ -+// Assume that we get exactly the same DPB for every slice -+// it makes no real sense otherwise -+#if V4L2_HEVC_DPB_ENTRIES_NUM_MAX > 16 -+#error HEVC_DPB_ENTRIES > h/w slots -+#endif -+ -+static u32 mk_config2(const struct rpivid_dec_state *const s) -+{ -+ const struct v4l2_ctrl_hevc_sps *const sps = &s->sps; -+ const struct v4l2_ctrl_hevc_pps *const pps = &s->pps; -+ u32 c; -+ // BitDepthY -+ c = (sps->bit_depth_luma_minus8 + 8) << 0; -+ // BitDepthC -+ c |= (sps->bit_depth_chroma_minus8 + 8) << 4; -+ // BitDepthY -+ if (sps->bit_depth_luma_minus8) -+ c |= BIT(8); -+ // BitDepthC -+ if (sps->bit_depth_chroma_minus8) -+ c |= BIT(9); -+ c |= s->log2_ctb_size << 10; -+ if (pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED) -+ c |= BIT(13); -+ if (sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED) -+ c |= BIT(14); -+ if (sps->flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) -+ c |= BIT(15); /* Write motion vectors to external memory */ -+ c |= (pps->log2_parallel_merge_level_minus2 + 2) << 16; -+ if (s->slice_temporal_mvp) -+ c |= BIT(19); -+ if (sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED) -+ c |= BIT(20); -+ c |= (pps->pps_cb_qp_offset & 31) << 21; -+ c |= (pps->pps_cr_qp_offset & 31) << 26; -+ return c; -+} -+ -+static void rpivid_h265_setup(struct rpivid_ctx *ctx, struct rpivid_run *run) -+{ -+ struct rpivid_dev *const dev = ctx->dev; -+ const struct v4l2_ctrl_hevc_slice_params *const sh = -+ run->h265.slice_params; -+ const struct v4l2_hevc_pred_weight_table *pred_weight_table; -+ struct rpivid_q_aux *dpb_q_aux[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; -+ struct rpivid_dec_state *const s = ctx->state; -+ struct vb2_queue *vq; -+ struct rpivid_dec_env *de; -+ int ctb_addr_ts; -+ unsigned int i; -+ int use_aux; -+ bool slice_temporal_mvp; -+ -+ pred_weight_table = &sh->pred_weight_table; -+ -+ s->frame_end = -+ ((run->src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) == 0); -+ -+ de = ctx->dec0; -+ slice_temporal_mvp = (sh->flags & -+ V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED); -+ -+ if (de && de->state != RPIVID_DECODE_END) { -+ ++s->slice_idx; -+ -+ switch (de->state) { -+ case RPIVID_DECODE_SLICE_CONTINUE: -+ // Expected state -+ break; -+ default: -+ v4l2_err(&dev->v4l2_dev, "%s: Unexpected state: %d\n", -+ __func__, de->state); -+ /* FALLTHRU */ -+ case RPIVID_DECODE_ERROR_CONTINUE: -+ // Uncleared error - fail now -+ goto fail; -+ } -+ -+ if (s->slice_temporal_mvp != slice_temporal_mvp) { -+ v4l2_warn(&dev->v4l2_dev, -+ "Slice Temporal MVP non-constant\n"); -+ goto fail; -+ } -+ } else { -+ /* Frame start */ -+ unsigned int ctb_size_y; -+ bool sps_changed = false; -+ -+ if (memcmp(&s->sps, run->h265.sps, sizeof(s->sps)) != 0) { -+ /* SPS changed */ -+ v4l2_info(&dev->v4l2_dev, "SPS changed\n"); -+ memcpy(&s->sps, run->h265.sps, sizeof(s->sps)); -+ sps_changed = true; -+ } -+ if (sps_changed || -+ memcmp(&s->pps, run->h265.pps, sizeof(s->pps)) != 0) { -+ /* SPS changed */ -+ v4l2_info(&dev->v4l2_dev, "PPS changed\n"); -+ memcpy(&s->pps, run->h265.pps, sizeof(s->pps)); -+ -+ /* Recalc stuff as required */ -+ updated_ps(s); -+ } -+ -+ de = dec_env_new(ctx); -+ if (!de) { -+ v4l2_err(&dev->v4l2_dev, -+ "Failed to find free decode env\n"); -+ goto fail; -+ } -+ ctx->dec0 = de; -+ -+ ctb_size_y = -+ 1U << (s->sps.log2_min_luma_coding_block_size_minus3 + -+ 3 + -+ s->sps.log2_diff_max_min_luma_coding_block_size); -+ -+ de->pic_width_in_ctbs_y = -+ (s->sps.pic_width_in_luma_samples + ctb_size_y - 1) / -+ ctb_size_y; // 7-15 -+ de->pic_height_in_ctbs_y = -+ (s->sps.pic_height_in_luma_samples + ctb_size_y - 1) / -+ ctb_size_y; // 7-17 -+ de->cmd_len = 0; -+ de->dpbno_col = ~0U; -+ -+ de->bit_copy_gptr = ctx->bitbufs + 0; -+ de->bit_copy_len = 0; -+ de->cmd_copy_gptr = ctx->cmdbufs + 0; -+ -+ de->frame_c_offset = ctx->dst_fmt.height * 128; -+ de->frame_stride = ctx->dst_fmt.bytesperline * 128; -+ de->frame_addr = -+ vb2_dma_contig_plane_dma_addr(&run->dst->vb2_buf, 0); -+ de->frame_aux = NULL; -+ -+ if (s->sps.bit_depth_luma_minus8 != -+ s->sps.bit_depth_chroma_minus8) { -+ v4l2_warn(&dev->v4l2_dev, -+ "Chroma depth (%d) != Luma depth (%d)\n", -+ s->sps.bit_depth_chroma_minus8 + 8, -+ s->sps.bit_depth_luma_minus8 + 8); -+ goto fail; -+ } -+ if (s->sps.bit_depth_luma_minus8 == 0) { -+ if (ctx->dst_fmt.pixelformat != -+ V4L2_PIX_FMT_NV12_COL128) { -+ v4l2_err(&dev->v4l2_dev, -+ "Pixel format %#x != NV12_COL128 for 8-bit output", -+ ctx->dst_fmt.pixelformat); -+ goto fail; -+ } -+ } else if (s->sps.bit_depth_luma_minus8 == 2) { -+ if (ctx->dst_fmt.pixelformat != -+ V4L2_PIX_FMT_NV12_10_COL128) { -+ v4l2_err(&dev->v4l2_dev, -+ "Pixel format %#x != NV12_10_COL128 for 10-bit output", -+ ctx->dst_fmt.pixelformat); -+ goto fail; -+ } -+ } else { -+ v4l2_warn(&dev->v4l2_dev, -+ "Luma depth (%d) unsupported\n", -+ s->sps.bit_depth_luma_minus8 + 8); -+ goto fail; -+ } -+ if (run->dst->vb2_buf.num_planes != 1) { -+ v4l2_warn(&dev->v4l2_dev, "Capture planes (%d) != 1\n", -+ run->dst->vb2_buf.num_planes); -+ goto fail; -+ } -+ if (run->dst->planes[0].length < -+ ctx->dst_fmt.sizeimage) { -+ v4l2_warn(&dev->v4l2_dev, -+ "Capture plane[0] length (%d) < sizeimage (%d)\n", -+ run->dst->planes[0].length, -+ ctx->dst_fmt.sizeimage); -+ goto fail; -+ } -+ -+ if (s->sps.pic_width_in_luma_samples > 4096 || -+ s->sps.pic_height_in_luma_samples > 4096) { -+ v4l2_warn(&dev->v4l2_dev, -+ "Pic dimension (%dx%d) exeeds 4096\n", -+ s->sps.pic_width_in_luma_samples, -+ s->sps.pic_height_in_luma_samples); -+ goto fail; -+ } -+ -+ // Fill in ref planes with our address s.t. if we mess -+ // up refs somehow then we still have a valid address -+ // entry -+ for (i = 0; i != 16; ++i) -+ de->ref_addrs[i] = de->frame_addr; -+ -+ /* -+ * Stash initial temporal_mvp flag -+ * This must be the same for all pic slices (7.4.7.1) -+ */ -+ s->slice_temporal_mvp = slice_temporal_mvp; -+ -+ // Phase 2 reg pre-calc -+ de->rpi_config2 = mk_config2(s); -+ de->rpi_framesize = (s->sps.pic_height_in_luma_samples << 16) | -+ s->sps.pic_width_in_luma_samples; -+ de->rpi_currpoc = sh->slice_pic_order_cnt; -+ -+ if (s->sps.flags & -+ V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) { -+ setup_colmv(ctx, run, s); -+ } -+ -+ s->slice_idx = 0; -+ -+ if (sh->slice_segment_addr != 0) { -+ v4l2_warn(&dev->v4l2_dev, -+ "New frame but segment_addr=%d\n", -+ sh->slice_segment_addr); -+ goto fail; -+ } -+ -+ /* Allocate a bitbuf if we need one - don't need one if single -+ * slice as we can use the src buf directly -+ */ -+ if (!s->frame_end && !de->bit_copy_gptr->ptr) { -+ const size_t wxh = s->sps.pic_width_in_luma_samples * -+ s->sps.pic_height_in_luma_samples; -+ size_t bits_alloc; -+ -+ /* Annex A gives a min compression of 2 @ lvl 3.1 -+ * (wxh <= 983040) and min 4 thereafter but avoid -+ * the odity of 983041 having a lower limit than -+ * 983040. -+ * Multiply by 3/2 for 4:2:0 -+ */ -+ bits_alloc = wxh < 983040 ? wxh * 3 / 4 : -+ wxh < 983040 * 2 ? 983040 * 3 / 4 : -+ wxh * 3 / 8; -+ bits_alloc = round_up_size(bits_alloc); -+ -+ if (gptr_alloc(dev, de->bit_copy_gptr, -+ bits_alloc, -+ DMA_ATTR_FORCE_CONTIGUOUS) != 0) { -+ v4l2_err(&dev->v4l2_dev, -+ "Unable to alloc buf (%d) for bit copy\n", -+ bits_alloc); -+ goto fail; -+ } -+ v4l2_info(&dev->v4l2_dev, -+ "Alloc buf (%d) for bit copy OK\n", -+ bits_alloc); -+ } -+ } -+ -+ // Pre calc a few things -+ s->src_addr = -+ !s->frame_end ? -+ 0 : -+ vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, 0); -+ s->src_buf = s->src_addr != 0 ? NULL : -+ vb2_plane_vaddr(&run->src->vb2_buf, 0); -+ if (!s->src_addr && !s->src_buf) { -+ v4l2_err(&dev->v4l2_dev, "Failed to map src buffer\n"); -+ goto fail; -+ } -+ -+ s->sh = sh; -+ s->slice_qp = 26 + s->pps.init_qp_minus26 + s->sh->slice_qp_delta; -+ s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? -+ 0 : -+ (5 - sh->five_minus_max_num_merge_cand); -+ // * SH DSS flag invented by me - but clearly needed -+ s->dependent_slice_segment_flag = -+ ((sh->flags & -+ V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0); -+ -+ s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ? -+ 0 : -+ sh->num_ref_idx_l0_active_minus1 + 1; -+ s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ? -+ 0 : -+ sh->num_ref_idx_l1_active_minus1 + 1; -+ -+ if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) -+ populate_scaling_factors(run, de, s); -+ -+ ctb_addr_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr]; -+ -+ if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED)) -+ wpp_decode_slice(de, s, sh, ctb_addr_ts); -+ else -+ decode_slice(de, s, sh, ctb_addr_ts); -+ -+ if (!s->frame_end) -+ return; -+ -+ // Frame end -+ memset(dpb_q_aux, 0, -+ sizeof(*dpb_q_aux) * V4L2_HEVC_DPB_ENTRIES_NUM_MAX); -+ /* -+ * Need Aux ents for all (ref) DPB ents if temporal MV could -+ * be enabled for any pic -+ * ** At the moment we have aux ents for all pics whether or not -+ * they are ref -+ */ -+ use_aux = ((s->sps.flags & -+ V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) != 0); -+ -+ // Locate ref frames -+ // At least in the current implementation this is constant across all -+ // slices. If this changes we will need idx mapping code. -+ // Uses sh so here rather than trigger -+ -+ vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); -+ -+ if (!vq) { -+ v4l2_err(&dev->v4l2_dev, "VQ gone!\n"); -+ goto fail; -+ } -+ -+ // v4l2_info(&dev->v4l2_dev, "rpivid_h265_end of frame\n"); -+ if (frame_end(dev, de, s)) -+ goto fail; -+ -+ for (i = 0; i < sh->num_active_dpb_entries; ++i) { -+ int buffer_index = -+ vb2_find_timestamp(vq, sh->dpb[i].timestamp, 0); -+ struct vb2_buffer *buf = buffer_index < 0 ? -+ NULL : -+ vb2_get_buffer(vq, buffer_index); -+ -+ if (!buf) { -+ v4l2_warn(&dev->v4l2_dev, -+ "Missing DPB ent %d, timestamp=%lld, index=%d\n", -+ i, (long long)sh->dpb[i].timestamp, -+ buffer_index); -+ continue; -+ } -+ -+ if (use_aux) { -+ dpb_q_aux[i] = aux_q_ref(ctx, -+ ctx->aux_ents[buffer_index]); -+ if (!dpb_q_aux[i]) -+ v4l2_warn(&dev->v4l2_dev, -+ "Missing DPB AUX ent %d index=%d\n", -+ i, buffer_index); -+ } -+ -+ de->ref_addrs[i] = -+ vb2_dma_contig_plane_dma_addr(buf, 0); -+ } -+ -+ // Move DPB from temp -+ for (i = 0; i != V4L2_HEVC_DPB_ENTRIES_NUM_MAX; ++i) { -+ aux_q_release(ctx, &s->ref_aux[i]); -+ s->ref_aux[i] = dpb_q_aux[i]; -+ } -+ // Unref the old frame aux too - it is either in the DPB or not -+ // now -+ aux_q_release(ctx, &s->frame_aux); -+ -+ if (use_aux) { -+ // New frame so new aux ent -+ // ??? Do we need this if non-ref ??? can we tell -+ s->frame_aux = aux_q_new(ctx, run->dst->vb2_buf.index); -+ -+ if (!s->frame_aux) { -+ v4l2_err(&dev->v4l2_dev, -+ "Failed to obtain aux storage for frame\n"); -+ goto fail; -+ } -+ -+ de->frame_aux = aux_q_ref(ctx, s->frame_aux); -+ } -+ -+ if (de->dpbno_col != ~0U) { -+ if (de->dpbno_col >= sh->num_active_dpb_entries) { -+ v4l2_err(&dev->v4l2_dev, -+ "Col ref index %d >= %d\n", -+ de->dpbno_col, -+ sh->num_active_dpb_entries); -+ } else { -+ // Standard requires that the col pic is -+ // constant for the duration of the pic -+ // (text of collocated_ref_idx in H265-2 2018 -+ // 7.4.7.1) -+ -+ // Spot the collocated ref in passing -+ de->col_aux = aux_q_ref(ctx, -+ dpb_q_aux[de->dpbno_col]); -+ -+ if (!de->col_aux) { -+ v4l2_warn(&dev->v4l2_dev, -+ "Missing DPB ent for col\n"); -+ // Probably need to abort if this fails -+ // as P2 may explode on bad data -+ goto fail; -+ } -+ } -+ } -+ -+ de->state = RPIVID_DECODE_PHASE1; -+ return; -+ -+fail: -+ if (de) -+ // Actual error reporting happens in Trigger -+ de->state = s->frame_end ? RPIVID_DECODE_ERROR_DONE : -+ RPIVID_DECODE_ERROR_CONTINUE; -+} -+ -+////////////////////////////////////////////////////////////////////////////// -+// Handle PU and COEFF stream overflow -+ -+// Returns: -+// -1 Phase 1 decode error -+// 0 OK -+// >0 Out of space (bitmask) -+ -+#define STATUS_COEFF_EXHAUSTED 8 -+#define STATUS_PU_EXHAUSTED 16 -+ -+static int check_status(const struct rpivid_dev *const dev) -+{ -+ const u32 cfstatus = apb_read(dev, RPI_CFSTATUS); -+ const u32 cfnum = apb_read(dev, RPI_CFNUM); -+ u32 status = apb_read(dev, RPI_STATUS); -+ -+ // Handle PU and COEFF stream overflow -+ -+ // this is the definition of successful completion of phase 1 -+ // it assures that status register is zero and all blocks in each tile -+ // have completed -+ if (cfstatus == cfnum) -+ return 0; //No error -+ -+ status &= (STATUS_PU_EXHAUSTED | STATUS_COEFF_EXHAUSTED); -+ if (status) -+ return status; -+ -+ return -1; -+} -+ -+static void cb_phase2(struct rpivid_dev *const dev, void *v) -+{ -+ struct rpivid_dec_env *const de = v; -+ struct rpivid_ctx *const ctx = de->ctx; -+ -+ xtrace_in(dev, de); -+ -+ v4l2_m2m_cap_buf_return(dev->m2m_dev, ctx->fh.m2m_ctx, de->frame_buf, -+ VB2_BUF_STATE_DONE); -+ de->frame_buf = NULL; -+ -+ /* Delete de before finish as finish might immediately trigger a reuse -+ * of de -+ */ -+ dec_env_delete(de); -+ -+ if (atomic_add_return(-1, &ctx->p2out) >= RPIVID_P2BUF_COUNT - 1) { -+ xtrace_fin(dev, de); -+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, -+ VB2_BUF_STATE_DONE); -+ } -+ -+ xtrace_ok(dev, de); -+} -+ -+static void phase2_claimed(struct rpivid_dev *const dev, void *v) -+{ -+ struct rpivid_dec_env *const de = v; -+ unsigned int i; -+ -+ xtrace_in(dev, de); -+ -+ apb_write_vc_addr(dev, RPI_PURBASE, de->pu_base_vc); -+ apb_write_vc_len(dev, RPI_PURSTRIDE, de->pu_stride); -+ apb_write_vc_addr(dev, RPI_COEFFRBASE, de->coeff_base_vc); -+ apb_write_vc_len(dev, RPI_COEFFRSTRIDE, de->coeff_stride); -+ -+ apb_write_vc_addr(dev, RPI_OUTYBASE, de->frame_addr); -+ apb_write_vc_addr(dev, RPI_OUTCBASE, -+ de->frame_addr + de->frame_c_offset); -+ apb_write_vc_len(dev, RPI_OUTYSTRIDE, de->frame_stride); -+ apb_write_vc_len(dev, RPI_OUTCSTRIDE, de->frame_stride); -+ -+ // v4l2_info(&dev->v4l2_dev, "Frame: Y=%llx, C=%llx, Stride=%x\n", -+ // de->frame_addr, de->frame_addr + de->frame_c_offset, -+ // de->frame_stride); -+ -+ for (i = 0; i < 16; i++) { -+ // Strides are in fact unused but fill in anyway -+ apb_write_vc_addr(dev, 0x9000 + 16 * i, de->ref_addrs[i]); -+ apb_write_vc_len(dev, 0x9004 + 16 * i, de->frame_stride); -+ apb_write_vc_addr(dev, 0x9008 + 16 * i, -+ de->ref_addrs[i] + de->frame_c_offset); -+ apb_write_vc_len(dev, 0x900C + 16 * i, de->frame_stride); -+ } -+ -+ apb_write(dev, RPI_CONFIG2, de->rpi_config2); -+ apb_write(dev, RPI_FRAMESIZE, de->rpi_framesize); -+ apb_write(dev, RPI_CURRPOC, de->rpi_currpoc); -+ // v4l2_info(&dev->v4l2_dev, "Config2=%#x, FrameSize=%#x, POC=%#x\n", -+ // de->rpi_config2, de->rpi_framesize, de->rpi_currpoc); -+ -+ // collocated reads/writes -+ apb_write_vc_len(dev, RPI_COLSTRIDE, -+ de->ctx->colmv_stride); // Read vals -+ apb_write_vc_len(dev, RPI_MVSTRIDE, -+ de->ctx->colmv_stride); // Write vals -+ apb_write_vc_addr(dev, RPI_MVBASE, -+ !de->frame_aux ? 0 : de->frame_aux->col.addr); -+ apb_write_vc_addr(dev, RPI_COLBASE, -+ !de->col_aux ? 0 : de->col_aux->col.addr); -+ -+ //v4l2_info(&dev->v4l2_dev, -+ // "Mv=%llx, Col=%llx, Stride=%x, Buf=%llx->%llx\n", -+ // de->rpi_mvbase, de->rpi_colbase, de->ctx->colmv_stride, -+ // de->ctx->colmvbuf.addr, de->ctx->colmvbuf.addr + -+ // de->ctx->colmvbuf.size); -+ -+ rpivid_hw_irq_active2_irq(dev, &de->irq_ent, cb_phase2, de); -+ -+ apb_write_final(dev, RPI_NUMROWS, de->pic_height_in_ctbs_y); -+ -+ xtrace_ok(dev, de); -+} -+ -+static void phase1_claimed(struct rpivid_dev *const dev, void *v); -+ -+static void phase1_thread(struct rpivid_dev *const dev, void *v) -+{ -+ struct rpivid_dec_env *const de = v; -+ struct rpivid_ctx *const ctx = de->ctx; -+ -+ struct rpivid_gptr *const pu_gptr = ctx->pu_bufs + ctx->p2idx; -+ struct rpivid_gptr *const coeff_gptr = ctx->coeff_bufs + ctx->p2idx; -+ -+ xtrace_in(dev, de); -+ -+ if (de->p1_status & STATUS_PU_EXHAUSTED) { -+ if (gptr_realloc_new(dev, pu_gptr, next_size(pu_gptr->size))) { -+ v4l2_err(&dev->v4l2_dev, -+ "%s: PU realloc (%#x) failed\n", -+ __func__, pu_gptr->size); -+ goto fail; -+ } -+ v4l2_info(&dev->v4l2_dev, "%s: PU realloc (%#x) OK\n", -+ __func__, pu_gptr->size); -+ } -+ -+ if (de->p1_status & STATUS_COEFF_EXHAUSTED) { -+ if (gptr_realloc_new(dev, coeff_gptr, -+ next_size(coeff_gptr->size))) { -+ v4l2_err(&dev->v4l2_dev, -+ "%s: Coeff realloc (%#x) failed\n", -+ __func__, coeff_gptr->size); -+ goto fail; -+ } -+ v4l2_info(&dev->v4l2_dev, "%s: Coeff realloc (%#x) OK\n", -+ __func__, coeff_gptr->size); -+ } -+ -+ phase1_claimed(dev, de); -+ xtrace_ok(dev, de); -+ return; -+ -+fail: -+ dec_env_delete(de); -+ xtrace_fin(dev, de); -+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, -+ VB2_BUF_STATE_ERROR); -+ xtrace_fail(dev, de); -+} -+ -+/* Always called in irq context (this is good) */ -+static void cb_phase1(struct rpivid_dev *const dev, void *v) -+{ -+ struct rpivid_dec_env *const de = v; -+ struct rpivid_ctx *const ctx = de->ctx; -+ -+ xtrace_in(dev, de); -+ -+ de->p1_status = check_status(dev); -+ if (de->p1_status != 0) { -+ v4l2_info(&dev->v4l2_dev, "%s: Post wait: %#x\n", -+ __func__, de->p1_status); -+ -+ if (de->p1_status < 0) -+ goto fail; -+ -+ /* Need to realloc - push onto a thread rather than IRQ */ -+ rpivid_hw_irq_active1_thread(dev, &de->irq_ent, -+ phase1_thread, de); -+ return; -+ } -+ -+ /* After the frame-buf is detached it must be returned but from -+ * this point onward (phase2_claimed, cb_phase2) there are no error -+ * paths so the return at the end of cb_phase2 is all that is needed -+ */ -+ de->frame_buf = v4l2_m2m_cap_buf_detach(dev->m2m_dev, ctx->fh.m2m_ctx); -+ if (!de->frame_buf) { -+ v4l2_err(&dev->v4l2_dev, "%s: No detached buffer\n", __func__); -+ goto fail; -+ } -+ -+ ctx->p2idx = -+ (ctx->p2idx + 1 >= RPIVID_P2BUF_COUNT) ? 0 : ctx->p2idx + 1; -+ -+ // Enable the next setup if our Q isn't too big -+ if (atomic_add_return(1, &ctx->p2out) < RPIVID_P2BUF_COUNT) { -+ xtrace_fin(dev, de); -+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, -+ VB2_BUF_STATE_DONE); -+ } -+ -+ rpivid_hw_irq_active2_claim(dev, &de->irq_ent, phase2_claimed, de); -+ -+ xtrace_ok(dev, de); -+ return; -+ -+fail: -+ dec_env_delete(de); -+ xtrace_fin(dev, de); -+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, -+ VB2_BUF_STATE_ERROR); -+ xtrace_fail(dev, de); -+} -+ -+static void phase1_claimed(struct rpivid_dev *const dev, void *v) -+{ -+ struct rpivid_dec_env *const de = v; -+ struct rpivid_ctx *const ctx = de->ctx; -+ -+ const struct rpivid_gptr * const pu_gptr = ctx->pu_bufs + ctx->p2idx; -+ const struct rpivid_gptr * const coeff_gptr = ctx->coeff_bufs + -+ ctx->p2idx; -+ -+ xtrace_in(dev, de); -+ -+ de->pu_base_vc = pu_gptr->addr; -+ de->pu_stride = -+ ALIGN_DOWN(pu_gptr->size / de->pic_height_in_ctbs_y, 64); -+ -+ de->coeff_base_vc = coeff_gptr->addr; -+ de->coeff_stride = -+ ALIGN_DOWN(coeff_gptr->size / de->pic_height_in_ctbs_y, 64); -+ -+ apb_write_vc_addr(dev, RPI_PUWBASE, de->pu_base_vc); -+ apb_write_vc_len(dev, RPI_PUWSTRIDE, de->pu_stride); -+ apb_write_vc_addr(dev, RPI_COEFFWBASE, de->coeff_base_vc); -+ apb_write_vc_len(dev, RPI_COEFFWSTRIDE, de->coeff_stride); -+ -+ // Trigger command FIFO -+ apb_write(dev, RPI_CFNUM, de->cmd_len); -+ -+ // Claim irq -+ rpivid_hw_irq_active1_irq(dev, &de->irq_ent, cb_phase1, de); -+ -+ // And start the h/w -+ apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_copy_gptr->addr); -+ -+ xtrace_ok(dev, de); -+} -+ -+static void dec_state_delete(struct rpivid_ctx *const ctx) -+{ -+ unsigned int i; -+ struct rpivid_dec_state *const s = ctx->state; -+ -+ if (!s) -+ return; -+ ctx->state = NULL; -+ -+ free_ps_info(s); -+ -+ for (i = 0; i != HEVC_MAX_REFS; ++i) -+ aux_q_release(ctx, &s->ref_aux[i]); -+ aux_q_release(ctx, &s->frame_aux); -+ -+ kfree(s); -+} -+ -+static void rpivid_h265_stop(struct rpivid_ctx *ctx) -+{ -+ struct rpivid_dev *const dev = ctx->dev; -+ unsigned int i; -+ -+ v4l2_info(&dev->v4l2_dev, "%s\n", __func__); -+ -+ dec_env_uninit(ctx); -+ dec_state_delete(ctx); -+ -+ // dec_env & state must be killed before this to release the buffer to -+ // the free pool -+ aux_q_uninit(ctx); -+ -+ for (i = 0; i != ARRAY_SIZE(ctx->bitbufs); ++i) -+ gptr_free(dev, ctx->bitbufs + i); -+ for (i = 0; i != ARRAY_SIZE(ctx->cmdbufs); ++i) -+ gptr_free(dev, ctx->cmdbufs + i); -+ for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i) -+ gptr_free(dev, ctx->pu_bufs + i); -+ for (i = 0; i != ARRAY_SIZE(ctx->coeff_bufs); ++i) -+ gptr_free(dev, ctx->coeff_bufs + i); -+} -+ -+static int rpivid_h265_start(struct rpivid_ctx *ctx) -+{ -+ struct rpivid_dev *const dev = ctx->dev; -+ unsigned int i; -+ -+ unsigned int w = ctx->dst_fmt.width; -+ unsigned int h = ctx->dst_fmt.height; -+ unsigned int wxh; -+ size_t pu_alloc; -+ size_t coeff_alloc; -+ -+ // Generate a sanitised WxH for memory alloc -+ // Assume HD if unset -+ if (w == 0) -+ w = 1920; -+ if (w > 4096) -+ w = 4096; -+ if (h == 0) -+ w = 1088; -+ if (h > 4096) -+ h = 4096; -+ wxh = w * h; -+ -+ v4l2_info(&dev->v4l2_dev, "%s: (%dx%d)\n", __func__, -+ ctx->dst_fmt.width, ctx->dst_fmt.height); -+ -+ ctx->dec0 = NULL; -+ ctx->state = kzalloc(sizeof(*ctx->state), GFP_KERNEL); -+ if (!ctx->state) { -+ v4l2_err(&dev->v4l2_dev, "Failed to allocate decode state\n"); -+ goto fail; -+ } -+ -+ if (dec_env_init(ctx) != 0) { -+ v4l2_err(&dev->v4l2_dev, "Failed to allocate decode envs\n"); -+ goto fail; -+ } -+ -+ // 16k is plenty for most purposes but we will realloc if needed -+ for (i = 0; i != ARRAY_SIZE(ctx->cmdbufs); ++i) { -+ if (gptr_alloc(dev, ctx->cmdbufs + i, 0x4000, -+ DMA_ATTR_FORCE_CONTIGUOUS)) -+ goto fail; -+ } -+ -+ // Finger in the air PU & Coeff alloc -+ // Will be realloced if too small -+ coeff_alloc = round_up_size(wxh); -+ pu_alloc = round_up_size(wxh / 4); -+ for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i) { -+ // Don't actually need a kernel mapping here -+ if (gptr_alloc(dev, ctx->pu_bufs + i, pu_alloc, -+ DMA_ATTR_FORCE_CONTIGUOUS | -+ DMA_ATTR_NO_KERNEL_MAPPING)) -+ goto fail; -+ if (gptr_alloc(dev, ctx->coeff_bufs + i, coeff_alloc, -+ DMA_ATTR_FORCE_CONTIGUOUS | -+ DMA_ATTR_NO_KERNEL_MAPPING)) -+ goto fail; -+ } -+ aux_q_init(ctx); -+ -+ return 0; -+ -+fail: -+ rpivid_h265_stop(ctx); -+ return -ENOMEM; -+} -+ -+static void rpivid_h265_trigger(struct rpivid_ctx *ctx) -+{ -+ struct rpivid_dev *const dev = ctx->dev; -+ struct rpivid_dec_env *const de = ctx->dec0; -+ -+ xtrace_in(dev, de); -+ -+ switch (!de ? RPIVID_DECODE_ERROR_CONTINUE : de->state) { -+ case RPIVID_DECODE_SLICE_START: -+ de->state = RPIVID_DECODE_SLICE_CONTINUE; -+ /* FALLTHRU */ -+ case RPIVID_DECODE_SLICE_CONTINUE: -+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, -+ VB2_BUF_STATE_DONE); -+ break; -+ default: -+ v4l2_err(&dev->v4l2_dev, "%s: Unexpected state: %d\n", __func__, -+ de->state); -+ /* FALLTHRU */ -+ case RPIVID_DECODE_ERROR_DONE: -+ ctx->dec0 = NULL; -+ dec_env_delete(de); -+ /* FALLTHRU */ -+ case RPIVID_DECODE_ERROR_CONTINUE: -+ xtrace_fin(dev, de); -+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, -+ VB2_BUF_STATE_ERROR); -+ break; -+ case RPIVID_DECODE_PHASE1: -+ ctx->dec0 = NULL; -+ rpivid_hw_irq_active1_claim(dev, &de->irq_ent, phase1_claimed, -+ de); -+ break; -+ } -+ -+ xtrace_ok(dev, de); -+} -+ -+struct rpivid_dec_ops rpivid_dec_ops_h265 = { -+ .setup = rpivid_h265_setup, -+ .start = rpivid_h265_start, -+ .stop = rpivid_h265_stop, -+ .trigger = rpivid_h265_trigger, -+}; ---- /dev/null -+++ b/drivers/staging/media/rpivid/rpivid_hw.c -@@ -0,0 +1,321 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Raspberry Pi HEVC driver -+ * -+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd -+ * -+ * Based on the Cedrus VPU driver, that is: -+ * -+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> -+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> -+ * Copyright (C) 2018 Bootlin -+ */ -+#include <linux/clk.h> -+#include <linux/component.h> -+#include <linux/dma-mapping.h> -+#include <linux/interrupt.h> -+#include <linux/io.h> -+#include <linux/of_reserved_mem.h> -+#include <linux/of_device.h> -+#include <linux/of_platform.h> -+#include <linux/platform_device.h> -+#include <linux/regmap.h> -+#include <linux/reset.h> -+ -+#include <media/videobuf2-core.h> -+#include <media/v4l2-mem2mem.h> -+ -+#include "rpivid.h" -+#include "rpivid_hw.h" -+ -+static void pre_irq(struct rpivid_dev *dev, struct rpivid_hw_irq_ent *ient, -+ rpivid_irq_callback cb, void *v, -+ struct rpivid_hw_irq_ctrl *ictl) -+{ -+ unsigned long flags; -+ -+ if (ictl->irq) { -+ v4l2_err(&dev->v4l2_dev, "Attempt to claim IRQ when already claimed\n"); -+ return; -+ } -+ -+ ient->cb = cb; -+ ient->v = v; -+ -+ // Not sure this lock is actually required -+ spin_lock_irqsave(&ictl->lock, flags); -+ ictl->irq = ient; -+ spin_unlock_irqrestore(&ictl->lock, flags); -+} -+ -+static void sched_claim(struct rpivid_dev * const dev, -+ struct rpivid_hw_irq_ctrl * const ictl) -+{ -+ for (;;) { -+ struct rpivid_hw_irq_ent *ient = NULL; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&ictl->lock, flags); -+ -+ if (--ictl->no_sched <= 0) { -+ ient = ictl->claim; -+ if (!ictl->irq && ient) { -+ ictl->claim = ient->next; -+ ictl->no_sched = 1; -+ } -+ } -+ -+ spin_unlock_irqrestore(&ictl->lock, flags); -+ -+ if (!ient) -+ break; -+ -+ ient->cb(dev, ient->v); -+ } -+} -+ -+/* Should only ever be called from its own IRQ cb so no lock required */ -+static void pre_thread(struct rpivid_dev *dev, -+ struct rpivid_hw_irq_ent *ient, -+ rpivid_irq_callback cb, void *v, -+ struct rpivid_hw_irq_ctrl *ictl) -+{ -+ ient->cb = cb; -+ ient->v = v; -+ ictl->irq = ient; -+ ictl->thread_reqed = true; -+ ictl->no_sched++; -+} -+ -+// Called in irq context -+static void do_irq(struct rpivid_dev * const dev, -+ struct rpivid_hw_irq_ctrl * const ictl) -+{ -+ struct rpivid_hw_irq_ent *ient; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&ictl->lock, flags); -+ ient = ictl->irq; -+ if (ient) { -+ ictl->no_sched++; -+ ictl->irq = NULL; -+ } -+ spin_unlock_irqrestore(&ictl->lock, flags); -+ -+ if (ient) { -+ ient->cb(dev, ient->v); -+ -+ sched_claim(dev, ictl); -+ } -+} -+ -+static void do_claim(struct rpivid_dev * const dev, -+ struct rpivid_hw_irq_ent *ient, -+ const rpivid_irq_callback cb, void * const v, -+ struct rpivid_hw_irq_ctrl * const ictl) -+{ -+ unsigned long flags; -+ -+ ient->next = NULL; -+ ient->cb = cb; -+ ient->v = v; -+ -+ spin_lock_irqsave(&ictl->lock, flags); -+ -+ if (ictl->claim) { -+ // If we have a Q then add to end -+ ictl->tail->next = ient; -+ ictl->tail = ient; -+ ient = NULL; -+ } else if (ictl->no_sched || ictl->irq) { -+ // Empty Q but other activity in progress so Q -+ ictl->claim = ient; -+ ictl->tail = ient; -+ ient = NULL; -+ } else { -+ // Nothing else going on - schedule immediately and -+ // prevent anything else scheduling claims -+ ictl->no_sched = 1; -+ } -+ -+ spin_unlock_irqrestore(&ictl->lock, flags); -+ -+ if (ient) { -+ ient->cb(dev, ient->v); -+ -+ sched_claim(dev, ictl); -+ } -+} -+ -+static void ictl_init(struct rpivid_hw_irq_ctrl * const ictl) -+{ -+ spin_lock_init(&ictl->lock); -+ ictl->claim = NULL; -+ ictl->tail = NULL; -+ ictl->irq = NULL; -+ ictl->no_sched = 0; -+} -+ -+static void ictl_uninit(struct rpivid_hw_irq_ctrl * const ictl) -+{ -+ // Nothing to do -+} -+ -+#if !OPT_DEBUG_POLL_IRQ -+static irqreturn_t rpivid_irq_irq(int irq, void *data) -+{ -+ struct rpivid_dev * const dev = data; -+ __u32 ictrl; -+ -+ ictrl = irq_read(dev, ARG_IC_ICTRL); -+ if (!(ictrl & ARG_IC_ICTRL_ALL_IRQ_MASK)) { -+ v4l2_warn(&dev->v4l2_dev, "IRQ but no IRQ bits set\n"); -+ return IRQ_NONE; -+ } -+ -+ // Cancel any/all irqs -+ irq_write(dev, ARG_IC_ICTRL, ictrl & ~ARG_IC_ICTRL_SET_ZERO_MASK); -+ -+ // Service Active2 before Active1 so Phase 1 can transition to Phase 2 -+ // without delay -+ if (ictrl & ARG_IC_ICTRL_ACTIVE2_INT_SET) -+ do_irq(dev, &dev->ic_active2); -+ if (ictrl & ARG_IC_ICTRL_ACTIVE1_INT_SET) -+ do_irq(dev, &dev->ic_active1); -+ -+ return dev->ic_active1.thread_reqed || dev->ic_active2.thread_reqed ? -+ IRQ_WAKE_THREAD : IRQ_HANDLED; -+} -+ -+static void do_thread(struct rpivid_dev * const dev, -+ struct rpivid_hw_irq_ctrl *const ictl) -+{ -+ unsigned long flags; -+ struct rpivid_hw_irq_ent *ient = NULL; -+ -+ spin_lock_irqsave(&ictl->lock, flags); -+ -+ if (ictl->thread_reqed) { -+ ient = ictl->irq; -+ ictl->thread_reqed = false; -+ ictl->irq = NULL; -+ } -+ -+ spin_unlock_irqrestore(&ictl->lock, flags); -+ -+ if (ient) { -+ ient->cb(dev, ient->v); -+ -+ sched_claim(dev, ictl); -+ } -+} -+ -+static irqreturn_t rpivid_irq_thread(int irq, void *data) -+{ -+ struct rpivid_dev * const dev = data; -+ -+ do_thread(dev, &dev->ic_active1); -+ do_thread(dev, &dev->ic_active2); -+ -+ return IRQ_HANDLED; -+} -+#endif -+ -+/* May only be called from Active1 CB -+ * IRQs should not be expected until execution continues in the cb -+ */ -+void rpivid_hw_irq_active1_thread(struct rpivid_dev *dev, -+ struct rpivid_hw_irq_ent *ient, -+ rpivid_irq_callback thread_cb, void *ctx) -+{ -+ pre_thread(dev, ient, thread_cb, ctx, &dev->ic_active1); -+} -+ -+void rpivid_hw_irq_active1_claim(struct rpivid_dev *dev, -+ struct rpivid_hw_irq_ent *ient, -+ rpivid_irq_callback ready_cb, void *ctx) -+{ -+ do_claim(dev, ient, ready_cb, ctx, &dev->ic_active1); -+} -+ -+void rpivid_hw_irq_active1_irq(struct rpivid_dev *dev, -+ struct rpivid_hw_irq_ent *ient, -+ rpivid_irq_callback irq_cb, void *ctx) -+{ -+ pre_irq(dev, ient, irq_cb, ctx, &dev->ic_active1); -+} -+ -+void rpivid_hw_irq_active2_claim(struct rpivid_dev *dev, -+ struct rpivid_hw_irq_ent *ient, -+ rpivid_irq_callback ready_cb, void *ctx) -+{ -+ do_claim(dev, ient, ready_cb, ctx, &dev->ic_active2); -+} -+ -+void rpivid_hw_irq_active2_irq(struct rpivid_dev *dev, -+ struct rpivid_hw_irq_ent *ient, -+ rpivid_irq_callback irq_cb, void *ctx) -+{ -+ pre_irq(dev, ient, irq_cb, ctx, &dev->ic_active2); -+} -+ -+int rpivid_hw_probe(struct rpivid_dev *dev) -+{ -+ struct resource *res; -+ __u32 irq_stat; -+ int irq_dec; -+ int ret = 0; -+ -+ ictl_init(&dev->ic_active1); -+ ictl_init(&dev->ic_active2); -+ -+ res = platform_get_resource_byname(dev->pdev, IORESOURCE_MEM, "intc"); -+ if (!res) -+ return -ENODEV; -+ -+ dev->base_irq = devm_ioremap(dev->dev, res->start, resource_size(res)); -+ if (IS_ERR(dev->base_irq)) -+ return PTR_ERR(dev->base_irq); -+ -+ res = platform_get_resource_byname(dev->pdev, IORESOURCE_MEM, "hevc"); -+ if (!res) -+ return -ENODEV; -+ -+ dev->base_h265 = devm_ioremap(dev->dev, res->start, resource_size(res)); -+ if (IS_ERR(dev->base_h265)) -+ return PTR_ERR(dev->base_h265); -+ -+ dev->clock = devm_clk_get(&dev->pdev->dev, "hevc"); -+ if (IS_ERR(dev->clock)) -+ return PTR_ERR(dev->clock); -+ -+ // Disable IRQs & reset anything pending -+ irq_write(dev, 0, -+ ARG_IC_ICTRL_ACTIVE1_EN_SET | ARG_IC_ICTRL_ACTIVE2_EN_SET); -+ irq_stat = irq_read(dev, 0); -+ irq_write(dev, 0, irq_stat); -+ -+#if !OPT_DEBUG_POLL_IRQ -+ irq_dec = platform_get_irq(dev->pdev, 0); -+ if (irq_dec <= 0) -+ return irq_dec; -+ ret = devm_request_threaded_irq(dev->dev, irq_dec, -+ rpivid_irq_irq, -+ rpivid_irq_thread, -+ 0, dev_name(dev->dev), dev); -+ if (ret) { -+ dev_err(dev->dev, "Failed to request IRQ - %d\n", ret); -+ -+ return ret; -+ } -+#endif -+ return ret; -+} -+ -+void rpivid_hw_remove(struct rpivid_dev *dev) -+{ -+ // IRQ auto freed on unload so no need to do it here -+ ictl_uninit(&dev->ic_active1); -+ ictl_uninit(&dev->ic_active2); -+} -+ ---- /dev/null -+++ b/drivers/staging/media/rpivid/rpivid_hw.h -@@ -0,0 +1,300 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Raspberry Pi HEVC driver -+ * -+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd -+ * -+ * Based on the Cedrus VPU driver, that is: -+ * -+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> -+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> -+ * Copyright (C) 2018 Bootlin -+ */ -+ -+#ifndef _RPIVID_HW_H_ -+#define _RPIVID_HW_H_ -+ -+struct rpivid_hw_irq_ent { -+ struct rpivid_hw_irq_ent *next; -+ rpivid_irq_callback cb; -+ void *v; -+}; -+ -+/* Phase 1 Register offsets */ -+ -+#define RPI_SPS0 0 -+#define RPI_SPS1 4 -+#define RPI_PPS 8 -+#define RPI_SLICE 12 -+#define RPI_TILESTART 16 -+#define RPI_TILEEND 20 -+#define RPI_SLICESTART 24 -+#define RPI_MODE 28 -+#define RPI_LEFT0 32 -+#define RPI_LEFT1 36 -+#define RPI_LEFT2 40 -+#define RPI_LEFT3 44 -+#define RPI_QP 48 -+#define RPI_CONTROL 52 -+#define RPI_STATUS 56 -+#define RPI_VERSION 60 -+#define RPI_BFBASE 64 -+#define RPI_BFNUM 68 -+#define RPI_BFCONTROL 72 -+#define RPI_BFSTATUS 76 -+#define RPI_PUWBASE 80 -+#define RPI_PUWSTRIDE 84 -+#define RPI_COEFFWBASE 88 -+#define RPI_COEFFWSTRIDE 92 -+#define RPI_SLICECMDS 96 -+#define RPI_BEGINTILEEND 100 -+#define RPI_TRANSFER 104 -+#define RPI_CFBASE 108 -+#define RPI_CFNUM 112 -+#define RPI_CFSTATUS 116 -+ -+/* Phase 2 Register offsets */ -+ -+#define RPI_PURBASE 0x8000 -+#define RPI_PURSTRIDE 0x8004 -+#define RPI_COEFFRBASE 0x8008 -+#define RPI_COEFFRSTRIDE 0x800C -+#define RPI_NUMROWS 0x8010 -+#define RPI_CONFIG2 0x8014 -+#define RPI_OUTYBASE 0x8018 -+#define RPI_OUTYSTRIDE 0x801C -+#define RPI_OUTCBASE 0x8020 -+#define RPI_OUTCSTRIDE 0x8024 -+#define RPI_STATUS2 0x8028 -+#define RPI_FRAMESIZE 0x802C -+#define RPI_MVBASE 0x8030 -+#define RPI_MVSTRIDE 0x8034 -+#define RPI_COLBASE 0x8038 -+#define RPI_COLSTRIDE 0x803C -+#define RPI_CURRPOC 0x8040 -+ -+/* -+ * Write a general register value -+ * Order is unimportant -+ */ -+static inline void apb_write(const struct rpivid_dev * const dev, -+ const unsigned int offset, const u32 val) -+{ -+ writel_relaxed(val, dev->base_h265 + offset); -+} -+ -+/* Write the final register value that actually starts the phase */ -+static inline void apb_write_final(const struct rpivid_dev * const dev, -+ const unsigned int offset, const u32 val) -+{ -+ writel(val, dev->base_h265 + offset); -+} -+ -+static inline u32 apb_read(const struct rpivid_dev * const dev, -+ const unsigned int offset) -+{ -+ return readl(dev->base_h265 + offset); -+} -+ -+static inline void irq_write(const struct rpivid_dev * const dev, -+ const unsigned int offset, const u32 val) -+{ -+ writel(val, dev->base_irq + offset); -+} -+ -+static inline u32 irq_read(const struct rpivid_dev * const dev, -+ const unsigned int offset) -+{ -+ return readl(dev->base_irq + offset); -+} -+ -+static inline void apb_write_vc_addr(const struct rpivid_dev * const dev, -+ const unsigned int offset, -+ const dma_addr_t a) -+{ -+ apb_write(dev, offset, (u32)(a >> 6)); -+} -+ -+static inline void apb_write_vc_addr_final(const struct rpivid_dev * const dev, -+ const unsigned int offset, -+ const dma_addr_t a) -+{ -+ apb_write_final(dev, offset, (u32)(a >> 6)); -+} -+ -+static inline void apb_write_vc_len(const struct rpivid_dev * const dev, -+ const unsigned int offset, -+ const unsigned int x) -+{ -+ apb_write(dev, offset, (x + 63) >> 6); -+} -+ -+/* *ARG_IC_ICTRL - Interrupt control for ARGON Core* -+ * Offset (byte space) = 40'h2b10000 -+ * Physical Address (byte space) = 40'h7eb10000 -+ * Verilog Macro Address = `ARG_IC_REG_START + `ARGON_INTCTRL_ICTRL -+ * Reset Value = 32'b100x100x_100xxxxx_xxxxxxx0_x100x100 -+ * Access = RW (32-bit only) -+ * Interrupt control logic for ARGON Core. -+ */ -+#define ARG_IC_ICTRL 0 -+ -+/* acc=LWC ACTIVE1_INT FIELD ACCESS: LWC -+ * -+ * Interrupt 1 -+ * This is set and held when an hevc_active1 interrupt edge is detected -+ * The polarity of the edge is set by the ACTIVE1_EDGE field -+ * Write a 1 to this bit to clear down the latched interrupt -+ * The latched interrupt is only enabled out onto the interrupt line if -+ * ACTIVE1_EN is set -+ * Reset value is *0* decimal. -+ */ -+#define ARG_IC_ICTRL_ACTIVE1_INT_SET BIT(0) -+ -+/* ACTIVE1_EDGE Sets the polarity of the interrupt edge detection logic -+ * This logic detects edges of the hevc_active1 line from the argon core -+ * 0 = negedge, 1 = posedge -+ * Reset value is *0* decimal. -+ */ -+#define ARG_IC_ICTRL_ACTIVE1_EDGE_SET BIT(1) -+ -+/* ACTIVE1_EN Enables ACTIVE1_INT out onto the argon interrupt line. -+ * If this isn't set, the interrupt logic will work but no interrupt will be -+ * set to the interrupt controller -+ * Reset value is *1* decimal. -+ * -+ * [JC] The above appears to be a lie - if unset then b0 is never set -+ */ -+#define ARG_IC_ICTRL_ACTIVE1_EN_SET BIT(2) -+ -+/* acc=RO ACTIVE1_STATUS FIELD ACCESS: RO -+ * -+ * The current status of the hevc_active1 signal -+ */ -+#define ARG_IC_ICTRL_ACTIVE1_STATUS_SET BIT(3) -+ -+/* acc=LWC ACTIVE2_INT FIELD ACCESS: LWC -+ * -+ * Interrupt 2 -+ * This is set and held when an hevc_active2 interrupt edge is detected -+ * The polarity of the edge is set by the ACTIVE2_EDGE field -+ * Write a 1 to this bit to clear down the latched interrupt -+ * The latched interrupt is only enabled out onto the interrupt line if -+ * ACTIVE2_EN is set -+ * Reset value is *0* decimal. -+ */ -+#define ARG_IC_ICTRL_ACTIVE2_INT_SET BIT(4) -+ -+/* ACTIVE2_EDGE Sets the polarity of the interrupt edge detection logic -+ * This logic detects edges of the hevc_active2 line from the argon core -+ * 0 = negedge, 1 = posedge -+ * Reset value is *0* decimal. -+ */ -+#define ARG_IC_ICTRL_ACTIVE2_EDGE_SET BIT(5) -+ -+/* ACTIVE2_EN Enables ACTIVE2_INT out onto the argon interrupt line. -+ * If this isn't set, the interrupt logic will work but no interrupt will be -+ * set to the interrupt controller -+ * Reset value is *1* decimal. -+ */ -+#define ARG_IC_ICTRL_ACTIVE2_EN_SET BIT(6) -+ -+/* acc=RO ACTIVE2_STATUS FIELD ACCESS: RO -+ * -+ * The current status of the hevc_active2 signal -+ */ -+#define ARG_IC_ICTRL_ACTIVE2_STATUS_SET BIT(7) -+ -+/* TEST_INT Forces the argon int high for test purposes. -+ * Reset value is *0* decimal. -+ */ -+#define ARG_IC_ICTRL_TEST_INT BIT(8) -+#define ARG_IC_ICTRL_SPARE BIT(9) -+ -+/* acc=RO VP9_INTERRUPT_STATUS FIELD ACCESS: RO -+ * -+ * The current status of the vp9_interrupt signal -+ */ -+#define ARG_IC_ICTRL_VP9_INTERRUPT_STATUS BIT(10) -+ -+/* AIO_INT_ENABLE 1 = Or the AIO int in with the Argon int so the VPU can see -+ * it -+ * 0 = the AIO int is masked. (It should still be connected to the GIC though). -+ */ -+#define ARG_IC_ICTRL_AIO_INT_ENABLE BIT(20) -+#define ARG_IC_ICTRL_H264_ACTIVE_INT BIT(21) -+#define ARG_IC_ICTRL_H264_ACTIVE_EDGE BIT(22) -+#define ARG_IC_ICTRL_H264_ACTIVE_EN BIT(23) -+#define ARG_IC_ICTRL_H264_ACTIVE_STATUS BIT(24) -+#define ARG_IC_ICTRL_H264_INTERRUPT_INT BIT(25) -+#define ARG_IC_ICTRL_H264_INTERRUPT_EDGE BIT(26) -+#define ARG_IC_ICTRL_H264_INTERRUPT_EN BIT(27) -+ -+/* acc=RO H264_INTERRUPT_STATUS FIELD ACCESS: RO -+ * -+ * The current status of the h264_interrupt signal -+ */ -+#define ARG_IC_ICTRL_H264_INTERRUPT_STATUS BIT(28) -+ -+/* acc=LWC VP9_INTERRUPT_INT FIELD ACCESS: LWC -+ * -+ * Interrupt 1 -+ * This is set and held when an vp9_interrupt interrupt edge is detected -+ * The polarity of the edge is set by the VP9_INTERRUPT_EDGE field -+ * Write a 1 to this bit to clear down the latched interrupt -+ * The latched interrupt is only enabled out onto the interrupt line if -+ * VP9_INTERRUPT_EN is set -+ * Reset value is *0* decimal. -+ */ -+#define ARG_IC_ICTRL_VP9_INTERRUPT_INT BIT(29) -+ -+/* VP9_INTERRUPT_EDGE Sets the polarity of the interrupt edge detection logic -+ * This logic detects edges of the vp9_interrupt line from the argon h264 core -+ * 0 = negedge, 1 = posedge -+ * Reset value is *0* decimal. -+ */ -+#define ARG_IC_ICTRL_VP9_INTERRUPT_EDGE BIT(30) -+ -+/* VP9_INTERRUPT_EN Enables VP9_INTERRUPT_INT out onto the argon interrupt line. -+ * If this isn't set, the interrupt logic will work but no interrupt will be -+ * set to the interrupt controller -+ * Reset value is *1* decimal. -+ */ -+#define ARG_IC_ICTRL_VP9_INTERRUPT_EN BIT(31) -+ -+/* Bits 19:12, 11 reserved - read ?, write 0 */ -+#define ARG_IC_ICTRL_SET_ZERO_MASK ((0xff << 12) | BIT(11)) -+ -+/* All IRQ bits */ -+#define ARG_IC_ICTRL_ALL_IRQ_MASK (\ -+ ARG_IC_ICTRL_VP9_INTERRUPT_INT |\ -+ ARG_IC_ICTRL_H264_INTERRUPT_INT |\ -+ ARG_IC_ICTRL_ACTIVE1_INT_SET |\ -+ ARG_IC_ICTRL_ACTIVE2_INT_SET) -+ -+/* Auto release once all CBs called */ -+void rpivid_hw_irq_active1_claim(struct rpivid_dev *dev, -+ struct rpivid_hw_irq_ent *ient, -+ rpivid_irq_callback ready_cb, void *ctx); -+/* May only be called in claim cb */ -+void rpivid_hw_irq_active1_irq(struct rpivid_dev *dev, -+ struct rpivid_hw_irq_ent *ient, -+ rpivid_irq_callback irq_cb, void *ctx); -+/* May only be called in irq cb */ -+void rpivid_hw_irq_active1_thread(struct rpivid_dev *dev, -+ struct rpivid_hw_irq_ent *ient, -+ rpivid_irq_callback thread_cb, void *ctx); -+ -+/* Auto release once all CBs called */ -+void rpivid_hw_irq_active2_claim(struct rpivid_dev *dev, -+ struct rpivid_hw_irq_ent *ient, -+ rpivid_irq_callback ready_cb, void *ctx); -+/* May only be called in claim cb */ -+void rpivid_hw_irq_active2_irq(struct rpivid_dev *dev, -+ struct rpivid_hw_irq_ent *ient, -+ rpivid_irq_callback irq_cb, void *ctx); -+ -+int rpivid_hw_probe(struct rpivid_dev *dev); -+void rpivid_hw_remove(struct rpivid_dev *dev); -+ -+#endif ---- /dev/null -+++ b/drivers/staging/media/rpivid/rpivid_video.c -@@ -0,0 +1,593 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Raspberry Pi HEVC driver -+ * -+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd -+ * -+ * Based on the Cedrus VPU driver, that is: -+ * -+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> -+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> -+ * Copyright (C) 2018 Bootlin -+ */ -+ -+#include <media/videobuf2-dma-contig.h> -+#include <media/v4l2-device.h> -+#include <media/v4l2-ioctl.h> -+#include <media/v4l2-event.h> -+#include <media/v4l2-mem2mem.h> -+ -+#include "rpivid.h" -+#include "rpivid_video.h" -+#include "rpivid_dec.h" -+ -+#define RPIVID_DECODE_SRC BIT(0) -+#define RPIVID_DECODE_DST BIT(1) -+ -+#define RPIVID_MIN_WIDTH 16U -+#define RPIVID_MIN_HEIGHT 16U -+#define RPIVID_MAX_WIDTH 4096U -+#define RPIVID_MAX_HEIGHT 4096U -+ -+static inline struct rpivid_ctx *rpivid_file2ctx(struct file *file) -+{ -+ return container_of(file->private_data, struct rpivid_ctx, fh); -+} -+ -+/* constrain x to y,y*2 */ -+static inline unsigned int constrain2x(unsigned int x, unsigned int y) -+{ -+ return (x < y) ? -+ y : -+ (x > y * 2) ? y : x; -+} -+ -+int rpivid_prepare_src_format(struct v4l2_pix_format *pix_fmt) -+{ -+ if (pix_fmt->pixelformat != V4L2_PIX_FMT_HEVC_SLICE) -+ return -EINVAL; -+ -+ /* Zero bytes per line for encoded source. */ -+ pix_fmt->bytesperline = 0; -+ /* Choose some minimum size since this can't be 0 */ -+ pix_fmt->sizeimage = max_t(u32, SZ_1K, pix_fmt->sizeimage); -+ pix_fmt->field = V4L2_FIELD_NONE; -+ return 0; -+} -+ -+int rpivid_prepare_dst_format(struct v4l2_pix_format *pix_fmt) -+{ -+ unsigned int width = pix_fmt->width; -+ unsigned int height = pix_fmt->height; -+ unsigned int sizeimage = pix_fmt->sizeimage; -+ unsigned int bytesperline = pix_fmt->bytesperline; -+ -+ switch (pix_fmt->pixelformat) { -+ /* For column formats set bytesperline to column height (stride2) */ -+ case V4L2_PIX_FMT_NV12_COL128: -+ /* Width rounds up to columns */ -+ width = ALIGN(min(width, RPIVID_MAX_WIDTH), 128); -+ -+ /* 16 aligned height - not sure we even need that */ -+ height = ALIGN(height, 16); -+ /* column height -+ * Accept suggested shape if at least min & < 2 * min -+ */ -+ bytesperline = constrain2x(bytesperline, height * 3 / 2); -+ -+ /* image size -+ * Again allow plausible variation in case added padding is -+ * required -+ */ -+ sizeimage = constrain2x(sizeimage, bytesperline * width); -+ break; -+ -+ case V4L2_PIX_FMT_NV12_10_COL128: -+ /* width in pixels (3 pels = 4 bytes) rounded to 128 byte -+ * columns -+ */ -+ width = ALIGN(((min(width, RPIVID_MAX_WIDTH) + 2) / 3), 32) * 3; -+ -+ /* 16-aligned height. */ -+ height = ALIGN(height, 16); -+ -+ /* column height -+ * Accept suggested shape if at least min & < 2 * min -+ */ -+ bytesperline = constrain2x(bytesperline, height * 3 / 2); -+ -+ /* image size -+ * Again allow plausible variation in case added padding is -+ * required -+ */ -+ sizeimage = constrain2x(sizeimage, -+ bytesperline * width * 4 / 3); -+ break; -+ -+ default: -+ return -EINVAL; -+ } -+ -+ pix_fmt->width = width; -+ pix_fmt->height = height; -+ -+ pix_fmt->field = V4L2_FIELD_NONE; -+ pix_fmt->bytesperline = bytesperline; -+ pix_fmt->sizeimage = sizeimage; -+ return 0; -+} -+ -+static int rpivid_querycap(struct file *file, void *priv, -+ struct v4l2_capability *cap) -+{ -+ strscpy(cap->driver, RPIVID_NAME, sizeof(cap->driver)); -+ strscpy(cap->card, RPIVID_NAME, sizeof(cap->card)); -+ snprintf(cap->bus_info, sizeof(cap->bus_info), -+ "platform:%s", RPIVID_NAME); -+ -+ return 0; -+} -+ -+static int rpivid_enum_fmt_vid_out(struct file *file, void *priv, -+ struct v4l2_fmtdesc *f) -+{ -+ // Input formats -+ -+ // H.265 Slice only currently -+ if (f->index == 0) { -+ f->pixelformat = V4L2_PIX_FMT_HEVC_SLICE; -+ return 0; -+ } -+ -+ return -EINVAL; -+} -+ -+static int rpivid_hevc_validate_sps(const struct v4l2_ctrl_hevc_sps * const sps) -+{ -+ const unsigned int ctb_log2_size_y = -+ sps->log2_min_luma_coding_block_size_minus3 + 3 + -+ sps->log2_diff_max_min_luma_coding_block_size; -+ const unsigned int min_tb_log2_size_y = -+ sps->log2_min_luma_transform_block_size_minus2 + 2; -+ const unsigned int max_tb_log2_size_y = min_tb_log2_size_y + -+ sps->log2_diff_max_min_luma_transform_block_size; -+ -+ /* Local limitations */ -+ if (sps->pic_width_in_luma_samples < 32 || -+ sps->pic_width_in_luma_samples > 4096) -+ return 0; -+ if (sps->pic_height_in_luma_samples < 32 || -+ sps->pic_height_in_luma_samples > 4096) -+ return 0; -+ if (!(sps->bit_depth_luma_minus8 == 0 || -+ sps->bit_depth_luma_minus8 == 2)) -+ return 0; -+ if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8) -+ return 0; -+ if (sps->chroma_format_idc != 1) -+ return 0; -+ -+ /* Limits from H.265 7.4.3.2.1 */ -+ if (sps->log2_max_pic_order_cnt_lsb_minus4 > 12) -+ return 0; -+ if (sps->sps_max_dec_pic_buffering_minus1 > 15) -+ return 0; -+ if (sps->sps_max_num_reorder_pics > -+ sps->sps_max_dec_pic_buffering_minus1) -+ return 0; -+ if (ctb_log2_size_y > 6) -+ return 0; -+ if (max_tb_log2_size_y > 5) -+ return 0; -+ if (max_tb_log2_size_y > ctb_log2_size_y) -+ return 0; -+ if (sps->max_transform_hierarchy_depth_inter > -+ (ctb_log2_size_y - min_tb_log2_size_y)) -+ return 0; -+ if (sps->max_transform_hierarchy_depth_intra > -+ (ctb_log2_size_y - min_tb_log2_size_y)) -+ return 0; -+ /* Check pcm stuff */ -+ if (sps->num_short_term_ref_pic_sets > 64) -+ return 0; -+ if (sps->num_long_term_ref_pics_sps > 32) -+ return 0; -+ return 1; -+} -+ -+static inline int is_sps_set(const struct v4l2_ctrl_hevc_sps * const sps) -+{ -+ return sps && sps->pic_width_in_luma_samples != 0; -+} -+ -+static u32 pixelformat_from_sps(const struct v4l2_ctrl_hevc_sps * const sps, -+ const int index) -+{ -+ u32 pf = 0; -+ -+ // Use width 0 as a signifier of unsetness -+ if (!is_sps_set(sps)) { -+ /* Treat this as an error? For now return both */ -+ if (index == 0) -+ pf = V4L2_PIX_FMT_NV12_COL128; -+ else if (index == 1) -+ pf = V4L2_PIX_FMT_NV12_10_COL128; -+ } else if (index == 0 && rpivid_hevc_validate_sps(sps)) { -+ if (sps->bit_depth_luma_minus8 == 0) -+ pf = V4L2_PIX_FMT_NV12_COL128; -+ else if (sps->bit_depth_luma_minus8 == 2) -+ pf = V4L2_PIX_FMT_NV12_10_COL128; -+ } -+ -+ return pf; -+} -+ -+static struct v4l2_pix_format -+rpivid_hevc_default_dst_fmt(struct rpivid_ctx * const ctx) -+{ -+ const struct v4l2_ctrl_hevc_sps * const sps = -+ rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS); -+ struct v4l2_pix_format pix_fmt = { -+ .width = sps->pic_width_in_luma_samples, -+ .height = sps->pic_height_in_luma_samples, -+ .pixelformat = pixelformat_from_sps(sps, 0) -+ }; -+ -+ rpivid_prepare_dst_format(&pix_fmt); -+ return pix_fmt; -+} -+ -+static u32 rpivid_hevc_get_dst_pixelformat(struct rpivid_ctx * const ctx, -+ const int index) -+{ -+ const struct v4l2_ctrl_hevc_sps * const sps = -+ rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS); -+ -+ return pixelformat_from_sps(sps, index); -+} -+ -+static int rpivid_enum_fmt_vid_cap(struct file *file, void *priv, -+ struct v4l2_fmtdesc *f) -+{ -+ struct rpivid_ctx * const ctx = rpivid_file2ctx(file); -+ -+ const u32 pf = rpivid_hevc_get_dst_pixelformat(ctx, f->index); -+ -+ if (pf == 0) -+ return -EINVAL; -+ -+ f->pixelformat = pf; -+ return 0; -+} -+ -+static int rpivid_g_fmt_vid_cap(struct file *file, void *priv, -+ struct v4l2_format *f) -+{ -+ struct rpivid_ctx *ctx = rpivid_file2ctx(file); -+ -+ if (!ctx->dst_fmt_set) -+ ctx->dst_fmt = rpivid_hevc_default_dst_fmt(ctx); -+ f->fmt.pix = ctx->dst_fmt; -+ return 0; -+} -+ -+static int rpivid_g_fmt_vid_out(struct file *file, void *priv, -+ struct v4l2_format *f) -+{ -+ struct rpivid_ctx *ctx = rpivid_file2ctx(file); -+ -+ f->fmt.pix = ctx->src_fmt; -+ return 0; -+} -+ -+static inline void copy_color(struct v4l2_pix_format *d, -+ const struct v4l2_pix_format *s) -+{ -+ d->colorspace = s->colorspace; -+ d->xfer_func = s->xfer_func; -+ d->ycbcr_enc = s->ycbcr_enc; -+ d->quantization = s->quantization; -+} -+ -+static int rpivid_try_fmt_vid_cap(struct file *file, void *priv, -+ struct v4l2_format *f) -+{ -+ struct rpivid_ctx *ctx = rpivid_file2ctx(file); -+ const struct v4l2_ctrl_hevc_sps * const sps = -+ rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS); -+ u32 pixelformat; -+ int i; -+ -+ /* Reject format types we don't support */ -+ if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) -+ return -EINVAL; -+ -+ for (i = 0; (pixelformat = pixelformat_from_sps(sps, i)) != 0; i++) { -+ if (f->fmt.pix.pixelformat == pixelformat) -+ break; -+ } -+ -+ // If we can't use requested fmt then set to default -+ if (pixelformat == 0) { -+ pixelformat = pixelformat_from_sps(sps, 0); -+ // If we don't have a default then give up -+ if (pixelformat == 0) -+ return -EINVAL; -+ } -+ -+ // We don't have any way of finding out colourspace so believe -+ // anything we are told - take anything set in src as a default -+ if (f->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT) -+ copy_color(&f->fmt.pix, &ctx->src_fmt); -+ -+ f->fmt.pix.pixelformat = pixelformat; -+ return rpivid_prepare_dst_format(&f->fmt.pix); -+} -+ -+static int rpivid_try_fmt_vid_out(struct file *file, void *priv, -+ struct v4l2_format *f) -+{ -+ if (f->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) -+ return -EINVAL; -+ -+ if (rpivid_prepare_src_format(&f->fmt.pix)) { -+ // Set default src format -+ f->fmt.pix.pixelformat = RPIVID_SRC_PIXELFORMAT_DEFAULT; -+ rpivid_prepare_src_format(&f->fmt.pix); -+ } -+ return 0; -+} -+ -+static int rpivid_s_fmt_vid_cap(struct file *file, void *priv, -+ struct v4l2_format *f) -+{ -+ struct rpivid_ctx *ctx = rpivid_file2ctx(file); -+ struct vb2_queue *vq; -+ int ret; -+ -+ vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type); -+ if (vb2_is_busy(vq)) -+ return -EBUSY; -+ -+ ret = rpivid_try_fmt_vid_cap(file, priv, f); -+ if (ret) -+ return ret; -+ -+ ctx->dst_fmt = f->fmt.pix; -+ ctx->dst_fmt_set = 1; -+ -+ return 0; -+} -+ -+static int rpivid_s_fmt_vid_out(struct file *file, void *priv, -+ struct v4l2_format *f) -+{ -+ struct rpivid_ctx *ctx = rpivid_file2ctx(file); -+ struct vb2_queue *vq; -+ int ret; -+ -+ vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type); -+ if (vb2_is_busy(vq)) -+ return -EBUSY; -+ -+ ret = rpivid_try_fmt_vid_out(file, priv, f); -+ if (ret) -+ return ret; -+ -+ ctx->src_fmt = f->fmt.pix; -+ ctx->dst_fmt_set = 0; // Setting src invalidates dst -+ -+ vq->subsystem_flags |= -+ VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF; -+ -+ /* Propagate colorspace information to capture. */ -+ copy_color(&ctx->dst_fmt, &f->fmt.pix); -+ return 0; -+} -+ -+const struct v4l2_ioctl_ops rpivid_ioctl_ops = { -+ .vidioc_querycap = rpivid_querycap, -+ -+ .vidioc_enum_fmt_vid_cap = rpivid_enum_fmt_vid_cap, -+ .vidioc_g_fmt_vid_cap = rpivid_g_fmt_vid_cap, -+ .vidioc_try_fmt_vid_cap = rpivid_try_fmt_vid_cap, -+ .vidioc_s_fmt_vid_cap = rpivid_s_fmt_vid_cap, -+ -+ .vidioc_enum_fmt_vid_out = rpivid_enum_fmt_vid_out, -+ .vidioc_g_fmt_vid_out = rpivid_g_fmt_vid_out, -+ .vidioc_try_fmt_vid_out = rpivid_try_fmt_vid_out, -+ .vidioc_s_fmt_vid_out = rpivid_s_fmt_vid_out, -+ -+ .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs, -+ .vidioc_querybuf = v4l2_m2m_ioctl_querybuf, -+ .vidioc_qbuf = v4l2_m2m_ioctl_qbuf, -+ .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf, -+ .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf, -+ .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs, -+ .vidioc_expbuf = v4l2_m2m_ioctl_expbuf, -+ -+ .vidioc_streamon = v4l2_m2m_ioctl_streamon, -+ .vidioc_streamoff = v4l2_m2m_ioctl_streamoff, -+ -+ .vidioc_try_decoder_cmd = v4l2_m2m_ioctl_stateless_try_decoder_cmd, -+ .vidioc_decoder_cmd = v4l2_m2m_ioctl_stateless_decoder_cmd, -+ -+ .vidioc_subscribe_event = v4l2_ctrl_subscribe_event, -+ .vidioc_unsubscribe_event = v4l2_event_unsubscribe, -+}; -+ -+static int rpivid_queue_setup(struct vb2_queue *vq, unsigned int *nbufs, -+ unsigned int *nplanes, unsigned int sizes[], -+ struct device *alloc_devs[]) -+{ -+ struct rpivid_ctx *ctx = vb2_get_drv_priv(vq); -+ struct v4l2_pix_format *pix_fmt; -+ -+ if (V4L2_TYPE_IS_OUTPUT(vq->type)) -+ pix_fmt = &ctx->src_fmt; -+ else -+ pix_fmt = &ctx->dst_fmt; -+ -+ if (*nplanes) { -+ if (sizes[0] < pix_fmt->sizeimage) -+ return -EINVAL; -+ } else { -+ sizes[0] = pix_fmt->sizeimage; -+ *nplanes = 1; -+ } -+ -+ return 0; -+} -+ -+static void rpivid_queue_cleanup(struct vb2_queue *vq, u32 state) -+{ -+ struct rpivid_ctx *ctx = vb2_get_drv_priv(vq); -+ struct vb2_v4l2_buffer *vbuf; -+ -+ for (;;) { -+ if (V4L2_TYPE_IS_OUTPUT(vq->type)) -+ vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); -+ else -+ vbuf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); -+ -+ if (!vbuf) -+ return; -+ -+ v4l2_ctrl_request_complete(vbuf->vb2_buf.req_obj.req, -+ &ctx->hdl); -+ v4l2_m2m_buf_done(vbuf, state); -+ } -+} -+ -+static int rpivid_buf_out_validate(struct vb2_buffer *vb) -+{ -+ struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); -+ -+ vbuf->field = V4L2_FIELD_NONE; -+ return 0; -+} -+ -+static int rpivid_buf_prepare(struct vb2_buffer *vb) -+{ -+ struct vb2_queue *vq = vb->vb2_queue; -+ struct rpivid_ctx *ctx = vb2_get_drv_priv(vq); -+ struct v4l2_pix_format *pix_fmt; -+ -+ if (V4L2_TYPE_IS_OUTPUT(vq->type)) -+ pix_fmt = &ctx->src_fmt; -+ else -+ pix_fmt = &ctx->dst_fmt; -+ -+ if (vb2_plane_size(vb, 0) < pix_fmt->sizeimage) -+ return -EINVAL; -+ -+ vb2_set_plane_payload(vb, 0, pix_fmt->sizeimage); -+ -+ return 0; -+} -+ -+static int rpivid_start_streaming(struct vb2_queue *vq, unsigned int count) -+{ -+ struct rpivid_ctx *ctx = vb2_get_drv_priv(vq); -+ struct rpivid_dev *dev = ctx->dev; -+ int ret = 0; -+ -+ if (ctx->src_fmt.pixelformat != V4L2_PIX_FMT_HEVC_SLICE) -+ return -EINVAL; -+ -+ if (V4L2_TYPE_IS_OUTPUT(vq->type) && dev->dec_ops->start) -+ ret = dev->dec_ops->start(ctx); -+ -+ ret = clk_set_rate(dev->clock, 500 * 1000 * 1000); -+ if (ret) { -+ dev_err(dev->dev, "Failed to set clock rate\n"); -+ goto out; -+ } -+ -+ ret = clk_prepare_enable(dev->clock); -+ if (ret) -+ dev_err(dev->dev, "Failed to enable clock\n"); -+ -+out: -+ if (ret) -+ rpivid_queue_cleanup(vq, VB2_BUF_STATE_QUEUED); -+ -+ return ret; -+} -+ -+static void rpivid_stop_streaming(struct vb2_queue *vq) -+{ -+ struct rpivid_ctx *ctx = vb2_get_drv_priv(vq); -+ struct rpivid_dev *dev = ctx->dev; -+ -+ if (V4L2_TYPE_IS_OUTPUT(vq->type) && dev->dec_ops->stop) -+ dev->dec_ops->stop(ctx); -+ -+ rpivid_queue_cleanup(vq, VB2_BUF_STATE_ERROR); -+ -+ clk_disable_unprepare(dev->clock); -+} -+ -+static void rpivid_buf_queue(struct vb2_buffer *vb) -+{ -+ struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); -+ struct rpivid_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); -+ -+ v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf); -+} -+ -+static void rpivid_buf_request_complete(struct vb2_buffer *vb) -+{ -+ struct rpivid_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); -+ -+ v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl); -+} -+ -+static struct vb2_ops rpivid_qops = { -+ .queue_setup = rpivid_queue_setup, -+ .buf_prepare = rpivid_buf_prepare, -+ .buf_queue = rpivid_buf_queue, -+ .buf_out_validate = rpivid_buf_out_validate, -+ .buf_request_complete = rpivid_buf_request_complete, -+ .start_streaming = rpivid_start_streaming, -+ .stop_streaming = rpivid_stop_streaming, -+ .wait_prepare = vb2_ops_wait_prepare, -+ .wait_finish = vb2_ops_wait_finish, -+}; -+ -+int rpivid_queue_init(void *priv, struct vb2_queue *src_vq, -+ struct vb2_queue *dst_vq) -+{ -+ struct rpivid_ctx *ctx = priv; -+ int ret; -+ -+ src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; -+ src_vq->io_modes = VB2_MMAP | VB2_DMABUF; -+ src_vq->drv_priv = ctx; -+ src_vq->buf_struct_size = sizeof(struct rpivid_buffer); -+ src_vq->min_buffers_needed = 1; -+ src_vq->ops = &rpivid_qops; -+ src_vq->mem_ops = &vb2_dma_contig_memops; -+ src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; -+ src_vq->lock = &ctx->dev->dev_mutex; -+ src_vq->dev = ctx->dev->dev; -+ src_vq->supports_requests = true; -+ src_vq->requires_requests = true; -+ -+ ret = vb2_queue_init(src_vq); -+ if (ret) -+ return ret; -+ -+ dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; -+ dst_vq->io_modes = VB2_MMAP | VB2_DMABUF; -+ dst_vq->drv_priv = ctx; -+ dst_vq->buf_struct_size = sizeof(struct rpivid_buffer); -+ dst_vq->min_buffers_needed = 1; -+ dst_vq->ops = &rpivid_qops; -+ dst_vq->mem_ops = &vb2_dma_contig_memops; -+ dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; -+ dst_vq->lock = &ctx->dev->dev_mutex; -+ dst_vq->dev = ctx->dev->dev; -+ -+ return vb2_queue_init(dst_vq); -+} ---- /dev/null -+++ b/drivers/staging/media/rpivid/rpivid_video.h -@@ -0,0 +1,30 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Raspberry Pi HEVC driver -+ * -+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd -+ * -+ * Based on the Cedrus VPU driver, that is: -+ * -+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> -+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> -+ * Copyright (C) 2018 Bootlin -+ */ -+ -+#ifndef _RPIVID_VIDEO_H_ -+#define _RPIVID_VIDEO_H_ -+ -+struct rpivid_format { -+ u32 pixelformat; -+ u32 directions; -+ unsigned int capabilities; -+}; -+ -+extern const struct v4l2_ioctl_ops rpivid_ioctl_ops; -+ -+int rpivid_queue_init(void *priv, struct vb2_queue *src_vq, -+ struct vb2_queue *dst_vq); -+int rpivid_prepare_src_format(struct v4l2_pix_format *pix_fmt); -+int rpivid_prepare_dst_format(struct v4l2_pix_format *pix_fmt); -+ -+#endif |