diff options
Diffstat (limited to 'target/linux/bcm27xx/patches-5.10/950-0614-media-rpivid-Add-a-Pass0-to-accumulate-slices-and-re.patch')
-rw-r--r-- | target/linux/bcm27xx/patches-5.10/950-0614-media-rpivid-Add-a-Pass0-to-accumulate-slices-and-re.patch | 1049 |
1 files changed, 0 insertions, 1049 deletions
diff --git a/target/linux/bcm27xx/patches-5.10/950-0614-media-rpivid-Add-a-Pass0-to-accumulate-slices-and-re.patch b/target/linux/bcm27xx/patches-5.10/950-0614-media-rpivid-Add-a-Pass0-to-accumulate-slices-and-re.patch deleted file mode 100644 index afa8aacce0..0000000000 --- a/target/linux/bcm27xx/patches-5.10/950-0614-media-rpivid-Add-a-Pass0-to-accumulate-slices-and-re.patch +++ /dev/null @@ -1,1049 +0,0 @@ -From a1902958d144d55309a1074f74fc9b3494d3042f Mon Sep 17 00:00:00 2001 -From: John Cox <jc@kynesim.co.uk> -Date: Thu, 11 Mar 2021 19:08:00 +0000 -Subject: [PATCH] media: rpivid: Add a Pass0 to accumulate slices and - rework job finish - -Due to overheads in assembling controls and requests it is worth having -the slice assembly phase separate from the h/w pass1 processing. Create -a queue to service pass1 rather than have the pass1 finished callback -trigger the next slice job. - -This requires a rework of the logic that splits up the buffer and -request done events. This code contains two ways of doing that, we use -Ezequiel Garcias <ezequiel@collabora.com> solution, but expect that -in the future this will be handled by the framework in a cleaner manner. - -Fix up the handling of some of the memory exhaustion crashes uncovered -in the process of writing this code. - -Signed-off-by: John Cox <jc@kynesim.co.uk> ---- - drivers/media/v4l2-core/v4l2-mem2mem.c | 2 - - drivers/staging/media/rpivid/rpivid.c | 11 +- - drivers/staging/media/rpivid/rpivid.h | 20 +- - drivers/staging/media/rpivid/rpivid_dec.c | 32 +- - drivers/staging/media/rpivid/rpivid_h265.c | 432 ++++++++++++++++----- - drivers/staging/media/rpivid/rpivid_hw.c | 8 +- - 6 files changed, 374 insertions(+), 131 deletions(-) - ---- a/drivers/media/v4l2-core/v4l2-mem2mem.c -+++ b/drivers/media/v4l2-core/v4l2-mem2mem.c -@@ -492,8 +492,6 @@ void v4l2_m2m_job_finish(struct v4l2_m2m - * holding capture buffers. Those should use - * v4l2_m2m_buf_done_and_job_finish() instead. - */ -- WARN_ON(m2m_ctx->out_q_ctx.q.subsystem_flags & -- VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF); - spin_lock_irqsave(&m2m_dev->job_spinlock, flags); - schedule_next = _v4l2_m2m_job_finish(m2m_dev, m2m_ctx); - spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags); ---- a/drivers/staging/media/rpivid/rpivid.c -+++ b/drivers/staging/media/rpivid/rpivid.c -@@ -79,17 +79,24 @@ static const struct rpivid_control rpivi - - #define rpivid_ctrls_COUNT ARRAY_SIZE(rpivid_ctrls) - --void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id) -+struct v4l2_ctrl *rpivid_find_ctrl(struct rpivid_ctx *ctx, u32 id) - { - unsigned int i; - - for (i = 0; ctx->ctrls[i]; i++) - if (ctx->ctrls[i]->id == id) -- return ctx->ctrls[i]->p_cur.p; -+ return ctx->ctrls[i]; - - return NULL; - } - -+void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id) -+{ -+ struct v4l2_ctrl *const ctrl = rpivid_find_ctrl(ctx, id); -+ -+ return !ctrl ? NULL : ctrl->p_cur.p; -+} -+ - static int rpivid_init_ctrls(struct rpivid_dev *dev, struct rpivid_ctx *ctx) - { - struct v4l2_ctrl_handler *hdl = &ctx->hdl; ---- a/drivers/staging/media/rpivid/rpivid.h -+++ b/drivers/staging/media/rpivid/rpivid.h -@@ -24,6 +24,10 @@ - - #define OPT_DEBUG_POLL_IRQ 0 - -+#define RPIVID_DEC_ENV_COUNT 6 -+#define RPIVID_P1BUF_COUNT 3 -+#define RPIVID_P2BUF_COUNT 3 -+ - #define RPIVID_NAME "rpivid" - - #define RPIVID_CAPABILITY_UNTILED BIT(0) -@@ -45,6 +49,7 @@ struct rpivid_control { - }; - - struct rpivid_h265_run { -+ u32 slice_ents; - const struct v4l2_ctrl_hevc_sps *sps; - const struct v4l2_ctrl_hevc_pps *pps; - const struct v4l2_ctrl_hevc_slice_params *slice_params; -@@ -64,7 +69,6 @@ struct rpivid_buffer { - - struct rpivid_dec_state; - struct rpivid_dec_env; --#define RPIVID_DEC_ENV_COUNT 3 - - struct rpivid_gptr { - size_t size; -@@ -79,7 +83,6 @@ typedef void (*rpivid_irq_callback)(stru - struct rpivid_q_aux; - #define RPIVID_AUX_ENT_COUNT VB2_MAX_FRAME - --#define RPIVID_P2BUF_COUNT 2 - - struct rpivid_ctx { - struct v4l2_fh fh; -@@ -108,11 +111,13 @@ struct rpivid_ctx { - - struct rpivid_dec_env *dec_pool; - -- /* Some of these should be in dev */ -- struct rpivid_gptr bitbufs[1]; /* Will be 2 */ -- struct rpivid_gptr cmdbufs[1]; /* Will be 2 */ -+ unsigned int p1idx; -+ atomic_t p1out; -+ struct rpivid_gptr bitbufs[RPIVID_P1BUF_COUNT]; -+ struct rpivid_gptr cmdbufs[RPIVID_P1BUF_COUNT]; -+ -+ /* *** Should be in dev *** */ - unsigned int p2idx; -- atomic_t p2out; - struct rpivid_gptr pu_bufs[RPIVID_P2BUF_COUNT]; - struct rpivid_gptr coeff_bufs[RPIVID_P2BUF_COUNT]; - -@@ -141,6 +146,8 @@ struct rpivid_variant { - - struct rpivid_hw_irq_ent; - -+#define RPIVID_ICTL_ENABLE_UNLIMITED (-1) -+ - struct rpivid_hw_irq_ctrl { - /* Spinlock protecting claim and tail */ - spinlock_t lock; -@@ -182,6 +189,7 @@ struct rpivid_dev { - - extern struct rpivid_dec_ops rpivid_dec_ops_h265; - -+struct v4l2_ctrl *rpivid_find_ctrl(struct rpivid_ctx *ctx, u32 id); - void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id); - - #endif ---- a/drivers/staging/media/rpivid/rpivid_dec.c -+++ b/drivers/staging/media/rpivid/rpivid_dec.c -@@ -21,8 +21,8 @@ - - void rpivid_device_run(void *priv) - { -- struct rpivid_ctx *ctx = priv; -- struct rpivid_dev *dev = ctx->dev; -+ struct rpivid_ctx *const ctx = priv; -+ struct rpivid_dev *const dev = ctx->dev; - struct rpivid_run run = {}; - struct media_request *src_req; - -@@ -32,19 +32,17 @@ void rpivid_device_run(void *priv) - if (!run.src || !run.dst) { - v4l2_err(&dev->v4l2_dev, "%s: Missing buffer: src=%p, dst=%p\n", - __func__, run.src, run.dst); -- /* We are stuffed - this probably won't dig us out of our -- * current situation but it is better than nothing -- */ -- v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, -- VB2_BUF_STATE_ERROR); -- return; -+ goto fail; - } - -- /* Apply request(s) controls if needed. */ -+ /* Apply request(s) controls */ - src_req = run.src->vb2_buf.req_obj.req; -+ if (!src_req) { -+ v4l2_err(&dev->v4l2_dev, "%s: Missing request\n", __func__); -+ goto fail; -+ } - -- if (src_req) -- v4l2_ctrl_request_setup(src_req, &ctx->hdl); -+ v4l2_ctrl_request_setup(src_req, &ctx->hdl); - - switch (ctx->src_fmt.pixelformat) { - case V4L2_PIX_FMT_HEVC_SLICE: -@@ -70,10 +68,14 @@ void rpivid_device_run(void *priv) - - dev->dec_ops->setup(ctx, &run); - -- /* Complete request(s) controls if needed. */ -- -- if (src_req) -- v4l2_ctrl_request_complete(src_req, &ctx->hdl); -+ /* Complete request(s) controls */ -+ v4l2_ctrl_request_complete(src_req, &ctx->hdl); - - dev->dec_ops->trigger(ctx); -+ return; -+ -+fail: -+ /* We really shouldn't get here but tidy up what we can */ -+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, -+ VB2_BUF_STATE_ERROR); - } ---- a/drivers/staging/media/rpivid/rpivid_h265.c -+++ b/drivers/staging/media/rpivid/rpivid_h265.c -@@ -22,6 +22,8 @@ - #define DEBUG_TRACE_P1_CMD 0 - #define DEBUG_TRACE_EXECUTION 0 - -+#define USE_REQUEST_PIN 1 -+ - #if DEBUG_TRACE_EXECUTION - #define xtrace_in(dev_, de_)\ - v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: in\n", __func__,\ -@@ -192,8 +194,6 @@ struct rpivid_dec_env { - unsigned int decode_order; - int p1_status; /* P1 status - what to realloc */ - -- struct rpivid_dec_env *phase_wait_q_next; -- - struct rpi_cmd *cmd_fifo; - unsigned int cmd_len, cmd_max; - unsigned int num_slice_msgs; -@@ -219,6 +219,7 @@ struct rpivid_dec_env { - u32 rpi_currpoc; - - struct vb2_v4l2_buffer *frame_buf; // Detached dest buffer -+ struct vb2_v4l2_buffer *src_buf; // Detached src buffer - unsigned int frame_c_offset; - unsigned int frame_stride; - dma_addr_t frame_addr; -@@ -235,9 +236,15 @@ struct rpivid_dec_env { - size_t bit_copy_len; - struct rpivid_gptr *cmd_copy_gptr; - -- u16 slice_msgs[2 * HEVC_MAX_REFS * 8 + 3]; -+#define SLICE_MSGS_MAX (2 * HEVC_MAX_REFS * 8 + 3) -+ u16 slice_msgs[SLICE_MSGS_MAX]; - u8 scaling_factors[NUM_SCALING_FACTORS]; - -+#if USE_REQUEST_PIN -+ struct media_request *req_pin; -+#else -+ struct media_request_object *req_obj; -+#endif - struct rpivid_hw_irq_ent irq_ent; - }; - -@@ -286,6 +293,17 @@ struct rpivid_dec_state { - unsigned int prev_ctb_y; - }; - -+#if !USE_REQUEST_PIN -+static void dst_req_obj_release(struct media_request_object *object) -+{ -+ kfree(object); -+} -+ -+static const struct media_request_object_ops dst_req_obj_ops = { -+ .release = dst_req_obj_release, -+}; -+#endif -+ - static inline int clip_int(const int x, const int lo, const int hi) - { - return x < lo ? lo : x > hi ? hi : x; -@@ -298,15 +316,48 @@ static inline int clip_int(const int x, - static int p1_z; - #endif - -+static int cmds_check_space(struct rpivid_dec_env *const de, unsigned int n) -+{ -+ struct rpi_cmd *a; -+ unsigned int newmax; -+ -+ if (n > 0x100000) { -+ v4l2_err(&de->ctx->dev->v4l2_dev, -+ "%s: n %u implausible\n", __func__, n); -+ return -ENOMEM; -+ } -+ -+ if (de->cmd_len + n <= de->cmd_max) -+ return 0; -+ -+ newmax = 2 << log2_size(de->cmd_len + n); -+ -+ a = krealloc(de->cmd_fifo, newmax * sizeof(struct rpi_cmd), -+ GFP_KERNEL); -+ if (!a) { -+ v4l2_err(&de->ctx->dev->v4l2_dev, -+ "Failed cmd buffer realloc from %u to %u\n", -+ de->cmd_max, newmax); -+ return -ENOMEM; -+ } -+ v4l2_info(&de->ctx->dev->v4l2_dev, -+ "cmd buffer realloc from %u to %u\n", de->cmd_max, newmax); -+ -+ de->cmd_fifo = a; -+ de->cmd_max = newmax; -+ return 0; -+} -+ - // ???? u16 addr - put in u32 --static int p1_apb_write(struct rpivid_dec_env *const de, const u16 addr, -- const u32 data) -+static void p1_apb_write(struct rpivid_dec_env *const de, const u16 addr, -+ const u32 data) - { -- if (de->cmd_len == de->cmd_max) -- de->cmd_fifo = -- krealloc(de->cmd_fifo, -- (de->cmd_max *= 2) * sizeof(struct rpi_cmd), -- GFP_KERNEL); -+ if (de->cmd_len >= de->cmd_max) { -+ v4l2_err(&de->ctx->dev->v4l2_dev, -+ "%s: Overflow @ %d\n", __func__, de->cmd_len); -+ return; -+ } -+ - de->cmd_fifo[de->cmd_len].addr = addr; - de->cmd_fifo[de->cmd_len].data = data; - -@@ -316,8 +367,7 @@ static int p1_apb_write(struct rpivid_de - de->cmd_len, addr, data); - } - #endif -- -- return de->cmd_len++; -+ de->cmd_len++; - } - - static int ctb_to_tile(unsigned int ctb, unsigned int *bd, int num) -@@ -511,6 +561,7 @@ static const u8 prob_init[3][156] = { - }, - }; - -+#define CMDS_WRITE_PROB ((RPI_PROB_ARRAY_SIZE / 4) + 1) - static void write_prob(struct rpivid_dec_env *const de, - const struct rpivid_dec_state *const s) - { -@@ -554,6 +605,7 @@ static void write_prob(struct rpivid_dec - p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP); - } - -+#define CMDS_WRITE_SCALING_FACTORS NUM_SCALING_FACTORS - static void write_scaling_factors(struct rpivid_dec_env *const de) - { - int i; -@@ -569,8 +621,9 @@ static inline __u32 dma_to_axi_addr(dma_ - return (__u32)(a >> 6); - } - --static void write_bitstream(struct rpivid_dec_env *const de, -- const struct rpivid_dec_state *const s) -+#define CMDS_WRITE_BITSTREAM 4 -+static int write_bitstream(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s) - { - // Note that FFmpeg V4L2 does not remove emulation prevention bytes, - // so this is matched in the configuration here. -@@ -584,6 +637,13 @@ static void write_bitstream(struct rpivi - if (s->src_addr != 0) { - addr = s->src_addr + offset; - } else { -+ if (len + de->bit_copy_len > de->bit_copy_gptr->size) { -+ v4l2_warn(&de->ctx->dev->v4l2_dev, -+ "Bit copy buffer overflow: size=%zu, offset=%zu, len=%u\n", -+ de->bit_copy_gptr->size, -+ de->bit_copy_len, len); -+ return -ENOMEM; -+ } - memcpy(de->bit_copy_gptr->ptr + de->bit_copy_len, - s->src_buf + offset, len); - addr = de->bit_copy_gptr->addr + de->bit_copy_len; -@@ -595,6 +655,7 @@ static void write_bitstream(struct rpivi - p1_apb_write(de, RPI_BFNUM, len); - p1_apb_write(de, RPI_BFCONTROL, offset + (1 << 7)); // Stop - p1_apb_write(de, RPI_BFCONTROL, offset + (rpi_use_emu << 6)); -+ return 0; - } - - ////////////////////////////////////////////////////////////////////////////// -@@ -623,6 +684,7 @@ static u32 slice_reg_const(const struct - - ////////////////////////////////////////////////////////////////////////////// - -+#define CMDS_NEW_SLICE_SEGMENT (4 + CMDS_WRITE_SCALING_FACTORS) - static void new_slice_segment(struct rpivid_dec_env *const de, - const struct rpivid_dec_state *const s) - { -@@ -706,6 +768,7 @@ static void msg_slice(struct rpivid_dec_ - de->slice_msgs[de->num_slice_msgs++] = msg; - } - -+#define CMDS_PROGRAM_SLICECMDS (1 + SLICE_MSGS_MAX) - static void program_slicecmds(struct rpivid_dec_env *const de, - const int sliceid) - { -@@ -902,6 +965,7 @@ static void pre_slice_decode(struct rpiv - (sh->slice_cb_qp_offset & 31)); // CMD_QPOFF - } - -+#define CMDS_WRITE_SLICE 1 - static void write_slice(struct rpivid_dec_env *const de, - const struct rpivid_dec_state *const s, - const u32 slice_const, -@@ -927,6 +991,7 @@ static void write_slice(struct rpivid_de - * N.B. This can be called to fill in data from the previous slice so must not - * use any state data that may change from slice to slice (e.g. qp) - */ -+#define CMDS_NEW_ENTRY_POINT (6 + CMDS_WRITE_SLICE) - static void new_entry_point(struct rpivid_dec_env *const de, - const struct rpivid_dec_state *const s, - const bool do_bte, -@@ -977,6 +1042,7 @@ static void new_entry_point(struct rpivi - ////////////////////////////////////////////////////////////////////////////// - // Wavefront mode - -+#define CMDS_WPP_PAUSE 4 - static void wpp_pause(struct rpivid_dec_env *const de, int ctb_row) - { - p1_apb_write(de, RPI_STATUS, (ctb_row << 18) | 0x25); -@@ -987,12 +1053,19 @@ static void wpp_pause(struct rpivid_dec_ - p1_apb_write(de, RPI_CONTROL, (ctb_row << 16) + 2); - } - --static void wpp_entry_fill(struct rpivid_dec_env *const de, -- const struct rpivid_dec_state *const s, -- const unsigned int last_y) -+#define CMDS_WPP_ENTRY_FILL_1 (CMDS_WPP_PAUSE + 2 + CMDS_NEW_ENTRY_POINT) -+static int wpp_entry_fill(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s, -+ const unsigned int last_y) - { -+ int rv; - const unsigned int last_x = s->ctb_width - 1; - -+ rv = cmds_check_space(de, CMDS_WPP_ENTRY_FILL_1 * -+ (last_y - de->entry_ctb_y)); -+ if (rv) -+ return rv; -+ - while (de->entry_ctb_y < last_y) { - /* wpp_entry_x/y set by wpp_entry_point */ - if (s->ctb_width > 2) -@@ -1010,12 +1083,21 @@ static void wpp_entry_fill(struct rpivid - 0, 0, 0, de->entry_ctb_y + 1, - de->entry_qp, de->entry_slice); - } -+ return 0; - } - --static void wpp_end_previous_slice(struct rpivid_dec_env *const de, -- const struct rpivid_dec_state *const s) -+static int wpp_end_previous_slice(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s) - { -- wpp_entry_fill(de, s, s->prev_ctb_y); -+ int rv; -+ -+ rv = wpp_entry_fill(de, s, s->prev_ctb_y); -+ if (rv) -+ return rv; -+ -+ rv = cmds_check_space(de, CMDS_WPP_PAUSE + 2); -+ if (rv) -+ return rv; - - if (de->entry_ctb_x < 2 && - (de->entry_ctb_y < s->start_ctb_y || s->start_ctb_x > 2) && -@@ -1026,21 +1108,38 @@ static void wpp_end_previous_slice(struc - if (s->start_ctb_x == 2 || - (s->ctb_width == 2 && de->entry_ctb_y < s->start_ctb_y)) - p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP); -+ return 0; - } - - /* Only main profile supported so WPP => !Tiles which makes some of the - * next chunk code simpler - */ --static void wpp_decode_slice(struct rpivid_dec_env *const de, -- const struct rpivid_dec_state *const s) -+static int wpp_decode_slice(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s) - { - bool reset_qp_y = true; - const bool indep = !s->dependent_slice_segment_flag; -+ int rv; - -- if (s->start_ts) -- wpp_end_previous_slice(de, s); -+ if (s->start_ts) { -+ rv = wpp_end_previous_slice(de, s); -+ if (rv) -+ return rv; -+ } - pre_slice_decode(de, s); -- write_bitstream(de, s); -+ -+ rv = cmds_check_space(de, -+ CMDS_WRITE_BITSTREAM + -+ CMDS_WRITE_PROB + -+ CMDS_PROGRAM_SLICECMDS + -+ CMDS_NEW_SLICE_SEGMENT + -+ CMDS_NEW_ENTRY_POINT); -+ if (rv) -+ return rv; -+ -+ rv = write_bitstream(de, s); -+ if (rv) -+ return rv; - - if (!s->start_ts || indep || s->ctb_width == 1) - write_prob(de, s); -@@ -1056,7 +1155,13 @@ static void wpp_decode_slice(struct rpiv - s->slice_qp, slice_reg_const(s)); - - if (s->frame_end) { -- wpp_entry_fill(de, s, s->ctb_height - 1); -+ rv = wpp_entry_fill(de, s, s->ctb_height - 1); -+ if (rv) -+ return rv; -+ -+ rv = cmds_check_space(de, CMDS_WPP_PAUSE + 1); -+ if (rv) -+ return rv; - - if (de->entry_ctb_x < 2 && s->ctb_width > 2) - wpp_pause(de, s->ctb_height - 1); -@@ -1065,25 +1170,32 @@ static void wpp_decode_slice(struct rpiv - 1 | ((s->ctb_width - 1) << 5) | - ((s->ctb_height - 1) << 18)); - } -- -+ return 0; - } - - ////////////////////////////////////////////////////////////////////////////// - // Tiles mode - --static void tile_entry_fill(struct rpivid_dec_env *const de, -- const struct rpivid_dec_state *const s, -- const unsigned int last_tile_x, -- const unsigned int last_tile_y) -+// Guarantees 1 cmd entry free on exit -+static int tile_entry_fill(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s, -+ const unsigned int last_tile_x, -+ const unsigned int last_tile_y) - { - while (de->entry_tile_y < last_tile_y || - (de->entry_tile_y == last_tile_y && - de->entry_tile_x < last_tile_x)) { -+ int rv; - unsigned int t_x = de->entry_tile_x; - unsigned int t_y = de->entry_tile_y; - const unsigned int last_x = s->col_bd[t_x + 1] - 1; - const unsigned int last_y = s->row_bd[t_y + 1] - 1; - -+ // One more than needed here -+ rv = cmds_check_space(de, CMDS_NEW_ENTRY_POINT + 3); -+ if (rv) -+ return rv; -+ - p1_apb_write(de, RPI_STATUS, - 2 | (last_x << 5) | (last_y << 18)); - p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD); -@@ -1098,33 +1210,55 @@ static void tile_entry_fill(struct rpivi - t_x, t_y, s->col_bd[t_x], s->row_bd[t_y], - de->entry_qp, de->entry_slice); - } -+ return 0; - } - - /* - * Write STATUS register with expected end CTU address of previous slice - */ --static void end_previous_slice(struct rpivid_dec_env *const de, -- const struct rpivid_dec_state *const s) -+static int end_previous_slice(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s) - { -- tile_entry_fill(de, s, -- ctb_to_tile_x(s, s->prev_ctb_x), -- ctb_to_tile_y(s, s->prev_ctb_y)); -+ int rv; -+ -+ rv = tile_entry_fill(de, s, -+ ctb_to_tile_x(s, s->prev_ctb_x), -+ ctb_to_tile_y(s, s->prev_ctb_y)); -+ if (rv) -+ return rv; -+ - p1_apb_write(de, RPI_STATUS, - 1 | (s->prev_ctb_x << 5) | (s->prev_ctb_y << 18)); -+ return 0; - } - --static void decode_slice(struct rpivid_dec_env *const de, -- const struct rpivid_dec_state *const s) -+static int decode_slice(struct rpivid_dec_env *const de, -+ const struct rpivid_dec_state *const s) - { - bool reset_qp_y; - unsigned int tile_x = ctb_to_tile_x(s, s->start_ctb_x); - unsigned int tile_y = ctb_to_tile_y(s, s->start_ctb_y); -+ int rv; - -- if (s->start_ts) -- end_previous_slice(de, s); -+ if (s->start_ts) { -+ rv = end_previous_slice(de, s); -+ if (rv) -+ return rv; -+ } -+ -+ rv = cmds_check_space(de, -+ CMDS_WRITE_BITSTREAM + -+ CMDS_WRITE_PROB + -+ CMDS_PROGRAM_SLICECMDS + -+ CMDS_NEW_SLICE_SEGMENT + -+ CMDS_NEW_ENTRY_POINT); -+ if (rv) -+ return rv; - - pre_slice_decode(de, s); -- write_bitstream(de, s); -+ rv = write_bitstream(de, s); -+ if (rv) -+ return rv; - - reset_qp_y = !s->start_ts || - !s->dependent_slice_segment_flag || -@@ -1146,13 +1280,16 @@ static void decode_slice(struct rpivid_d - * when it will be known where this slice finishes - */ - if (s->frame_end) { -- tile_entry_fill(de, s, -- s->tile_width - 1, -- s->tile_height - 1); -+ rv = tile_entry_fill(de, s, -+ s->tile_width - 1, -+ s->tile_height - 1); -+ if (rv) -+ return rv; - p1_apb_write(de, RPI_STATUS, - 1 | ((s->ctb_width - 1) << 5) | - ((s->ctb_height - 1) << 18)); - } -+ return 0; - } - - ////////////////////////////////////////////////////////////////////////////// -@@ -1524,7 +1661,7 @@ static void rpivid_h265_setup(struct rpi - struct rpivid_dev *const dev = ctx->dev; - const struct v4l2_ctrl_hevc_slice_params *const sh = - run->h265.slice_params; -- const struct v4l2_hevc_pred_weight_table *pred_weight_table; -+// const struct v4l2_hevc_pred_weight_table *pred_weight_table; - struct rpivid_q_aux *dpb_q_aux[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; - struct rpivid_dec_state *const s = ctx->state; - struct vb2_queue *vq; -@@ -1532,11 +1669,12 @@ static void rpivid_h265_setup(struct rpi - unsigned int prev_rs; - unsigned int i; - int use_aux; -+ int rv; - bool slice_temporal_mvp; - - xtrace_in(dev, de); - -- pred_weight_table = &sh->pred_weight_table; -+// pred_weight_table = &sh->pred_weight_table; - - s->frame_end = - ((run->src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) == 0); -@@ -1608,9 +1746,9 @@ static void rpivid_h265_setup(struct rpi - de->cmd_len = 0; - de->dpbno_col = ~0U; - -- de->bit_copy_gptr = ctx->bitbufs + 0; -+ de->bit_copy_gptr = ctx->bitbufs + ctx->p1idx; - de->bit_copy_len = 0; -- de->cmd_copy_gptr = ctx->cmdbufs + 0; -+ de->cmd_copy_gptr = ctx->cmdbufs + ctx->p1idx; - - de->frame_c_offset = ctx->dst_fmt.height * 128; - de->frame_stride = ctx->dst_fmt.plane_fmt[0].bytesperline * 128; -@@ -1727,6 +1865,9 @@ static void rpivid_h265_setup(struct rpi - bits_alloc = wxh < 983040 ? wxh * 3 / 4 : - wxh < 983040 * 2 ? 983040 * 3 / 4 : - wxh * 3 / 8; -+ /* Allow for bit depth */ -+ bits_alloc += (bits_alloc * -+ s->sps.bit_depth_luma_minus8) / 8; - bits_alloc = round_up_size(bits_alloc); - - if (gptr_alloc(dev, de->bit_copy_gptr, -@@ -1743,18 +1884,35 @@ static void rpivid_h265_setup(struct rpi - } - } - -- // Pre calc a few things -- s->src_addr = -- !s->frame_end ? -- 0 : -- vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, 0); -- s->src_buf = s->src_addr != 0 ? NULL : -- vb2_plane_vaddr(&run->src->vb2_buf, 0); -+ // Either map src buffer or use directly -+ s->src_addr = 0; -+ s->src_buf = NULL; -+ -+ if (run->src->planes[0].bytesused < (sh->bit_size + 7) / 8) { -+ v4l2_warn(&dev->v4l2_dev, -+ "Bit size %d > bytesused %d\n", -+ sh->bit_size, run->src->planes[0].bytesused); -+ goto fail; -+ } -+ if (sh->data_bit_offset >= sh->bit_size || -+ sh->bit_size - sh->data_bit_offset < 8) { -+ v4l2_warn(&dev->v4l2_dev, -+ "Bit size %d < Bit offset %d + 8\n", -+ sh->bit_size, sh->data_bit_offset); -+ goto fail; -+ } -+ -+ if (s->frame_end) -+ s->src_addr = vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, -+ 0); -+ if (!s->src_addr) -+ s->src_buf = vb2_plane_vaddr(&run->src->vb2_buf, 0); - if (!s->src_addr && !s->src_buf) { - v4l2_err(&dev->v4l2_dev, "Failed to map src buffer\n"); - goto fail; - } - -+ // Pre calc a few things - s->sh = sh; - s->slice_qp = 26 + s->pps.init_qp_minus26 + s->sh->slice_qp_delta; - s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? -@@ -1785,9 +1943,11 @@ static void rpivid_h265_setup(struct rpi - s->prev_ctb_y = prev_rs / de->pic_width_in_ctbs_y; - - if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED)) -- wpp_decode_slice(de, s); -+ rv = wpp_decode_slice(de, s); - else -- decode_slice(de, s); -+ rv = decode_slice(de, s); -+ if (rv) -+ goto fail; - - if (!s->frame_end) { - xtrace_ok(dev, de); -@@ -1945,29 +2105,28 @@ static int check_status(const struct rpi - return -1; - } - --static void cb_phase2(struct rpivid_dev *const dev, void *v) -+static void phase2_cb(struct rpivid_dev *const dev, void *v) - { - struct rpivid_dec_env *const de = v; -- struct rpivid_ctx *const ctx = de->ctx; - - xtrace_in(dev, de); - -- v4l2_m2m_cap_buf_return(dev->m2m_dev, ctx->fh.m2m_ctx, de->frame_buf, -- VB2_BUF_STATE_DONE); -- de->frame_buf = NULL; -+ /* Done with buffers - allow new P1 */ -+ rpivid_hw_irq_active1_enable_claim(dev, 1); - -- /* Delete de before finish as finish might immediately trigger a reuse -- * of de -- */ -- dec_env_delete(de); -+ v4l2_m2m_buf_done(de->frame_buf, VB2_BUF_STATE_DONE); -+ de->frame_buf = NULL; - -- if (atomic_add_return(-1, &ctx->p2out) >= RPIVID_P2BUF_COUNT - 1) { -- xtrace_fin(dev, de); -- v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, -- VB2_BUF_STATE_DONE); -- } -+#if USE_REQUEST_PIN -+ media_request_unpin(de->req_pin); -+ de->req_pin = NULL; -+#else -+ media_request_object_complete(de->req_obj); -+ de->req_obj = NULL; -+#endif - - xtrace_ok(dev, de); -+ dec_env_delete(de); - } - - static void phase2_claimed(struct rpivid_dev *const dev, void *v) -@@ -2023,7 +2182,7 @@ static void phase2_claimed(struct rpivid - // de->ctx->colmvbuf.addr, de->ctx->colmvbuf.addr + - // de->ctx->colmvbuf.size); - -- rpivid_hw_irq_active2_irq(dev, &de->irq_ent, cb_phase2, de); -+ rpivid_hw_irq_active2_irq(dev, &de->irq_ent, phase2_cb, de); - - apb_write_final(dev, RPI_NUMROWS, de->pic_height_in_ctbs_y); - -@@ -2032,6 +2191,39 @@ static void phase2_claimed(struct rpivid - - static void phase1_claimed(struct rpivid_dev *const dev, void *v); - -+// release any and all objects associated with de -+// and reenable phase 1 if required -+static void phase1_err_fin(struct rpivid_dev *const dev, -+ struct rpivid_ctx *const ctx, -+ struct rpivid_dec_env *const de) -+{ -+ /* Return all detached buffers */ -+ if (de->src_buf) -+ v4l2_m2m_buf_done(de->src_buf, VB2_BUF_STATE_ERROR); -+ de->src_buf = NULL; -+ if (de->frame_buf) -+ v4l2_m2m_buf_done(de->frame_buf, VB2_BUF_STATE_ERROR); -+ de->frame_buf = NULL; -+#if USE_REQUEST_PIN -+ if (de->req_pin) -+ media_request_unpin(de->req_pin); -+ de->req_pin = NULL; -+#else -+ if (de->req_obj) -+ media_request_object_complete(de->req_obj); -+ de->req_obj = NULL; -+#endif -+ -+ dec_env_delete(de); -+ -+ /* Reenable phase 0 if we were blocking */ -+ if (atomic_add_return(-1, &ctx->p1out) >= RPIVID_P1BUF_COUNT - 1) -+ v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx); -+ -+ /* Done with P1-P2 buffers - allow new P1 */ -+ rpivid_hw_irq_active1_enable_claim(dev, 1); -+} -+ - static void phase1_thread(struct rpivid_dev *const dev, void *v) - { - struct rpivid_dec_env *const de = v; -@@ -2076,15 +2268,12 @@ fail: - __func__); - ctx->fatal_err = 1; - } -- dec_env_delete(de); -- xtrace_fin(dev, de); -- v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, -- VB2_BUF_STATE_ERROR); - xtrace_fail(dev, de); -+ phase1_err_fin(dev, ctx, de); - } - - /* Always called in irq context (this is good) */ --static void cb_phase1(struct rpivid_dev *const dev, void *v) -+static void phase1_cb(struct rpivid_dev *const dev, void *v) - { - struct rpivid_dec_env *const de = v; - struct rpivid_ctx *const ctx = de->ctx; -@@ -2092,6 +2281,7 @@ static void cb_phase1(struct rpivid_dev - xtrace_in(dev, de); - - de->p1_status = check_status(dev); -+ - if (de->p1_status != 0) { - v4l2_info(&dev->v4l2_dev, "%s: Post wait: %#x\n", - __func__, de->p1_status); -@@ -2105,24 +2295,17 @@ static void cb_phase1(struct rpivid_dev - return; - } - -- /* After the frame-buf is detached it must be returned but from -- * this point onward (phase2_claimed, cb_phase2) there are no error -- * paths so the return at the end of cb_phase2 is all that is needed -- */ -- de->frame_buf = v4l2_m2m_cap_buf_detach(dev->m2m_dev, ctx->fh.m2m_ctx); -- if (!de->frame_buf) { -- v4l2_err(&dev->v4l2_dev, "%s: No detached buffer\n", __func__); -- goto fail; -- } -+ v4l2_m2m_buf_done(de->src_buf, VB2_BUF_STATE_DONE); -+ de->src_buf = NULL; - -+ /* All phase1 error paths done - it is safe to inc p2idx */ - ctx->p2idx = - (ctx->p2idx + 1 >= RPIVID_P2BUF_COUNT) ? 0 : ctx->p2idx + 1; - -- // Enable the next setup if our Q isn't too big -- if (atomic_add_return(1, &ctx->p2out) < RPIVID_P2BUF_COUNT) { -+ /* Renable the next setup if we were blocking */ -+ if (atomic_add_return(-1, &ctx->p1out) >= RPIVID_P1BUF_COUNT - 1) { - xtrace_fin(dev, de); -- v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, -- VB2_BUF_STATE_DONE); -+ v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx); - } - - rpivid_hw_irq_active2_claim(dev, &de->irq_ent, phase2_claimed, de); -@@ -2131,11 +2314,8 @@ static void cb_phase1(struct rpivid_dev - return; - - fail: -- dec_env_delete(de); -- xtrace_fin(dev, de); -- v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, -- VB2_BUF_STATE_ERROR); - xtrace_fail(dev, de); -+ phase1_err_fin(dev, ctx, de); - } - - static void phase1_claimed(struct rpivid_dev *const dev, void *v) -@@ -2160,6 +2340,10 @@ static void phase1_claimed(struct rpivid - de->coeff_stride = - ALIGN_DOWN(coeff_gptr->size / de->pic_height_in_ctbs_y, 64); - -+ /* phase1_claimed blocked until cb_phase1 completed so p2idx inc -+ * in cb_phase1 after error detection -+ */ -+ - apb_write_vc_addr(dev, RPI_PUWBASE, de->pu_base_vc); - apb_write_vc_len(dev, RPI_PUWSTRIDE, de->pu_stride); - apb_write_vc_addr(dev, RPI_COEFFWBASE, de->coeff_base_vc); -@@ -2169,7 +2353,7 @@ static void phase1_claimed(struct rpivid - apb_write(dev, RPI_CFNUM, de->cmd_len); - - // Claim irq -- rpivid_hw_irq_active1_irq(dev, &de->irq_ent, cb_phase1, de); -+ rpivid_hw_irq_active1_irq(dev, &de->irq_ent, phase1_cb, de); - - // And start the h/w - apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_copy_gptr->addr); -@@ -2178,11 +2362,8 @@ static void phase1_claimed(struct rpivid - return; - - fail: -- dec_env_delete(de); -- xtrace_fin(dev, de); -- v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, -- VB2_BUF_STATE_ERROR); - xtrace_fail(dev, de); -+ phase1_err_fin(dev, ctx, de); - } - - static void dec_state_delete(struct rpivid_ctx *const ctx) -@@ -2315,7 +2496,9 @@ static void rpivid_h265_trigger(struct r - case RPIVID_DECODE_SLICE_CONTINUE: - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, - VB2_BUF_STATE_DONE); -+ xtrace_ok(dev, de); - break; -+ - default: - v4l2_err(&dev->v4l2_dev, "%s: Unexpected state: %d\n", __func__, - de->state); -@@ -2329,14 +2512,59 @@ static void rpivid_h265_trigger(struct r - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, - VB2_BUF_STATE_ERROR); - break; -+ - case RPIVID_DECODE_PHASE1: - ctx->dec0 = NULL; -+ -+#if !USE_REQUEST_PIN -+ /* Alloc a new request object - needs to be alloced dynamically -+ * as the media request will release it some random time after -+ * it is completed -+ */ -+ de->req_obj = kmalloc(sizeof(*de->req_obj), GFP_KERNEL); -+ if (!de->req_obj) { -+ xtrace_fail(dev, de); -+ dec_env_delete(de); -+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, -+ ctx->fh.m2m_ctx, -+ VB2_BUF_STATE_ERROR); -+ break; -+ } -+ media_request_object_init(de->req_obj); -+#warning probably needs to _get the req obj too -+#endif -+ ctx->p1idx = (ctx->p1idx + 1 >= RPIVID_P1BUF_COUNT) ? -+ 0 : ctx->p1idx + 1; -+ -+ /* We know we have src & dst so no need to test */ -+ de->src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); -+ de->frame_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); -+ -+#if USE_REQUEST_PIN -+ de->req_pin = de->src_buf->vb2_buf.req_obj.req; -+ media_request_pin(de->req_pin); -+#else -+ media_request_object_bind(de->src_buf->vb2_buf.req_obj.req, -+ &dst_req_obj_ops, de, false, -+ de->req_obj); -+#endif -+ -+ /* We could get rid of the src buffer here if we've already -+ * copied it, but we don't copy the last buffer unless it -+ * didn't return a contig dma addr and that shouldn't happen -+ */ -+ -+ /* Enable the next setup if our Q isn't too big */ -+ if (atomic_add_return(1, &ctx->p1out) < RPIVID_P1BUF_COUNT) { -+ xtrace_fin(dev, de); -+ v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx); -+ } -+ - rpivid_hw_irq_active1_claim(dev, &de->irq_ent, phase1_claimed, - de); -+ xtrace_ok(dev, de); - break; - } -- -- xtrace_ok(dev, de); - } - - struct rpivid_dec_ops rpivid_dec_ops_h265 = { ---- a/drivers/staging/media/rpivid/rpivid_hw.c -+++ b/drivers/staging/media/rpivid/rpivid_hw.c -@@ -185,14 +185,14 @@ static void do_enable_claim(struct rpivi - sched_cb(dev, ictl, ient); - } - --static void ictl_init(struct rpivid_hw_irq_ctrl * const ictl) -+static void ictl_init(struct rpivid_hw_irq_ctrl * const ictl, int enables) - { - spin_lock_init(&ictl->lock); - ictl->claim = NULL; - ictl->tail = NULL; - ictl->irq = NULL; - ictl->no_sched = 0; -- ictl->enable = -1; -+ ictl->enable = enables; - ictl->thread_reqed = false; - } - -@@ -308,8 +308,8 @@ int rpivid_hw_probe(struct rpivid_dev *d - int irq_dec; - int ret = 0; - -- ictl_init(&dev->ic_active1); -- ictl_init(&dev->ic_active2); -+ ictl_init(&dev->ic_active1, RPIVID_P2BUF_COUNT); -+ ictl_init(&dev->ic_active2, RPIVID_ICTL_ENABLE_UNLIMITED); - - res = platform_get_resource_byname(dev->pdev, IORESOURCE_MEM, "intc"); - if (!res) |