From a1902958d144d55309a1074f74fc9b3494d3042f Mon Sep 17 00:00:00 2001 From: John Cox Date: Thu, 11 Mar 2021 19:08:00 +0000 Subject: [PATCH] media: rpivid: Add a Pass0 to accumulate slices and rework job finish Due to overheads in assembling controls and requests it is worth having the slice assembly phase separate from the h/w pass1 processing. Create a queue to service pass1 rather than have the pass1 finished callback trigger the next slice job. This requires a rework of the logic that splits up the buffer and request done events. This code contains two ways of doing that, we use Ezequiel Garcias solution, but expect that in the future this will be handled by the framework in a cleaner manner. Fix up the handling of some of the memory exhaustion crashes uncovered in the process of writing this code. Signed-off-by: John Cox --- drivers/media/v4l2-core/v4l2-mem2mem.c | 2 - drivers/staging/media/rpivid/rpivid.c | 11 +- drivers/staging/media/rpivid/rpivid.h | 20 +- drivers/staging/media/rpivid/rpivid_dec.c | 32 +- drivers/staging/media/rpivid/rpivid_h265.c | 432 ++++++++++++++++----- drivers/staging/media/rpivid/rpivid_hw.c | 8 +- 6 files changed, 374 insertions(+), 131 deletions(-) --- a/drivers/media/v4l2-core/v4l2-mem2mem.c +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c @@ -492,8 +492,6 @@ void v4l2_m2m_job_finish(struct v4l2_m2m * holding capture buffers. Those should use * v4l2_m2m_buf_done_and_job_finish() instead. */ - WARN_ON(m2m_ctx->out_q_ctx.q.subsystem_flags & - VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF); spin_lock_irqsave(&m2m_dev->job_spinlock, flags); schedule_next = _v4l2_m2m_job_finish(m2m_dev, m2m_ctx); spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags); --- a/drivers/staging/media/rpivid/rpivid.c +++ b/drivers/staging/media/rpivid/rpivid.c @@ -79,17 +79,24 @@ static const struct rpivid_control rpivi #define rpivid_ctrls_COUNT ARRAY_SIZE(rpivid_ctrls) -void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id) +struct v4l2_ctrl *rpivid_find_ctrl(struct rpivid_ctx *ctx, u32 id) { unsigned int i; for (i = 0; ctx->ctrls[i]; i++) if (ctx->ctrls[i]->id == id) - return ctx->ctrls[i]->p_cur.p; + return ctx->ctrls[i]; return NULL; } +void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id) +{ + struct v4l2_ctrl *const ctrl = rpivid_find_ctrl(ctx, id); + + return !ctrl ? NULL : ctrl->p_cur.p; +} + static int rpivid_init_ctrls(struct rpivid_dev *dev, struct rpivid_ctx *ctx) { struct v4l2_ctrl_handler *hdl = &ctx->hdl; --- a/drivers/staging/media/rpivid/rpivid.h +++ b/drivers/staging/media/rpivid/rpivid.h @@ -24,6 +24,10 @@ #define OPT_DEBUG_POLL_IRQ 0 +#define RPIVID_DEC_ENV_COUNT 6 +#define RPIVID_P1BUF_COUNT 3 +#define RPIVID_P2BUF_COUNT 3 + #define RPIVID_NAME "rpivid" #define RPIVID_CAPABILITY_UNTILED BIT(0) @@ -45,6 +49,7 @@ struct rpivid_control { }; struct rpivid_h265_run { + u32 slice_ents; const struct v4l2_ctrl_hevc_sps *sps; const struct v4l2_ctrl_hevc_pps *pps; const struct v4l2_ctrl_hevc_slice_params *slice_params; @@ -64,7 +69,6 @@ struct rpivid_buffer { struct rpivid_dec_state; struct rpivid_dec_env; -#define RPIVID_DEC_ENV_COUNT 3 struct rpivid_gptr { size_t size; @@ -79,7 +83,6 @@ typedef void (*rpivid_irq_callback)(stru struct rpivid_q_aux; #define RPIVID_AUX_ENT_COUNT VB2_MAX_FRAME -#define RPIVID_P2BUF_COUNT 2 struct rpivid_ctx { struct v4l2_fh fh; @@ -108,11 +111,13 @@ struct rpivid_ctx { struct rpivid_dec_env *dec_pool; - /* Some of these should be in dev */ - struct rpivid_gptr bitbufs[1]; /* Will be 2 */ - struct rpivid_gptr cmdbufs[1]; /* Will be 2 */ + unsigned int p1idx; + atomic_t p1out; + struct rpivid_gptr bitbufs[RPIVID_P1BUF_COUNT]; + struct rpivid_gptr cmdbufs[RPIVID_P1BUF_COUNT]; + + /* *** Should be in dev *** */ unsigned int p2idx; - atomic_t p2out; struct rpivid_gptr pu_bufs[RPIVID_P2BUF_COUNT]; struct rpivid_gptr coeff_bufs[RPIVID_P2BUF_COUNT]; @@ -141,6 +146,8 @@ struct rpivid_variant { struct rpivid_hw_irq_ent; +#define RPIVID_ICTL_ENABLE_UNLIMITED (-1) + struct rpivid_hw_irq_ctrl { /* Spinlock protecting claim and tail */ spinlock_t lock; @@ -182,6 +189,7 @@ struct rpivid_dev { extern struct rpivid_dec_ops rpivid_dec_ops_h265; +struct v4l2_ctrl *rpivid_find_ctrl(struct rpivid_ctx *ctx, u32 id); void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id); #endif --- a/drivers/staging/media/rpivid/rpivid_dec.c +++ b/drivers/staging/media/rpivid/rpivid_dec.c @@ -21,8 +21,8 @@ void rpivid_device_run(void *priv) { - struct rpivid_ctx *ctx = priv; - struct rpivid_dev *dev = ctx->dev; + struct rpivid_ctx *const ctx = priv; + struct rpivid_dev *const dev = ctx->dev; struct rpivid_run run = {}; struct media_request *src_req; @@ -32,19 +32,17 @@ void rpivid_device_run(void *priv) if (!run.src || !run.dst) { v4l2_err(&dev->v4l2_dev, "%s: Missing buffer: src=%p, dst=%p\n", __func__, run.src, run.dst); - /* We are stuffed - this probably won't dig us out of our - * current situation but it is better than nothing - */ - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, - VB2_BUF_STATE_ERROR); - return; + goto fail; } - /* Apply request(s) controls if needed. */ + /* Apply request(s) controls */ src_req = run.src->vb2_buf.req_obj.req; + if (!src_req) { + v4l2_err(&dev->v4l2_dev, "%s: Missing request\n", __func__); + goto fail; + } - if (src_req) - v4l2_ctrl_request_setup(src_req, &ctx->hdl); + v4l2_ctrl_request_setup(src_req, &ctx->hdl); switch (ctx->src_fmt.pixelformat) { case V4L2_PIX_FMT_HEVC_SLICE: @@ -70,10 +68,14 @@ void rpivid_device_run(void *priv) dev->dec_ops->setup(ctx, &run); - /* Complete request(s) controls if needed. */ - - if (src_req) - v4l2_ctrl_request_complete(src_req, &ctx->hdl); + /* Complete request(s) controls */ + v4l2_ctrl_request_complete(src_req, &ctx->hdl); dev->dec_ops->trigger(ctx); + return; + +fail: + /* We really shouldn't get here but tidy up what we can */ + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, + VB2_BUF_STATE_ERROR); } --- a/drivers/staging/media/rpivid/rpivid_h265.c +++ b/drivers/staging/media/rpivid/rpivid_h265.c @@ -22,6 +22,8 @@ #define DEBUG_TRACE_P1_CMD 0 #define DEBUG_TRACE_EXECUTION 0 +#define USE_REQUEST_PIN 1 + #if DEBUG_TRACE_EXECUTION #define xtrace_in(dev_, de_)\ v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: in\n", __func__,\ @@ -192,8 +194,6 @@ struct rpivid_dec_env { unsigned int decode_order; int p1_status; /* P1 status - what to realloc */ - struct rpivid_dec_env *phase_wait_q_next; - struct rpi_cmd *cmd_fifo; unsigned int cmd_len, cmd_max; unsigned int num_slice_msgs; @@ -219,6 +219,7 @@ struct rpivid_dec_env { u32 rpi_currpoc; struct vb2_v4l2_buffer *frame_buf; // Detached dest buffer + struct vb2_v4l2_buffer *src_buf; // Detached src buffer unsigned int frame_c_offset; unsigned int frame_stride; dma_addr_t frame_addr; @@ -235,9 +236,15 @@ struct rpivid_dec_env { size_t bit_copy_len; struct rpivid_gptr *cmd_copy_gptr; - u16 slice_msgs[2 * HEVC_MAX_REFS * 8 + 3]; +#define SLICE_MSGS_MAX (2 * HEVC_MAX_REFS * 8 + 3) + u16 slice_msgs[SLICE_MSGS_MAX]; u8 scaling_factors[NUM_SCALING_FACTORS]; +#if USE_REQUEST_PIN + struct media_request *req_pin; +#else + struct media_request_object *req_obj; +#endif struct rpivid_hw_irq_ent irq_ent; }; @@ -286,6 +293,17 @@ struct rpivid_dec_state { unsigned int prev_ctb_y; }; +#if !USE_REQUEST_PIN +static void dst_req_obj_release(struct media_request_object *object) +{ + kfree(object); +} + +static const struct media_request_object_ops dst_req_obj_ops = { + .release = dst_req_obj_release, +}; +#endif + static inline int clip_int(const int x, const int lo, const int hi) { return x < lo ? lo : x > hi ? hi : x; @@ -298,15 +316,48 @@ static inline int clip_int(const int x, static int p1_z; #endif +static int cmds_check_space(struct rpivid_dec_env *const de, unsigned int n) +{ + struct rpi_cmd *a; + unsigned int newmax; + + if (n > 0x100000) { + v4l2_err(&de->ctx->dev->v4l2_dev, + "%s: n %u implausible\n", __func__, n); + return -ENOMEM; + } + + if (de->cmd_len + n <= de->cmd_max) + return 0; + + newmax = 2 << log2_size(de->cmd_len + n); + + a = krealloc(de->cmd_fifo, newmax * sizeof(struct rpi_cmd), + GFP_KERNEL); + if (!a) { + v4l2_err(&de->ctx->dev->v4l2_dev, + "Failed cmd buffer realloc from %u to %u\n", + de->cmd_max, newmax); + return -ENOMEM; + } + v4l2_info(&de->ctx->dev->v4l2_dev, + "cmd buffer realloc from %u to %u\n", de->cmd_max, newmax); + + de->cmd_fifo = a; + de->cmd_max = newmax; + return 0; +} + // ???? u16 addr - put in u32 -static int p1_apb_write(struct rpivid_dec_env *const de, const u16 addr, - const u32 data) +static void p1_apb_write(struct rpivid_dec_env *const de, const u16 addr, + const u32 data) { - if (de->cmd_len == de->cmd_max) - de->cmd_fifo = - krealloc(de->cmd_fifo, - (de->cmd_max *= 2) * sizeof(struct rpi_cmd), - GFP_KERNEL); + if (de->cmd_len >= de->cmd_max) { + v4l2_err(&de->ctx->dev->v4l2_dev, + "%s: Overflow @ %d\n", __func__, de->cmd_len); + return; + } + de->cmd_fifo[de->cmd_len].addr = addr; de->cmd_fifo[de->cmd_len].data = data; @@ -316,8 +367,7 @@ static int p1_apb_write(struct rpivid_de de->cmd_len, addr, data); } #endif - - return de->cmd_len++; + de->cmd_len++; } static int ctb_to_tile(unsigned int ctb, unsigned int *bd, int num) @@ -511,6 +561,7 @@ static const u8 prob_init[3][156] = { }, }; +#define CMDS_WRITE_PROB ((RPI_PROB_ARRAY_SIZE / 4) + 1) static void write_prob(struct rpivid_dec_env *const de, const struct rpivid_dec_state *const s) { @@ -554,6 +605,7 @@ static void write_prob(struct rpivid_dec p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP); } +#define CMDS_WRITE_SCALING_FACTORS NUM_SCALING_FACTORS static void write_scaling_factors(struct rpivid_dec_env *const de) { int i; @@ -569,8 +621,9 @@ static inline __u32 dma_to_axi_addr(dma_ return (__u32)(a >> 6); } -static void write_bitstream(struct rpivid_dec_env *const de, - const struct rpivid_dec_state *const s) +#define CMDS_WRITE_BITSTREAM 4 +static int write_bitstream(struct rpivid_dec_env *const de, + const struct rpivid_dec_state *const s) { // Note that FFmpeg V4L2 does not remove emulation prevention bytes, // so this is matched in the configuration here. @@ -584,6 +637,13 @@ static void write_bitstream(struct rpivi if (s->src_addr != 0) { addr = s->src_addr + offset; } else { + if (len + de->bit_copy_len > de->bit_copy_gptr->size) { + v4l2_warn(&de->ctx->dev->v4l2_dev, + "Bit copy buffer overflow: size=%zu, offset=%zu, len=%u\n", + de->bit_copy_gptr->size, + de->bit_copy_len, len); + return -ENOMEM; + } memcpy(de->bit_copy_gptr->ptr + de->bit_copy_len, s->src_buf + offset, len); addr = de->bit_copy_gptr->addr + de->bit_copy_len; @@ -595,6 +655,7 @@ static void write_bitstream(struct rpivi p1_apb_write(de, RPI_BFNUM, len); p1_apb_write(de, RPI_BFCONTROL, offset + (1 << 7)); // Stop p1_apb_write(de, RPI_BFCONTROL, offset + (rpi_use_emu << 6)); + return 0; } ////////////////////////////////////////////////////////////////////////////// @@ -623,6 +684,7 @@ static u32 slice_reg_const(const struct ////////////////////////////////////////////////////////////////////////////// +#define CMDS_NEW_SLICE_SEGMENT (4 + CMDS_WRITE_SCALING_FACTORS) static void new_slice_segment(struct rpivid_dec_env *const de, const struct rpivid_dec_state *const s) { @@ -706,6 +768,7 @@ static void msg_slice(struct rpivid_dec_ de->slice_msgs[de->num_slice_msgs++] = msg; } +#define CMDS_PROGRAM_SLICECMDS (1 + SLICE_MSGS_MAX) static void program_slicecmds(struct rpivid_dec_env *const de, const int sliceid) { @@ -902,6 +965,7 @@ static void pre_slice_decode(struct rpiv (sh->slice_cb_qp_offset & 31)); // CMD_QPOFF } +#define CMDS_WRITE_SLICE 1 static void write_slice(struct rpivid_dec_env *const de, const struct rpivid_dec_state *const s, const u32 slice_const, @@ -927,6 +991,7 @@ static void write_slice(struct rpivid_de * N.B. This can be called to fill in data from the previous slice so must not * use any state data that may change from slice to slice (e.g. qp) */ +#define CMDS_NEW_ENTRY_POINT (6 + CMDS_WRITE_SLICE) static void new_entry_point(struct rpivid_dec_env *const de, const struct rpivid_dec_state *const s, const bool do_bte, @@ -977,6 +1042,7 @@ static void new_entry_point(struct rpivi ////////////////////////////////////////////////////////////////////////////// // Wavefront mode +#define CMDS_WPP_PAUSE 4 static void wpp_pause(struct rpivid_dec_env *const de, int ctb_row) { p1_apb_write(de, RPI_STATUS, (ctb_row << 18) | 0x25); @@ -987,12 +1053,19 @@ static void wpp_pause(struct rpivid_dec_ p1_apb_write(de, RPI_CONTROL, (ctb_row << 16) + 2); } -static void wpp_entry_fill(struct rpivid_dec_env *const de, - const struct rpivid_dec_state *const s, - const unsigned int last_y) +#define CMDS_WPP_ENTRY_FILL_1 (CMDS_WPP_PAUSE + 2 + CMDS_NEW_ENTRY_POINT) +static int wpp_entry_fill(struct rpivid_dec_env *const de, + const struct rpivid_dec_state *const s, + const unsigned int last_y) { + int rv; const unsigned int last_x = s->ctb_width - 1; + rv = cmds_check_space(de, CMDS_WPP_ENTRY_FILL_1 * + (last_y - de->entry_ctb_y)); + if (rv) + return rv; + while (de->entry_ctb_y < last_y) { /* wpp_entry_x/y set by wpp_entry_point */ if (s->ctb_width > 2) @@ -1010,12 +1083,21 @@ static void wpp_entry_fill(struct rpivid 0, 0, 0, de->entry_ctb_y + 1, de->entry_qp, de->entry_slice); } + return 0; } -static void wpp_end_previous_slice(struct rpivid_dec_env *const de, - const struct rpivid_dec_state *const s) +static int wpp_end_previous_slice(struct rpivid_dec_env *const de, + const struct rpivid_dec_state *const s) { - wpp_entry_fill(de, s, s->prev_ctb_y); + int rv; + + rv = wpp_entry_fill(de, s, s->prev_ctb_y); + if (rv) + return rv; + + rv = cmds_check_space(de, CMDS_WPP_PAUSE + 2); + if (rv) + return rv; if (de->entry_ctb_x < 2 && (de->entry_ctb_y < s->start_ctb_y || s->start_ctb_x > 2) && @@ -1026,21 +1108,38 @@ static void wpp_end_previous_slice(struc if (s->start_ctb_x == 2 || (s->ctb_width == 2 && de->entry_ctb_y < s->start_ctb_y)) p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP); + return 0; } /* Only main profile supported so WPP => !Tiles which makes some of the * next chunk code simpler */ -static void wpp_decode_slice(struct rpivid_dec_env *const de, - const struct rpivid_dec_state *const s) +static int wpp_decode_slice(struct rpivid_dec_env *const de, + const struct rpivid_dec_state *const s) { bool reset_qp_y = true; const bool indep = !s->dependent_slice_segment_flag; + int rv; - if (s->start_ts) - wpp_end_previous_slice(de, s); + if (s->start_ts) { + rv = wpp_end_previous_slice(de, s); + if (rv) + return rv; + } pre_slice_decode(de, s); - write_bitstream(de, s); + + rv = cmds_check_space(de, + CMDS_WRITE_BITSTREAM + + CMDS_WRITE_PROB + + CMDS_PROGRAM_SLICECMDS + + CMDS_NEW_SLICE_SEGMENT + + CMDS_NEW_ENTRY_POINT); + if (rv) + return rv; + + rv = write_bitstream(de, s); + if (rv) + return rv; if (!s->start_ts || indep || s->ctb_width == 1) write_prob(de, s); @@ -1056,7 +1155,13 @@ static void wpp_decode_slice(struct rpiv s->slice_qp, slice_reg_const(s)); if (s->frame_end) { - wpp_entry_fill(de, s, s->ctb_height - 1); + rv = wpp_entry_fill(de, s, s->ctb_height - 1); + if (rv) + return rv; + + rv = cmds_check_space(de, CMDS_WPP_PAUSE + 1); + if (rv) + return rv; if (de->entry_ctb_x < 2 && s->ctb_width > 2) wpp_pause(de, s->ctb_height - 1); @@ -1065,25 +1170,32 @@ static void wpp_decode_slice(struct rpiv 1 | ((s->ctb_width - 1) << 5) | ((s->ctb_height - 1) << 18)); } - + return 0; } ////////////////////////////////////////////////////////////////////////////// // Tiles mode -static void tile_entry_fill(struct rpivid_dec_env *const de, - const struct rpivid_dec_state *const s, - const unsigned int last_tile_x, - const unsigned int last_tile_y) +// Guarantees 1 cmd entry free on exit +static int tile_entry_fill(struct rpivid_dec_env *const de, + const struct rpivid_dec_state *const s, + const unsigned int last_tile_x, + const unsigned int last_tile_y) { while (de->entry_tile_y < last_tile_y || (de->entry_tile_y == last_tile_y && de->entry_tile_x < last_tile_x)) { + int rv; unsigned int t_x = de->entry_tile_x; unsigned int t_y = de->entry_tile_y; const unsigned int last_x = s->col_bd[t_x + 1] - 1; const unsigned int last_y = s->row_bd[t_y + 1] - 1; + // One more than needed here + rv = cmds_check_space(de, CMDS_NEW_ENTRY_POINT + 3); + if (rv) + return rv; + p1_apb_write(de, RPI_STATUS, 2 | (last_x << 5) | (last_y << 18)); p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD); @@ -1098,33 +1210,55 @@ static void tile_entry_fill(struct rpivi t_x, t_y, s->col_bd[t_x], s->row_bd[t_y], de->entry_qp, de->entry_slice); } + return 0; } /* * Write STATUS register with expected end CTU address of previous slice */ -static void end_previous_slice(struct rpivid_dec_env *const de, - const struct rpivid_dec_state *const s) +static int end_previous_slice(struct rpivid_dec_env *const de, + const struct rpivid_dec_state *const s) { - tile_entry_fill(de, s, - ctb_to_tile_x(s, s->prev_ctb_x), - ctb_to_tile_y(s, s->prev_ctb_y)); + int rv; + + rv = tile_entry_fill(de, s, + ctb_to_tile_x(s, s->prev_ctb_x), + ctb_to_tile_y(s, s->prev_ctb_y)); + if (rv) + return rv; + p1_apb_write(de, RPI_STATUS, 1 | (s->prev_ctb_x << 5) | (s->prev_ctb_y << 18)); + return 0; } -static void decode_slice(struct rpivid_dec_env *const de, - const struct rpivid_dec_state *const s) +static int decode_slice(struct rpivid_dec_env *const de, + const struct rpivid_dec_state *const s) { bool reset_qp_y; unsigned int tile_x = ctb_to_tile_x(s, s->start_ctb_x); unsigned int tile_y = ctb_to_tile_y(s, s->start_ctb_y); + int rv; - if (s->start_ts) - end_previous_slice(de, s); + if (s->start_ts) { + rv = end_previous_slice(de, s); + if (rv) + return rv; + } + + rv = cmds_check_space(de, + CMDS_WRITE_BITSTREAM + + CMDS_WRITE_PROB + + CMDS_PROGRAM_SLICECMDS + + CMDS_NEW_SLICE_SEGMENT + + CMDS_NEW_ENTRY_POINT); + if (rv) + return rv; pre_slice_decode(de, s); - write_bitstream(de, s); + rv = write_bitstream(de, s); + if (rv) + return rv; reset_qp_y = !s->start_ts || !s->dependent_slice_segment_flag || @@ -1146,13 +1280,16 @@ static void decode_slice(struct rpivid_d * when it will be known where this slice finishes */ if (s->frame_end) { - tile_entry_fill(de, s, - s->tile_width - 1, - s->tile_height - 1); + rv = tile_entry_fill(de, s, + s->tile_width - 1, + s->tile_height - 1); + if (rv) + return rv; p1_apb_write(de, RPI_STATUS, 1 | ((s->ctb_width - 1) << 5) | ((s->ctb_height - 1) << 18)); } + return 0; } ////////////////////////////////////////////////////////////////////////////// @@ -1524,7 +1661,7 @@ static void rpivid_h265_setup(struct rpi struct rpivid_dev *const dev = ctx->dev; const struct v4l2_ctrl_hevc_slice_params *const sh = run->h265.slice_params; - const struct v4l2_hevc_pred_weight_table *pred_weight_table; +// const struct v4l2_hevc_pred_weight_table *pred_weight_table; struct rpivid_q_aux *dpb_q_aux[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; struct rpivid_dec_state *const s = ctx->state; struct vb2_queue *vq; @@ -1532,11 +1669,12 @@ static void rpivid_h265_setup(struct rpi unsigned int prev_rs; unsigned int i; int use_aux; + int rv; bool slice_temporal_mvp; xtrace_in(dev, de); - pred_weight_table = &sh->pred_weight_table; +// pred_weight_table = &sh->pred_weight_table; s->frame_end = ((run->src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) == 0); @@ -1608,9 +1746,9 @@ static void rpivid_h265_setup(struct rpi de->cmd_len = 0; de->dpbno_col = ~0U; - de->bit_copy_gptr = ctx->bitbufs + 0; + de->bit_copy_gptr = ctx->bitbufs + ctx->p1idx; de->bit_copy_len = 0; - de->cmd_copy_gptr = ctx->cmdbufs + 0; + de->cmd_copy_gptr = ctx->cmdbufs + ctx->p1idx; de->frame_c_offset = ctx->dst_fmt.height * 128; de->frame_stride = ctx->dst_fmt.plane_fmt[0].bytesperline * 128; @@ -1727,6 +1865,9 @@ static void rpivid_h265_setup(struct rpi bits_alloc = wxh < 983040 ? wxh * 3 / 4 : wxh < 983040 * 2 ? 983040 * 3 / 4 : wxh * 3 / 8; + /* Allow for bit depth */ + bits_alloc += (bits_alloc * + s->sps.bit_depth_luma_minus8) / 8; bits_alloc = round_up_size(bits_alloc); if (gptr_alloc(dev, de->bit_copy_gptr, @@ -1743,18 +1884,35 @@ static void rpivid_h265_setup(struct rpi } } - // Pre calc a few things - s->src_addr = - !s->frame_end ? - 0 : - vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, 0); - s->src_buf = s->src_addr != 0 ? NULL : - vb2_plane_vaddr(&run->src->vb2_buf, 0); + // Either map src buffer or use directly + s->src_addr = 0; + s->src_buf = NULL; + + if (run->src->planes[0].bytesused < (sh->bit_size + 7) / 8) { + v4l2_warn(&dev->v4l2_dev, + "Bit size %d > bytesused %d\n", + sh->bit_size, run->src->planes[0].bytesused); + goto fail; + } + if (sh->data_bit_offset >= sh->bit_size || + sh->bit_size - sh->data_bit_offset < 8) { + v4l2_warn(&dev->v4l2_dev, + "Bit size %d < Bit offset %d + 8\n", + sh->bit_size, sh->data_bit_offset); + goto fail; + } + + if (s->frame_end) + s->src_addr = vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, + 0); + if (!s->src_addr) + s->src_buf = vb2_plane_vaddr(&run->src->vb2_buf, 0); if (!s->src_addr && !s->src_buf) { v4l2_err(&dev->v4l2_dev, "Failed to map src buffer\n"); goto fail; } + // Pre calc a few things s->sh = sh; s->slice_qp = 26 + s->pps.init_qp_minus26 + s->sh->slice_qp_delta; s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? @@ -1785,9 +1943,11 @@ static void rpivid_h265_setup(struct rpi s->prev_ctb_y = prev_rs / de->pic_width_in_ctbs_y; if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED)) - wpp_decode_slice(de, s); + rv = wpp_decode_slice(de, s); else - decode_slice(de, s); + rv = decode_slice(de, s); + if (rv) + goto fail; if (!s->frame_end) { xtrace_ok(dev, de); @@ -1945,29 +2105,28 @@ static int check_status(const struct rpi return -1; } -static void cb_phase2(struct rpivid_dev *const dev, void *v) +static void phase2_cb(struct rpivid_dev *const dev, void *v) { struct rpivid_dec_env *const de = v; - struct rpivid_ctx *const ctx = de->ctx; xtrace_in(dev, de); - v4l2_m2m_cap_buf_return(dev->m2m_dev, ctx->fh.m2m_ctx, de->frame_buf, - VB2_BUF_STATE_DONE); - de->frame_buf = NULL; + /* Done with buffers - allow new P1 */ + rpivid_hw_irq_active1_enable_claim(dev, 1); - /* Delete de before finish as finish might immediately trigger a reuse - * of de - */ - dec_env_delete(de); + v4l2_m2m_buf_done(de->frame_buf, VB2_BUF_STATE_DONE); + de->frame_buf = NULL; - if (atomic_add_return(-1, &ctx->p2out) >= RPIVID_P2BUF_COUNT - 1) { - xtrace_fin(dev, de); - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, - VB2_BUF_STATE_DONE); - } +#if USE_REQUEST_PIN + media_request_unpin(de->req_pin); + de->req_pin = NULL; +#else + media_request_object_complete(de->req_obj); + de->req_obj = NULL; +#endif xtrace_ok(dev, de); + dec_env_delete(de); } static void phase2_claimed(struct rpivid_dev *const dev, void *v) @@ -2023,7 +2182,7 @@ static void phase2_claimed(struct rpivid // de->ctx->colmvbuf.addr, de->ctx->colmvbuf.addr + // de->ctx->colmvbuf.size); - rpivid_hw_irq_active2_irq(dev, &de->irq_ent, cb_phase2, de); + rpivid_hw_irq_active2_irq(dev, &de->irq_ent, phase2_cb, de); apb_write_final(dev, RPI_NUMROWS, de->pic_height_in_ctbs_y); @@ -2032,6 +2191,39 @@ static void phase2_claimed(struct rpivid static void phase1_claimed(struct rpivid_dev *const dev, void *v); +// release any and all objects associated with de +// and reenable phase 1 if required +static void phase1_err_fin(struct rpivid_dev *const dev, + struct rpivid_ctx *const ctx, + struct rpivid_dec_env *const de) +{ + /* Return all detached buffers */ + if (de->src_buf) + v4l2_m2m_buf_done(de->src_buf, VB2_BUF_STATE_ERROR); + de->src_buf = NULL; + if (de->frame_buf) + v4l2_m2m_buf_done(de->frame_buf, VB2_BUF_STATE_ERROR); + de->frame_buf = NULL; +#if USE_REQUEST_PIN + if (de->req_pin) + media_request_unpin(de->req_pin); + de->req_pin = NULL; +#else + if (de->req_obj) + media_request_object_complete(de->req_obj); + de->req_obj = NULL; +#endif + + dec_env_delete(de); + + /* Reenable phase 0 if we were blocking */ + if (atomic_add_return(-1, &ctx->p1out) >= RPIVID_P1BUF_COUNT - 1) + v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx); + + /* Done with P1-P2 buffers - allow new P1 */ + rpivid_hw_irq_active1_enable_claim(dev, 1); +} + static void phase1_thread(struct rpivid_dev *const dev, void *v) { struct rpivid_dec_env *const de = v; @@ -2076,15 +2268,12 @@ fail: __func__); ctx->fatal_err = 1; } - dec_env_delete(de); - xtrace_fin(dev, de); - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, - VB2_BUF_STATE_ERROR); xtrace_fail(dev, de); + phase1_err_fin(dev, ctx, de); } /* Always called in irq context (this is good) */ -static void cb_phase1(struct rpivid_dev *const dev, void *v) +static void phase1_cb(struct rpivid_dev *const dev, void *v) { struct rpivid_dec_env *const de = v; struct rpivid_ctx *const ctx = de->ctx; @@ -2092,6 +2281,7 @@ static void cb_phase1(struct rpivid_dev xtrace_in(dev, de); de->p1_status = check_status(dev); + if (de->p1_status != 0) { v4l2_info(&dev->v4l2_dev, "%s: Post wait: %#x\n", __func__, de->p1_status); @@ -2105,24 +2295,17 @@ static void cb_phase1(struct rpivid_dev return; } - /* After the frame-buf is detached it must be returned but from - * this point onward (phase2_claimed, cb_phase2) there are no error - * paths so the return at the end of cb_phase2 is all that is needed - */ - de->frame_buf = v4l2_m2m_cap_buf_detach(dev->m2m_dev, ctx->fh.m2m_ctx); - if (!de->frame_buf) { - v4l2_err(&dev->v4l2_dev, "%s: No detached buffer\n", __func__); - goto fail; - } + v4l2_m2m_buf_done(de->src_buf, VB2_BUF_STATE_DONE); + de->src_buf = NULL; + /* All phase1 error paths done - it is safe to inc p2idx */ ctx->p2idx = (ctx->p2idx + 1 >= RPIVID_P2BUF_COUNT) ? 0 : ctx->p2idx + 1; - // Enable the next setup if our Q isn't too big - if (atomic_add_return(1, &ctx->p2out) < RPIVID_P2BUF_COUNT) { + /* Renable the next setup if we were blocking */ + if (atomic_add_return(-1, &ctx->p1out) >= RPIVID_P1BUF_COUNT - 1) { xtrace_fin(dev, de); - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, - VB2_BUF_STATE_DONE); + v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx); } rpivid_hw_irq_active2_claim(dev, &de->irq_ent, phase2_claimed, de); @@ -2131,11 +2314,8 @@ static void cb_phase1(struct rpivid_dev return; fail: - dec_env_delete(de); - xtrace_fin(dev, de); - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, - VB2_BUF_STATE_ERROR); xtrace_fail(dev, de); + phase1_err_fin(dev, ctx, de); } static void phase1_claimed(struct rpivid_dev *const dev, void *v) @@ -2160,6 +2340,10 @@ static void phase1_claimed(struct rpivid de->coeff_stride = ALIGN_DOWN(coeff_gptr->size / de->pic_height_in_ctbs_y, 64); + /* phase1_claimed blocked until cb_phase1 completed so p2idx inc + * in cb_phase1 after error detection + */ + apb_write_vc_addr(dev, RPI_PUWBASE, de->pu_base_vc); apb_write_vc_len(dev, RPI_PUWSTRIDE, de->pu_stride); apb_write_vc_addr(dev, RPI_COEFFWBASE, de->coeff_base_vc); @@ -2169,7 +2353,7 @@ static void phase1_claimed(struct rpivid apb_write(dev, RPI_CFNUM, de->cmd_len); // Claim irq - rpivid_hw_irq_active1_irq(dev, &de->irq_ent, cb_phase1, de); + rpivid_hw_irq_active1_irq(dev, &de->irq_ent, phase1_cb, de); // And start the h/w apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_copy_gptr->addr); @@ -2178,11 +2362,8 @@ static void phase1_claimed(struct rpivid return; fail: - dec_env_delete(de); - xtrace_fin(dev, de); - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, - VB2_BUF_STATE_ERROR); xtrace_fail(dev, de); + phase1_err_fin(dev, ctx, de); } static void dec_state_delete(struct rpivid_ctx *const ctx) @@ -2315,7 +2496,9 @@ static void rpivid_h265_trigger(struct r case RPIVID_DECODE_SLICE_CONTINUE: v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, VB2_BUF_STATE_DONE); + xtrace_ok(dev, de); break; + default: v4l2_err(&dev->v4l2_dev, "%s: Unexpected state: %d\n", __func__, de->state); @@ -2329,14 +2512,59 @@ static void rpivid_h265_trigger(struct r v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, VB2_BUF_STATE_ERROR); break; + case RPIVID_DECODE_PHASE1: ctx->dec0 = NULL; + +#if !USE_REQUEST_PIN + /* Alloc a new request object - needs to be alloced dynamically + * as the media request will release it some random time after + * it is completed + */ + de->req_obj = kmalloc(sizeof(*de->req_obj), GFP_KERNEL); + if (!de->req_obj) { + xtrace_fail(dev, de); + dec_env_delete(de); + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, + ctx->fh.m2m_ctx, + VB2_BUF_STATE_ERROR); + break; + } + media_request_object_init(de->req_obj); +#warning probably needs to _get the req obj too +#endif + ctx->p1idx = (ctx->p1idx + 1 >= RPIVID_P1BUF_COUNT) ? + 0 : ctx->p1idx + 1; + + /* We know we have src & dst so no need to test */ + de->src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); + de->frame_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); + +#if USE_REQUEST_PIN + de->req_pin = de->src_buf->vb2_buf.req_obj.req; + media_request_pin(de->req_pin); +#else + media_request_object_bind(de->src_buf->vb2_buf.req_obj.req, + &dst_req_obj_ops, de, false, + de->req_obj); +#endif + + /* We could get rid of the src buffer here if we've already + * copied it, but we don't copy the last buffer unless it + * didn't return a contig dma addr and that shouldn't happen + */ + + /* Enable the next setup if our Q isn't too big */ + if (atomic_add_return(1, &ctx->p1out) < RPIVID_P1BUF_COUNT) { + xtrace_fin(dev, de); + v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx); + } + rpivid_hw_irq_active1_claim(dev, &de->irq_ent, phase1_claimed, de); + xtrace_ok(dev, de); break; } - - xtrace_ok(dev, de); } struct rpivid_dec_ops rpivid_dec_ops_h265 = { --- a/drivers/staging/media/rpivid/rpivid_hw.c +++ b/drivers/staging/media/rpivid/rpivid_hw.c @@ -185,14 +185,14 @@ static void do_enable_claim(struct rpivi sched_cb(dev, ictl, ient); } -static void ictl_init(struct rpivid_hw_irq_ctrl * const ictl) +static void ictl_init(struct rpivid_hw_irq_ctrl * const ictl, int enables) { spin_lock_init(&ictl->lock); ictl->claim = NULL; ictl->tail = NULL; ictl->irq = NULL; ictl->no_sched = 0; - ictl->enable = -1; + ictl->enable = enables; ictl->thread_reqed = false; } @@ -308,8 +308,8 @@ int rpivid_hw_probe(struct rpivid_dev *d int irq_dec; int ret = 0; - ictl_init(&dev->ic_active1); - ictl_init(&dev->ic_active2); + ictl_init(&dev->ic_active1, RPIVID_P2BUF_COUNT); + ictl_init(&dev->ic_active2, RPIVID_ICTL_ENABLE_UNLIMITED); res = platform_get_resource_byname(dev->pdev, IORESOURCE_MEM, "intc"); if (!res)