From 4ab81f113bdf1ca8c3b0d53c777885aa33ed27f3 Mon Sep 17 00:00:00 2001 From: John Cox Date: Thu, 29 Apr 2021 19:17:06 +0100 Subject: [PATCH] media: rpivid: Make slice ctrl dynamic Allows the user to submit a whole frames worth of slice headers in one lump along with a single bitstream dmabuf for the whole lot. This saves potentially a lot of bitstream copying. Signed-off-by: John Cox --- drivers/staging/media/rpivid/rpivid.c | 4 + drivers/staging/media/rpivid/rpivid_dec.c | 18 ++- drivers/staging/media/rpivid/rpivid_h265.c | 151 +++++++++++---------- 3 files changed, 99 insertions(+), 74 deletions(-) --- a/drivers/staging/media/rpivid/rpivid.c +++ b/drivers/staging/media/rpivid/rpivid.c @@ -63,7 +63,11 @@ static const struct rpivid_control rpivi }, { .cfg = { + .name = "Slice param array", .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, + .type = V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS, + .flags = V4L2_CTRL_FLAG_DYNAMIC_ARRAY, + .dims = { 0x1000 }, }, .required = true, }, --- a/drivers/staging/media/rpivid/rpivid_dec.c +++ b/drivers/staging/media/rpivid/rpivid_dec.c @@ -46,22 +46,34 @@ void rpivid_device_run(void *priv) switch (ctx->src_fmt.pixelformat) { case V4L2_PIX_FMT_HEVC_SLICE: + { + const struct v4l2_ctrl *ctrl; + run.h265.sps = rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS); run.h265.pps = rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_PPS); - run.h265.slice_params = - rpivid_find_control_data(ctx, - V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS); run.h265.dec = rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS); + + ctrl = rpivid_find_ctrl(ctx, + V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS); + if (!ctrl || !ctrl->elems) { + v4l2_err(&dev->v4l2_dev, "%s: Missing slice params\n", + __func__); + goto fail; + } + run.h265.slice_ents = ctrl->elems; + run.h265.slice_params = ctrl->p_cur.p; + run.h265.scaling_matrix = rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX); break; + } default: break; --- a/drivers/staging/media/rpivid/rpivid_h265.c +++ b/drivers/staging/media/rpivid/rpivid_h265.c @@ -245,7 +245,6 @@ struct rpivid_dec_state { // Slice vars unsigned int slice_idx; - bool frame_end; bool slice_temporal_mvp; /* Slice flag but constant for frame */ // Temp vars per run - don't actually need to persist @@ -740,7 +739,8 @@ static void new_slice_segment(struct rpi V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED)) << 24)); - if ((sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0) + if (!s->start_ts && + (sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0) write_scaling_factors(de); if (!s->dependent_slice_segment_flag) { @@ -1111,7 +1111,8 @@ static int wpp_end_previous_slice(struct * next chunk code simpler */ static int wpp_decode_slice(struct rpivid_dec_env *const de, - const struct rpivid_dec_state *const s) + const struct rpivid_dec_state *const s, + bool last_slice) { bool reset_qp_y = true; const bool indep = !s->dependent_slice_segment_flag; @@ -1150,7 +1151,7 @@ static int wpp_decode_slice(struct rpivi 0, 0, s->start_ctb_x, s->start_ctb_y, s->slice_qp, slice_reg_const(s)); - if (s->frame_end) { + if (last_slice) { rv = wpp_entry_fill(de, s, s->ctb_height - 1); if (rv) return rv; @@ -1229,7 +1230,8 @@ static int end_previous_slice(struct rpi } static int decode_slice(struct rpivid_dec_env *const de, - const struct rpivid_dec_state *const s) + const struct rpivid_dec_state *const s, + bool last_slice) { bool reset_qp_y; unsigned int tile_x = ctb_to_tile_x(s, s->start_ctb_x); @@ -1275,7 +1277,7 @@ static int decode_slice(struct rpivid_de * now, otherwise this will be done at the start of the next slice * when it will be known where this slice finishes */ - if (s->frame_end) { + if (last_slice) { rv = tile_entry_fill(de, s, s->tile_width - 1, s->tile_height - 1); @@ -1670,11 +1672,13 @@ static u32 mk_config2(const struct rpivi static void rpivid_h265_setup(struct rpivid_ctx *ctx, struct rpivid_run *run) { struct rpivid_dev *const dev = ctx->dev; - const struct v4l2_ctrl_hevc_slice_params *const sh = - run->h265.slice_params; const struct v4l2_ctrl_hevc_decode_params *const dec = run->h265.dec; -// const struct v4l2_hevc_pred_weight_table *pred_weight_table; + /* sh0 used where slice header contents should be constant over all + * slices, or first slice of frame + */ + const struct v4l2_ctrl_hevc_slice_params *const sh0 = + run->h265.slice_params; struct rpivid_q_aux *dpb_q_aux[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; struct rpivid_dec_state *const s = ctx->state; struct vb2_queue *vq; @@ -1684,20 +1688,18 @@ static void rpivid_h265_setup(struct rpi int use_aux; int rv; bool slice_temporal_mvp; + bool frame_end; xtrace_in(dev, de); + s->sh = NULL; // Avoid use until in the slice loop -// pred_weight_table = &sh->pred_weight_table; - - s->frame_end = + frame_end = ((run->src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) == 0); - slice_temporal_mvp = (sh->flags & + slice_temporal_mvp = (sh0->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED); if (de && de->state != RPIVID_DECODE_END) { - ++s->slice_idx; - switch (de->state) { case RPIVID_DECODE_SLICE_CONTINUE: // Expected state @@ -1830,7 +1832,7 @@ static void rpivid_h265_setup(struct rpi de->rpi_config2 = mk_config2(s); de->rpi_framesize = (s->sps.pic_height_in_luma_samples << 16) | s->sps.pic_width_in_luma_samples; - de->rpi_currpoc = sh->slice_pic_order_cnt; + de->rpi_currpoc = sh0->slice_pic_order_cnt; if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) { @@ -1839,17 +1841,17 @@ static void rpivid_h265_setup(struct rpi s->slice_idx = 0; - if (sh->slice_segment_addr != 0) { + if (sh0->slice_segment_addr != 0) { v4l2_warn(&dev->v4l2_dev, "New frame but segment_addr=%d\n", - sh->slice_segment_addr); + sh0->slice_segment_addr); goto fail; } /* Allocate a bitbuf if we need one - don't need one if single * slice as we can use the src buf directly */ - if (!s->frame_end && !de->bit_copy_gptr->ptr) { + if (!frame_end && !de->bit_copy_gptr->ptr) { size_t bits_alloc; bits_alloc = rpivid_bit_buf_size(s->sps.pic_width_in_luma_samples, s->sps.pic_height_in_luma_samples, @@ -1873,21 +1875,7 @@ static void rpivid_h265_setup(struct rpi s->src_addr = 0; s->src_buf = NULL; - if (run->src->planes[0].bytesused < (sh->bit_size + 7) / 8) { - v4l2_warn(&dev->v4l2_dev, - "Bit size %d > bytesused %d\n", - sh->bit_size, run->src->planes[0].bytesused); - goto fail; - } - if (sh->data_bit_offset >= sh->bit_size || - sh->bit_size - sh->data_bit_offset < 8) { - v4l2_warn(&dev->v4l2_dev, - "Bit size %d < Bit offset %d + 8\n", - sh->bit_size, sh->data_bit_offset); - goto fail; - } - - if (s->frame_end) + if (frame_end) s->src_addr = vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, 0); if (!s->src_addr) @@ -1898,44 +1886,65 @@ static void rpivid_h265_setup(struct rpi } // Pre calc a few things - s->sh = sh; s->dec = dec; - s->slice_qp = 26 + s->pps.init_qp_minus26 + s->sh->slice_qp_delta; - s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? + for (i = 0; i != run->h265.slice_ents; ++i) { + const struct v4l2_ctrl_hevc_slice_params *const sh = sh0 + i; + const bool last_slice = frame_end && i + 1 == run->h265.slice_ents; + + s->sh = sh; + + if (run->src->planes[0].bytesused < (sh->bit_size + 7) / 8) { + v4l2_warn(&dev->v4l2_dev, + "Bit size %d > bytesused %d\n", + sh->bit_size, run->src->planes[0].bytesused); + goto fail; + } + if (sh->data_bit_offset >= sh->bit_size || + sh->bit_size - sh->data_bit_offset < 8) { + v4l2_warn(&dev->v4l2_dev, + "Bit size %d < Bit offset %d + 8\n", + sh->bit_size, sh->data_bit_offset); + goto fail; + } + + s->slice_qp = 26 + s->pps.init_qp_minus26 + sh->slice_qp_delta; + s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? + 0 : + (5 - sh->five_minus_max_num_merge_cand); + s->dependent_slice_segment_flag = + ((sh->flags & + V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0); + + s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ? + 0 : + sh->num_ref_idx_l0_active_minus1 + 1; + s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ? 0 : - (5 - sh->five_minus_max_num_merge_cand); - // * SH DSS flag invented by me - but clearly needed - s->dependent_slice_segment_flag = - ((sh->flags & - V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0); - - s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ? - 0 : - sh->num_ref_idx_l0_active_minus1 + 1; - s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ? - 0 : - sh->num_ref_idx_l1_active_minus1 + 1; - - if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) - populate_scaling_factors(run, de, s); - - // Calc all the random coord info to avoid repeated conversion in/out - s->start_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr]; - s->start_ctb_x = sh->slice_segment_addr % de->pic_width_in_ctbs_y; - s->start_ctb_y = sh->slice_segment_addr / de->pic_width_in_ctbs_y; - // Last CTB of previous slice - prev_rs = !s->start_ts ? 0 : s->ctb_addr_ts_to_rs[s->start_ts - 1]; - s->prev_ctb_x = prev_rs % de->pic_width_in_ctbs_y; - s->prev_ctb_y = prev_rs / de->pic_width_in_ctbs_y; + sh->num_ref_idx_l1_active_minus1 + 1; - if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED)) - rv = wpp_decode_slice(de, s); - else - rv = decode_slice(de, s); - if (rv) - goto fail; + if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) + populate_scaling_factors(run, de, s); + + /* Calc all the random coord info to avoid repeated conversion in/out */ + s->start_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr]; + s->start_ctb_x = sh->slice_segment_addr % de->pic_width_in_ctbs_y; + s->start_ctb_y = sh->slice_segment_addr / de->pic_width_in_ctbs_y; + /* Last CTB of previous slice */ + prev_rs = !s->start_ts ? 0 : s->ctb_addr_ts_to_rs[s->start_ts - 1]; + s->prev_ctb_x = prev_rs % de->pic_width_in_ctbs_y; + s->prev_ctb_y = prev_rs / de->pic_width_in_ctbs_y; + + if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED)) + rv = wpp_decode_slice(de, s, last_slice); + else + rv = decode_slice(de, s, last_slice); + if (rv) + goto fail; + + ++s->slice_idx; + } - if (!s->frame_end) { + if (!frame_end) { xtrace_ok(dev, de); return; } @@ -2054,8 +2063,8 @@ static void rpivid_h265_setup(struct rpi fail: if (de) // Actual error reporting happens in Trigger - de->state = s->frame_end ? RPIVID_DECODE_ERROR_DONE : - RPIVID_DECODE_ERROR_CONTINUE; + de->state = frame_end ? RPIVID_DECODE_ERROR_DONE : + RPIVID_DECODE_ERROR_CONTINUE; xtrace_fail(dev, de); }