diff options
Diffstat (limited to 'target/linux/bcm27xx/patches-5.15/950-0466-media-rpivid-Make-slice-ctrl-dynamic.patch')
-rw-r--r-- | target/linux/bcm27xx/patches-5.15/950-0466-media-rpivid-Make-slice-ctrl-dynamic.patch | 334 |
1 files changed, 334 insertions, 0 deletions
diff --git a/target/linux/bcm27xx/patches-5.15/950-0466-media-rpivid-Make-slice-ctrl-dynamic.patch b/target/linux/bcm27xx/patches-5.15/950-0466-media-rpivid-Make-slice-ctrl-dynamic.patch new file mode 100644 index 0000000000..66e9783711 --- /dev/null +++ b/target/linux/bcm27xx/patches-5.15/950-0466-media-rpivid-Make-slice-ctrl-dynamic.patch @@ -0,0 +1,334 @@ +From 4ab81f113bdf1ca8c3b0d53c777885aa33ed27f3 Mon Sep 17 00:00:00 2001 +From: John Cox <jc@kynesim.co.uk> +Date: Thu, 29 Apr 2021 19:17:06 +0100 +Subject: [PATCH] media: rpivid: Make slice ctrl dynamic + +Allows the user to submit a whole frames worth of slice headers in +one lump along with a single bitstream dmabuf for the whole lot. +This saves potentially a lot of bitstream copying. + +Signed-off-by: John Cox <jc@kynesim.co.uk> +--- + drivers/staging/media/rpivid/rpivid.c | 4 + + drivers/staging/media/rpivid/rpivid_dec.c | 18 ++- + drivers/staging/media/rpivid/rpivid_h265.c | 151 +++++++++++---------- + 3 files changed, 99 insertions(+), 74 deletions(-) + +--- a/drivers/staging/media/rpivid/rpivid.c ++++ b/drivers/staging/media/rpivid/rpivid.c +@@ -63,7 +63,11 @@ static const struct rpivid_control rpivi + }, + { + .cfg = { ++ .name = "Slice param array", + .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, ++ .type = V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS, ++ .flags = V4L2_CTRL_FLAG_DYNAMIC_ARRAY, ++ .dims = { 0x1000 }, + }, + .required = true, + }, +--- a/drivers/staging/media/rpivid/rpivid_dec.c ++++ b/drivers/staging/media/rpivid/rpivid_dec.c +@@ -46,22 +46,34 @@ void rpivid_device_run(void *priv) + + switch (ctx->src_fmt.pixelformat) { + case V4L2_PIX_FMT_HEVC_SLICE: ++ { ++ const struct v4l2_ctrl *ctrl; ++ + run.h265.sps = + rpivid_find_control_data(ctx, + V4L2_CID_MPEG_VIDEO_HEVC_SPS); + run.h265.pps = + rpivid_find_control_data(ctx, + V4L2_CID_MPEG_VIDEO_HEVC_PPS); +- run.h265.slice_params = +- rpivid_find_control_data(ctx, +- V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS); + run.h265.dec = + rpivid_find_control_data(ctx, + V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS); ++ ++ ctrl = rpivid_find_ctrl(ctx, ++ V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS); ++ if (!ctrl || !ctrl->elems) { ++ v4l2_err(&dev->v4l2_dev, "%s: Missing slice params\n", ++ __func__); ++ goto fail; ++ } ++ run.h265.slice_ents = ctrl->elems; ++ run.h265.slice_params = ctrl->p_cur.p; ++ + run.h265.scaling_matrix = + rpivid_find_control_data(ctx, + V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX); + break; ++ } + + default: + break; +--- a/drivers/staging/media/rpivid/rpivid_h265.c ++++ b/drivers/staging/media/rpivid/rpivid_h265.c +@@ -245,7 +245,6 @@ struct rpivid_dec_state { + + // Slice vars + unsigned int slice_idx; +- bool frame_end; + bool slice_temporal_mvp; /* Slice flag but constant for frame */ + + // Temp vars per run - don't actually need to persist +@@ -740,7 +739,8 @@ static void new_slice_segment(struct rpi + V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED)) + << 24)); + +- if ((sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0) ++ if (!s->start_ts && ++ (sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0) + write_scaling_factors(de); + + if (!s->dependent_slice_segment_flag) { +@@ -1111,7 +1111,8 @@ static int wpp_end_previous_slice(struct + * next chunk code simpler + */ + static int wpp_decode_slice(struct rpivid_dec_env *const de, +- const struct rpivid_dec_state *const s) ++ const struct rpivid_dec_state *const s, ++ bool last_slice) + { + bool reset_qp_y = true; + const bool indep = !s->dependent_slice_segment_flag; +@@ -1150,7 +1151,7 @@ static int wpp_decode_slice(struct rpivi + 0, 0, s->start_ctb_x, s->start_ctb_y, + s->slice_qp, slice_reg_const(s)); + +- if (s->frame_end) { ++ if (last_slice) { + rv = wpp_entry_fill(de, s, s->ctb_height - 1); + if (rv) + return rv; +@@ -1229,7 +1230,8 @@ static int end_previous_slice(struct rpi + } + + static int decode_slice(struct rpivid_dec_env *const de, +- const struct rpivid_dec_state *const s) ++ const struct rpivid_dec_state *const s, ++ bool last_slice) + { + bool reset_qp_y; + unsigned int tile_x = ctb_to_tile_x(s, s->start_ctb_x); +@@ -1275,7 +1277,7 @@ static int decode_slice(struct rpivid_de + * now, otherwise this will be done at the start of the next slice + * when it will be known where this slice finishes + */ +- if (s->frame_end) { ++ if (last_slice) { + rv = tile_entry_fill(de, s, + s->tile_width - 1, + s->tile_height - 1); +@@ -1670,11 +1672,13 @@ static u32 mk_config2(const struct rpivi + static void rpivid_h265_setup(struct rpivid_ctx *ctx, struct rpivid_run *run) + { + struct rpivid_dev *const dev = ctx->dev; +- const struct v4l2_ctrl_hevc_slice_params *const sh = +- run->h265.slice_params; + const struct v4l2_ctrl_hevc_decode_params *const dec = + run->h265.dec; +-// const struct v4l2_hevc_pred_weight_table *pred_weight_table; ++ /* sh0 used where slice header contents should be constant over all ++ * slices, or first slice of frame ++ */ ++ const struct v4l2_ctrl_hevc_slice_params *const sh0 = ++ run->h265.slice_params; + struct rpivid_q_aux *dpb_q_aux[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + struct rpivid_dec_state *const s = ctx->state; + struct vb2_queue *vq; +@@ -1684,20 +1688,18 @@ static void rpivid_h265_setup(struct rpi + int use_aux; + int rv; + bool slice_temporal_mvp; ++ bool frame_end; + + xtrace_in(dev, de); ++ s->sh = NULL; // Avoid use until in the slice loop + +-// pred_weight_table = &sh->pred_weight_table; +- +- s->frame_end = ++ frame_end = + ((run->src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) == 0); + +- slice_temporal_mvp = (sh->flags & ++ slice_temporal_mvp = (sh0->flags & + V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED); + + if (de && de->state != RPIVID_DECODE_END) { +- ++s->slice_idx; +- + switch (de->state) { + case RPIVID_DECODE_SLICE_CONTINUE: + // Expected state +@@ -1830,7 +1832,7 @@ static void rpivid_h265_setup(struct rpi + de->rpi_config2 = mk_config2(s); + de->rpi_framesize = (s->sps.pic_height_in_luma_samples << 16) | + s->sps.pic_width_in_luma_samples; +- de->rpi_currpoc = sh->slice_pic_order_cnt; ++ de->rpi_currpoc = sh0->slice_pic_order_cnt; + + if (s->sps.flags & + V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) { +@@ -1839,17 +1841,17 @@ static void rpivid_h265_setup(struct rpi + + s->slice_idx = 0; + +- if (sh->slice_segment_addr != 0) { ++ if (sh0->slice_segment_addr != 0) { + v4l2_warn(&dev->v4l2_dev, + "New frame but segment_addr=%d\n", +- sh->slice_segment_addr); ++ sh0->slice_segment_addr); + goto fail; + } + + /* Allocate a bitbuf if we need one - don't need one if single + * slice as we can use the src buf directly + */ +- if (!s->frame_end && !de->bit_copy_gptr->ptr) { ++ if (!frame_end && !de->bit_copy_gptr->ptr) { + size_t bits_alloc; + bits_alloc = rpivid_bit_buf_size(s->sps.pic_width_in_luma_samples, + s->sps.pic_height_in_luma_samples, +@@ -1873,21 +1875,7 @@ static void rpivid_h265_setup(struct rpi + s->src_addr = 0; + s->src_buf = NULL; + +- if (run->src->planes[0].bytesused < (sh->bit_size + 7) / 8) { +- v4l2_warn(&dev->v4l2_dev, +- "Bit size %d > bytesused %d\n", +- sh->bit_size, run->src->planes[0].bytesused); +- goto fail; +- } +- if (sh->data_bit_offset >= sh->bit_size || +- sh->bit_size - sh->data_bit_offset < 8) { +- v4l2_warn(&dev->v4l2_dev, +- "Bit size %d < Bit offset %d + 8\n", +- sh->bit_size, sh->data_bit_offset); +- goto fail; +- } +- +- if (s->frame_end) ++ if (frame_end) + s->src_addr = vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, + 0); + if (!s->src_addr) +@@ -1898,44 +1886,65 @@ static void rpivid_h265_setup(struct rpi + } + + // Pre calc a few things +- s->sh = sh; + s->dec = dec; +- s->slice_qp = 26 + s->pps.init_qp_minus26 + s->sh->slice_qp_delta; +- s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? ++ for (i = 0; i != run->h265.slice_ents; ++i) { ++ const struct v4l2_ctrl_hevc_slice_params *const sh = sh0 + i; ++ const bool last_slice = frame_end && i + 1 == run->h265.slice_ents; ++ ++ s->sh = sh; ++ ++ if (run->src->planes[0].bytesused < (sh->bit_size + 7) / 8) { ++ v4l2_warn(&dev->v4l2_dev, ++ "Bit size %d > bytesused %d\n", ++ sh->bit_size, run->src->planes[0].bytesused); ++ goto fail; ++ } ++ if (sh->data_bit_offset >= sh->bit_size || ++ sh->bit_size - sh->data_bit_offset < 8) { ++ v4l2_warn(&dev->v4l2_dev, ++ "Bit size %d < Bit offset %d + 8\n", ++ sh->bit_size, sh->data_bit_offset); ++ goto fail; ++ } ++ ++ s->slice_qp = 26 + s->pps.init_qp_minus26 + sh->slice_qp_delta; ++ s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? ++ 0 : ++ (5 - sh->five_minus_max_num_merge_cand); ++ s->dependent_slice_segment_flag = ++ ((sh->flags & ++ V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0); ++ ++ s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ? ++ 0 : ++ sh->num_ref_idx_l0_active_minus1 + 1; ++ s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ? + 0 : +- (5 - sh->five_minus_max_num_merge_cand); +- // * SH DSS flag invented by me - but clearly needed +- s->dependent_slice_segment_flag = +- ((sh->flags & +- V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0); +- +- s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ? +- 0 : +- sh->num_ref_idx_l0_active_minus1 + 1; +- s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ? +- 0 : +- sh->num_ref_idx_l1_active_minus1 + 1; +- +- if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) +- populate_scaling_factors(run, de, s); +- +- // Calc all the random coord info to avoid repeated conversion in/out +- s->start_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr]; +- s->start_ctb_x = sh->slice_segment_addr % de->pic_width_in_ctbs_y; +- s->start_ctb_y = sh->slice_segment_addr / de->pic_width_in_ctbs_y; +- // Last CTB of previous slice +- prev_rs = !s->start_ts ? 0 : s->ctb_addr_ts_to_rs[s->start_ts - 1]; +- s->prev_ctb_x = prev_rs % de->pic_width_in_ctbs_y; +- s->prev_ctb_y = prev_rs / de->pic_width_in_ctbs_y; ++ sh->num_ref_idx_l1_active_minus1 + 1; + +- if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED)) +- rv = wpp_decode_slice(de, s); +- else +- rv = decode_slice(de, s); +- if (rv) +- goto fail; ++ if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) ++ populate_scaling_factors(run, de, s); ++ ++ /* Calc all the random coord info to avoid repeated conversion in/out */ ++ s->start_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr]; ++ s->start_ctb_x = sh->slice_segment_addr % de->pic_width_in_ctbs_y; ++ s->start_ctb_y = sh->slice_segment_addr / de->pic_width_in_ctbs_y; ++ /* Last CTB of previous slice */ ++ prev_rs = !s->start_ts ? 0 : s->ctb_addr_ts_to_rs[s->start_ts - 1]; ++ s->prev_ctb_x = prev_rs % de->pic_width_in_ctbs_y; ++ s->prev_ctb_y = prev_rs / de->pic_width_in_ctbs_y; ++ ++ if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED)) ++ rv = wpp_decode_slice(de, s, last_slice); ++ else ++ rv = decode_slice(de, s, last_slice); ++ if (rv) ++ goto fail; ++ ++ ++s->slice_idx; ++ } + +- if (!s->frame_end) { ++ if (!frame_end) { + xtrace_ok(dev, de); + return; + } +@@ -2054,8 +2063,8 @@ static void rpivid_h265_setup(struct rpi + fail: + if (de) + // Actual error reporting happens in Trigger +- de->state = s->frame_end ? RPIVID_DECODE_ERROR_DONE : +- RPIVID_DECODE_ERROR_CONTINUE; ++ de->state = frame_end ? RPIVID_DECODE_ERROR_DONE : ++ RPIVID_DECODE_ERROR_CONTINUE; + xtrace_fail(dev, de); + } + |