1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
|
From d4f8e47d60c180cf57eba4093a343230a824ecbf Mon Sep 17 00:00:00 2001
From: John Cox <jc@kynesim.co.uk>
Date: Mon, 29 Mar 2021 17:42:16 +0100
Subject: [PATCH] media: rpivid: Map cmd buffer directly
It is unnecessary to have a separate dmabuf to hold the cmd buffer.
Map it directly from the kmalloc.
Signed-off-by: John Cox <jc@kynesim.co.uk>
---
drivers/staging/media/rpivid/rpivid.h | 3 +-
drivers/staging/media/rpivid/rpivid_h265.c | 48 ++++++++++------------
drivers/staging/media/rpivid/rpivid_hw.c | 2 +
3 files changed, 25 insertions(+), 28 deletions(-)
--- a/drivers/staging/media/rpivid/rpivid.h
+++ b/drivers/staging/media/rpivid/rpivid.h
@@ -114,7 +114,6 @@ struct rpivid_ctx {
unsigned int p1idx;
atomic_t p1out;
struct rpivid_gptr bitbufs[RPIVID_P1BUF_COUNT];
- struct rpivid_gptr cmdbufs[RPIVID_P1BUF_COUNT];
/* *** Should be in dev *** */
unsigned int p2idx;
@@ -183,6 +182,8 @@ struct rpivid_dev {
struct clk *clock;
struct clk_request *hevc_req;
+ int cache_align;
+
struct rpivid_hw_irq_ctrl ic_active1;
struct rpivid_hw_irq_ctrl ic_active2;
};
--- a/drivers/staging/media/rpivid/rpivid_h265.c
+++ b/drivers/staging/media/rpivid/rpivid_h265.c
@@ -227,6 +227,9 @@ struct rpivid_dec_env {
struct rpivid_q_aux *frame_aux;
struct rpivid_q_aux *col_aux;
+ dma_addr_t cmd_addr;
+ size_t cmd_size;
+
dma_addr_t pu_base_vc;
dma_addr_t coeff_base_vc;
u32 pu_stride;
@@ -234,7 +237,6 @@ struct rpivid_dec_env {
struct rpivid_gptr *bit_copy_gptr;
size_t bit_copy_len;
- struct rpivid_gptr *cmd_copy_gptr;
#define SLICE_MSGS_MAX (2 * HEVC_MAX_REFS * 8 + 3)
u16 slice_msgs[SLICE_MSGS_MAX];
@@ -1499,22 +1501,17 @@ static int write_cmd_buffer(struct rpivi
struct rpivid_dec_env *const de,
const struct rpivid_dec_state *const s)
{
- // Copy commands out to dma buf
- const size_t cmd_size = de->cmd_len * sizeof(de->cmd_fifo[0]);
-
- if (!de->cmd_copy_gptr->ptr || cmd_size > de->cmd_copy_gptr->size) {
- size_t cmd_alloc = round_up_size(cmd_size);
+ const size_t cmd_size = ALIGN(de->cmd_len * sizeof(de->cmd_fifo[0]),
+ dev->cache_align);
- if (gptr_realloc_new(dev, de->cmd_copy_gptr, cmd_alloc)) {
- v4l2_err(&dev->v4l2_dev,
- "Alloc cmd buffer (%zu): FAILED\n", cmd_alloc);
- return -ENOMEM;
- }
- v4l2_info(&dev->v4l2_dev, "Alloc cmd buffer (%zu): OK\n",
- cmd_alloc);
+ de->cmd_addr = dma_map_single(dev->dev, de->cmd_fifo,
+ cmd_size, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev->dev, de->cmd_addr)) {
+ v4l2_err(&dev->v4l2_dev,
+ "Map cmd buffer (%zu): FAILED\n", cmd_size);
+ return -ENOMEM;
}
-
- memcpy(de->cmd_copy_gptr->ptr, de->cmd_fifo, cmd_size);
+ de->cmd_size = cmd_size;
return 0;
}
@@ -1551,6 +1548,12 @@ static void dec_env_delete(struct rpivid
struct rpivid_ctx * const ctx = de->ctx;
unsigned long lock_flags;
+ if (de->cmd_size) {
+ dma_unmap_single(ctx->dev->dev, de->cmd_addr, de->cmd_size,
+ DMA_TO_DEVICE);
+ de->cmd_size = 0;
+ }
+
aux_q_release(ctx, &de->frame_aux);
aux_q_release(ctx, &de->col_aux);
@@ -1603,7 +1606,8 @@ static int dec_env_init(struct rpivid_ct
de->ctx = ctx;
de->decode_order = i;
- de->cmd_max = 1024;
+// de->cmd_max = 1024;
+ de->cmd_max = 8096;
de->cmd_fifo = kmalloc_array(de->cmd_max,
sizeof(struct rpi_cmd),
GFP_KERNEL);
@@ -1748,7 +1752,6 @@ static void rpivid_h265_setup(struct rpi
de->bit_copy_gptr = ctx->bitbufs + ctx->p1idx;
de->bit_copy_len = 0;
- de->cmd_copy_gptr = ctx->cmdbufs + ctx->p1idx;
de->frame_c_offset = ctx->dst_fmt.height * 128;
de->frame_stride = ctx->dst_fmt.plane_fmt[0].bytesperline * 128;
@@ -2356,7 +2359,7 @@ static void phase1_claimed(struct rpivid
rpivid_hw_irq_active1_irq(dev, &de->irq_ent, phase1_cb, de);
// And start the h/w
- apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_copy_gptr->addr);
+ apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_addr);
xtrace_ok(dev, de);
return;
@@ -2400,8 +2403,6 @@ static void rpivid_h265_stop(struct rpiv
for (i = 0; i != ARRAY_SIZE(ctx->bitbufs); ++i)
gptr_free(dev, ctx->bitbufs + i);
- for (i = 0; i != ARRAY_SIZE(ctx->cmdbufs); ++i)
- gptr_free(dev, ctx->cmdbufs + i);
for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i)
gptr_free(dev, ctx->pu_bufs + i);
for (i = 0; i != ARRAY_SIZE(ctx->coeff_bufs); ++i)
@@ -2451,13 +2452,6 @@ static int rpivid_h265_start(struct rpiv
goto fail;
}
- // 16k is plenty for most purposes but we will realloc if needed
- for (i = 0; i != ARRAY_SIZE(ctx->cmdbufs); ++i) {
- if (gptr_alloc(dev, ctx->cmdbufs + i, 0x4000,
- DMA_ATTR_FORCE_CONTIGUOUS))
- goto fail;
- }
-
// Finger in the air PU & Coeff alloc
// Will be realloced if too small
coeff_alloc = round_up_size(wxh);
--- a/drivers/staging/media/rpivid/rpivid_hw.c
+++ b/drivers/staging/media/rpivid/rpivid_hw.c
@@ -331,6 +331,8 @@ int rpivid_hw_probe(struct rpivid_dev *d
if (IS_ERR(dev->clock))
return PTR_ERR(dev->clock);
+ dev->cache_align = dma_get_cache_alignment();
+
// Disable IRQs & reset anything pending
irq_write(dev, 0,
ARG_IC_ICTRL_ACTIVE1_EN_SET | ARG_IC_ICTRL_ACTIVE2_EN_SET);
|