1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
|
From 6b4faeac05bc0b91616b921191cb054d1376f3b4 Mon Sep 17 00:00:00 2001
From: Sricharan R <sricharan@codeaurora.org>
Date: Mon, 28 Aug 2017 20:30:24 +0530
Subject: [PATCH] dmaengine: qcom-bam: Process multiple pending descriptors
The bam dmaengine has a circular FIFO to which we
add hw descriptors that describes the transaction.
The FIFO has space for about 4096 hw descriptors.
Currently we add one descriptor and wait for it to
complete with interrupt and then add the next pending
descriptor. In this way, the FIFO is underutilized
since only one descriptor is processed at a time, although
there is space in FIFO for the BAM to process more.
Instead keep adding descriptors to FIFO till its full,
that allows BAM to continue to work on the next descriptor
immediately after signalling completion interrupt for the
previous descriptor.
Also when the client has not set the DMA_PREP_INTERRUPT for
a descriptor, then do not configure BAM to trigger a interrupt
upon completion of that descriptor. This way we get a interrupt
only for the descriptor for which DMA_PREP_INTERRUPT was
requested and there signal completion of all the previous completed
descriptors. So we still do callbacks for all requested descriptors,
but just that the number of interrupts are reduced.
CURRENT:
------ ------- ---------------
|DES 0| |DESC 1| |DESC 2 + INT |
------ ------- ---------------
| | |
| | |
INTERRUPT: (INT) (INT) (INT)
CALLBACK: (CB) (CB) (CB)
MTD_SPEEDTEST READ PAGE: 3560 KiB/s
MTD_SPEEDTEST WRITE PAGE: 2664 KiB/s
IOZONE READ: 2456 KB/s
IOZONE WRITE: 1230 KB/s
bam dma interrupts (after tests): 96508
CHANGE:
------ ------- -------------
|DES 0| |DESC 1 |DESC 2 + INT |
------ ------- --------------
|
|
(INT)
(CB for 0, 1, 2)
MTD_SPEEDTEST READ PAGE: 3860 KiB/s
MTD_SPEEDTEST WRITE PAGE: 2837 KiB/s
IOZONE READ: 2677 KB/s
IOZONE WRITE: 1308 KB/s
bam dma interrupts (after tests): 58806
Signed-off-by: Sricharan R <sricharan@codeaurora.org>
Reviewed-by: Andy Gross <andy.gross@linaro.org>
Tested-by: Abhishek Sahu <absahu@codeaurora.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
drivers/dma/qcom/bam_dma.c | 169 +++++++++++++++++++++++++++++----------------
1 file changed, 109 insertions(+), 60 deletions(-)
--- a/drivers/dma/qcom/bam_dma.c
+++ b/drivers/dma/qcom/bam_dma.c
@@ -46,6 +46,7 @@
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/of_dma.h>
+#include <linux/circ_buf.h>
#include <linux/clk.h>
#include <linux/dmaengine.h>
#include <linux/pm_runtime.h>
@@ -78,6 +79,8 @@ struct bam_async_desc {
struct bam_desc_hw *curr_desc;
+ /* list node for the desc in the bam_chan list of descriptors */
+ struct list_head desc_node;
enum dma_transfer_direction dir;
size_t length;
struct bam_desc_hw desc[0];
@@ -347,6 +350,8 @@ static const struct reg_offset_data bam_
#define BAM_DESC_FIFO_SIZE SZ_32K
#define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
#define BAM_FIFO_SIZE (SZ_32K - 8)
+#define IS_BUSY(chan) (CIRC_SPACE(bchan->tail, bchan->head,\
+ MAX_DESCRIPTORS + 1) == 0)
struct bam_chan {
struct virt_dma_chan vc;
@@ -356,8 +361,6 @@ struct bam_chan {
/* configuration from device tree */
u32 id;
- struct bam_async_desc *curr_txd; /* current running dma */
-
/* runtime configuration */
struct dma_slave_config slave;
@@ -372,6 +375,8 @@ struct bam_chan {
unsigned int initialized; /* is the channel hw initialized? */
unsigned int paused; /* is the channel paused? */
unsigned int reconfigure; /* new slave config? */
+ /* list of descriptors currently processed */
+ struct list_head desc_list;
struct list_head node;
};
@@ -540,7 +545,7 @@ static void bam_free_chan(struct dma_cha
vchan_free_chan_resources(to_virt_chan(chan));
- if (bchan->curr_txd) {
+ if (!list_empty(&bchan->desc_list)) {
dev_err(bchan->bdev->dev, "Cannot free busy channel\n");
goto err;
}
@@ -633,8 +638,6 @@ static struct dma_async_tx_descriptor *b
if (flags & DMA_PREP_INTERRUPT)
async_desc->flags |= DESC_FLAG_EOT;
- else
- async_desc->flags |= DESC_FLAG_INT;
async_desc->num_desc = num_alloc;
async_desc->curr_desc = async_desc->desc;
@@ -685,28 +688,16 @@ err_out:
static int bam_dma_terminate_all(struct dma_chan *chan)
{
struct bam_chan *bchan = to_bam_chan(chan);
+ struct bam_async_desc *async_desc, *tmp;
unsigned long flag;
LIST_HEAD(head);
/* remove all transactions, including active transaction */
spin_lock_irqsave(&bchan->vc.lock, flag);
- /*
- * If we have transactions queued, then some might be committed to the
- * hardware in the desc fifo. The only way to reset the desc fifo is
- * to do a hardware reset (either by pipe or the entire block).
- * bam_chan_init_hw() will trigger a pipe reset, and also reinit the
- * pipe. If the pipe is left disabled (default state after pipe reset)
- * and is accessed by a connected hardware engine, a fatal error in
- * the BAM will occur. There is a small window where this could happen
- * with bam_chan_init_hw(), but it is assumed that the caller has
- * stopped activity on any attached hardware engine. Make sure to do
- * this first so that the BAM hardware doesn't cause memory corruption
- * by accessing freed resources.
- */
- if (bchan->curr_txd) {
- bam_chan_init_hw(bchan, bchan->curr_txd->dir);
- list_add(&bchan->curr_txd->vd.node, &bchan->vc.desc_issued);
- bchan->curr_txd = NULL;
+ list_for_each_entry_safe(async_desc, tmp,
+ &bchan->desc_list, desc_node) {
+ list_add(&async_desc->vd.node, &bchan->vc.desc_issued);
+ list_del(&async_desc->desc_node);
}
vchan_get_all_descriptors(&bchan->vc, &head);
@@ -778,9 +769,9 @@ static int bam_resume(struct dma_chan *c
*/
static u32 process_channel_irqs(struct bam_device *bdev)
{
- u32 i, srcs, pipe_stts;
+ u32 i, srcs, pipe_stts, offset, avail;
unsigned long flags;
- struct bam_async_desc *async_desc;
+ struct bam_async_desc *async_desc, *tmp;
srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE));
@@ -800,27 +791,40 @@ static u32 process_channel_irqs(struct b
writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR));
spin_lock_irqsave(&bchan->vc.lock, flags);
- async_desc = bchan->curr_txd;
- if (async_desc) {
- async_desc->num_desc -= async_desc->xfer_len;
- async_desc->curr_desc += async_desc->xfer_len;
- bchan->curr_txd = NULL;
+ offset = readl_relaxed(bam_addr(bdev, i, BAM_P_SW_OFSTS)) &
+ P_SW_OFSTS_MASK;
+ offset /= sizeof(struct bam_desc_hw);
+
+ /* Number of bytes available to read */
+ avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1);
+
+ list_for_each_entry_safe(async_desc, tmp,
+ &bchan->desc_list, desc_node) {
+ /* Not enough data to read */
+ if (avail < async_desc->xfer_len)
+ break;
/* manage FIFO */
bchan->head += async_desc->xfer_len;
bchan->head %= MAX_DESCRIPTORS;
+ async_desc->num_desc -= async_desc->xfer_len;
+ async_desc->curr_desc += async_desc->xfer_len;
+ avail -= async_desc->xfer_len;
+
/*
- * if complete, process cookie. Otherwise
+ * if complete, process cookie. Otherwise
* push back to front of desc_issued so that
* it gets restarted by the tasklet
*/
- if (!async_desc->num_desc)
+ if (!async_desc->num_desc) {
vchan_cookie_complete(&async_desc->vd);
- else
+ } else {
list_add(&async_desc->vd.node,
- &bchan->vc.desc_issued);
+ &bchan->vc.desc_issued);
+ }
+ list_del(&async_desc->desc_node);
}
spin_unlock_irqrestore(&bchan->vc.lock, flags);
@@ -882,6 +886,7 @@ static enum dma_status bam_tx_status(str
struct dma_tx_state *txstate)
{
struct bam_chan *bchan = to_bam_chan(chan);
+ struct bam_async_desc *async_desc;
struct virt_dma_desc *vd;
int ret;
size_t residue = 0;
@@ -897,11 +902,17 @@ static enum dma_status bam_tx_status(str
spin_lock_irqsave(&bchan->vc.lock, flags);
vd = vchan_find_desc(&bchan->vc, cookie);
- if (vd)
+ if (vd) {
residue = container_of(vd, struct bam_async_desc, vd)->length;
- else if (bchan->curr_txd && bchan->curr_txd->vd.tx.cookie == cookie)
- for (i = 0; i < bchan->curr_txd->num_desc; i++)
- residue += bchan->curr_txd->curr_desc[i].size;
+ } else {
+ list_for_each_entry(async_desc, &bchan->desc_list, desc_node) {
+ if (async_desc->vd.tx.cookie != cookie)
+ continue;
+
+ for (i = 0; i < async_desc->num_desc; i++)
+ residue += async_desc->curr_desc[i].size;
+ }
+ }
spin_unlock_irqrestore(&bchan->vc.lock, flags);
@@ -942,63 +953,86 @@ static void bam_start_dma(struct bam_cha
{
struct virt_dma_desc *vd = vchan_next_desc(&bchan->vc);
struct bam_device *bdev = bchan->bdev;
- struct bam_async_desc *async_desc;
+ struct bam_async_desc *async_desc = NULL;
struct bam_desc_hw *desc;
struct bam_desc_hw *fifo = PTR_ALIGN(bchan->fifo_virt,
sizeof(struct bam_desc_hw));
int ret;
+ unsigned int avail;
+ struct dmaengine_desc_callback cb;
lockdep_assert_held(&bchan->vc.lock);
if (!vd)
return;
- list_del(&vd->node);
-
- async_desc = container_of(vd, struct bam_async_desc, vd);
- bchan->curr_txd = async_desc;
-
ret = pm_runtime_get_sync(bdev->dev);
if (ret < 0)
return;
- /* on first use, initialize the channel hardware */
- if (!bchan->initialized)
- bam_chan_init_hw(bchan, async_desc->dir);
-
- /* apply new slave config changes, if necessary */
- if (bchan->reconfigure)
- bam_apply_new_config(bchan, async_desc->dir);
+ while (vd && !IS_BUSY(bchan)) {
+ list_del(&vd->node);
- desc = bchan->curr_txd->curr_desc;
+ async_desc = container_of(vd, struct bam_async_desc, vd);
- if (async_desc->num_desc > MAX_DESCRIPTORS)
- async_desc->xfer_len = MAX_DESCRIPTORS;
- else
- async_desc->xfer_len = async_desc->num_desc;
+ /* on first use, initialize the channel hardware */
+ if (!bchan->initialized)
+ bam_chan_init_hw(bchan, async_desc->dir);
- /* set any special flags on the last descriptor */
- if (async_desc->num_desc == async_desc->xfer_len)
- desc[async_desc->xfer_len - 1].flags |=
- cpu_to_le16(async_desc->flags);
- else
- desc[async_desc->xfer_len - 1].flags |=
- cpu_to_le16(DESC_FLAG_INT);
+ /* apply new slave config changes, if necessary */
+ if (bchan->reconfigure)
+ bam_apply_new_config(bchan, async_desc->dir);
+
+ desc = async_desc->curr_desc;
+ avail = CIRC_SPACE(bchan->tail, bchan->head,
+ MAX_DESCRIPTORS + 1);
+
+ if (async_desc->num_desc > avail)
+ async_desc->xfer_len = avail;
+ else
+ async_desc->xfer_len = async_desc->num_desc;
- if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
- u32 partial = MAX_DESCRIPTORS - bchan->tail;
+ /* set any special flags on the last descriptor */
+ if (async_desc->num_desc == async_desc->xfer_len)
+ desc[async_desc->xfer_len - 1].flags |=
+ cpu_to_le16(async_desc->flags);
- memcpy(&fifo[bchan->tail], desc,
- partial * sizeof(struct bam_desc_hw));
- memcpy(fifo, &desc[partial], (async_desc->xfer_len - partial) *
+ vd = vchan_next_desc(&bchan->vc);
+
+ dmaengine_desc_get_callback(&async_desc->vd.tx, &cb);
+
+ /*
+ * An interrupt is generated at this desc, if
+ * - FIFO is FULL.
+ * - No more descriptors to add.
+ * - If a callback completion was requested for this DESC,
+ * In this case, BAM will deliver the completion callback
+ * for this desc and continue processing the next desc.
+ */
+ if (((avail <= async_desc->xfer_len) || !vd ||
+ dmaengine_desc_callback_valid(&cb)) &&
+ !(async_desc->flags & DESC_FLAG_EOT))
+ desc[async_desc->xfer_len - 1].flags |=
+ cpu_to_le16(DESC_FLAG_INT);
+
+ if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
+ u32 partial = MAX_DESCRIPTORS - bchan->tail;
+
+ memcpy(&fifo[bchan->tail], desc,
+ partial * sizeof(struct bam_desc_hw));
+ memcpy(fifo, &desc[partial],
+ (async_desc->xfer_len - partial) *
sizeof(struct bam_desc_hw));
- } else {
- memcpy(&fifo[bchan->tail], desc,
- async_desc->xfer_len * sizeof(struct bam_desc_hw));
- }
+ } else {
+ memcpy(&fifo[bchan->tail], desc,
+ async_desc->xfer_len *
+ sizeof(struct bam_desc_hw));
+ }
- bchan->tail += async_desc->xfer_len;
- bchan->tail %= MAX_DESCRIPTORS;
+ bchan->tail += async_desc->xfer_len;
+ bchan->tail %= MAX_DESCRIPTORS;
+ list_add_tail(&async_desc->desc_node, &bchan->desc_list);
+ }
/* ensure descriptor writes and dma start not reordered */
wmb();
@@ -1027,7 +1061,7 @@ static void dma_tasklet(unsigned long da
bchan = &bdev->channels[i];
spin_lock_irqsave(&bchan->vc.lock, flags);
- if (!list_empty(&bchan->vc.desc_issued) && !bchan->curr_txd)
+ if (!list_empty(&bchan->vc.desc_issued) && !IS_BUSY(bchan))
bam_start_dma(bchan);
spin_unlock_irqrestore(&bchan->vc.lock, flags);
}
@@ -1048,7 +1082,7 @@ static void bam_issue_pending(struct dma
spin_lock_irqsave(&bchan->vc.lock, flags);
/* if work pending and idle, start a transaction */
- if (vchan_issue_pending(&bchan->vc) && !bchan->curr_txd)
+ if (vchan_issue_pending(&bchan->vc) && !IS_BUSY(bchan))
bam_start_dma(bchan);
spin_unlock_irqrestore(&bchan->vc.lock, flags);
@@ -1152,6 +1186,7 @@ static void bam_channel_init(struct bam_
vchan_init(&bchan->vc, &bdev->common);
bchan->vc.desc_free = bam_dma_free_desc;
+ INIT_LIST_HEAD(&bchan->desc_list);
}
static const struct of_device_id bam_of_match[] = {
|