1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
|
From fced03d891377fa04153fb0538bc8ca95ba05020 Mon Sep 17 00:00:00 2001
From: Madalin Bucur <madalin.bucur@nxp.com>
Date: Tue, 14 Nov 2017 08:12:12 +0200
Subject: [PATCH] dpaa_eth: workaround for ERR010022
On LS1043A SoC there is a known erratum ERR010022 that results in split DMA
transfers in the FMan under certain conditions. This, combined with a fixed
size FIFO of ongoing DMA transfers that may overflow when a split occurs,
results in the FMan stalling DMA transfers under high traffic. To avoid the
problem, one needs to prevent the DMA transfer splits to occur by preparing
the buffers as follows.
In order to prevent split transactions, all frames need to be aligned to 16
bytes and not cross 4K address boundaries. To allow Jumbo frames (up to
9.6K), all data must be aligned to 256 byes. This way, 4K boundary crossings
will not trigger any transaction splits.
The errata is prevented from manifesting by realigning all outgoing frames to
256 byte boundaries. In the process, all S/G frames are linearized.
Signed-off-by: Madalin Bucur <madalin.bucur@nxp.com>
Signed-off-by: Camelia Groza <camelia.groza@nxp.com>
[rebase]
Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
---
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 204 +++++++++++++++++++++++--
1 file changed, 194 insertions(+), 10 deletions(-)
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -54,6 +54,10 @@
#include <linux/phy_fixed.h>
#include <soc/fsl/bman.h>
#include <soc/fsl/qman.h>
+#if !defined(CONFIG_PPC) && defined(CONFIG_SOC_BUS)
+#include <linux/sys_soc.h> /* soc_device_match */
+#endif
+
#include "fman.h"
#include "fman_port.h"
#include "mac.h"
@@ -73,6 +77,10 @@ static u16 tx_timeout = 1000;
module_param(tx_timeout, ushort, 0444);
MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms");
+#ifndef CONFIG_PPC
+bool dpaa_errata_a010022;
+#endif
+
#define FM_FD_STAT_RX_ERRORS \
(FM_FD_ERR_DMA | FM_FD_ERR_PHYSICAL | \
FM_FD_ERR_SIZE | FM_FD_ERR_CLS_DISCARD | \
@@ -1495,7 +1503,19 @@ static int dpaa_bp_add_8_bufs(const stru
u8 i;
for (i = 0; i < 8; i++) {
+#ifndef CONFIG_PPC
+ if (dpaa_errata_a010022) {
+ struct page *page = alloc_page(GFP_KERNEL);
+
+ if (unlikely(!page))
+ goto release_previous_buffs;
+ new_buf = page_address(page);
+ } else {
+ new_buf = netdev_alloc_frag(dpaa_bp->raw_size);
+ }
+#else
new_buf = netdev_alloc_frag(dpaa_bp->raw_size);
+#endif
if (unlikely(!new_buf)) {
dev_err(dev, "netdev_alloc_frag() failed, size %zu\n",
dpaa_bp->raw_size);
@@ -1663,9 +1683,15 @@ static struct sk_buff *dpaa_cleanup_tx_f
}
}
- if (qm_fd_get_format(fd) == qm_fd_sg)
- /* Free the page frag that we allocated on Tx */
- skb_free_frag(phys_to_virt(addr));
+ if (qm_fd_get_format(fd) == qm_fd_sg) {
+#ifndef CONFIG_PPC
+ if (dpaa_errata_a010022)
+ put_page(virt_to_page(sgt));
+ else
+#endif
+ /* Free the page frag that we allocated on Tx */
+ skb_free_frag(phys_to_virt(addr));
+ }
return skb;
}
@@ -1922,14 +1948,26 @@ static int skb_to_sg_fd(struct dpaa_priv
size_t frag_len;
void *sgt_buf;
- /* get a page frag to store the SGTable */
- sz = SKB_DATA_ALIGN(priv->tx_headroom + DPAA_SGT_SIZE);
- sgt_buf = netdev_alloc_frag(sz);
- if (unlikely(!sgt_buf)) {
- netdev_err(net_dev, "netdev_alloc_frag() failed for size %d\n",
- sz);
- return -ENOMEM;
+#ifndef CONFIG_PPC
+ if (unlikely(dpaa_errata_a010022)) {
+ struct page *page = alloc_page(GFP_ATOMIC);
+ if (unlikely(!page))
+ return -ENOMEM;
+ sgt_buf = page_address(page);
+ } else {
+#endif
+ /* get a page frag to store the SGTable */
+ sz = SKB_DATA_ALIGN(priv->tx_headroom + DPAA_SGT_SIZE);
+ sgt_buf = netdev_alloc_frag(sz);
+ if (unlikely(!sgt_buf)) {
+ netdev_err(net_dev,
+ "netdev_alloc_frag() failed for size %d\n",
+ sz);
+ return -ENOMEM;
+ }
+#ifndef CONFIG_PPC
}
+#endif
/* Enable L3/L4 hardware checksum computation.
*
@@ -2049,6 +2087,122 @@ static inline int dpaa_xmit(struct dpaa_
return 0;
}
+#ifndef CONFIG_PPC
+/* On LS1043A SoC there is a known erratum ERR010022 that results in split DMA
+ * transfers in the FMan under certain conditions. This, combined with a fixed
+ * size FIFO of ongoing DMA transfers that may overflow when a split occurs,
+ * results in the FMan stalling DMA transfers under high traffic. To avoid the
+ * problem, one needs to prevent the DMA transfer splits to occur by preparing
+ * the buffers
+ */
+
+#define DPAA_A010022_HEADROOM 256
+#define CROSS_4K_BOUND(start, size) \
+ (((start) + (size)) > (((start) + 0x1000) & ~0xFFF))
+
+static bool dpaa_errata_a010022_has_dma_issue(struct sk_buff *skb,
+ struct dpaa_priv *priv)
+{
+ int nr_frags, i = 0;
+ skb_frag_t *frag;
+
+ /* Transfers that do not start at 16B aligned addresses will be split;
+ * Transfers that cross a 4K page boundary will also be split
+ */
+
+ /* Check if the frame data is aligned to 16 bytes */
+ if ((uintptr_t)skb->data % DPAA_FD_DATA_ALIGNMENT)
+ return true;
+
+ /* Check if the headroom crosses a boundary */
+ if (CROSS_4K_BOUND((uintptr_t)skb->head, skb_headroom(skb)))
+ return true;
+
+ /* Check if the non-paged data crosses a boundary */
+ if (CROSS_4K_BOUND((uintptr_t)skb->data, skb_headlen(skb)))
+ return true;
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+
+ while (i < nr_frags) {
+ frag = &skb_shinfo(skb)->frags[i];
+
+ /* Check if a paged fragment crosses a boundary from its
+ * offset to its end.
+ */
+ if (CROSS_4K_BOUND((uintptr_t)frag->page_offset, frag->size))
+ return true;
+
+ i++;
+ }
+
+ return false;
+}
+
+static struct sk_buff *dpaa_errata_a010022_prevent(struct sk_buff *skb,
+ struct dpaa_priv *priv)
+{
+ int trans_offset = skb_transport_offset(skb);
+ int net_offset = skb_network_offset(skb);
+ int nsize, npage_order, headroom;
+ struct sk_buff *nskb = NULL;
+ struct page *npage;
+ void *npage_addr;
+
+ if (!dpaa_errata_a010022_has_dma_issue(skb, priv))
+ return skb;
+
+ /* For the new skb we only need the old one's data (both non-paged and
+ * paged). We can skip the old tailroom.
+ *
+ * The headroom also needs to fit our private info (64 bytes) but we
+ * reserve 256 bytes instead in order to guarantee that the data is
+ * aligned to 256.
+ */
+ headroom = DPAA_A010022_HEADROOM;
+ nsize = headroom + skb->len +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+ /* Reserve enough memory to accommodate Jumbo frames */
+ npage_order = (nsize - 1) / PAGE_SIZE;
+ npage = alloc_pages(GFP_ATOMIC | __GFP_COMP, npage_order);
+ if (unlikely(!npage)) {
+ WARN_ONCE(1, "Memory allocation failure\n");
+ return NULL;
+ }
+ npage_addr = page_address(npage);
+
+ nskb = build_skb(npage_addr, nsize);
+ if (unlikely(!nskb))
+ goto err;
+
+ /* Code borrowed and adapted from skb_copy() */
+ skb_reserve(nskb, headroom);
+ skb_put(nskb, skb->len);
+ if (skb_copy_bits(skb, 0, nskb->data, skb->len)) {
+ WARN_ONCE(1, "skb parsing failure\n");
+ goto err;
+ }
+ copy_skb_header(nskb, skb);
+
+ /* We move the headroom when we align it so we have to reset the
+ * network and transport header offsets relative to the new data
+ * pointer. The checksum offload relies on these offsets.
+ */
+ skb_set_network_header(nskb, net_offset);
+ skb_set_transport_header(nskb, trans_offset);
+
+ dev_kfree_skb(skb);
+ return nskb;
+
+err:
+ if (nskb)
+ dev_kfree_skb(nskb);
+ put_page(npage);
+ return NULL;
+}
+#endif
+
static netdev_tx_t
dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
{
@@ -2095,6 +2249,15 @@ dpaa_start_xmit(struct sk_buff *skb, str
nonlinear = skb_is_nonlinear(skb);
}
+#ifndef CONFIG_PPC
+ if (unlikely(dpaa_errata_a010022)) {
+ skb = dpaa_errata_a010022_prevent(skb, priv);
+ if (!skb)
+ goto enomem;
+ nonlinear = skb_is_nonlinear(skb);
+ }
+#endif
+
if (nonlinear) {
/* Just create a S/G fd based on the skb */
err = skb_to_sg_fd(priv, skb, &fd);
@@ -2992,6 +3155,23 @@ static int dpaa_remove(struct platform_d
return err;
}
+#ifndef CONFIG_PPC
+static bool __init soc_has_errata_a010022(void)
+{
+#ifdef CONFIG_SOC_BUS
+ const struct soc_device_attribute soc_msi_matches[] = {
+ { .family = "QorIQ LS1043A",
+ .data = NULL },
+ { },
+ };
+
+ if (!soc_device_match(soc_msi_matches))
+ return false;
+#endif
+ return true; /* cannot identify SoC or errata applies */
+}
+#endif
+
static const struct platform_device_id dpaa_devtype[] = {
{
.name = "dpaa-ethernet",
@@ -3016,6 +3196,10 @@ static int __init dpaa_load(void)
pr_debug("FSL DPAA Ethernet driver\n");
+#ifndef CONFIG_PPC
+ /* Detect if the current SoC requires the DMA transfer alignment workaround */
+ dpaa_errata_a010022 = soc_has_errata_a010022();
+#endif
/* initialize dpaa_eth mirror values */
dpaa_rx_extra_headroom = fman_get_rx_extra_headroom();
dpaa_max_frm = fman_get_max_frm();
|