i40e/i40evf: Add support for GSO partial with UDP_TUNNEL_CSUM and GRE_CSUM
[deliverable/linux.git] / drivers / net / ethernet / intel / i40e / i40e_txrx.c
CommitLineData
fd0a05ce
JB
1/*******************************************************************************
2 *
3 * Intel Ethernet Controller XL710 Family Linux Driver
ecc6a239 4 * Copyright(c) 2013 - 2016 Intel Corporation.
fd0a05ce
JB
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
dc641b73
GR
15 * You should have received a copy of the GNU General Public License along
16 * with this program. If not, see <http://www.gnu.org/licenses/>.
fd0a05ce
JB
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 * Contact Information:
22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
24 *
25 ******************************************************************************/
26
1c112a64 27#include <linux/prefetch.h>
a132af24 28#include <net/busy_poll.h>
fd0a05ce 29#include "i40e.h"
206812b5 30#include "i40e_prototype.h"
fd0a05ce
JB
31
32static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
33 u32 td_tag)
34{
35 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
36 ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) |
37 ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
38 ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
39 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
40}
41
eaefbd06 42#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
49d7d933 43#define I40E_FD_CLEAN_DELAY 10
fd0a05ce
JB
44/**
45 * i40e_program_fdir_filter - Program a Flow Director filter
17a73f6b
JG
46 * @fdir_data: Packet data that will be filter parameters
47 * @raw_packet: the pre-allocated packet buffer for FDir
b40c82e6 48 * @pf: The PF pointer
fd0a05ce
JB
49 * @add: True for add/update, False for remove
50 **/
17a73f6b 51int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
fd0a05ce
JB
52 struct i40e_pf *pf, bool add)
53{
54 struct i40e_filter_program_desc *fdir_desc;
49d7d933 55 struct i40e_tx_buffer *tx_buf, *first;
fd0a05ce
JB
56 struct i40e_tx_desc *tx_desc;
57 struct i40e_ring *tx_ring;
eaefbd06 58 unsigned int fpt, dcc;
fd0a05ce
JB
59 struct i40e_vsi *vsi;
60 struct device *dev;
61 dma_addr_t dma;
62 u32 td_cmd = 0;
49d7d933 63 u16 delay = 0;
fd0a05ce
JB
64 u16 i;
65
66 /* find existing FDIR VSI */
67 vsi = NULL;
505682cd 68 for (i = 0; i < pf->num_alloc_vsi; i++)
fd0a05ce
JB
69 if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
70 vsi = pf->vsi[i];
71 if (!vsi)
72 return -ENOENT;
73
9f65e15b 74 tx_ring = vsi->tx_rings[0];
fd0a05ce
JB
75 dev = tx_ring->dev;
76
49d7d933
ASJ
77 /* we need two descriptors to add/del a filter and we can wait */
78 do {
79 if (I40E_DESC_UNUSED(tx_ring) > 1)
80 break;
81 msleep_interruptible(1);
82 delay++;
83 } while (delay < I40E_FD_CLEAN_DELAY);
84
85 if (!(I40E_DESC_UNUSED(tx_ring) > 1))
86 return -EAGAIN;
87
17a73f6b
JG
88 dma = dma_map_single(dev, raw_packet,
89 I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
fd0a05ce
JB
90 if (dma_mapping_error(dev, dma))
91 goto dma_fail;
92
93 /* grab the next descriptor */
fc4ac67b
AD
94 i = tx_ring->next_to_use;
95 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
49d7d933
ASJ
96 first = &tx_ring->tx_bi[i];
97 memset(first, 0, sizeof(struct i40e_tx_buffer));
fc4ac67b 98
49d7d933 99 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
fd0a05ce 100
eaefbd06
JB
101 fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
102 I40E_TXD_FLTR_QW0_QINDEX_MASK;
fd0a05ce 103
eaefbd06
JB
104 fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
105 I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
fd0a05ce 106
eaefbd06
JB
107 fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
108 I40E_TXD_FLTR_QW0_PCTYPE_MASK;
fd0a05ce
JB
109
110 /* Use LAN VSI Id if not programmed by user */
111 if (fdir_data->dest_vsi == 0)
eaefbd06
JB
112 fpt |= (pf->vsi[pf->lan_vsi]->id) <<
113 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
fd0a05ce 114 else
eaefbd06
JB
115 fpt |= ((u32)fdir_data->dest_vsi <<
116 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
117 I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
118
eaefbd06 119 dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
fd0a05ce
JB
120
121 if (add)
eaefbd06
JB
122 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
123 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
fd0a05ce 124 else
eaefbd06
JB
125 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
126 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
fd0a05ce 127
eaefbd06
JB
128 dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
129 I40E_TXD_FLTR_QW1_DEST_MASK;
fd0a05ce 130
eaefbd06
JB
131 dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
132 I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
fd0a05ce
JB
133
134 if (fdir_data->cnt_index != 0) {
eaefbd06
JB
135 dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
136 dcc |= ((u32)fdir_data->cnt_index <<
137 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
433c47de 138 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
fd0a05ce
JB
139 }
140
99753ea6
JB
141 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
142 fdir_desc->rsvd = cpu_to_le32(0);
eaefbd06 143 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
fd0a05ce
JB
144 fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
145
146 /* Now program a dummy descriptor */
fc4ac67b
AD
147 i = tx_ring->next_to_use;
148 tx_desc = I40E_TX_DESC(tx_ring, i);
298deef1 149 tx_buf = &tx_ring->tx_bi[i];
fc4ac67b 150
49d7d933
ASJ
151 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
152
153 memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
fd0a05ce 154
298deef1 155 /* record length, and DMA address */
17a73f6b 156 dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
298deef1
ASJ
157 dma_unmap_addr_set(tx_buf, dma, dma);
158
fd0a05ce 159 tx_desc->buffer_addr = cpu_to_le64(dma);
eaefbd06 160 td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
fd0a05ce 161
49d7d933
ASJ
162 tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
163 tx_buf->raw_buf = (void *)raw_packet;
164
fd0a05ce 165 tx_desc->cmd_type_offset_bsz =
17a73f6b 166 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
fd0a05ce 167
fd0a05ce 168 /* Force memory writes to complete before letting h/w
49d7d933 169 * know there are new descriptors to fetch.
fd0a05ce
JB
170 */
171 wmb();
172
fc4ac67b 173 /* Mark the data descriptor to be watched */
49d7d933 174 first->next_to_watch = tx_desc;
fc4ac67b 175
fd0a05ce
JB
176 writel(tx_ring->next_to_use, tx_ring->tail);
177 return 0;
178
179dma_fail:
180 return -1;
181}
182
17a73f6b
JG
183#define IP_HEADER_OFFSET 14
184#define I40E_UDPIP_DUMMY_PACKET_LEN 42
185/**
186 * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
187 * @vsi: pointer to the targeted VSI
188 * @fd_data: the flow director data required for the FDir descriptor
17a73f6b
JG
189 * @add: true adds a filter, false removes it
190 *
191 * Returns 0 if the filters were successfully added or removed
192 **/
193static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
194 struct i40e_fdir_filter *fd_data,
49d7d933 195 bool add)
17a73f6b
JG
196{
197 struct i40e_pf *pf = vsi->back;
198 struct udphdr *udp;
199 struct iphdr *ip;
200 bool err = false;
49d7d933 201 u8 *raw_packet;
17a73f6b 202 int ret;
17a73f6b
JG
203 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
204 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
205 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
206
49d7d933
ASJ
207 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
208 if (!raw_packet)
209 return -ENOMEM;
17a73f6b
JG
210 memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
211
212 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
213 udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
214 + sizeof(struct iphdr));
215
216 ip->daddr = fd_data->dst_ip[0];
217 udp->dest = fd_data->dst_port;
218 ip->saddr = fd_data->src_ip[0];
219 udp->source = fd_data->src_port;
220
b2d36c03
KS
221 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
222 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
223 if (ret) {
224 dev_info(&pf->pdev->dev,
e99bdd39
CW
225 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
226 fd_data->pctype, fd_data->fd_id, ret);
b2d36c03 227 err = true;
4205d379 228 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
f7233c54
ASJ
229 if (add)
230 dev_info(&pf->pdev->dev,
231 "Filter OK for PCTYPE %d loc = %d\n",
232 fd_data->pctype, fd_data->fd_id);
233 else
234 dev_info(&pf->pdev->dev,
235 "Filter deleted for PCTYPE %d loc = %d\n",
236 fd_data->pctype, fd_data->fd_id);
17a73f6b 237 }
a42e7a36
KP
238 if (err)
239 kfree(raw_packet);
240
17a73f6b
JG
241 return err ? -EOPNOTSUPP : 0;
242}
243
244#define I40E_TCPIP_DUMMY_PACKET_LEN 54
245/**
246 * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
247 * @vsi: pointer to the targeted VSI
248 * @fd_data: the flow director data required for the FDir descriptor
17a73f6b
JG
249 * @add: true adds a filter, false removes it
250 *
251 * Returns 0 if the filters were successfully added or removed
252 **/
253static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
254 struct i40e_fdir_filter *fd_data,
49d7d933 255 bool add)
17a73f6b
JG
256{
257 struct i40e_pf *pf = vsi->back;
258 struct tcphdr *tcp;
259 struct iphdr *ip;
260 bool err = false;
49d7d933 261 u8 *raw_packet;
17a73f6b
JG
262 int ret;
263 /* Dummy packet */
264 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
265 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
267 0x0, 0x72, 0, 0, 0, 0};
268
49d7d933
ASJ
269 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
270 if (!raw_packet)
271 return -ENOMEM;
17a73f6b
JG
272 memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
273
274 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
275 tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
276 + sizeof(struct iphdr));
277
278 ip->daddr = fd_data->dst_ip[0];
279 tcp->dest = fd_data->dst_port;
280 ip->saddr = fd_data->src_ip[0];
281 tcp->source = fd_data->src_port;
282
283 if (add) {
1e1be8f6 284 pf->fd_tcp_rule++;
17a73f6b 285 if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
2e4875e3
ASJ
286 if (I40E_DEBUG_FD & pf->hw.debug_mask)
287 dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
17a73f6b
JG
288 pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
289 }
1e1be8f6
ASJ
290 } else {
291 pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
292 (pf->fd_tcp_rule - 1) : 0;
293 if (pf->fd_tcp_rule == 0) {
294 pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
2e4875e3
ASJ
295 if (I40E_DEBUG_FD & pf->hw.debug_mask)
296 dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
1e1be8f6 297 }
17a73f6b
JG
298 }
299
b2d36c03 300 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
17a73f6b
JG
301 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
302
303 if (ret) {
304 dev_info(&pf->pdev->dev,
e99bdd39
CW
305 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
306 fd_data->pctype, fd_data->fd_id, ret);
17a73f6b 307 err = true;
4205d379 308 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
f7233c54
ASJ
309 if (add)
310 dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
311 fd_data->pctype, fd_data->fd_id);
312 else
313 dev_info(&pf->pdev->dev,
314 "Filter deleted for PCTYPE %d loc = %d\n",
315 fd_data->pctype, fd_data->fd_id);
17a73f6b
JG
316 }
317
a42e7a36
KP
318 if (err)
319 kfree(raw_packet);
320
17a73f6b
JG
321 return err ? -EOPNOTSUPP : 0;
322}
323
324/**
325 * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
326 * a specific flow spec
327 * @vsi: pointer to the targeted VSI
328 * @fd_data: the flow director data required for the FDir descriptor
17a73f6b
JG
329 * @add: true adds a filter, false removes it
330 *
4eeb1fff 331 * Returns 0 if the filters were successfully added or removed
17a73f6b
JG
332 **/
333static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
334 struct i40e_fdir_filter *fd_data,
49d7d933 335 bool add)
17a73f6b
JG
336{
337 return -EOPNOTSUPP;
338}
339
340#define I40E_IP_DUMMY_PACKET_LEN 34
341/**
342 * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
343 * a specific flow spec
344 * @vsi: pointer to the targeted VSI
345 * @fd_data: the flow director data required for the FDir descriptor
17a73f6b
JG
346 * @add: true adds a filter, false removes it
347 *
348 * Returns 0 if the filters were successfully added or removed
349 **/
350static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
351 struct i40e_fdir_filter *fd_data,
49d7d933 352 bool add)
17a73f6b
JG
353{
354 struct i40e_pf *pf = vsi->back;
355 struct iphdr *ip;
356 bool err = false;
49d7d933 357 u8 *raw_packet;
17a73f6b
JG
358 int ret;
359 int i;
360 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
361 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
362 0, 0, 0, 0};
363
17a73f6b
JG
364 for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
365 i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) {
49d7d933
ASJ
366 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
367 if (!raw_packet)
368 return -ENOMEM;
369 memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
370 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
371
372 ip->saddr = fd_data->src_ip[0];
373 ip->daddr = fd_data->dst_ip[0];
374 ip->protocol = 0;
375
17a73f6b
JG
376 fd_data->pctype = i;
377 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
378
379 if (ret) {
380 dev_info(&pf->pdev->dev,
e99bdd39
CW
381 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
382 fd_data->pctype, fd_data->fd_id, ret);
17a73f6b 383 err = true;
4205d379 384 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
f7233c54
ASJ
385 if (add)
386 dev_info(&pf->pdev->dev,
387 "Filter OK for PCTYPE %d loc = %d\n",
388 fd_data->pctype, fd_data->fd_id);
389 else
390 dev_info(&pf->pdev->dev,
391 "Filter deleted for PCTYPE %d loc = %d\n",
392 fd_data->pctype, fd_data->fd_id);
17a73f6b
JG
393 }
394 }
395
a42e7a36
KP
396 if (err)
397 kfree(raw_packet);
398
17a73f6b
JG
399 return err ? -EOPNOTSUPP : 0;
400}
401
402/**
403 * i40e_add_del_fdir - Build raw packets to add/del fdir filter
404 * @vsi: pointer to the targeted VSI
405 * @cmd: command to get or set RX flow classification rules
406 * @add: true adds a filter, false removes it
407 *
408 **/
409int i40e_add_del_fdir(struct i40e_vsi *vsi,
410 struct i40e_fdir_filter *input, bool add)
411{
412 struct i40e_pf *pf = vsi->back;
17a73f6b
JG
413 int ret;
414
17a73f6b
JG
415 switch (input->flow_type & ~FLOW_EXT) {
416 case TCP_V4_FLOW:
49d7d933 417 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
17a73f6b
JG
418 break;
419 case UDP_V4_FLOW:
49d7d933 420 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
17a73f6b
JG
421 break;
422 case SCTP_V4_FLOW:
49d7d933 423 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
17a73f6b
JG
424 break;
425 case IPV4_FLOW:
49d7d933 426 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
17a73f6b
JG
427 break;
428 case IP_USER_FLOW:
429 switch (input->ip4_proto) {
430 case IPPROTO_TCP:
49d7d933 431 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
17a73f6b
JG
432 break;
433 case IPPROTO_UDP:
49d7d933 434 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
17a73f6b
JG
435 break;
436 case IPPROTO_SCTP:
49d7d933 437 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
17a73f6b
JG
438 break;
439 default:
49d7d933 440 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
17a73f6b
JG
441 break;
442 }
443 break;
444 default:
c5ffe7e1 445 dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
17a73f6b
JG
446 input->flow_type);
447 ret = -EINVAL;
448 }
449
49d7d933 450 /* The buffer allocated here is freed by the i40e_clean_tx_ring() */
17a73f6b
JG
451 return ret;
452}
453
fd0a05ce
JB
454/**
455 * i40e_fd_handle_status - check the Programming Status for FD
456 * @rx_ring: the Rx ring for this descriptor
55a5e60b 457 * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
fd0a05ce
JB
458 * @prog_id: the id originally used for programming
459 *
460 * This is used to verify if the FD programming or invalidation
461 * requested by SW to the HW is successful or not and take actions accordingly.
462 **/
55a5e60b
ASJ
463static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
464 union i40e_rx_desc *rx_desc, u8 prog_id)
fd0a05ce 465{
55a5e60b
ASJ
466 struct i40e_pf *pf = rx_ring->vsi->back;
467 struct pci_dev *pdev = pf->pdev;
468 u32 fcnt_prog, fcnt_avail;
fd0a05ce 469 u32 error;
55a5e60b 470 u64 qw;
fd0a05ce 471
55a5e60b 472 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
fd0a05ce
JB
473 error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
474 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
475
41a1d04b 476 if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
3487b6c3 477 pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
f7233c54
ASJ
478 if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
479 (I40E_DEBUG_FD & pf->hw.debug_mask))
480 dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
3487b6c3 481 pf->fd_inv);
55a5e60b 482
04294e38
ASJ
483 /* Check if the programming error is for ATR.
484 * If so, auto disable ATR and set a state for
485 * flush in progress. Next time we come here if flush is in
486 * progress do nothing, once flush is complete the state will
487 * be cleared.
488 */
489 if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
490 return;
491
1e1be8f6
ASJ
492 pf->fd_add_err++;
493 /* store the current atr filter count */
494 pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
495
04294e38
ASJ
496 if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
497 (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
498 pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
499 set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
500 }
501
55a5e60b 502 /* filter programming failed most likely due to table full */
04294e38 503 fcnt_prog = i40e_get_global_fd_count(pf);
12957388 504 fcnt_avail = pf->fdir_pf_filter_count;
55a5e60b
ASJ
505 /* If ATR is running fcnt_prog can quickly change,
506 * if we are very close to full, it makes sense to disable
507 * FD ATR/SB and then re-enable it when there is room.
508 */
509 if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
1e1be8f6 510 if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
b814ba65 511 !(pf->auto_disable_flags &
b814ba65 512 I40E_FLAG_FD_SB_ENABLED)) {
2e4875e3
ASJ
513 if (I40E_DEBUG_FD & pf->hw.debug_mask)
514 dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
55a5e60b
ASJ
515 pf->auto_disable_flags |=
516 I40E_FLAG_FD_SB_ENABLED;
55a5e60b 517 }
55a5e60b 518 }
41a1d04b 519 } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
13c2884f 520 if (I40E_DEBUG_FD & pf->hw.debug_mask)
e99bdd39 521 dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
13c2884f 522 rx_desc->wb.qword0.hi_dword.fd_id);
55a5e60b 523 }
fd0a05ce
JB
524}
525
526/**
a5e9c572 527 * i40e_unmap_and_free_tx_resource - Release a Tx buffer
fd0a05ce
JB
528 * @ring: the ring that owns the buffer
529 * @tx_buffer: the buffer to free
530 **/
a5e9c572
AD
531static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
532 struct i40e_tx_buffer *tx_buffer)
fd0a05ce 533{
a5e9c572 534 if (tx_buffer->skb) {
a42e7a36 535 dev_kfree_skb_any(tx_buffer->skb);
a5e9c572 536 if (dma_unmap_len(tx_buffer, len))
fd0a05ce 537 dma_unmap_single(ring->dev,
35a1e2ad
AD
538 dma_unmap_addr(tx_buffer, dma),
539 dma_unmap_len(tx_buffer, len),
fd0a05ce 540 DMA_TO_DEVICE);
a5e9c572
AD
541 } else if (dma_unmap_len(tx_buffer, len)) {
542 dma_unmap_page(ring->dev,
543 dma_unmap_addr(tx_buffer, dma),
544 dma_unmap_len(tx_buffer, len),
545 DMA_TO_DEVICE);
fd0a05ce 546 }
a42e7a36
KP
547
548 if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
549 kfree(tx_buffer->raw_buf);
550
a5e9c572
AD
551 tx_buffer->next_to_watch = NULL;
552 tx_buffer->skb = NULL;
35a1e2ad 553 dma_unmap_len_set(tx_buffer, len, 0);
a5e9c572 554 /* tx_buffer must be completely set up in the transmit path */
fd0a05ce
JB
555}
556
557/**
558 * i40e_clean_tx_ring - Free any empty Tx buffers
559 * @tx_ring: ring to be cleaned
560 **/
561void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
562{
fd0a05ce
JB
563 unsigned long bi_size;
564 u16 i;
565
566 /* ring already cleared, nothing to do */
567 if (!tx_ring->tx_bi)
568 return;
569
570 /* Free all the Tx ring sk_buffs */
a5e9c572
AD
571 for (i = 0; i < tx_ring->count; i++)
572 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
fd0a05ce
JB
573
574 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
575 memset(tx_ring->tx_bi, 0, bi_size);
576
577 /* Zero out the descriptor ring */
578 memset(tx_ring->desc, 0, tx_ring->size);
579
580 tx_ring->next_to_use = 0;
581 tx_ring->next_to_clean = 0;
7070ce0a
AD
582
583 if (!tx_ring->netdev)
584 return;
585
586 /* cleanup Tx queue statistics */
587 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
588 tx_ring->queue_index));
fd0a05ce
JB
589}
590
591/**
592 * i40e_free_tx_resources - Free Tx resources per queue
593 * @tx_ring: Tx descriptor ring for a specific queue
594 *
595 * Free all transmit software resources
596 **/
597void i40e_free_tx_resources(struct i40e_ring *tx_ring)
598{
599 i40e_clean_tx_ring(tx_ring);
600 kfree(tx_ring->tx_bi);
601 tx_ring->tx_bi = NULL;
602
603 if (tx_ring->desc) {
604 dma_free_coherent(tx_ring->dev, tx_ring->size,
605 tx_ring->desc, tx_ring->dma);
606 tx_ring->desc = NULL;
607 }
608}
609
610/**
611 * i40e_get_tx_pending - how many tx descriptors not processed
612 * @tx_ring: the ring of descriptors
dd353109 613 * @in_sw: is tx_pending being checked in SW or HW
fd0a05ce
JB
614 *
615 * Since there is no access to the ring head register
616 * in XL710, we need to use our local copies
617 **/
dd353109 618u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
fd0a05ce 619{
a68de58d
JB
620 u32 head, tail;
621
dd353109
ASJ
622 if (!in_sw)
623 head = i40e_get_head(ring);
624 else
625 head = ring->next_to_clean;
a68de58d
JB
626 tail = readl(ring->tail);
627
628 if (head != tail)
629 return (head < tail) ?
630 tail - head : (tail + ring->count - head);
631
632 return 0;
fd0a05ce
JB
633}
634
d91649f5
JB
635#define WB_STRIDE 0x3
636
fd0a05ce
JB
637/**
638 * i40e_clean_tx_irq - Reclaim resources after transmit completes
a619afe8
AD
639 * @vsi: the VSI we care about
640 * @tx_ring: Tx ring to clean
641 * @napi_budget: Used to determine if we are in netpoll
fd0a05ce
JB
642 *
643 * Returns true if there's any budget left (e.g. the clean is finished)
644 **/
a619afe8
AD
645static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
646 struct i40e_ring *tx_ring, int napi_budget)
fd0a05ce
JB
647{
648 u16 i = tx_ring->next_to_clean;
649 struct i40e_tx_buffer *tx_buf;
1943d8ba 650 struct i40e_tx_desc *tx_head;
fd0a05ce 651 struct i40e_tx_desc *tx_desc;
a619afe8
AD
652 unsigned int total_bytes = 0, total_packets = 0;
653 unsigned int budget = vsi->work_limit;
fd0a05ce
JB
654
655 tx_buf = &tx_ring->tx_bi[i];
656 tx_desc = I40E_TX_DESC(tx_ring, i);
a5e9c572 657 i -= tx_ring->count;
fd0a05ce 658
1943d8ba
JB
659 tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
660
a5e9c572
AD
661 do {
662 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
fd0a05ce
JB
663
664 /* if next_to_watch is not set then there is no work pending */
665 if (!eop_desc)
666 break;
667
a5e9c572
AD
668 /* prevent any other reads prior to eop_desc */
669 read_barrier_depends();
670
1943d8ba
JB
671 /* we have caught up to head, no work left to do */
672 if (tx_head == tx_desc)
fd0a05ce
JB
673 break;
674
c304fdac 675 /* clear next_to_watch to prevent false hangs */
fd0a05ce 676 tx_buf->next_to_watch = NULL;
fd0a05ce 677
a5e9c572
AD
678 /* update the statistics for this packet */
679 total_bytes += tx_buf->bytecount;
680 total_packets += tx_buf->gso_segs;
fd0a05ce 681
a5e9c572 682 /* free the skb */
a619afe8 683 napi_consume_skb(tx_buf->skb, napi_budget);
fd0a05ce 684
a5e9c572
AD
685 /* unmap skb header data */
686 dma_unmap_single(tx_ring->dev,
687 dma_unmap_addr(tx_buf, dma),
688 dma_unmap_len(tx_buf, len),
689 DMA_TO_DEVICE);
fd0a05ce 690
a5e9c572
AD
691 /* clear tx_buffer data */
692 tx_buf->skb = NULL;
693 dma_unmap_len_set(tx_buf, len, 0);
fd0a05ce 694
a5e9c572
AD
695 /* unmap remaining buffers */
696 while (tx_desc != eop_desc) {
fd0a05ce
JB
697
698 tx_buf++;
699 tx_desc++;
700 i++;
a5e9c572
AD
701 if (unlikely(!i)) {
702 i -= tx_ring->count;
fd0a05ce
JB
703 tx_buf = tx_ring->tx_bi;
704 tx_desc = I40E_TX_DESC(tx_ring, 0);
705 }
fd0a05ce 706
a5e9c572
AD
707 /* unmap any remaining paged data */
708 if (dma_unmap_len(tx_buf, len)) {
709 dma_unmap_page(tx_ring->dev,
710 dma_unmap_addr(tx_buf, dma),
711 dma_unmap_len(tx_buf, len),
712 DMA_TO_DEVICE);
713 dma_unmap_len_set(tx_buf, len, 0);
714 }
715 }
716
717 /* move us one more past the eop_desc for start of next pkt */
718 tx_buf++;
719 tx_desc++;
720 i++;
721 if (unlikely(!i)) {
722 i -= tx_ring->count;
723 tx_buf = tx_ring->tx_bi;
724 tx_desc = I40E_TX_DESC(tx_ring, 0);
725 }
726
016890b9
JB
727 prefetch(tx_desc);
728
a5e9c572
AD
729 /* update budget accounting */
730 budget--;
731 } while (likely(budget));
732
733 i += tx_ring->count;
fd0a05ce 734 tx_ring->next_to_clean = i;
980e9b11 735 u64_stats_update_begin(&tx_ring->syncp);
a114d0a6
AD
736 tx_ring->stats.bytes += total_bytes;
737 tx_ring->stats.packets += total_packets;
980e9b11 738 u64_stats_update_end(&tx_ring->syncp);
fd0a05ce
JB
739 tx_ring->q_vector->tx.total_bytes += total_bytes;
740 tx_ring->q_vector->tx.total_packets += total_packets;
a5e9c572 741
58044743
AS
742 if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
743 unsigned int j = 0;
744
745 /* check to see if there are < 4 descriptors
746 * waiting to be written back, then kick the hardware to force
747 * them to be written back in case we stay in NAPI.
748 * In this mode on X722 we do not enable Interrupt.
749 */
dd353109 750 j = i40e_get_tx_pending(tx_ring, false);
58044743
AS
751
752 if (budget &&
753 ((j / (WB_STRIDE + 1)) == 0) && (j != 0) &&
a619afe8 754 !test_bit(__I40E_DOWN, &vsi->state) &&
58044743
AS
755 (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
756 tx_ring->arm_wb = true;
757 }
d91649f5 758
7070ce0a
AD
759 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
760 tx_ring->queue_index),
761 total_packets, total_bytes);
762
fd0a05ce
JB
763#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
764 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
765 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
766 /* Make sure that anybody stopping the queue after this
767 * sees the new next_to_clean.
768 */
769 smp_mb();
770 if (__netif_subqueue_stopped(tx_ring->netdev,
771 tx_ring->queue_index) &&
a619afe8 772 !test_bit(__I40E_DOWN, &vsi->state)) {
fd0a05ce
JB
773 netif_wake_subqueue(tx_ring->netdev,
774 tx_ring->queue_index);
775 ++tx_ring->tx_stats.restart_queue;
776 }
777 }
778
d91649f5
JB
779 return !!budget;
780}
781
782/**
ecc6a239 783 * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled
d91649f5 784 * @vsi: the VSI we care about
ecc6a239 785 * @q_vector: the vector on which to enable writeback
d91649f5
JB
786 *
787 **/
ecc6a239
ASJ
788static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
789 struct i40e_q_vector *q_vector)
d91649f5 790{
8e0764b4 791 u16 flags = q_vector->tx.ring[0].flags;
ecc6a239 792 u32 val;
8e0764b4 793
ecc6a239
ASJ
794 if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR))
795 return;
8e0764b4 796
ecc6a239
ASJ
797 if (q_vector->arm_wb_state)
798 return;
8e0764b4 799
ecc6a239
ASJ
800 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
801 val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK |
802 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */
a3d772a3 803
ecc6a239
ASJ
804 wr32(&vsi->back->hw,
805 I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
806 val);
807 } else {
808 val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK |
809 I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */
a3d772a3 810
ecc6a239
ASJ
811 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
812 }
813 q_vector->arm_wb_state = true;
814}
815
816/**
817 * i40e_force_wb - Issue SW Interrupt so HW does a wb
818 * @vsi: the VSI we care about
819 * @q_vector: the vector on which to force writeback
820 *
821 **/
822void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
823{
824 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
8e0764b4
ASJ
825 u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
826 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
827 I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
828 I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
829 /* allow 00 to be written to the index */
830
831 wr32(&vsi->back->hw,
832 I40E_PFINT_DYN_CTLN(q_vector->v_idx +
833 vsi->base_vector - 1), val);
834 } else {
835 u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
836 I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
837 I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
838 I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK;
839 /* allow 00 to be written to the index */
840
841 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
842 }
fd0a05ce
JB
843}
844
845/**
846 * i40e_set_new_dynamic_itr - Find new ITR level
847 * @rc: structure containing ring performance data
848 *
8f5e39ce
JB
849 * Returns true if ITR changed, false if not
850 *
fd0a05ce
JB
851 * Stores a new ITR value based on packets and byte counts during
852 * the last interrupt. The advantage of per interrupt computation
853 * is faster updates and more accurate ITR for the current traffic
854 * pattern. Constants in this function were computed based on
855 * theoretical maximum wire speed and thresholds were set based on
856 * testing data as well as attempting to minimize response time
857 * while increasing bulk throughput.
858 **/
8f5e39ce 859static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
fd0a05ce
JB
860{
861 enum i40e_latency_range new_latency_range = rc->latency_range;
c56625d5 862 struct i40e_q_vector *qv = rc->ring->q_vector;
fd0a05ce
JB
863 u32 new_itr = rc->itr;
864 int bytes_per_int;
51cc6d9f 865 int usecs;
fd0a05ce
JB
866
867 if (rc->total_packets == 0 || !rc->itr)
8f5e39ce 868 return false;
fd0a05ce
JB
869
870 /* simple throttlerate management
c56625d5 871 * 0-10MB/s lowest (50000 ints/s)
fd0a05ce 872 * 10-20MB/s low (20000 ints/s)
c56625d5
JB
873 * 20-1249MB/s bulk (18000 ints/s)
874 * > 40000 Rx packets per second (8000 ints/s)
51cc6d9f
JB
875 *
876 * The math works out because the divisor is in 10^(-6) which
877 * turns the bytes/us input value into MB/s values, but
878 * make sure to use usecs, as the register values written
ee2319cf
JB
879 * are in 2 usec increments in the ITR registers, and make sure
880 * to use the smoothed values that the countdown timer gives us.
fd0a05ce 881 */
ee2319cf 882 usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
51cc6d9f 883 bytes_per_int = rc->total_bytes / usecs;
ee2319cf 884
de32e3ef 885 switch (new_latency_range) {
fd0a05ce
JB
886 case I40E_LOWEST_LATENCY:
887 if (bytes_per_int > 10)
888 new_latency_range = I40E_LOW_LATENCY;
889 break;
890 case I40E_LOW_LATENCY:
891 if (bytes_per_int > 20)
892 new_latency_range = I40E_BULK_LATENCY;
893 else if (bytes_per_int <= 10)
894 new_latency_range = I40E_LOWEST_LATENCY;
895 break;
896 case I40E_BULK_LATENCY:
c56625d5 897 case I40E_ULTRA_LATENCY:
de32e3ef
CW
898 default:
899 if (bytes_per_int <= 20)
900 new_latency_range = I40E_LOW_LATENCY;
fd0a05ce
JB
901 break;
902 }
c56625d5
JB
903
904 /* this is to adjust RX more aggressively when streaming small
905 * packets. The value of 40000 was picked as it is just beyond
906 * what the hardware can receive per second if in low latency
907 * mode.
908 */
909#define RX_ULTRA_PACKET_RATE 40000
910
911 if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) &&
912 (&qv->rx == rc))
913 new_latency_range = I40E_ULTRA_LATENCY;
914
de32e3ef 915 rc->latency_range = new_latency_range;
fd0a05ce
JB
916
917 switch (new_latency_range) {
918 case I40E_LOWEST_LATENCY:
c56625d5 919 new_itr = I40E_ITR_50K;
fd0a05ce
JB
920 break;
921 case I40E_LOW_LATENCY:
922 new_itr = I40E_ITR_20K;
923 break;
924 case I40E_BULK_LATENCY:
c56625d5
JB
925 new_itr = I40E_ITR_18K;
926 break;
927 case I40E_ULTRA_LATENCY:
fd0a05ce
JB
928 new_itr = I40E_ITR_8K;
929 break;
930 default:
931 break;
932 }
933
fd0a05ce
JB
934 rc->total_bytes = 0;
935 rc->total_packets = 0;
8f5e39ce
JB
936
937 if (new_itr != rc->itr) {
938 rc->itr = new_itr;
939 return true;
940 }
941
942 return false;
fd0a05ce
JB
943}
944
fd0a05ce
JB
945/**
946 * i40e_clean_programming_status - clean the programming status descriptor
947 * @rx_ring: the rx ring that has this descriptor
948 * @rx_desc: the rx descriptor written back by HW
949 *
950 * Flow director should handle FD_FILTER_STATUS to check its filter programming
951 * status being successful or not and take actions accordingly. FCoE should
952 * handle its context/filter programming/invalidation status and take actions.
953 *
954 **/
955static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
956 union i40e_rx_desc *rx_desc)
957{
958 u64 qw;
959 u8 id;
960
961 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
962 id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
963 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
964
965 if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
55a5e60b 966 i40e_fd_handle_status(rx_ring, rx_desc, id);
38e00438
VD
967#ifdef I40E_FCOE
968 else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
969 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
970 i40e_fcoe_handle_status(rx_ring, rx_desc, id);
971#endif
fd0a05ce
JB
972}
973
974/**
975 * i40e_setup_tx_descriptors - Allocate the Tx descriptors
976 * @tx_ring: the tx ring to set up
977 *
978 * Return 0 on success, negative on error
979 **/
980int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
981{
982 struct device *dev = tx_ring->dev;
983 int bi_size;
984
985 if (!dev)
986 return -ENOMEM;
987
e908f815
JB
988 /* warn if we are about to overwrite the pointer */
989 WARN_ON(tx_ring->tx_bi);
fd0a05ce
JB
990 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
991 tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
992 if (!tx_ring->tx_bi)
993 goto err;
994
995 /* round up to nearest 4K */
996 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
1943d8ba
JB
997 /* add u32 for head writeback, align after this takes care of
998 * guaranteeing this is at least one cache line in size
999 */
1000 tx_ring->size += sizeof(u32);
fd0a05ce
JB
1001 tx_ring->size = ALIGN(tx_ring->size, 4096);
1002 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
1003 &tx_ring->dma, GFP_KERNEL);
1004 if (!tx_ring->desc) {
1005 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
1006 tx_ring->size);
1007 goto err;
1008 }
1009
1010 tx_ring->next_to_use = 0;
1011 tx_ring->next_to_clean = 0;
1012 return 0;
1013
1014err:
1015 kfree(tx_ring->tx_bi);
1016 tx_ring->tx_bi = NULL;
1017 return -ENOMEM;
1018}
1019
1020/**
1021 * i40e_clean_rx_ring - Free Rx buffers
1022 * @rx_ring: ring to be cleaned
1023 **/
1024void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
1025{
1026 struct device *dev = rx_ring->dev;
1027 struct i40e_rx_buffer *rx_bi;
1028 unsigned long bi_size;
1029 u16 i;
1030
1031 /* ring already cleared, nothing to do */
1032 if (!rx_ring->rx_bi)
1033 return;
1034
a132af24
MW
1035 if (ring_is_ps_enabled(rx_ring)) {
1036 int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count;
1037
1038 rx_bi = &rx_ring->rx_bi[0];
1039 if (rx_bi->hdr_buf) {
1040 dma_free_coherent(dev,
1041 bufsz,
1042 rx_bi->hdr_buf,
1043 rx_bi->dma);
1044 for (i = 0; i < rx_ring->count; i++) {
1045 rx_bi = &rx_ring->rx_bi[i];
1046 rx_bi->dma = 0;
37a2973a 1047 rx_bi->hdr_buf = NULL;
a132af24
MW
1048 }
1049 }
1050 }
fd0a05ce
JB
1051 /* Free all the Rx ring sk_buffs */
1052 for (i = 0; i < rx_ring->count; i++) {
1053 rx_bi = &rx_ring->rx_bi[i];
1054 if (rx_bi->dma) {
1055 dma_unmap_single(dev,
1056 rx_bi->dma,
1057 rx_ring->rx_buf_len,
1058 DMA_FROM_DEVICE);
1059 rx_bi->dma = 0;
1060 }
1061 if (rx_bi->skb) {
1062 dev_kfree_skb(rx_bi->skb);
1063 rx_bi->skb = NULL;
1064 }
1065 if (rx_bi->page) {
1066 if (rx_bi->page_dma) {
1067 dma_unmap_page(dev,
1068 rx_bi->page_dma,
f16704e5 1069 PAGE_SIZE,
fd0a05ce
JB
1070 DMA_FROM_DEVICE);
1071 rx_bi->page_dma = 0;
1072 }
1073 __free_page(rx_bi->page);
1074 rx_bi->page = NULL;
1075 rx_bi->page_offset = 0;
1076 }
1077 }
1078
1079 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1080 memset(rx_ring->rx_bi, 0, bi_size);
1081
1082 /* Zero out the descriptor ring */
1083 memset(rx_ring->desc, 0, rx_ring->size);
1084
1085 rx_ring->next_to_clean = 0;
1086 rx_ring->next_to_use = 0;
1087}
1088
1089/**
1090 * i40e_free_rx_resources - Free Rx resources
1091 * @rx_ring: ring to clean the resources from
1092 *
1093 * Free all receive software resources
1094 **/
1095void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1096{
1097 i40e_clean_rx_ring(rx_ring);
1098 kfree(rx_ring->rx_bi);
1099 rx_ring->rx_bi = NULL;
1100
1101 if (rx_ring->desc) {
1102 dma_free_coherent(rx_ring->dev, rx_ring->size,
1103 rx_ring->desc, rx_ring->dma);
1104 rx_ring->desc = NULL;
1105 }
1106}
1107
a132af24
MW
1108/**
1109 * i40e_alloc_rx_headers - allocate rx header buffers
1110 * @rx_ring: ring to alloc buffers
1111 *
1112 * Allocate rx header buffers for the entire ring. As these are static,
1113 * this is only called when setting up a new ring.
1114 **/
1115void i40e_alloc_rx_headers(struct i40e_ring *rx_ring)
1116{
1117 struct device *dev = rx_ring->dev;
1118 struct i40e_rx_buffer *rx_bi;
1119 dma_addr_t dma;
1120 void *buffer;
1121 int buf_size;
1122 int i;
1123
1124 if (rx_ring->rx_bi[0].hdr_buf)
1125 return;
1126 /* Make sure the buffers don't cross cache line boundaries. */
1127 buf_size = ALIGN(rx_ring->rx_hdr_len, 256);
1128 buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count,
1129 &dma, GFP_KERNEL);
1130 if (!buffer)
1131 return;
1132 for (i = 0; i < rx_ring->count; i++) {
1133 rx_bi = &rx_ring->rx_bi[i];
1134 rx_bi->dma = dma + (i * buf_size);
1135 rx_bi->hdr_buf = buffer + (i * buf_size);
1136 }
1137}
1138
fd0a05ce
JB
1139/**
1140 * i40e_setup_rx_descriptors - Allocate Rx descriptors
1141 * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1142 *
1143 * Returns 0 on success, negative on failure
1144 **/
1145int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1146{
1147 struct device *dev = rx_ring->dev;
1148 int bi_size;
1149
e908f815
JB
1150 /* warn if we are about to overwrite the pointer */
1151 WARN_ON(rx_ring->rx_bi);
fd0a05ce
JB
1152 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1153 rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1154 if (!rx_ring->rx_bi)
1155 goto err;
1156
f217d6ca 1157 u64_stats_init(&rx_ring->syncp);
638702bd 1158
fd0a05ce
JB
1159 /* Round up to nearest 4K */
1160 rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1161 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1162 : rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1163 rx_ring->size = ALIGN(rx_ring->size, 4096);
1164 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1165 &rx_ring->dma, GFP_KERNEL);
1166
1167 if (!rx_ring->desc) {
1168 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1169 rx_ring->size);
1170 goto err;
1171 }
1172
1173 rx_ring->next_to_clean = 0;
1174 rx_ring->next_to_use = 0;
1175
1176 return 0;
1177err:
1178 kfree(rx_ring->rx_bi);
1179 rx_ring->rx_bi = NULL;
1180 return -ENOMEM;
1181}
1182
1183/**
1184 * i40e_release_rx_desc - Store the new tail and head values
1185 * @rx_ring: ring to bump
1186 * @val: new head index
1187 **/
1188static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1189{
1190 rx_ring->next_to_use = val;
1191 /* Force memory writes to complete before letting h/w
1192 * know there are new descriptors to fetch. (Only
1193 * applicable for weak-ordered memory model archs,
1194 * such as IA-64).
1195 */
1196 wmb();
1197 writel(val, rx_ring->tail);
1198}
1199
1200/**
a132af24 1201 * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
fd0a05ce
JB
1202 * @rx_ring: ring to place buffers on
1203 * @cleaned_count: number of buffers to replace
c2e245ab
JB
1204 *
1205 * Returns true if any errors on allocation
fd0a05ce 1206 **/
c2e245ab 1207bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
a132af24
MW
1208{
1209 u16 i = rx_ring->next_to_use;
1210 union i40e_rx_desc *rx_desc;
1211 struct i40e_rx_buffer *bi;
f16704e5 1212 const int current_node = numa_node_id();
a132af24
MW
1213
1214 /* do nothing if no valid netdev defined */
1215 if (!rx_ring->netdev || !cleaned_count)
c2e245ab 1216 return false;
a132af24
MW
1217
1218 while (cleaned_count--) {
1219 rx_desc = I40E_RX_DESC(rx_ring, i);
1220 bi = &rx_ring->rx_bi[i];
1221
1222 if (bi->skb) /* desc is in use */
1223 goto no_buffers;
f16704e5
MW
1224
1225 /* If we've been moved to a different NUMA node, release the
1226 * page so we can get a new one on the current node.
1227 */
1228 if (bi->page && page_to_nid(bi->page) != current_node) {
1229 dma_unmap_page(rx_ring->dev,
1230 bi->page_dma,
1231 PAGE_SIZE,
1232 DMA_FROM_DEVICE);
1233 __free_page(bi->page);
1234 bi->page = NULL;
1235 bi->page_dma = 0;
1236 rx_ring->rx_stats.realloc_count++;
1237 } else if (bi->page) {
1238 rx_ring->rx_stats.page_reuse_count++;
1239 }
1240
a132af24
MW
1241 if (!bi->page) {
1242 bi->page = alloc_page(GFP_ATOMIC);
1243 if (!bi->page) {
1244 rx_ring->rx_stats.alloc_page_failed++;
1245 goto no_buffers;
1246 }
a132af24
MW
1247 bi->page_dma = dma_map_page(rx_ring->dev,
1248 bi->page,
f16704e5
MW
1249 0,
1250 PAGE_SIZE,
a132af24 1251 DMA_FROM_DEVICE);
f16704e5 1252 if (dma_mapping_error(rx_ring->dev, bi->page_dma)) {
a132af24 1253 rx_ring->rx_stats.alloc_page_failed++;
f16704e5
MW
1254 __free_page(bi->page);
1255 bi->page = NULL;
a132af24 1256 bi->page_dma = 0;
f16704e5 1257 bi->page_offset = 0;
a132af24
MW
1258 goto no_buffers;
1259 }
f16704e5 1260 bi->page_offset = 0;
a132af24
MW
1261 }
1262
a132af24
MW
1263 /* Refresh the desc even if buffer_addrs didn't change
1264 * because each write-back erases this info.
1265 */
f16704e5
MW
1266 rx_desc->read.pkt_addr =
1267 cpu_to_le64(bi->page_dma + bi->page_offset);
a132af24
MW
1268 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1269 i++;
1270 if (i == rx_ring->count)
1271 i = 0;
1272 }
1273
c2e245ab
JB
1274 if (rx_ring->next_to_use != i)
1275 i40e_release_rx_desc(rx_ring, i);
1276
1277 return false;
1278
a132af24
MW
1279no_buffers:
1280 if (rx_ring->next_to_use != i)
1281 i40e_release_rx_desc(rx_ring, i);
c2e245ab
JB
1282
1283 /* make sure to come back via polling to try again after
1284 * allocation failure
1285 */
1286 return true;
a132af24
MW
1287}
1288
1289/**
1290 * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
1291 * @rx_ring: ring to place buffers on
1292 * @cleaned_count: number of buffers to replace
c2e245ab
JB
1293 *
1294 * Returns true if any errors on allocation
a132af24 1295 **/
c2e245ab 1296bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
fd0a05ce
JB
1297{
1298 u16 i = rx_ring->next_to_use;
1299 union i40e_rx_desc *rx_desc;
1300 struct i40e_rx_buffer *bi;
1301 struct sk_buff *skb;
1302
1303 /* do nothing if no valid netdev defined */
1304 if (!rx_ring->netdev || !cleaned_count)
c2e245ab 1305 return false;
fd0a05ce
JB
1306
1307 while (cleaned_count--) {
1308 rx_desc = I40E_RX_DESC(rx_ring, i);
1309 bi = &rx_ring->rx_bi[i];
1310 skb = bi->skb;
1311
1312 if (!skb) {
dd1a5df8
JB
1313 skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
1314 rx_ring->rx_buf_len,
1315 GFP_ATOMIC |
1316 __GFP_NOWARN);
fd0a05ce 1317 if (!skb) {
420136cc 1318 rx_ring->rx_stats.alloc_buff_failed++;
fd0a05ce
JB
1319 goto no_buffers;
1320 }
1321 /* initialize queue mapping */
1322 skb_record_rx_queue(skb, rx_ring->queue_index);
1323 bi->skb = skb;
1324 }
1325
1326 if (!bi->dma) {
1327 bi->dma = dma_map_single(rx_ring->dev,
1328 skb->data,
1329 rx_ring->rx_buf_len,
1330 DMA_FROM_DEVICE);
1331 if (dma_mapping_error(rx_ring->dev, bi->dma)) {
420136cc 1332 rx_ring->rx_stats.alloc_buff_failed++;
fd0a05ce 1333 bi->dma = 0;
c2e245ab
JB
1334 dev_kfree_skb(bi->skb);
1335 bi->skb = NULL;
fd0a05ce
JB
1336 goto no_buffers;
1337 }
1338 }
1339
a132af24
MW
1340 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1341 rx_desc->read.hdr_addr = 0;
fd0a05ce
JB
1342 i++;
1343 if (i == rx_ring->count)
1344 i = 0;
1345 }
1346
c2e245ab
JB
1347 if (rx_ring->next_to_use != i)
1348 i40e_release_rx_desc(rx_ring, i);
1349
1350 return false;
1351
fd0a05ce
JB
1352no_buffers:
1353 if (rx_ring->next_to_use != i)
1354 i40e_release_rx_desc(rx_ring, i);
c2e245ab
JB
1355
1356 /* make sure to come back via polling to try again after
1357 * allocation failure
1358 */
1359 return true;
fd0a05ce
JB
1360}
1361
1362/**
1363 * i40e_receive_skb - Send a completed packet up the stack
1364 * @rx_ring: rx ring in play
1365 * @skb: packet to send up
1366 * @vlan_tag: vlan tag for packet
1367 **/
1368static void i40e_receive_skb(struct i40e_ring *rx_ring,
1369 struct sk_buff *skb, u16 vlan_tag)
1370{
1371 struct i40e_q_vector *q_vector = rx_ring->q_vector;
fd0a05ce 1372
a149f2c3
JB
1373 if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
1374 (vlan_tag & VLAN_VID_MASK))
fd0a05ce
JB
1375 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1376
8b650359 1377 napi_gro_receive(&q_vector->napi, skb);
fd0a05ce
JB
1378}
1379
1380/**
1381 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1382 * @vsi: the VSI we care about
1383 * @skb: skb currently being received and modified
1384 * @rx_status: status value of last descriptor in packet
1385 * @rx_error: error value of last descriptor in packet
8144f0f7 1386 * @rx_ptype: ptype value of last descriptor in packet
fd0a05ce
JB
1387 **/
1388static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1389 struct sk_buff *skb,
1390 u32 rx_status,
8144f0f7
JG
1391 u32 rx_error,
1392 u16 rx_ptype)
fd0a05ce 1393{
8a3c91cc 1394 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
fad57330 1395 bool ipv4, ipv6, ipv4_tunnel, ipv6_tunnel;
8144f0f7 1396
fd0a05ce
JB
1397 skb->ip_summed = CHECKSUM_NONE;
1398
1399 /* Rx csum enabled and ip headers found? */
8a3c91cc
JB
1400 if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1401 return;
1402
1403 /* did the hardware decode the packet and checksum? */
41a1d04b 1404 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
8a3c91cc
JB
1405 return;
1406
1407 /* both known and outer_ip must be set for the below code to work */
1408 if (!(decoded.known && decoded.outer_ip))
fd0a05ce
JB
1409 return;
1410
fad57330
AD
1411 ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
1412 (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4);
1413 ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
1414 (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6);
8a3c91cc
JB
1415
1416 if (ipv4 &&
41a1d04b
JB
1417 (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
1418 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
8a3c91cc
JB
1419 goto checksum_fail;
1420
ddf1d0d7 1421 /* likely incorrect csum if alternate IP extension headers found */
8a3c91cc 1422 if (ipv6 &&
41a1d04b 1423 rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
8a3c91cc 1424 /* don't increment checksum err here, non-fatal err */
8ee75a8e
SN
1425 return;
1426
8a3c91cc 1427 /* there was some L4 error, count error and punt packet to the stack */
41a1d04b 1428 if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
8a3c91cc
JB
1429 goto checksum_fail;
1430
1431 /* handle packets that were not able to be checksummed due
1432 * to arrival speed, in this case the stack can compute
1433 * the csum.
1434 */
41a1d04b 1435 if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
fd0a05ce 1436 return;
fd0a05ce 1437
a9c9a81f
AD
1438 /* The hardware supported by this driver does not validate outer
1439 * checksums for tunneled VXLAN or GENEVE frames. I don't agree
1440 * with it but the specification states that you "MAY validate", it
1441 * doesn't make it a hard requirement so if we have validated the
1442 * inner checksum report CHECKSUM_UNNECESSARY.
8a3c91cc 1443 */
8144f0f7 1444
fad57330
AD
1445 ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1446 (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1447 ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1448 (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1449
fd0a05ce 1450 skb->ip_summed = CHECKSUM_UNNECESSARY;
fa4ba69b 1451 skb->csum_level = ipv4_tunnel || ipv6_tunnel;
8a3c91cc
JB
1452
1453 return;
1454
1455checksum_fail:
1456 vsi->back->hw_csum_rx_error++;
fd0a05ce
JB
1457}
1458
1459/**
857942fd 1460 * i40e_ptype_to_htype - get a hash type
206812b5
JB
1461 * @ptype: the ptype value from the descriptor
1462 *
1463 * Returns a hash type to be used by skb_set_hash
1464 **/
857942fd 1465static inline enum pkt_hash_types i40e_ptype_to_htype(u8 ptype)
206812b5
JB
1466{
1467 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1468
1469 if (!decoded.known)
1470 return PKT_HASH_TYPE_NONE;
1471
1472 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1473 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1474 return PKT_HASH_TYPE_L4;
1475 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1476 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1477 return PKT_HASH_TYPE_L3;
1478 else
1479 return PKT_HASH_TYPE_L2;
1480}
1481
857942fd
ASJ
1482/**
1483 * i40e_rx_hash - set the hash value in the skb
1484 * @ring: descriptor ring
1485 * @rx_desc: specific descriptor
1486 **/
1487static inline void i40e_rx_hash(struct i40e_ring *ring,
1488 union i40e_rx_desc *rx_desc,
1489 struct sk_buff *skb,
1490 u8 rx_ptype)
1491{
1492 u32 hash;
1493 const __le64 rss_mask =
1494 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1495 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1496
1497 if (ring->netdev->features & NETIF_F_RXHASH)
1498 return;
1499
1500 if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) {
1501 hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1502 skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype));
1503 }
1504}
1505
fd0a05ce 1506/**
a132af24 1507 * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
fd0a05ce
JB
1508 * @rx_ring: rx ring to clean
1509 * @budget: how many cleans we're allowed
1510 *
1511 * Returns true if there's any budget left (e.g. the clean is finished)
1512 **/
c2e245ab 1513static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget)
fd0a05ce
JB
1514{
1515 unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1516 u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1517 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
fd0a05ce
JB
1518 struct i40e_vsi *vsi = rx_ring->vsi;
1519 u16 i = rx_ring->next_to_clean;
1520 union i40e_rx_desc *rx_desc;
1521 u32 rx_error, rx_status;
c2e245ab 1522 bool failure = false;
206812b5 1523 u8 rx_ptype;
fd0a05ce 1524 u64 qword;
f16704e5 1525 u32 copysize;
fd0a05ce 1526
390f86df
EB
1527 if (budget <= 0)
1528 return 0;
1529
a132af24 1530 do {
fd0a05ce
JB
1531 struct i40e_rx_buffer *rx_bi;
1532 struct sk_buff *skb;
1533 u16 vlan_tag;
a132af24
MW
1534 /* return some buffers to hardware, one at a time is too slow */
1535 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
c2e245ab
JB
1536 failure = failure ||
1537 i40e_alloc_rx_buffers_ps(rx_ring,
1538 cleaned_count);
a132af24
MW
1539 cleaned_count = 0;
1540 }
1541
1542 i = rx_ring->next_to_clean;
1543 rx_desc = I40E_RX_DESC(rx_ring, i);
1544 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1545 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1546 I40E_RXD_QW1_STATUS_SHIFT;
1547
41a1d04b 1548 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
a132af24
MW
1549 break;
1550
1551 /* This memory barrier is needed to keep us from reading
1552 * any other fields out of the rx_desc until we know the
1553 * DD bit is set.
1554 */
67317166 1555 dma_rmb();
f16704e5
MW
1556 /* sync header buffer for reading */
1557 dma_sync_single_range_for_cpu(rx_ring->dev,
1558 rx_ring->rx_bi[0].dma,
1559 i * rx_ring->rx_hdr_len,
1560 rx_ring->rx_hdr_len,
1561 DMA_FROM_DEVICE);
fd0a05ce
JB
1562 if (i40e_rx_is_programming_status(qword)) {
1563 i40e_clean_programming_status(rx_ring, rx_desc);
a132af24
MW
1564 I40E_RX_INCREMENT(rx_ring, i);
1565 continue;
fd0a05ce
JB
1566 }
1567 rx_bi = &rx_ring->rx_bi[i];
1568 skb = rx_bi->skb;
a132af24 1569 if (likely(!skb)) {
dd1a5df8
JB
1570 skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
1571 rx_ring->rx_hdr_len,
1572 GFP_ATOMIC |
1573 __GFP_NOWARN);
8b6ed9c2 1574 if (!skb) {
a132af24 1575 rx_ring->rx_stats.alloc_buff_failed++;
c2e245ab 1576 failure = true;
8b6ed9c2
JB
1577 break;
1578 }
1579
a132af24
MW
1580 /* initialize queue mapping */
1581 skb_record_rx_queue(skb, rx_ring->queue_index);
1582 /* we are reusing so sync this buffer for CPU use */
1583 dma_sync_single_range_for_cpu(rx_ring->dev,
3578fa0a
JB
1584 rx_ring->rx_bi[0].dma,
1585 i * rx_ring->rx_hdr_len,
a132af24
MW
1586 rx_ring->rx_hdr_len,
1587 DMA_FROM_DEVICE);
1588 }
829af3ac
MW
1589 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1590 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1591 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1592 I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1593 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1594 I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1595
1596 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1597 I40E_RXD_QW1_ERROR_SHIFT;
41a1d04b
JB
1598 rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1599 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
fd0a05ce 1600
8144f0f7
JG
1601 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1602 I40E_RXD_QW1_PTYPE_SHIFT;
f16704e5
MW
1603 /* sync half-page for reading */
1604 dma_sync_single_range_for_cpu(rx_ring->dev,
1605 rx_bi->page_dma,
1606 rx_bi->page_offset,
1607 PAGE_SIZE / 2,
1608 DMA_FROM_DEVICE);
1609 prefetch(page_address(rx_bi->page) + rx_bi->page_offset);
fd0a05ce 1610 rx_bi->skb = NULL;
a132af24 1611 cleaned_count++;
f16704e5 1612 copysize = 0;
a132af24
MW
1613 if (rx_hbo || rx_sph) {
1614 int len;
6995b36c 1615
fd0a05ce
JB
1616 if (rx_hbo)
1617 len = I40E_RX_HDR_SIZE;
fd0a05ce 1618 else
a132af24
MW
1619 len = rx_header_len;
1620 memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
1621 } else if (skb->len == 0) {
1622 int len;
f16704e5
MW
1623 unsigned char *va = page_address(rx_bi->page) +
1624 rx_bi->page_offset;
a132af24 1625
f16704e5
MW
1626 len = min(rx_packet_len, rx_ring->rx_hdr_len);
1627 memcpy(__skb_put(skb, len), va, len);
1628 copysize = len;
a132af24 1629 rx_packet_len -= len;
fd0a05ce 1630 }
fd0a05ce 1631 /* Get the rest of the data if this was a header split */
a132af24 1632 if (rx_packet_len) {
f16704e5
MW
1633 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
1634 rx_bi->page,
1635 rx_bi->page_offset + copysize,
1636 rx_packet_len, I40E_RXBUFFER_2048);
1637
f16704e5
MW
1638 /* If the page count is more than 2, then both halves
1639 * of the page are used and we need to free it. Do it
1640 * here instead of in the alloc code. Otherwise one
1641 * of the half-pages might be released between now and
1642 * then, and we wouldn't know which one to use.
16fd08b8
MW
1643 * Don't call get_page and free_page since those are
1644 * both expensive atomic operations that just change
1645 * the refcount in opposite directions. Just give the
1646 * page to the stack; he can have our refcount.
f16704e5
MW
1647 */
1648 if (page_count(rx_bi->page) > 2) {
1649 dma_unmap_page(rx_ring->dev,
1650 rx_bi->page_dma,
1651 PAGE_SIZE,
1652 DMA_FROM_DEVICE);
fd0a05ce 1653 rx_bi->page = NULL;
f16704e5
MW
1654 rx_bi->page_dma = 0;
1655 rx_ring->rx_stats.realloc_count++;
16fd08b8
MW
1656 } else {
1657 get_page(rx_bi->page);
1658 /* switch to the other half-page here; the
1659 * allocation code programs the right addr
1660 * into HW. If we haven't used this half-page,
1661 * the address won't be changed, and HW can
1662 * just use it next time through.
1663 */
1664 rx_bi->page_offset ^= PAGE_SIZE / 2;
f16704e5 1665 }
fd0a05ce 1666
fd0a05ce 1667 }
a132af24 1668 I40E_RX_INCREMENT(rx_ring, i);
fd0a05ce
JB
1669
1670 if (unlikely(
41a1d04b 1671 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
fd0a05ce
JB
1672 struct i40e_rx_buffer *next_buffer;
1673
1674 next_buffer = &rx_ring->rx_bi[i];
a132af24 1675 next_buffer->skb = skb;
fd0a05ce 1676 rx_ring->rx_stats.non_eop_descs++;
a132af24 1677 continue;
fd0a05ce
JB
1678 }
1679
1680 /* ERR_MASK will only have valid bits if EOP set */
41a1d04b 1681 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
fd0a05ce 1682 dev_kfree_skb_any(skb);
a132af24 1683 continue;
fd0a05ce
JB
1684 }
1685
857942fd
ASJ
1686 i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
1687
beb0dff1
JK
1688 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1689 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1690 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1691 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1692 rx_ring->last_rx_timestamp = jiffies;
1693 }
1694
fd0a05ce
JB
1695 /* probably a little skewed due to removing CRC */
1696 total_rx_bytes += skb->len;
1697 total_rx_packets++;
1698
1699 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
8144f0f7
JG
1700
1701 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1702
41a1d04b 1703 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
fd0a05ce
JB
1704 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1705 : 0;
38e00438 1706#ifdef I40E_FCOE
1f15d667
JB
1707 if (unlikely(
1708 i40e_rx_is_fcoe(rx_ptype) &&
1709 !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) {
38e00438 1710 dev_kfree_skb_any(skb);
a132af24 1711 continue;
38e00438
VD
1712 }
1713#endif
fd0a05ce
JB
1714 i40e_receive_skb(rx_ring, skb, vlan_tag);
1715
fd0a05ce 1716 rx_desc->wb.qword1.status_error_len = 0;
fd0a05ce 1717
a132af24
MW
1718 } while (likely(total_rx_packets < budget));
1719
1720 u64_stats_update_begin(&rx_ring->syncp);
1721 rx_ring->stats.packets += total_rx_packets;
1722 rx_ring->stats.bytes += total_rx_bytes;
1723 u64_stats_update_end(&rx_ring->syncp);
1724 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1725 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1726
c2e245ab 1727 return failure ? budget : total_rx_packets;
a132af24
MW
1728}
1729
1730/**
1731 * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1732 * @rx_ring: rx ring to clean
1733 * @budget: how many cleans we're allowed
1734 *
1735 * Returns number of packets cleaned
1736 **/
1737static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
1738{
1739 unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1740 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1741 struct i40e_vsi *vsi = rx_ring->vsi;
1742 union i40e_rx_desc *rx_desc;
1743 u32 rx_error, rx_status;
1744 u16 rx_packet_len;
c2e245ab 1745 bool failure = false;
a132af24
MW
1746 u8 rx_ptype;
1747 u64 qword;
1748 u16 i;
1749
1750 do {
1751 struct i40e_rx_buffer *rx_bi;
1752 struct sk_buff *skb;
1753 u16 vlan_tag;
fd0a05ce
JB
1754 /* return some buffers to hardware, one at a time is too slow */
1755 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
c2e245ab
JB
1756 failure = failure ||
1757 i40e_alloc_rx_buffers_1buf(rx_ring,
1758 cleaned_count);
fd0a05ce
JB
1759 cleaned_count = 0;
1760 }
1761
a132af24
MW
1762 i = rx_ring->next_to_clean;
1763 rx_desc = I40E_RX_DESC(rx_ring, i);
fd0a05ce 1764 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
829af3ac 1765 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
a132af24
MW
1766 I40E_RXD_QW1_STATUS_SHIFT;
1767
41a1d04b 1768 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
a132af24
MW
1769 break;
1770
1771 /* This memory barrier is needed to keep us from reading
1772 * any other fields out of the rx_desc until we know the
1773 * DD bit is set.
1774 */
67317166 1775 dma_rmb();
a132af24
MW
1776
1777 if (i40e_rx_is_programming_status(qword)) {
1778 i40e_clean_programming_status(rx_ring, rx_desc);
1779 I40E_RX_INCREMENT(rx_ring, i);
1780 continue;
1781 }
1782 rx_bi = &rx_ring->rx_bi[i];
1783 skb = rx_bi->skb;
1784 prefetch(skb->data);
1785
1786 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1787 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1788
1789 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1790 I40E_RXD_QW1_ERROR_SHIFT;
41a1d04b 1791 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
a132af24
MW
1792
1793 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1794 I40E_RXD_QW1_PTYPE_SHIFT;
1795 rx_bi->skb = NULL;
1796 cleaned_count++;
1797
1798 /* Get the header and possibly the whole packet
1799 * If this is an skb from previous receive dma will be 0
1800 */
1801 skb_put(skb, rx_packet_len);
1802 dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len,
1803 DMA_FROM_DEVICE);
1804 rx_bi->dma = 0;
1805
1806 I40E_RX_INCREMENT(rx_ring, i);
1807
1808 if (unlikely(
41a1d04b 1809 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
a132af24
MW
1810 rx_ring->rx_stats.non_eop_descs++;
1811 continue;
1812 }
1813
1814 /* ERR_MASK will only have valid bits if EOP set */
41a1d04b 1815 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
a132af24 1816 dev_kfree_skb_any(skb);
a132af24
MW
1817 continue;
1818 }
1819
857942fd 1820 i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
a132af24
MW
1821 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1822 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1823 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1824 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1825 rx_ring->last_rx_timestamp = jiffies;
1826 }
1827
1828 /* probably a little skewed due to removing CRC */
1829 total_rx_bytes += skb->len;
1830 total_rx_packets++;
1831
1832 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1833
1834 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1835
41a1d04b 1836 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
a132af24
MW
1837 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1838 : 0;
1839#ifdef I40E_FCOE
1f15d667
JB
1840 if (unlikely(
1841 i40e_rx_is_fcoe(rx_ptype) &&
1842 !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) {
a132af24
MW
1843 dev_kfree_skb_any(skb);
1844 continue;
1845 }
1846#endif
1847 i40e_receive_skb(rx_ring, skb, vlan_tag);
1848
a132af24
MW
1849 rx_desc->wb.qword1.status_error_len = 0;
1850 } while (likely(total_rx_packets < budget));
fd0a05ce 1851
980e9b11 1852 u64_stats_update_begin(&rx_ring->syncp);
a114d0a6
AD
1853 rx_ring->stats.packets += total_rx_packets;
1854 rx_ring->stats.bytes += total_rx_bytes;
980e9b11 1855 u64_stats_update_end(&rx_ring->syncp);
fd0a05ce
JB
1856 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1857 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1858
c2e245ab 1859 return failure ? budget : total_rx_packets;
fd0a05ce
JB
1860}
1861
8f5e39ce
JB
1862static u32 i40e_buildreg_itr(const int type, const u16 itr)
1863{
1864 u32 val;
1865
1866 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
40d72a50
JB
1867 /* Don't clear PBA because that can cause lost interrupts that
1868 * came in while we were cleaning/polling
1869 */
8f5e39ce
JB
1870 (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1871 (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1872
1873 return val;
1874}
1875
1876/* a small macro to shorten up some long lines */
1877#define INTREG I40E_PFINT_DYN_CTLN
1878
de32e3ef
CW
1879/**
1880 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
1881 * @vsi: the VSI we care about
1882 * @q_vector: q_vector for which itr is being updated and interrupt enabled
1883 *
1884 **/
1885static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
1886 struct i40e_q_vector *q_vector)
1887{
1888 struct i40e_hw *hw = &vsi->back->hw;
8f5e39ce
JB
1889 bool rx = false, tx = false;
1890 u32 rxval, txval;
de32e3ef 1891 int vector;
a75e8005 1892 int idx = q_vector->v_idx;
de32e3ef
CW
1893
1894 vector = (q_vector->v_idx + vsi->base_vector);
8f5e39ce 1895
ee2319cf
JB
1896 /* avoid dynamic calculation if in countdown mode OR if
1897 * all dynamic is disabled
1898 */
8f5e39ce
JB
1899 rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
1900
ee2319cf 1901 if (q_vector->itr_countdown > 0 ||
a75e8005
KL
1902 (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) &&
1903 !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) {
ee2319cf
JB
1904 goto enable_int;
1905 }
1906
a75e8005 1907 if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) {
8f5e39ce
JB
1908 rx = i40e_set_new_dynamic_itr(&q_vector->rx);
1909 rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
de32e3ef 1910 }
8f5e39ce 1911
a75e8005 1912 if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) {
8f5e39ce
JB
1913 tx = i40e_set_new_dynamic_itr(&q_vector->tx);
1914 txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
de32e3ef 1915 }
8f5e39ce
JB
1916
1917 if (rx || tx) {
1918 /* get the higher of the two ITR adjustments and
1919 * use the same value for both ITR registers
1920 * when in adaptive mode (Rx and/or Tx)
1921 */
1922 u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
1923
1924 q_vector->tx.itr = q_vector->rx.itr = itr;
1925 txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
1926 tx = true;
1927 rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
1928 rx = true;
1929 }
1930
1931 /* only need to enable the interrupt once, but need
1932 * to possibly update both ITR values
1933 */
1934 if (rx) {
1935 /* set the INTENA_MSK_MASK so that this first write
1936 * won't actually enable the interrupt, instead just
1937 * updating the ITR (it's bit 31 PF and VF)
1938 */
1939 rxval |= BIT(31);
1940 /* don't check _DOWN because interrupt isn't being enabled */
1941 wr32(hw, INTREG(vector - 1), rxval);
1942 }
1943
ee2319cf 1944enable_int:
8f5e39ce
JB
1945 if (!test_bit(__I40E_DOWN, &vsi->state))
1946 wr32(hw, INTREG(vector - 1), txval);
ee2319cf
JB
1947
1948 if (q_vector->itr_countdown)
1949 q_vector->itr_countdown--;
1950 else
1951 q_vector->itr_countdown = ITR_COUNTDOWN_START;
de32e3ef
CW
1952}
1953
fd0a05ce
JB
1954/**
1955 * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1956 * @napi: napi struct with our devices info in it
1957 * @budget: amount of work driver is allowed to do this pass, in packets
1958 *
1959 * This function will clean all queues associated with a q_vector.
1960 *
1961 * Returns the amount of work done
1962 **/
1963int i40e_napi_poll(struct napi_struct *napi, int budget)
1964{
1965 struct i40e_q_vector *q_vector =
1966 container_of(napi, struct i40e_q_vector, napi);
1967 struct i40e_vsi *vsi = q_vector->vsi;
cd0b6fa6 1968 struct i40e_ring *ring;
fd0a05ce 1969 bool clean_complete = true;
d91649f5 1970 bool arm_wb = false;
fd0a05ce 1971 int budget_per_ring;
32b3e08f 1972 int work_done = 0;
fd0a05ce
JB
1973
1974 if (test_bit(__I40E_DOWN, &vsi->state)) {
1975 napi_complete(napi);
1976 return 0;
1977 }
1978
9c6c1259
KP
1979 /* Clear hung_detected bit */
1980 clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected);
cd0b6fa6
AD
1981 /* Since the actual Tx work is minimal, we can give the Tx a larger
1982 * budget and be more aggressive about cleaning up the Tx descriptors.
1983 */
d91649f5 1984 i40e_for_each_ring(ring, q_vector->tx) {
a619afe8 1985 if (!i40e_clean_tx_irq(vsi, ring, budget)) {
f2edaaaa
AD
1986 clean_complete = false;
1987 continue;
1988 }
1989 arm_wb |= ring->arm_wb;
0deda868 1990 ring->arm_wb = false;
d91649f5 1991 }
cd0b6fa6 1992
c67caceb
AD
1993 /* Handle case where we are called by netpoll with a budget of 0 */
1994 if (budget <= 0)
1995 goto tx_only;
1996
fd0a05ce
JB
1997 /* We attempt to distribute budget to each Rx queue fairly, but don't
1998 * allow the budget to go below 1 because that would exit polling early.
fd0a05ce
JB
1999 */
2000 budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
cd0b6fa6 2001
a132af24 2002 i40e_for_each_ring(ring, q_vector->rx) {
32b3e08f
JB
2003 int cleaned;
2004
a132af24
MW
2005 if (ring_is_ps_enabled(ring))
2006 cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
2007 else
2008 cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
32b3e08f
JB
2009
2010 work_done += cleaned;
f2edaaaa
AD
2011 /* if we clean as many as budgeted, we must not be done */
2012 if (cleaned >= budget_per_ring)
2013 clean_complete = false;
a132af24 2014 }
fd0a05ce
JB
2015
2016 /* If work not completed, return budget and polling will return */
d91649f5 2017 if (!clean_complete) {
c67caceb 2018tx_only:
164c9f54
ASJ
2019 if (arm_wb) {
2020 q_vector->tx.ring[0].tx_stats.tx_force_wb++;
ecc6a239 2021 i40e_enable_wb_on_itr(vsi, q_vector);
164c9f54 2022 }
fd0a05ce 2023 return budget;
d91649f5 2024 }
fd0a05ce 2025
8e0764b4
ASJ
2026 if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
2027 q_vector->arm_wb_state = false;
2028
fd0a05ce 2029 /* Work is done so exit the polling mode and re-enable the interrupt */
32b3e08f 2030 napi_complete_done(napi, work_done);
de32e3ef
CW
2031 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
2032 i40e_update_enable_itr(vsi, q_vector);
2033 } else { /* Legacy mode */
40d72a50 2034 i40e_irq_dynamic_enable_icr0(vsi->back, false);
fd0a05ce 2035 }
fd0a05ce
JB
2036 return 0;
2037}
2038
2039/**
2040 * i40e_atr - Add a Flow Director ATR filter
2041 * @tx_ring: ring to add programming descriptor to
2042 * @skb: send buffer
89232c3b 2043 * @tx_flags: send tx flags
fd0a05ce
JB
2044 **/
2045static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
6b037cd4 2046 u32 tx_flags)
fd0a05ce
JB
2047{
2048 struct i40e_filter_program_desc *fdir_desc;
2049 struct i40e_pf *pf = tx_ring->vsi->back;
2050 union {
2051 unsigned char *network;
2052 struct iphdr *ipv4;
2053 struct ipv6hdr *ipv6;
2054 } hdr;
2055 struct tcphdr *th;
2056 unsigned int hlen;
2057 u32 flex_ptype, dtype_cmd;
ffcc55c0 2058 int l4_proto;
fc4ac67b 2059 u16 i;
fd0a05ce
JB
2060
2061 /* make sure ATR is enabled */
60ea5f83 2062 if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
fd0a05ce
JB
2063 return;
2064
04294e38
ASJ
2065 if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2066 return;
2067
fd0a05ce
JB
2068 /* if sampling is disabled do nothing */
2069 if (!tx_ring->atr_sample_rate)
2070 return;
2071
6b037cd4 2072 /* Currently only IPv4/IPv6 with TCP is supported */
89232c3b
ASJ
2073 if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
2074 return;
fd0a05ce 2075
ffcc55c0
AD
2076 /* snag network header to get L4 type and address */
2077 hdr.network = (tx_flags & I40E_TX_FLAGS_UDP_TUNNEL) ?
2078 skb_inner_network_header(skb) : skb_network_header(skb);
fd0a05ce 2079
ffcc55c0
AD
2080 /* Note: tx_flags gets modified to reflect inner protocols in
2081 * tx_enable_csum function if encap is enabled.
2082 */
2083 if (tx_flags & I40E_TX_FLAGS_IPV4) {
6b037cd4 2084 /* access ihl as u8 to avoid unaligned access on ia64 */
ffcc55c0
AD
2085 hlen = (hdr.network[0] & 0x0F) << 2;
2086 l4_proto = hdr.ipv4->protocol;
fd0a05ce 2087 } else {
ffcc55c0
AD
2088 hlen = hdr.network - skb->data;
2089 l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL);
2090 hlen -= hdr.network - skb->data;
fd0a05ce
JB
2091 }
2092
6b037cd4 2093 if (l4_proto != IPPROTO_TCP)
89232c3b
ASJ
2094 return;
2095
fd0a05ce
JB
2096 th = (struct tcphdr *)(hdr.network + hlen);
2097
55a5e60b
ASJ
2098 /* Due to lack of space, no more new filters can be programmed */
2099 if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2100 return;
72b74869
ASJ
2101 if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
2102 (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) {
52eb95ef
ASJ
2103 /* HW ATR eviction will take care of removing filters on FIN
2104 * and RST packets.
2105 */
2106 if (th->fin || th->rst)
2107 return;
2108 }
55a5e60b
ASJ
2109
2110 tx_ring->atr_count++;
2111
ce806783
ASJ
2112 /* sample on all syn/fin/rst packets or once every atr sample rate */
2113 if (!th->fin &&
2114 !th->syn &&
2115 !th->rst &&
2116 (tx_ring->atr_count < tx_ring->atr_sample_rate))
fd0a05ce
JB
2117 return;
2118
2119 tx_ring->atr_count = 0;
2120
2121 /* grab the next descriptor */
fc4ac67b
AD
2122 i = tx_ring->next_to_use;
2123 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
2124
2125 i++;
2126 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
fd0a05ce
JB
2127
2128 flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
2129 I40E_TXD_FLTR_QW0_QINDEX_MASK;
6b037cd4 2130 flex_ptype |= (tx_flags & I40E_TX_FLAGS_IPV4) ?
fd0a05ce
JB
2131 (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
2132 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
2133 (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
2134 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
2135
2136 flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
2137
2138 dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
2139
ce806783 2140 dtype_cmd |= (th->fin || th->rst) ?
fd0a05ce
JB
2141 (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
2142 I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
2143 (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
2144 I40E_TXD_FLTR_QW1_PCMD_SHIFT);
2145
2146 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
2147 I40E_TXD_FLTR_QW1_DEST_SHIFT;
2148
2149 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
2150 I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
2151
433c47de 2152 dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
6a899024 2153 if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL))
60ccd45c
ASJ
2154 dtype_cmd |=
2155 ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
2156 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2157 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2158 else
2159 dtype_cmd |=
2160 ((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) <<
2161 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2162 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
433c47de 2163
72b74869
ASJ
2164 if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
2165 (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)))
52eb95ef
ASJ
2166 dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
2167
fd0a05ce 2168 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
99753ea6 2169 fdir_desc->rsvd = cpu_to_le32(0);
fd0a05ce 2170 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
99753ea6 2171 fdir_desc->fd_id = cpu_to_le32(0);
fd0a05ce
JB
2172}
2173
fd0a05ce
JB
2174/**
2175 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2176 * @skb: send buffer
2177 * @tx_ring: ring to send buffer on
2178 * @flags: the tx flags to be set
2179 *
2180 * Checks the skb and set up correspondingly several generic transmit flags
2181 * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2182 *
2183 * Returns error code indicate the frame should be dropped upon error and the
2184 * otherwise returns 0 to indicate the flags has been set properly.
2185 **/
38e00438 2186#ifdef I40E_FCOE
3e587cf3 2187inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
fd0a05ce
JB
2188 struct i40e_ring *tx_ring,
2189 u32 *flags)
3e587cf3
JB
2190#else
2191static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2192 struct i40e_ring *tx_ring,
2193 u32 *flags)
38e00438 2194#endif
fd0a05ce
JB
2195{
2196 __be16 protocol = skb->protocol;
2197 u32 tx_flags = 0;
2198
31eaaccf
GR
2199 if (protocol == htons(ETH_P_8021Q) &&
2200 !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
2201 /* When HW VLAN acceleration is turned off by the user the
2202 * stack sets the protocol to 8021q so that the driver
2203 * can take any steps required to support the SW only
2204 * VLAN handling. In our case the driver doesn't need
2205 * to take any further steps so just set the protocol
2206 * to the encapsulated ethertype.
2207 */
2208 skb->protocol = vlan_get_protocol(skb);
2209 goto out;
2210 }
2211
fd0a05ce 2212 /* if we have a HW VLAN tag being added, default to the HW one */
df8a39de
JP
2213 if (skb_vlan_tag_present(skb)) {
2214 tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
fd0a05ce
JB
2215 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2216 /* else if it is a SW VLAN, check the next protocol and store the tag */
0e2fe46c 2217 } else if (protocol == htons(ETH_P_8021Q)) {
fd0a05ce 2218 struct vlan_hdr *vhdr, _vhdr;
6995b36c 2219
fd0a05ce
JB
2220 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
2221 if (!vhdr)
2222 return -EINVAL;
2223
2224 protocol = vhdr->h_vlan_encapsulated_proto;
2225 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
2226 tx_flags |= I40E_TX_FLAGS_SW_VLAN;
2227 }
2228
d40d00b1
NP
2229 if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
2230 goto out;
2231
fd0a05ce 2232 /* Insert 802.1p priority into VLAN header */
38e00438
VD
2233 if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
2234 (skb->priority != TC_PRIO_CONTROL)) {
fd0a05ce
JB
2235 tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
2236 tx_flags |= (skb->priority & 0x7) <<
2237 I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
2238 if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
2239 struct vlan_ethhdr *vhdr;
dd225bc6
FR
2240 int rc;
2241
2242 rc = skb_cow_head(skb, 0);
2243 if (rc < 0)
2244 return rc;
fd0a05ce
JB
2245 vhdr = (struct vlan_ethhdr *)skb->data;
2246 vhdr->h_vlan_TCI = htons(tx_flags >>
2247 I40E_TX_FLAGS_VLAN_SHIFT);
2248 } else {
2249 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2250 }
2251 }
d40d00b1
NP
2252
2253out:
fd0a05ce
JB
2254 *flags = tx_flags;
2255 return 0;
2256}
2257
fd0a05ce
JB
2258/**
2259 * i40e_tso - set up the tso context descriptor
fd0a05ce 2260 * @skb: ptr to the skb we're sending
fd0a05ce 2261 * @hdr_len: ptr to the size of the packet header
9c883bd3 2262 * @cd_type_cmd_tso_mss: Quad Word 1
fd0a05ce
JB
2263 *
2264 * Returns 0 if no TSO can happen, 1 if tso is going, or error
2265 **/
84b07992 2266static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
fd0a05ce 2267{
03f9d6a5 2268 u64 cd_cmd, cd_tso_len, cd_mss;
c777019a
AD
2269 union {
2270 struct iphdr *v4;
2271 struct ipv6hdr *v6;
2272 unsigned char *hdr;
2273 } ip;
c49a7bc3
AD
2274 union {
2275 struct tcphdr *tcp;
5453205c 2276 struct udphdr *udp;
c49a7bc3
AD
2277 unsigned char *hdr;
2278 } l4;
2279 u32 paylen, l4_offset;
fd0a05ce 2280 int err;
fd0a05ce 2281
e9f6563d
SN
2282 if (skb->ip_summed != CHECKSUM_PARTIAL)
2283 return 0;
2284
fd0a05ce
JB
2285 if (!skb_is_gso(skb))
2286 return 0;
2287
dd225bc6
FR
2288 err = skb_cow_head(skb, 0);
2289 if (err < 0)
2290 return err;
fd0a05ce 2291
c777019a
AD
2292 ip.hdr = skb_network_header(skb);
2293 l4.hdr = skb_transport_header(skb);
df23075f 2294
c777019a
AD
2295 /* initialize outer IP header fields */
2296 if (ip.v4->version == 4) {
2297 ip.v4->tot_len = 0;
2298 ip.v4->check = 0;
c49a7bc3 2299 } else {
c777019a
AD
2300 ip.v6->payload_len = 0;
2301 }
2302
577389a5 2303 if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
1c7b4a23 2304 SKB_GSO_GRE_CSUM |
577389a5
AD
2305 SKB_GSO_IPIP |
2306 SKB_GSO_SIT |
2307 SKB_GSO_UDP_TUNNEL |
5453205c 2308 SKB_GSO_UDP_TUNNEL_CSUM)) {
1c7b4a23
AD
2309 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
2310 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) {
2311 l4.udp->len = 0;
2312
5453205c
AD
2313 /* determine offset of outer transport header */
2314 l4_offset = l4.hdr - skb->data;
2315
2316 /* remove payload length from outer checksum */
24d41e5e
AD
2317 paylen = skb->len - l4_offset;
2318 csum_replace_by_diff(&l4.udp->check, htonl(paylen));
5453205c
AD
2319 }
2320
c777019a
AD
2321 /* reset pointers to inner headers */
2322 ip.hdr = skb_inner_network_header(skb);
2323 l4.hdr = skb_inner_transport_header(skb);
2324
2325 /* initialize inner IP header fields */
2326 if (ip.v4->version == 4) {
2327 ip.v4->tot_len = 0;
2328 ip.v4->check = 0;
2329 } else {
2330 ip.v6->payload_len = 0;
2331 }
fd0a05ce
JB
2332 }
2333
c49a7bc3
AD
2334 /* determine offset of inner transport header */
2335 l4_offset = l4.hdr - skb->data;
2336
2337 /* remove payload length from inner checksum */
24d41e5e
AD
2338 paylen = skb->len - l4_offset;
2339 csum_replace_by_diff(&l4.tcp->check, htonl(paylen));
c49a7bc3
AD
2340
2341 /* compute length of segmentation header */
2342 *hdr_len = (l4.tcp->doff * 4) + l4_offset;
fd0a05ce
JB
2343
2344 /* find the field values */
2345 cd_cmd = I40E_TX_CTX_DESC_TSO;
2346 cd_tso_len = skb->len - *hdr_len;
2347 cd_mss = skb_shinfo(skb)->gso_size;
03f9d6a5
AD
2348 *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
2349 (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
2350 (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
fd0a05ce
JB
2351 return 1;
2352}
2353
beb0dff1
JK
2354/**
2355 * i40e_tsyn - set up the tsyn context descriptor
2356 * @tx_ring: ptr to the ring to send
2357 * @skb: ptr to the skb we're sending
2358 * @tx_flags: the collected send information
9c883bd3 2359 * @cd_type_cmd_tso_mss: Quad Word 1
beb0dff1
JK
2360 *
2361 * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2362 **/
2363static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
2364 u32 tx_flags, u64 *cd_type_cmd_tso_mss)
2365{
2366 struct i40e_pf *pf;
2367
2368 if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2369 return 0;
2370
2371 /* Tx timestamps cannot be sampled when doing TSO */
2372 if (tx_flags & I40E_TX_FLAGS_TSO)
2373 return 0;
2374
2375 /* only timestamp the outbound packet if the user has requested it and
2376 * we are not already transmitting a packet to be timestamped
2377 */
2378 pf = i40e_netdev_to_pf(tx_ring->netdev);
22b4777d
JK
2379 if (!(pf->flags & I40E_FLAG_PTP))
2380 return 0;
2381
9ce34f02
JK
2382 if (pf->ptp_tx &&
2383 !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
beb0dff1
JK
2384 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2385 pf->ptp_tx_skb = skb_get(skb);
2386 } else {
2387 return 0;
2388 }
2389
2390 *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
2391 I40E_TXD_CTX_QW1_CMD_SHIFT;
2392
beb0dff1
JK
2393 return 1;
2394}
2395
fd0a05ce
JB
2396/**
2397 * i40e_tx_enable_csum - Enable Tx checksum offloads
2398 * @skb: send buffer
89232c3b 2399 * @tx_flags: pointer to Tx flags currently set
fd0a05ce
JB
2400 * @td_cmd: Tx descriptor command bits to set
2401 * @td_offset: Tx descriptor header offsets to set
554f4544 2402 * @tx_ring: Tx descriptor ring
fd0a05ce
JB
2403 * @cd_tunneling: ptr to context desc bits
2404 **/
529f1f65
AD
2405static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
2406 u32 *td_cmd, u32 *td_offset,
2407 struct i40e_ring *tx_ring,
2408 u32 *cd_tunneling)
fd0a05ce 2409{
b96b78f2
AD
2410 union {
2411 struct iphdr *v4;
2412 struct ipv6hdr *v6;
2413 unsigned char *hdr;
2414 } ip;
2415 union {
2416 struct tcphdr *tcp;
2417 struct udphdr *udp;
2418 unsigned char *hdr;
2419 } l4;
a3fd9d88 2420 unsigned char *exthdr;
d1bd743b 2421 u32 offset, cmd = 0;
a3fd9d88 2422 __be16 frag_off;
b96b78f2
AD
2423 u8 l4_proto = 0;
2424
529f1f65
AD
2425 if (skb->ip_summed != CHECKSUM_PARTIAL)
2426 return 0;
2427
b96b78f2
AD
2428 ip.hdr = skb_network_header(skb);
2429 l4.hdr = skb_transport_header(skb);
fd0a05ce 2430
475b4205
AD
2431 /* compute outer L2 header size */
2432 offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
2433
fd0a05ce 2434 if (skb->encapsulation) {
d1bd743b 2435 u32 tunnel = 0;
a0064728
AD
2436 /* define outer network header type */
2437 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
475b4205
AD
2438 tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
2439 I40E_TX_CTX_EXT_IP_IPV4 :
2440 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
2441
a0064728
AD
2442 l4_proto = ip.v4->protocol;
2443 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
475b4205 2444 tunnel |= I40E_TX_CTX_EXT_IP_IPV6;
a3fd9d88
AD
2445
2446 exthdr = ip.hdr + sizeof(*ip.v6);
a0064728 2447 l4_proto = ip.v6->nexthdr;
a3fd9d88
AD
2448 if (l4.hdr != exthdr)
2449 ipv6_skip_exthdr(skb, exthdr - skb->data,
2450 &l4_proto, &frag_off);
a0064728
AD
2451 }
2452
2453 /* define outer transport */
2454 switch (l4_proto) {
45991204 2455 case IPPROTO_UDP:
475b4205 2456 tunnel |= I40E_TXD_CTX_UDP_TUNNELING;
6a899024 2457 *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
45991204 2458 break;
c1d1791d 2459 case IPPROTO_GRE:
475b4205 2460 tunnel |= I40E_TXD_CTX_GRE_TUNNELING;
a0064728 2461 *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
c1d1791d 2462 break;
577389a5
AD
2463 case IPPROTO_IPIP:
2464 case IPPROTO_IPV6:
2465 *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
2466 l4.hdr = skb_inner_network_header(skb);
2467 break;
45991204 2468 default:
529f1f65
AD
2469 if (*tx_flags & I40E_TX_FLAGS_TSO)
2470 return -1;
2471
2472 skb_checksum_help(skb);
2473 return 0;
45991204 2474 }
b96b78f2 2475
577389a5
AD
2476 /* compute outer L3 header size */
2477 tunnel |= ((l4.hdr - ip.hdr) / 4) <<
2478 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT;
2479
2480 /* switch IP header pointer from outer to inner header */
2481 ip.hdr = skb_inner_network_header(skb);
2482
475b4205
AD
2483 /* compute tunnel header size */
2484 tunnel |= ((ip.hdr - l4.hdr) / 2) <<
2485 I40E_TXD_CTX_QW0_NATLEN_SHIFT;
2486
5453205c
AD
2487 /* indicate if we need to offload outer UDP header */
2488 if ((*tx_flags & I40E_TX_FLAGS_TSO) &&
1c7b4a23 2489 !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
5453205c
AD
2490 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
2491 tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
2492
475b4205
AD
2493 /* record tunnel offload values */
2494 *cd_tunneling |= tunnel;
2495
b96b78f2 2496 /* switch L4 header pointer from outer to inner */
b96b78f2 2497 l4.hdr = skb_inner_transport_header(skb);
a0064728 2498 l4_proto = 0;
fd0a05ce 2499
a0064728
AD
2500 /* reset type as we transition from outer to inner headers */
2501 *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6);
2502 if (ip.v4->version == 4)
2503 *tx_flags |= I40E_TX_FLAGS_IPV4;
2504 if (ip.v6->version == 6)
89232c3b 2505 *tx_flags |= I40E_TX_FLAGS_IPV6;
fd0a05ce
JB
2506 }
2507
2508 /* Enable IP checksum offloads */
89232c3b 2509 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
b96b78f2 2510 l4_proto = ip.v4->protocol;
fd0a05ce
JB
2511 /* the stack computes the IP header already, the only time we
2512 * need the hardware to recompute it is in the case of TSO.
2513 */
475b4205
AD
2514 cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
2515 I40E_TX_DESC_CMD_IIPT_IPV4_CSUM :
2516 I40E_TX_DESC_CMD_IIPT_IPV4;
89232c3b 2517 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
475b4205 2518 cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
a3fd9d88
AD
2519
2520 exthdr = ip.hdr + sizeof(*ip.v6);
2521 l4_proto = ip.v6->nexthdr;
2522 if (l4.hdr != exthdr)
2523 ipv6_skip_exthdr(skb, exthdr - skb->data,
2524 &l4_proto, &frag_off);
fd0a05ce 2525 }
b96b78f2 2526
475b4205
AD
2527 /* compute inner L3 header size */
2528 offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
fd0a05ce
JB
2529
2530 /* Enable L4 checksum offloads */
b96b78f2 2531 switch (l4_proto) {
fd0a05ce
JB
2532 case IPPROTO_TCP:
2533 /* enable checksum offloads */
475b4205
AD
2534 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2535 offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
fd0a05ce
JB
2536 break;
2537 case IPPROTO_SCTP:
2538 /* enable SCTP checksum offload */
475b4205
AD
2539 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2540 offset |= (sizeof(struct sctphdr) >> 2) <<
2541 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
fd0a05ce
JB
2542 break;
2543 case IPPROTO_UDP:
2544 /* enable UDP checksum offload */
475b4205
AD
2545 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2546 offset |= (sizeof(struct udphdr) >> 2) <<
2547 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
fd0a05ce
JB
2548 break;
2549 default:
529f1f65
AD
2550 if (*tx_flags & I40E_TX_FLAGS_TSO)
2551 return -1;
2552 skb_checksum_help(skb);
2553 return 0;
fd0a05ce 2554 }
475b4205
AD
2555
2556 *td_cmd |= cmd;
2557 *td_offset |= offset;
529f1f65
AD
2558
2559 return 1;
fd0a05ce
JB
2560}
2561
2562/**
2563 * i40e_create_tx_ctx Build the Tx context descriptor
2564 * @tx_ring: ring to create the descriptor on
2565 * @cd_type_cmd_tso_mss: Quad Word 1
2566 * @cd_tunneling: Quad Word 0 - bits 0-31
2567 * @cd_l2tag2: Quad Word 0 - bits 32-63
2568 **/
2569static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2570 const u64 cd_type_cmd_tso_mss,
2571 const u32 cd_tunneling, const u32 cd_l2tag2)
2572{
2573 struct i40e_tx_context_desc *context_desc;
fc4ac67b 2574 int i = tx_ring->next_to_use;
fd0a05ce 2575
ff40dd5d
JB
2576 if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2577 !cd_tunneling && !cd_l2tag2)
fd0a05ce
JB
2578 return;
2579
2580 /* grab the next descriptor */
fc4ac67b
AD
2581 context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2582
2583 i++;
2584 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
fd0a05ce
JB
2585
2586 /* cpu_to_le32 and assign to struct fields */
2587 context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2588 context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
3efbbb20 2589 context_desc->rsvd = cpu_to_le16(0);
fd0a05ce
JB
2590 context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2591}
2592
4567dc10
ED
2593/**
2594 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2595 * @tx_ring: the ring to be checked
2596 * @size: the size buffer we want to assure is available
2597 *
2598 * Returns -EBUSY if a stop is needed, else 0
2599 **/
4ec441df 2600int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
4567dc10
ED
2601{
2602 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2603 /* Memory barrier before checking head and tail */
2604 smp_mb();
2605
2606 /* Check again in a case another CPU has just made room available. */
2607 if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2608 return -EBUSY;
2609
2610 /* A reprieve! - use start_queue because it doesn't call schedule */
2611 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2612 ++tx_ring->tx_stats.restart_queue;
2613 return 0;
2614}
2615
71da6197 2616/**
3f3f7cb8 2617 * __i40e_chk_linearize - Check if there are more than 8 buffers per packet
71da6197 2618 * @skb: send buffer
71da6197 2619 *
3f3f7cb8
AD
2620 * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire
2621 * and so we need to figure out the cases where we need to linearize the skb.
2622 *
2623 * For TSO we need to count the TSO header and segment payload separately.
2624 * As such we need to check cases where we have 7 fragments or more as we
2625 * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
2626 * the segment payload in the first descriptor, and another 7 for the
2627 * fragments.
71da6197 2628 **/
2d37490b 2629bool __i40e_chk_linearize(struct sk_buff *skb)
71da6197 2630{
2d37490b 2631 const struct skb_frag_struct *frag, *stale;
3f3f7cb8 2632 int nr_frags, sum;
71da6197 2633
3f3f7cb8 2634 /* no need to check if number of frags is less than 7 */
2d37490b 2635 nr_frags = skb_shinfo(skb)->nr_frags;
3f3f7cb8 2636 if (nr_frags < (I40E_MAX_BUFFER_TXD - 1))
2d37490b 2637 return false;
71da6197 2638
2d37490b
AD
2639 /* We need to walk through the list and validate that each group
2640 * of 6 fragments totals at least gso_size. However we don't need
3f3f7cb8
AD
2641 * to perform such validation on the last 6 since the last 6 cannot
2642 * inherit any data from a descriptor after them.
2d37490b 2643 */
3f3f7cb8 2644 nr_frags -= I40E_MAX_BUFFER_TXD - 2;
2d37490b
AD
2645 frag = &skb_shinfo(skb)->frags[0];
2646
2647 /* Initialize size to the negative value of gso_size minus 1. We
2648 * use this as the worst case scenerio in which the frag ahead
2649 * of us only provides one byte which is why we are limited to 6
2650 * descriptors for a single transmit as the header and previous
2651 * fragment are already consuming 2 descriptors.
2652 */
3f3f7cb8 2653 sum = 1 - skb_shinfo(skb)->gso_size;
2d37490b 2654
3f3f7cb8
AD
2655 /* Add size of frags 0 through 4 to create our initial sum */
2656 sum += skb_frag_size(frag++);
2657 sum += skb_frag_size(frag++);
2658 sum += skb_frag_size(frag++);
2659 sum += skb_frag_size(frag++);
2660 sum += skb_frag_size(frag++);
2d37490b
AD
2661
2662 /* Walk through fragments adding latest fragment, testing it, and
2663 * then removing stale fragments from the sum.
2664 */
2665 stale = &skb_shinfo(skb)->frags[0];
2666 for (;;) {
3f3f7cb8 2667 sum += skb_frag_size(frag++);
2d37490b
AD
2668
2669 /* if sum is negative we failed to make sufficient progress */
2670 if (sum < 0)
2671 return true;
2672
2673 /* use pre-decrement to avoid processing last fragment */
2674 if (!--nr_frags)
2675 break;
2676
3f3f7cb8 2677 sum -= skb_frag_size(stale++);
71da6197
AS
2678 }
2679
2d37490b 2680 return false;
71da6197
AS
2681}
2682
fd0a05ce
JB
2683/**
2684 * i40e_tx_map - Build the Tx descriptor
2685 * @tx_ring: ring to send buffer on
2686 * @skb: send buffer
2687 * @first: first buffer info buffer to use
2688 * @tx_flags: collected send information
2689 * @hdr_len: size of the packet header
2690 * @td_cmd: the command field in the descriptor
2691 * @td_offset: offset for checksum or crc
2692 **/
38e00438 2693#ifdef I40E_FCOE
3e587cf3 2694inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
fd0a05ce
JB
2695 struct i40e_tx_buffer *first, u32 tx_flags,
2696 const u8 hdr_len, u32 td_cmd, u32 td_offset)
3e587cf3
JB
2697#else
2698static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2699 struct i40e_tx_buffer *first, u32 tx_flags,
2700 const u8 hdr_len, u32 td_cmd, u32 td_offset)
38e00438 2701#endif
fd0a05ce 2702{
fd0a05ce
JB
2703 unsigned int data_len = skb->data_len;
2704 unsigned int size = skb_headlen(skb);
a5e9c572 2705 struct skb_frag_struct *frag;
fd0a05ce
JB
2706 struct i40e_tx_buffer *tx_bi;
2707 struct i40e_tx_desc *tx_desc;
a5e9c572 2708 u16 i = tx_ring->next_to_use;
fd0a05ce
JB
2709 u32 td_tag = 0;
2710 dma_addr_t dma;
2711 u16 gso_segs;
58044743
AS
2712 u16 desc_count = 0;
2713 bool tail_bump = true;
2714 bool do_rs = false;
fd0a05ce 2715
fd0a05ce
JB
2716 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2717 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2718 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2719 I40E_TX_FLAGS_VLAN_SHIFT;
2720 }
2721
a5e9c572
AD
2722 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2723 gso_segs = skb_shinfo(skb)->gso_segs;
2724 else
2725 gso_segs = 1;
2726
2727 /* multiply data chunks by size of headers */
2728 first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2729 first->gso_segs = gso_segs;
2730 first->skb = skb;
2731 first->tx_flags = tx_flags;
2732
2733 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2734
fd0a05ce 2735 tx_desc = I40E_TX_DESC(tx_ring, i);
a5e9c572
AD
2736 tx_bi = first;
2737
2738 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
5c4654da
AD
2739 unsigned int max_data = I40E_MAX_DATA_PER_TXD_ALIGNED;
2740
a5e9c572
AD
2741 if (dma_mapping_error(tx_ring->dev, dma))
2742 goto dma_error;
2743
2744 /* record length, and DMA address */
2745 dma_unmap_len_set(tx_bi, len, size);
2746 dma_unmap_addr_set(tx_bi, dma, dma);
2747
5c4654da
AD
2748 /* align size to end of page */
2749 max_data += -dma & (I40E_MAX_READ_REQ_SIZE - 1);
a5e9c572
AD
2750 tx_desc->buffer_addr = cpu_to_le64(dma);
2751
2752 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
fd0a05ce
JB
2753 tx_desc->cmd_type_offset_bsz =
2754 build_ctob(td_cmd, td_offset,
5c4654da 2755 max_data, td_tag);
fd0a05ce 2756
fd0a05ce
JB
2757 tx_desc++;
2758 i++;
58044743
AS
2759 desc_count++;
2760
fd0a05ce
JB
2761 if (i == tx_ring->count) {
2762 tx_desc = I40E_TX_DESC(tx_ring, 0);
2763 i = 0;
2764 }
fd0a05ce 2765
5c4654da
AD
2766 dma += max_data;
2767 size -= max_data;
fd0a05ce 2768
5c4654da 2769 max_data = I40E_MAX_DATA_PER_TXD_ALIGNED;
a5e9c572
AD
2770 tx_desc->buffer_addr = cpu_to_le64(dma);
2771 }
fd0a05ce
JB
2772
2773 if (likely(!data_len))
2774 break;
2775
a5e9c572
AD
2776 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2777 size, td_tag);
fd0a05ce
JB
2778
2779 tx_desc++;
2780 i++;
58044743
AS
2781 desc_count++;
2782
fd0a05ce
JB
2783 if (i == tx_ring->count) {
2784 tx_desc = I40E_TX_DESC(tx_ring, 0);
2785 i = 0;
2786 }
2787
a5e9c572
AD
2788 size = skb_frag_size(frag);
2789 data_len -= size;
fd0a05ce 2790
a5e9c572
AD
2791 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2792 DMA_TO_DEVICE);
fd0a05ce 2793
a5e9c572
AD
2794 tx_bi = &tx_ring->tx_bi[i];
2795 }
fd0a05ce 2796
a5e9c572
AD
2797 /* set next_to_watch value indicating a packet is present */
2798 first->next_to_watch = tx_desc;
2799
2800 i++;
2801 if (i == tx_ring->count)
2802 i = 0;
2803
2804 tx_ring->next_to_use = i;
2805
58044743
AS
2806 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2807 tx_ring->queue_index),
2808 first->bytecount);
4567dc10 2809 i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
58044743
AS
2810
2811 /* Algorithm to optimize tail and RS bit setting:
2812 * if xmit_more is supported
2813 * if xmit_more is true
2814 * do not update tail and do not mark RS bit.
2815 * if xmit_more is false and last xmit_more was false
2816 * if every packet spanned less than 4 desc
2817 * then set RS bit on 4th packet and update tail
2818 * on every packet
2819 * else
2820 * update tail and set RS bit on every packet.
2821 * if xmit_more is false and last_xmit_more was true
2822 * update tail and set RS bit.
2823 *
2824 * Optimization: wmb to be issued only in case of tail update.
2825 * Also optimize the Descriptor WB path for RS bit with the same
2826 * algorithm.
2827 *
2828 * Note: If there are less than 4 packets
2829 * pending and interrupts were disabled the service task will
2830 * trigger a force WB.
2831 */
2832 if (skb->xmit_more &&
2833 !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2834 tx_ring->queue_index))) {
2835 tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2836 tail_bump = false;
2837 } else if (!skb->xmit_more &&
2838 !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2839 tx_ring->queue_index)) &&
2840 (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
2841 (tx_ring->packet_stride < WB_STRIDE) &&
2842 (desc_count < WB_STRIDE)) {
2843 tx_ring->packet_stride++;
2844 } else {
2845 tx_ring->packet_stride = 0;
2846 tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2847 do_rs = true;
2848 }
2849 if (do_rs)
2850 tx_ring->packet_stride = 0;
2851
2852 tx_desc->cmd_type_offset_bsz =
2853 build_ctob(td_cmd, td_offset, size, td_tag) |
2854 cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
2855 I40E_TX_DESC_CMD_EOP) <<
2856 I40E_TXD_QW1_CMD_SHIFT);
2857
a5e9c572 2858 /* notify HW of packet */
58044743 2859 if (!tail_bump)
489ce7a4 2860 prefetchw(tx_desc + 1);
a5e9c572 2861
58044743
AS
2862 if (tail_bump) {
2863 /* Force memory writes to complete before letting h/w
2864 * know there are new descriptors to fetch. (Only
2865 * applicable for weak-ordered memory model archs,
2866 * such as IA-64).
2867 */
2868 wmb();
2869 writel(i, tx_ring->tail);
2870 }
2871
fd0a05ce
JB
2872 return;
2873
2874dma_error:
a5e9c572 2875 dev_info(tx_ring->dev, "TX DMA map failed\n");
fd0a05ce
JB
2876
2877 /* clear dma mappings for failed tx_bi map */
2878 for (;;) {
2879 tx_bi = &tx_ring->tx_bi[i];
a5e9c572 2880 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
fd0a05ce
JB
2881 if (tx_bi == first)
2882 break;
2883 if (i == 0)
2884 i = tx_ring->count;
2885 i--;
2886 }
2887
fd0a05ce
JB
2888 tx_ring->next_to_use = i;
2889}
2890
fd0a05ce
JB
2891/**
2892 * i40e_xmit_frame_ring - Sends buffer on Tx ring
2893 * @skb: send buffer
2894 * @tx_ring: ring to send buffer on
2895 *
2896 * Returns NETDEV_TX_OK if sent, else an error code
2897 **/
2898static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2899 struct i40e_ring *tx_ring)
2900{
2901 u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2902 u32 cd_tunneling = 0, cd_l2tag2 = 0;
2903 struct i40e_tx_buffer *first;
2904 u32 td_offset = 0;
2905 u32 tx_flags = 0;
2906 __be16 protocol;
2907 u32 td_cmd = 0;
2908 u8 hdr_len = 0;
4ec441df 2909 int tso, count;
beb0dff1 2910 int tsyn;
6995b36c 2911
b74118f0
JB
2912 /* prefetch the data, we'll need it later */
2913 prefetch(skb->data);
2914
4ec441df 2915 count = i40e_xmit_descriptor_count(skb);
2d37490b
AD
2916 if (i40e_chk_linearize(skb, count)) {
2917 if (__skb_linearize(skb))
2918 goto out_drop;
5c4654da 2919 count = i40e_txd_use_count(skb->len);
2d37490b
AD
2920 tx_ring->tx_stats.tx_linearize++;
2921 }
4ec441df
AD
2922
2923 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2924 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2925 * + 4 desc gap to avoid the cache line where head is,
2926 * + 1 desc for context descriptor,
2927 * otherwise try next time
2928 */
2929 if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2930 tx_ring->tx_stats.tx_busy++;
fd0a05ce 2931 return NETDEV_TX_BUSY;
4ec441df 2932 }
fd0a05ce
JB
2933
2934 /* prepare the xmit flags */
2935 if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2936 goto out_drop;
2937
2938 /* obtain protocol of skb */
3d34dd03 2939 protocol = vlan_get_protocol(skb);
fd0a05ce
JB
2940
2941 /* record the location of the first descriptor for this packet */
2942 first = &tx_ring->tx_bi[tx_ring->next_to_use];
2943
2944 /* setup IPv4/IPv6 offloads */
0e2fe46c 2945 if (protocol == htons(ETH_P_IP))
fd0a05ce 2946 tx_flags |= I40E_TX_FLAGS_IPV4;
0e2fe46c 2947 else if (protocol == htons(ETH_P_IPV6))
fd0a05ce
JB
2948 tx_flags |= I40E_TX_FLAGS_IPV6;
2949
84b07992 2950 tso = i40e_tso(skb, &hdr_len, &cd_type_cmd_tso_mss);
fd0a05ce
JB
2951
2952 if (tso < 0)
2953 goto out_drop;
2954 else if (tso)
2955 tx_flags |= I40E_TX_FLAGS_TSO;
2956
3bc67973
AD
2957 /* Always offload the checksum, since it's in the data descriptor */
2958 tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
2959 tx_ring, &cd_tunneling);
2960 if (tso < 0)
2961 goto out_drop;
2962
beb0dff1
JK
2963 tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2964
2965 if (tsyn)
2966 tx_flags |= I40E_TX_FLAGS_TSYN;
2967
259afec7
JK
2968 skb_tx_timestamp(skb);
2969
b1941306
AD
2970 /* always enable CRC insertion offload */
2971 td_cmd |= I40E_TX_DESC_CMD_ICRC;
2972
fd0a05ce
JB
2973 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2974 cd_tunneling, cd_l2tag2);
2975
2976 /* Add Flow Director ATR if it's enabled.
2977 *
2978 * NOTE: this must always be directly before the data descriptor.
2979 */
6b037cd4 2980 i40e_atr(tx_ring, skb, tx_flags);
fd0a05ce
JB
2981
2982 i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2983 td_cmd, td_offset);
2984
fd0a05ce
JB
2985 return NETDEV_TX_OK;
2986
2987out_drop:
2988 dev_kfree_skb_any(skb);
2989 return NETDEV_TX_OK;
2990}
2991
2992/**
2993 * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2994 * @skb: send buffer
2995 * @netdev: network interface device structure
2996 *
2997 * Returns NETDEV_TX_OK if sent, else an error code
2998 **/
2999netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
3000{
3001 struct i40e_netdev_priv *np = netdev_priv(netdev);
3002 struct i40e_vsi *vsi = np->vsi;
9f65e15b 3003 struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
fd0a05ce
JB
3004
3005 /* hardware can't handle really short frames, hardware padding works
3006 * beyond this point
3007 */
a94d9e22
AD
3008 if (skb_put_padto(skb, I40E_MIN_TX_LEN))
3009 return NETDEV_TX_OK;
fd0a05ce
JB
3010
3011 return i40e_xmit_frame_ring(skb, tx_ring);
3012}
This page took 0.417859 seconds and 5 git commands to generate.