i40e/i40evf: Add support for GSO partial with UDP_TUNNEL_CSUM and GRE_CSUM
[deliverable/linux.git] / drivers / net / ethernet / intel / i40e / i40e_txrx.c
1 /*******************************************************************************
2 *
3 * Intel Ethernet Controller XL710 Family Linux Driver
4 * Copyright(c) 2013 - 2016 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 * Contact Information:
22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
24 *
25 ******************************************************************************/
26
27 #include <linux/prefetch.h>
28 #include <net/busy_poll.h>
29 #include "i40e.h"
30 #include "i40e_prototype.h"
31
32 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
33 u32 td_tag)
34 {
35 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
36 ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) |
37 ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
38 ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
39 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
40 }
41
42 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
43 #define I40E_FD_CLEAN_DELAY 10
44 /**
45 * i40e_program_fdir_filter - Program a Flow Director filter
46 * @fdir_data: Packet data that will be filter parameters
47 * @raw_packet: the pre-allocated packet buffer for FDir
48 * @pf: The PF pointer
49 * @add: True for add/update, False for remove
50 **/
51 int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
52 struct i40e_pf *pf, bool add)
53 {
54 struct i40e_filter_program_desc *fdir_desc;
55 struct i40e_tx_buffer *tx_buf, *first;
56 struct i40e_tx_desc *tx_desc;
57 struct i40e_ring *tx_ring;
58 unsigned int fpt, dcc;
59 struct i40e_vsi *vsi;
60 struct device *dev;
61 dma_addr_t dma;
62 u32 td_cmd = 0;
63 u16 delay = 0;
64 u16 i;
65
66 /* find existing FDIR VSI */
67 vsi = NULL;
68 for (i = 0; i < pf->num_alloc_vsi; i++)
69 if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
70 vsi = pf->vsi[i];
71 if (!vsi)
72 return -ENOENT;
73
74 tx_ring = vsi->tx_rings[0];
75 dev = tx_ring->dev;
76
77 /* we need two descriptors to add/del a filter and we can wait */
78 do {
79 if (I40E_DESC_UNUSED(tx_ring) > 1)
80 break;
81 msleep_interruptible(1);
82 delay++;
83 } while (delay < I40E_FD_CLEAN_DELAY);
84
85 if (!(I40E_DESC_UNUSED(tx_ring) > 1))
86 return -EAGAIN;
87
88 dma = dma_map_single(dev, raw_packet,
89 I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
90 if (dma_mapping_error(dev, dma))
91 goto dma_fail;
92
93 /* grab the next descriptor */
94 i = tx_ring->next_to_use;
95 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
96 first = &tx_ring->tx_bi[i];
97 memset(first, 0, sizeof(struct i40e_tx_buffer));
98
99 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
100
101 fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
102 I40E_TXD_FLTR_QW0_QINDEX_MASK;
103
104 fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
105 I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
106
107 fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
108 I40E_TXD_FLTR_QW0_PCTYPE_MASK;
109
110 /* Use LAN VSI Id if not programmed by user */
111 if (fdir_data->dest_vsi == 0)
112 fpt |= (pf->vsi[pf->lan_vsi]->id) <<
113 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
114 else
115 fpt |= ((u32)fdir_data->dest_vsi <<
116 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
117 I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
118
119 dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
120
121 if (add)
122 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
123 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
124 else
125 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
126 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
127
128 dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
129 I40E_TXD_FLTR_QW1_DEST_MASK;
130
131 dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
132 I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
133
134 if (fdir_data->cnt_index != 0) {
135 dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
136 dcc |= ((u32)fdir_data->cnt_index <<
137 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
138 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
139 }
140
141 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
142 fdir_desc->rsvd = cpu_to_le32(0);
143 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
144 fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
145
146 /* Now program a dummy descriptor */
147 i = tx_ring->next_to_use;
148 tx_desc = I40E_TX_DESC(tx_ring, i);
149 tx_buf = &tx_ring->tx_bi[i];
150
151 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
152
153 memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
154
155 /* record length, and DMA address */
156 dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
157 dma_unmap_addr_set(tx_buf, dma, dma);
158
159 tx_desc->buffer_addr = cpu_to_le64(dma);
160 td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
161
162 tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
163 tx_buf->raw_buf = (void *)raw_packet;
164
165 tx_desc->cmd_type_offset_bsz =
166 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
167
168 /* Force memory writes to complete before letting h/w
169 * know there are new descriptors to fetch.
170 */
171 wmb();
172
173 /* Mark the data descriptor to be watched */
174 first->next_to_watch = tx_desc;
175
176 writel(tx_ring->next_to_use, tx_ring->tail);
177 return 0;
178
179 dma_fail:
180 return -1;
181 }
182
183 #define IP_HEADER_OFFSET 14
184 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
185 /**
186 * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
187 * @vsi: pointer to the targeted VSI
188 * @fd_data: the flow director data required for the FDir descriptor
189 * @add: true adds a filter, false removes it
190 *
191 * Returns 0 if the filters were successfully added or removed
192 **/
193 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
194 struct i40e_fdir_filter *fd_data,
195 bool add)
196 {
197 struct i40e_pf *pf = vsi->back;
198 struct udphdr *udp;
199 struct iphdr *ip;
200 bool err = false;
201 u8 *raw_packet;
202 int ret;
203 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
204 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
205 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
206
207 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
208 if (!raw_packet)
209 return -ENOMEM;
210 memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
211
212 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
213 udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
214 + sizeof(struct iphdr));
215
216 ip->daddr = fd_data->dst_ip[0];
217 udp->dest = fd_data->dst_port;
218 ip->saddr = fd_data->src_ip[0];
219 udp->source = fd_data->src_port;
220
221 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
222 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
223 if (ret) {
224 dev_info(&pf->pdev->dev,
225 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
226 fd_data->pctype, fd_data->fd_id, ret);
227 err = true;
228 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
229 if (add)
230 dev_info(&pf->pdev->dev,
231 "Filter OK for PCTYPE %d loc = %d\n",
232 fd_data->pctype, fd_data->fd_id);
233 else
234 dev_info(&pf->pdev->dev,
235 "Filter deleted for PCTYPE %d loc = %d\n",
236 fd_data->pctype, fd_data->fd_id);
237 }
238 if (err)
239 kfree(raw_packet);
240
241 return err ? -EOPNOTSUPP : 0;
242 }
243
244 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
245 /**
246 * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
247 * @vsi: pointer to the targeted VSI
248 * @fd_data: the flow director data required for the FDir descriptor
249 * @add: true adds a filter, false removes it
250 *
251 * Returns 0 if the filters were successfully added or removed
252 **/
253 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
254 struct i40e_fdir_filter *fd_data,
255 bool add)
256 {
257 struct i40e_pf *pf = vsi->back;
258 struct tcphdr *tcp;
259 struct iphdr *ip;
260 bool err = false;
261 u8 *raw_packet;
262 int ret;
263 /* Dummy packet */
264 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
265 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
267 0x0, 0x72, 0, 0, 0, 0};
268
269 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
270 if (!raw_packet)
271 return -ENOMEM;
272 memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
273
274 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
275 tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
276 + sizeof(struct iphdr));
277
278 ip->daddr = fd_data->dst_ip[0];
279 tcp->dest = fd_data->dst_port;
280 ip->saddr = fd_data->src_ip[0];
281 tcp->source = fd_data->src_port;
282
283 if (add) {
284 pf->fd_tcp_rule++;
285 if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
286 if (I40E_DEBUG_FD & pf->hw.debug_mask)
287 dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
288 pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
289 }
290 } else {
291 pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
292 (pf->fd_tcp_rule - 1) : 0;
293 if (pf->fd_tcp_rule == 0) {
294 pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
295 if (I40E_DEBUG_FD & pf->hw.debug_mask)
296 dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
297 }
298 }
299
300 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
301 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
302
303 if (ret) {
304 dev_info(&pf->pdev->dev,
305 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
306 fd_data->pctype, fd_data->fd_id, ret);
307 err = true;
308 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
309 if (add)
310 dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
311 fd_data->pctype, fd_data->fd_id);
312 else
313 dev_info(&pf->pdev->dev,
314 "Filter deleted for PCTYPE %d loc = %d\n",
315 fd_data->pctype, fd_data->fd_id);
316 }
317
318 if (err)
319 kfree(raw_packet);
320
321 return err ? -EOPNOTSUPP : 0;
322 }
323
324 /**
325 * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
326 * a specific flow spec
327 * @vsi: pointer to the targeted VSI
328 * @fd_data: the flow director data required for the FDir descriptor
329 * @add: true adds a filter, false removes it
330 *
331 * Returns 0 if the filters were successfully added or removed
332 **/
333 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
334 struct i40e_fdir_filter *fd_data,
335 bool add)
336 {
337 return -EOPNOTSUPP;
338 }
339
340 #define I40E_IP_DUMMY_PACKET_LEN 34
341 /**
342 * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
343 * a specific flow spec
344 * @vsi: pointer to the targeted VSI
345 * @fd_data: the flow director data required for the FDir descriptor
346 * @add: true adds a filter, false removes it
347 *
348 * Returns 0 if the filters were successfully added or removed
349 **/
350 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
351 struct i40e_fdir_filter *fd_data,
352 bool add)
353 {
354 struct i40e_pf *pf = vsi->back;
355 struct iphdr *ip;
356 bool err = false;
357 u8 *raw_packet;
358 int ret;
359 int i;
360 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
361 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
362 0, 0, 0, 0};
363
364 for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
365 i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) {
366 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
367 if (!raw_packet)
368 return -ENOMEM;
369 memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
370 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
371
372 ip->saddr = fd_data->src_ip[0];
373 ip->daddr = fd_data->dst_ip[0];
374 ip->protocol = 0;
375
376 fd_data->pctype = i;
377 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
378
379 if (ret) {
380 dev_info(&pf->pdev->dev,
381 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
382 fd_data->pctype, fd_data->fd_id, ret);
383 err = true;
384 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
385 if (add)
386 dev_info(&pf->pdev->dev,
387 "Filter OK for PCTYPE %d loc = %d\n",
388 fd_data->pctype, fd_data->fd_id);
389 else
390 dev_info(&pf->pdev->dev,
391 "Filter deleted for PCTYPE %d loc = %d\n",
392 fd_data->pctype, fd_data->fd_id);
393 }
394 }
395
396 if (err)
397 kfree(raw_packet);
398
399 return err ? -EOPNOTSUPP : 0;
400 }
401
402 /**
403 * i40e_add_del_fdir - Build raw packets to add/del fdir filter
404 * @vsi: pointer to the targeted VSI
405 * @cmd: command to get or set RX flow classification rules
406 * @add: true adds a filter, false removes it
407 *
408 **/
409 int i40e_add_del_fdir(struct i40e_vsi *vsi,
410 struct i40e_fdir_filter *input, bool add)
411 {
412 struct i40e_pf *pf = vsi->back;
413 int ret;
414
415 switch (input->flow_type & ~FLOW_EXT) {
416 case TCP_V4_FLOW:
417 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
418 break;
419 case UDP_V4_FLOW:
420 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
421 break;
422 case SCTP_V4_FLOW:
423 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
424 break;
425 case IPV4_FLOW:
426 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
427 break;
428 case IP_USER_FLOW:
429 switch (input->ip4_proto) {
430 case IPPROTO_TCP:
431 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
432 break;
433 case IPPROTO_UDP:
434 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
435 break;
436 case IPPROTO_SCTP:
437 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
438 break;
439 default:
440 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
441 break;
442 }
443 break;
444 default:
445 dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
446 input->flow_type);
447 ret = -EINVAL;
448 }
449
450 /* The buffer allocated here is freed by the i40e_clean_tx_ring() */
451 return ret;
452 }
453
454 /**
455 * i40e_fd_handle_status - check the Programming Status for FD
456 * @rx_ring: the Rx ring for this descriptor
457 * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
458 * @prog_id: the id originally used for programming
459 *
460 * This is used to verify if the FD programming or invalidation
461 * requested by SW to the HW is successful or not and take actions accordingly.
462 **/
463 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
464 union i40e_rx_desc *rx_desc, u8 prog_id)
465 {
466 struct i40e_pf *pf = rx_ring->vsi->back;
467 struct pci_dev *pdev = pf->pdev;
468 u32 fcnt_prog, fcnt_avail;
469 u32 error;
470 u64 qw;
471
472 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
473 error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
474 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
475
476 if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
477 pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
478 if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
479 (I40E_DEBUG_FD & pf->hw.debug_mask))
480 dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
481 pf->fd_inv);
482
483 /* Check if the programming error is for ATR.
484 * If so, auto disable ATR and set a state for
485 * flush in progress. Next time we come here if flush is in
486 * progress do nothing, once flush is complete the state will
487 * be cleared.
488 */
489 if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
490 return;
491
492 pf->fd_add_err++;
493 /* store the current atr filter count */
494 pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
495
496 if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
497 (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
498 pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
499 set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
500 }
501
502 /* filter programming failed most likely due to table full */
503 fcnt_prog = i40e_get_global_fd_count(pf);
504 fcnt_avail = pf->fdir_pf_filter_count;
505 /* If ATR is running fcnt_prog can quickly change,
506 * if we are very close to full, it makes sense to disable
507 * FD ATR/SB and then re-enable it when there is room.
508 */
509 if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
510 if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
511 !(pf->auto_disable_flags &
512 I40E_FLAG_FD_SB_ENABLED)) {
513 if (I40E_DEBUG_FD & pf->hw.debug_mask)
514 dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
515 pf->auto_disable_flags |=
516 I40E_FLAG_FD_SB_ENABLED;
517 }
518 }
519 } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
520 if (I40E_DEBUG_FD & pf->hw.debug_mask)
521 dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
522 rx_desc->wb.qword0.hi_dword.fd_id);
523 }
524 }
525
526 /**
527 * i40e_unmap_and_free_tx_resource - Release a Tx buffer
528 * @ring: the ring that owns the buffer
529 * @tx_buffer: the buffer to free
530 **/
531 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
532 struct i40e_tx_buffer *tx_buffer)
533 {
534 if (tx_buffer->skb) {
535 dev_kfree_skb_any(tx_buffer->skb);
536 if (dma_unmap_len(tx_buffer, len))
537 dma_unmap_single(ring->dev,
538 dma_unmap_addr(tx_buffer, dma),
539 dma_unmap_len(tx_buffer, len),
540 DMA_TO_DEVICE);
541 } else if (dma_unmap_len(tx_buffer, len)) {
542 dma_unmap_page(ring->dev,
543 dma_unmap_addr(tx_buffer, dma),
544 dma_unmap_len(tx_buffer, len),
545 DMA_TO_DEVICE);
546 }
547
548 if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
549 kfree(tx_buffer->raw_buf);
550
551 tx_buffer->next_to_watch = NULL;
552 tx_buffer->skb = NULL;
553 dma_unmap_len_set(tx_buffer, len, 0);
554 /* tx_buffer must be completely set up in the transmit path */
555 }
556
557 /**
558 * i40e_clean_tx_ring - Free any empty Tx buffers
559 * @tx_ring: ring to be cleaned
560 **/
561 void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
562 {
563 unsigned long bi_size;
564 u16 i;
565
566 /* ring already cleared, nothing to do */
567 if (!tx_ring->tx_bi)
568 return;
569
570 /* Free all the Tx ring sk_buffs */
571 for (i = 0; i < tx_ring->count; i++)
572 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
573
574 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
575 memset(tx_ring->tx_bi, 0, bi_size);
576
577 /* Zero out the descriptor ring */
578 memset(tx_ring->desc, 0, tx_ring->size);
579
580 tx_ring->next_to_use = 0;
581 tx_ring->next_to_clean = 0;
582
583 if (!tx_ring->netdev)
584 return;
585
586 /* cleanup Tx queue statistics */
587 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
588 tx_ring->queue_index));
589 }
590
591 /**
592 * i40e_free_tx_resources - Free Tx resources per queue
593 * @tx_ring: Tx descriptor ring for a specific queue
594 *
595 * Free all transmit software resources
596 **/
597 void i40e_free_tx_resources(struct i40e_ring *tx_ring)
598 {
599 i40e_clean_tx_ring(tx_ring);
600 kfree(tx_ring->tx_bi);
601 tx_ring->tx_bi = NULL;
602
603 if (tx_ring->desc) {
604 dma_free_coherent(tx_ring->dev, tx_ring->size,
605 tx_ring->desc, tx_ring->dma);
606 tx_ring->desc = NULL;
607 }
608 }
609
610 /**
611 * i40e_get_tx_pending - how many tx descriptors not processed
612 * @tx_ring: the ring of descriptors
613 * @in_sw: is tx_pending being checked in SW or HW
614 *
615 * Since there is no access to the ring head register
616 * in XL710, we need to use our local copies
617 **/
618 u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
619 {
620 u32 head, tail;
621
622 if (!in_sw)
623 head = i40e_get_head(ring);
624 else
625 head = ring->next_to_clean;
626 tail = readl(ring->tail);
627
628 if (head != tail)
629 return (head < tail) ?
630 tail - head : (tail + ring->count - head);
631
632 return 0;
633 }
634
635 #define WB_STRIDE 0x3
636
637 /**
638 * i40e_clean_tx_irq - Reclaim resources after transmit completes
639 * @vsi: the VSI we care about
640 * @tx_ring: Tx ring to clean
641 * @napi_budget: Used to determine if we are in netpoll
642 *
643 * Returns true if there's any budget left (e.g. the clean is finished)
644 **/
645 static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
646 struct i40e_ring *tx_ring, int napi_budget)
647 {
648 u16 i = tx_ring->next_to_clean;
649 struct i40e_tx_buffer *tx_buf;
650 struct i40e_tx_desc *tx_head;
651 struct i40e_tx_desc *tx_desc;
652 unsigned int total_bytes = 0, total_packets = 0;
653 unsigned int budget = vsi->work_limit;
654
655 tx_buf = &tx_ring->tx_bi[i];
656 tx_desc = I40E_TX_DESC(tx_ring, i);
657 i -= tx_ring->count;
658
659 tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
660
661 do {
662 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
663
664 /* if next_to_watch is not set then there is no work pending */
665 if (!eop_desc)
666 break;
667
668 /* prevent any other reads prior to eop_desc */
669 read_barrier_depends();
670
671 /* we have caught up to head, no work left to do */
672 if (tx_head == tx_desc)
673 break;
674
675 /* clear next_to_watch to prevent false hangs */
676 tx_buf->next_to_watch = NULL;
677
678 /* update the statistics for this packet */
679 total_bytes += tx_buf->bytecount;
680 total_packets += tx_buf->gso_segs;
681
682 /* free the skb */
683 napi_consume_skb(tx_buf->skb, napi_budget);
684
685 /* unmap skb header data */
686 dma_unmap_single(tx_ring->dev,
687 dma_unmap_addr(tx_buf, dma),
688 dma_unmap_len(tx_buf, len),
689 DMA_TO_DEVICE);
690
691 /* clear tx_buffer data */
692 tx_buf->skb = NULL;
693 dma_unmap_len_set(tx_buf, len, 0);
694
695 /* unmap remaining buffers */
696 while (tx_desc != eop_desc) {
697
698 tx_buf++;
699 tx_desc++;
700 i++;
701 if (unlikely(!i)) {
702 i -= tx_ring->count;
703 tx_buf = tx_ring->tx_bi;
704 tx_desc = I40E_TX_DESC(tx_ring, 0);
705 }
706
707 /* unmap any remaining paged data */
708 if (dma_unmap_len(tx_buf, len)) {
709 dma_unmap_page(tx_ring->dev,
710 dma_unmap_addr(tx_buf, dma),
711 dma_unmap_len(tx_buf, len),
712 DMA_TO_DEVICE);
713 dma_unmap_len_set(tx_buf, len, 0);
714 }
715 }
716
717 /* move us one more past the eop_desc for start of next pkt */
718 tx_buf++;
719 tx_desc++;
720 i++;
721 if (unlikely(!i)) {
722 i -= tx_ring->count;
723 tx_buf = tx_ring->tx_bi;
724 tx_desc = I40E_TX_DESC(tx_ring, 0);
725 }
726
727 prefetch(tx_desc);
728
729 /* update budget accounting */
730 budget--;
731 } while (likely(budget));
732
733 i += tx_ring->count;
734 tx_ring->next_to_clean = i;
735 u64_stats_update_begin(&tx_ring->syncp);
736 tx_ring->stats.bytes += total_bytes;
737 tx_ring->stats.packets += total_packets;
738 u64_stats_update_end(&tx_ring->syncp);
739 tx_ring->q_vector->tx.total_bytes += total_bytes;
740 tx_ring->q_vector->tx.total_packets += total_packets;
741
742 if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
743 unsigned int j = 0;
744
745 /* check to see if there are < 4 descriptors
746 * waiting to be written back, then kick the hardware to force
747 * them to be written back in case we stay in NAPI.
748 * In this mode on X722 we do not enable Interrupt.
749 */
750 j = i40e_get_tx_pending(tx_ring, false);
751
752 if (budget &&
753 ((j / (WB_STRIDE + 1)) == 0) && (j != 0) &&
754 !test_bit(__I40E_DOWN, &vsi->state) &&
755 (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
756 tx_ring->arm_wb = true;
757 }
758
759 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
760 tx_ring->queue_index),
761 total_packets, total_bytes);
762
763 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
764 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
765 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
766 /* Make sure that anybody stopping the queue after this
767 * sees the new next_to_clean.
768 */
769 smp_mb();
770 if (__netif_subqueue_stopped(tx_ring->netdev,
771 tx_ring->queue_index) &&
772 !test_bit(__I40E_DOWN, &vsi->state)) {
773 netif_wake_subqueue(tx_ring->netdev,
774 tx_ring->queue_index);
775 ++tx_ring->tx_stats.restart_queue;
776 }
777 }
778
779 return !!budget;
780 }
781
782 /**
783 * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled
784 * @vsi: the VSI we care about
785 * @q_vector: the vector on which to enable writeback
786 *
787 **/
788 static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
789 struct i40e_q_vector *q_vector)
790 {
791 u16 flags = q_vector->tx.ring[0].flags;
792 u32 val;
793
794 if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR))
795 return;
796
797 if (q_vector->arm_wb_state)
798 return;
799
800 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
801 val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK |
802 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */
803
804 wr32(&vsi->back->hw,
805 I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
806 val);
807 } else {
808 val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK |
809 I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */
810
811 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
812 }
813 q_vector->arm_wb_state = true;
814 }
815
816 /**
817 * i40e_force_wb - Issue SW Interrupt so HW does a wb
818 * @vsi: the VSI we care about
819 * @q_vector: the vector on which to force writeback
820 *
821 **/
822 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
823 {
824 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
825 u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
826 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
827 I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
828 I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
829 /* allow 00 to be written to the index */
830
831 wr32(&vsi->back->hw,
832 I40E_PFINT_DYN_CTLN(q_vector->v_idx +
833 vsi->base_vector - 1), val);
834 } else {
835 u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
836 I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
837 I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
838 I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK;
839 /* allow 00 to be written to the index */
840
841 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
842 }
843 }
844
845 /**
846 * i40e_set_new_dynamic_itr - Find new ITR level
847 * @rc: structure containing ring performance data
848 *
849 * Returns true if ITR changed, false if not
850 *
851 * Stores a new ITR value based on packets and byte counts during
852 * the last interrupt. The advantage of per interrupt computation
853 * is faster updates and more accurate ITR for the current traffic
854 * pattern. Constants in this function were computed based on
855 * theoretical maximum wire speed and thresholds were set based on
856 * testing data as well as attempting to minimize response time
857 * while increasing bulk throughput.
858 **/
859 static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
860 {
861 enum i40e_latency_range new_latency_range = rc->latency_range;
862 struct i40e_q_vector *qv = rc->ring->q_vector;
863 u32 new_itr = rc->itr;
864 int bytes_per_int;
865 int usecs;
866
867 if (rc->total_packets == 0 || !rc->itr)
868 return false;
869
870 /* simple throttlerate management
871 * 0-10MB/s lowest (50000 ints/s)
872 * 10-20MB/s low (20000 ints/s)
873 * 20-1249MB/s bulk (18000 ints/s)
874 * > 40000 Rx packets per second (8000 ints/s)
875 *
876 * The math works out because the divisor is in 10^(-6) which
877 * turns the bytes/us input value into MB/s values, but
878 * make sure to use usecs, as the register values written
879 * are in 2 usec increments in the ITR registers, and make sure
880 * to use the smoothed values that the countdown timer gives us.
881 */
882 usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
883 bytes_per_int = rc->total_bytes / usecs;
884
885 switch (new_latency_range) {
886 case I40E_LOWEST_LATENCY:
887 if (bytes_per_int > 10)
888 new_latency_range = I40E_LOW_LATENCY;
889 break;
890 case I40E_LOW_LATENCY:
891 if (bytes_per_int > 20)
892 new_latency_range = I40E_BULK_LATENCY;
893 else if (bytes_per_int <= 10)
894 new_latency_range = I40E_LOWEST_LATENCY;
895 break;
896 case I40E_BULK_LATENCY:
897 case I40E_ULTRA_LATENCY:
898 default:
899 if (bytes_per_int <= 20)
900 new_latency_range = I40E_LOW_LATENCY;
901 break;
902 }
903
904 /* this is to adjust RX more aggressively when streaming small
905 * packets. The value of 40000 was picked as it is just beyond
906 * what the hardware can receive per second if in low latency
907 * mode.
908 */
909 #define RX_ULTRA_PACKET_RATE 40000
910
911 if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) &&
912 (&qv->rx == rc))
913 new_latency_range = I40E_ULTRA_LATENCY;
914
915 rc->latency_range = new_latency_range;
916
917 switch (new_latency_range) {
918 case I40E_LOWEST_LATENCY:
919 new_itr = I40E_ITR_50K;
920 break;
921 case I40E_LOW_LATENCY:
922 new_itr = I40E_ITR_20K;
923 break;
924 case I40E_BULK_LATENCY:
925 new_itr = I40E_ITR_18K;
926 break;
927 case I40E_ULTRA_LATENCY:
928 new_itr = I40E_ITR_8K;
929 break;
930 default:
931 break;
932 }
933
934 rc->total_bytes = 0;
935 rc->total_packets = 0;
936
937 if (new_itr != rc->itr) {
938 rc->itr = new_itr;
939 return true;
940 }
941
942 return false;
943 }
944
945 /**
946 * i40e_clean_programming_status - clean the programming status descriptor
947 * @rx_ring: the rx ring that has this descriptor
948 * @rx_desc: the rx descriptor written back by HW
949 *
950 * Flow director should handle FD_FILTER_STATUS to check its filter programming
951 * status being successful or not and take actions accordingly. FCoE should
952 * handle its context/filter programming/invalidation status and take actions.
953 *
954 **/
955 static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
956 union i40e_rx_desc *rx_desc)
957 {
958 u64 qw;
959 u8 id;
960
961 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
962 id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
963 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
964
965 if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
966 i40e_fd_handle_status(rx_ring, rx_desc, id);
967 #ifdef I40E_FCOE
968 else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
969 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
970 i40e_fcoe_handle_status(rx_ring, rx_desc, id);
971 #endif
972 }
973
974 /**
975 * i40e_setup_tx_descriptors - Allocate the Tx descriptors
976 * @tx_ring: the tx ring to set up
977 *
978 * Return 0 on success, negative on error
979 **/
980 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
981 {
982 struct device *dev = tx_ring->dev;
983 int bi_size;
984
985 if (!dev)
986 return -ENOMEM;
987
988 /* warn if we are about to overwrite the pointer */
989 WARN_ON(tx_ring->tx_bi);
990 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
991 tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
992 if (!tx_ring->tx_bi)
993 goto err;
994
995 /* round up to nearest 4K */
996 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
997 /* add u32 for head writeback, align after this takes care of
998 * guaranteeing this is at least one cache line in size
999 */
1000 tx_ring->size += sizeof(u32);
1001 tx_ring->size = ALIGN(tx_ring->size, 4096);
1002 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
1003 &tx_ring->dma, GFP_KERNEL);
1004 if (!tx_ring->desc) {
1005 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
1006 tx_ring->size);
1007 goto err;
1008 }
1009
1010 tx_ring->next_to_use = 0;
1011 tx_ring->next_to_clean = 0;
1012 return 0;
1013
1014 err:
1015 kfree(tx_ring->tx_bi);
1016 tx_ring->tx_bi = NULL;
1017 return -ENOMEM;
1018 }
1019
1020 /**
1021 * i40e_clean_rx_ring - Free Rx buffers
1022 * @rx_ring: ring to be cleaned
1023 **/
1024 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
1025 {
1026 struct device *dev = rx_ring->dev;
1027 struct i40e_rx_buffer *rx_bi;
1028 unsigned long bi_size;
1029 u16 i;
1030
1031 /* ring already cleared, nothing to do */
1032 if (!rx_ring->rx_bi)
1033 return;
1034
1035 if (ring_is_ps_enabled(rx_ring)) {
1036 int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count;
1037
1038 rx_bi = &rx_ring->rx_bi[0];
1039 if (rx_bi->hdr_buf) {
1040 dma_free_coherent(dev,
1041 bufsz,
1042 rx_bi->hdr_buf,
1043 rx_bi->dma);
1044 for (i = 0; i < rx_ring->count; i++) {
1045 rx_bi = &rx_ring->rx_bi[i];
1046 rx_bi->dma = 0;
1047 rx_bi->hdr_buf = NULL;
1048 }
1049 }
1050 }
1051 /* Free all the Rx ring sk_buffs */
1052 for (i = 0; i < rx_ring->count; i++) {
1053 rx_bi = &rx_ring->rx_bi[i];
1054 if (rx_bi->dma) {
1055 dma_unmap_single(dev,
1056 rx_bi->dma,
1057 rx_ring->rx_buf_len,
1058 DMA_FROM_DEVICE);
1059 rx_bi->dma = 0;
1060 }
1061 if (rx_bi->skb) {
1062 dev_kfree_skb(rx_bi->skb);
1063 rx_bi->skb = NULL;
1064 }
1065 if (rx_bi->page) {
1066 if (rx_bi->page_dma) {
1067 dma_unmap_page(dev,
1068 rx_bi->page_dma,
1069 PAGE_SIZE,
1070 DMA_FROM_DEVICE);
1071 rx_bi->page_dma = 0;
1072 }
1073 __free_page(rx_bi->page);
1074 rx_bi->page = NULL;
1075 rx_bi->page_offset = 0;
1076 }
1077 }
1078
1079 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1080 memset(rx_ring->rx_bi, 0, bi_size);
1081
1082 /* Zero out the descriptor ring */
1083 memset(rx_ring->desc, 0, rx_ring->size);
1084
1085 rx_ring->next_to_clean = 0;
1086 rx_ring->next_to_use = 0;
1087 }
1088
1089 /**
1090 * i40e_free_rx_resources - Free Rx resources
1091 * @rx_ring: ring to clean the resources from
1092 *
1093 * Free all receive software resources
1094 **/
1095 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1096 {
1097 i40e_clean_rx_ring(rx_ring);
1098 kfree(rx_ring->rx_bi);
1099 rx_ring->rx_bi = NULL;
1100
1101 if (rx_ring->desc) {
1102 dma_free_coherent(rx_ring->dev, rx_ring->size,
1103 rx_ring->desc, rx_ring->dma);
1104 rx_ring->desc = NULL;
1105 }
1106 }
1107
1108 /**
1109 * i40e_alloc_rx_headers - allocate rx header buffers
1110 * @rx_ring: ring to alloc buffers
1111 *
1112 * Allocate rx header buffers for the entire ring. As these are static,
1113 * this is only called when setting up a new ring.
1114 **/
1115 void i40e_alloc_rx_headers(struct i40e_ring *rx_ring)
1116 {
1117 struct device *dev = rx_ring->dev;
1118 struct i40e_rx_buffer *rx_bi;
1119 dma_addr_t dma;
1120 void *buffer;
1121 int buf_size;
1122 int i;
1123
1124 if (rx_ring->rx_bi[0].hdr_buf)
1125 return;
1126 /* Make sure the buffers don't cross cache line boundaries. */
1127 buf_size = ALIGN(rx_ring->rx_hdr_len, 256);
1128 buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count,
1129 &dma, GFP_KERNEL);
1130 if (!buffer)
1131 return;
1132 for (i = 0; i < rx_ring->count; i++) {
1133 rx_bi = &rx_ring->rx_bi[i];
1134 rx_bi->dma = dma + (i * buf_size);
1135 rx_bi->hdr_buf = buffer + (i * buf_size);
1136 }
1137 }
1138
1139 /**
1140 * i40e_setup_rx_descriptors - Allocate Rx descriptors
1141 * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1142 *
1143 * Returns 0 on success, negative on failure
1144 **/
1145 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1146 {
1147 struct device *dev = rx_ring->dev;
1148 int bi_size;
1149
1150 /* warn if we are about to overwrite the pointer */
1151 WARN_ON(rx_ring->rx_bi);
1152 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1153 rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1154 if (!rx_ring->rx_bi)
1155 goto err;
1156
1157 u64_stats_init(&rx_ring->syncp);
1158
1159 /* Round up to nearest 4K */
1160 rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1161 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1162 : rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1163 rx_ring->size = ALIGN(rx_ring->size, 4096);
1164 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1165 &rx_ring->dma, GFP_KERNEL);
1166
1167 if (!rx_ring->desc) {
1168 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1169 rx_ring->size);
1170 goto err;
1171 }
1172
1173 rx_ring->next_to_clean = 0;
1174 rx_ring->next_to_use = 0;
1175
1176 return 0;
1177 err:
1178 kfree(rx_ring->rx_bi);
1179 rx_ring->rx_bi = NULL;
1180 return -ENOMEM;
1181 }
1182
1183 /**
1184 * i40e_release_rx_desc - Store the new tail and head values
1185 * @rx_ring: ring to bump
1186 * @val: new head index
1187 **/
1188 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1189 {
1190 rx_ring->next_to_use = val;
1191 /* Force memory writes to complete before letting h/w
1192 * know there are new descriptors to fetch. (Only
1193 * applicable for weak-ordered memory model archs,
1194 * such as IA-64).
1195 */
1196 wmb();
1197 writel(val, rx_ring->tail);
1198 }
1199
1200 /**
1201 * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
1202 * @rx_ring: ring to place buffers on
1203 * @cleaned_count: number of buffers to replace
1204 *
1205 * Returns true if any errors on allocation
1206 **/
1207 bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
1208 {
1209 u16 i = rx_ring->next_to_use;
1210 union i40e_rx_desc *rx_desc;
1211 struct i40e_rx_buffer *bi;
1212 const int current_node = numa_node_id();
1213
1214 /* do nothing if no valid netdev defined */
1215 if (!rx_ring->netdev || !cleaned_count)
1216 return false;
1217
1218 while (cleaned_count--) {
1219 rx_desc = I40E_RX_DESC(rx_ring, i);
1220 bi = &rx_ring->rx_bi[i];
1221
1222 if (bi->skb) /* desc is in use */
1223 goto no_buffers;
1224
1225 /* If we've been moved to a different NUMA node, release the
1226 * page so we can get a new one on the current node.
1227 */
1228 if (bi->page && page_to_nid(bi->page) != current_node) {
1229 dma_unmap_page(rx_ring->dev,
1230 bi->page_dma,
1231 PAGE_SIZE,
1232 DMA_FROM_DEVICE);
1233 __free_page(bi->page);
1234 bi->page = NULL;
1235 bi->page_dma = 0;
1236 rx_ring->rx_stats.realloc_count++;
1237 } else if (bi->page) {
1238 rx_ring->rx_stats.page_reuse_count++;
1239 }
1240
1241 if (!bi->page) {
1242 bi->page = alloc_page(GFP_ATOMIC);
1243 if (!bi->page) {
1244 rx_ring->rx_stats.alloc_page_failed++;
1245 goto no_buffers;
1246 }
1247 bi->page_dma = dma_map_page(rx_ring->dev,
1248 bi->page,
1249 0,
1250 PAGE_SIZE,
1251 DMA_FROM_DEVICE);
1252 if (dma_mapping_error(rx_ring->dev, bi->page_dma)) {
1253 rx_ring->rx_stats.alloc_page_failed++;
1254 __free_page(bi->page);
1255 bi->page = NULL;
1256 bi->page_dma = 0;
1257 bi->page_offset = 0;
1258 goto no_buffers;
1259 }
1260 bi->page_offset = 0;
1261 }
1262
1263 /* Refresh the desc even if buffer_addrs didn't change
1264 * because each write-back erases this info.
1265 */
1266 rx_desc->read.pkt_addr =
1267 cpu_to_le64(bi->page_dma + bi->page_offset);
1268 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1269 i++;
1270 if (i == rx_ring->count)
1271 i = 0;
1272 }
1273
1274 if (rx_ring->next_to_use != i)
1275 i40e_release_rx_desc(rx_ring, i);
1276
1277 return false;
1278
1279 no_buffers:
1280 if (rx_ring->next_to_use != i)
1281 i40e_release_rx_desc(rx_ring, i);
1282
1283 /* make sure to come back via polling to try again after
1284 * allocation failure
1285 */
1286 return true;
1287 }
1288
1289 /**
1290 * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
1291 * @rx_ring: ring to place buffers on
1292 * @cleaned_count: number of buffers to replace
1293 *
1294 * Returns true if any errors on allocation
1295 **/
1296 bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
1297 {
1298 u16 i = rx_ring->next_to_use;
1299 union i40e_rx_desc *rx_desc;
1300 struct i40e_rx_buffer *bi;
1301 struct sk_buff *skb;
1302
1303 /* do nothing if no valid netdev defined */
1304 if (!rx_ring->netdev || !cleaned_count)
1305 return false;
1306
1307 while (cleaned_count--) {
1308 rx_desc = I40E_RX_DESC(rx_ring, i);
1309 bi = &rx_ring->rx_bi[i];
1310 skb = bi->skb;
1311
1312 if (!skb) {
1313 skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
1314 rx_ring->rx_buf_len,
1315 GFP_ATOMIC |
1316 __GFP_NOWARN);
1317 if (!skb) {
1318 rx_ring->rx_stats.alloc_buff_failed++;
1319 goto no_buffers;
1320 }
1321 /* initialize queue mapping */
1322 skb_record_rx_queue(skb, rx_ring->queue_index);
1323 bi->skb = skb;
1324 }
1325
1326 if (!bi->dma) {
1327 bi->dma = dma_map_single(rx_ring->dev,
1328 skb->data,
1329 rx_ring->rx_buf_len,
1330 DMA_FROM_DEVICE);
1331 if (dma_mapping_error(rx_ring->dev, bi->dma)) {
1332 rx_ring->rx_stats.alloc_buff_failed++;
1333 bi->dma = 0;
1334 dev_kfree_skb(bi->skb);
1335 bi->skb = NULL;
1336 goto no_buffers;
1337 }
1338 }
1339
1340 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1341 rx_desc->read.hdr_addr = 0;
1342 i++;
1343 if (i == rx_ring->count)
1344 i = 0;
1345 }
1346
1347 if (rx_ring->next_to_use != i)
1348 i40e_release_rx_desc(rx_ring, i);
1349
1350 return false;
1351
1352 no_buffers:
1353 if (rx_ring->next_to_use != i)
1354 i40e_release_rx_desc(rx_ring, i);
1355
1356 /* make sure to come back via polling to try again after
1357 * allocation failure
1358 */
1359 return true;
1360 }
1361
1362 /**
1363 * i40e_receive_skb - Send a completed packet up the stack
1364 * @rx_ring: rx ring in play
1365 * @skb: packet to send up
1366 * @vlan_tag: vlan tag for packet
1367 **/
1368 static void i40e_receive_skb(struct i40e_ring *rx_ring,
1369 struct sk_buff *skb, u16 vlan_tag)
1370 {
1371 struct i40e_q_vector *q_vector = rx_ring->q_vector;
1372
1373 if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
1374 (vlan_tag & VLAN_VID_MASK))
1375 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1376
1377 napi_gro_receive(&q_vector->napi, skb);
1378 }
1379
1380 /**
1381 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1382 * @vsi: the VSI we care about
1383 * @skb: skb currently being received and modified
1384 * @rx_status: status value of last descriptor in packet
1385 * @rx_error: error value of last descriptor in packet
1386 * @rx_ptype: ptype value of last descriptor in packet
1387 **/
1388 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1389 struct sk_buff *skb,
1390 u32 rx_status,
1391 u32 rx_error,
1392 u16 rx_ptype)
1393 {
1394 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
1395 bool ipv4, ipv6, ipv4_tunnel, ipv6_tunnel;
1396
1397 skb->ip_summed = CHECKSUM_NONE;
1398
1399 /* Rx csum enabled and ip headers found? */
1400 if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1401 return;
1402
1403 /* did the hardware decode the packet and checksum? */
1404 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1405 return;
1406
1407 /* both known and outer_ip must be set for the below code to work */
1408 if (!(decoded.known && decoded.outer_ip))
1409 return;
1410
1411 ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
1412 (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4);
1413 ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
1414 (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6);
1415
1416 if (ipv4 &&
1417 (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
1418 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
1419 goto checksum_fail;
1420
1421 /* likely incorrect csum if alternate IP extension headers found */
1422 if (ipv6 &&
1423 rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1424 /* don't increment checksum err here, non-fatal err */
1425 return;
1426
1427 /* there was some L4 error, count error and punt packet to the stack */
1428 if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
1429 goto checksum_fail;
1430
1431 /* handle packets that were not able to be checksummed due
1432 * to arrival speed, in this case the stack can compute
1433 * the csum.
1434 */
1435 if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
1436 return;
1437
1438 /* The hardware supported by this driver does not validate outer
1439 * checksums for tunneled VXLAN or GENEVE frames. I don't agree
1440 * with it but the specification states that you "MAY validate", it
1441 * doesn't make it a hard requirement so if we have validated the
1442 * inner checksum report CHECKSUM_UNNECESSARY.
1443 */
1444
1445 ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1446 (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1447 ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1448 (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1449
1450 skb->ip_summed = CHECKSUM_UNNECESSARY;
1451 skb->csum_level = ipv4_tunnel || ipv6_tunnel;
1452
1453 return;
1454
1455 checksum_fail:
1456 vsi->back->hw_csum_rx_error++;
1457 }
1458
1459 /**
1460 * i40e_ptype_to_htype - get a hash type
1461 * @ptype: the ptype value from the descriptor
1462 *
1463 * Returns a hash type to be used by skb_set_hash
1464 **/
1465 static inline enum pkt_hash_types i40e_ptype_to_htype(u8 ptype)
1466 {
1467 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1468
1469 if (!decoded.known)
1470 return PKT_HASH_TYPE_NONE;
1471
1472 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1473 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1474 return PKT_HASH_TYPE_L4;
1475 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1476 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1477 return PKT_HASH_TYPE_L3;
1478 else
1479 return PKT_HASH_TYPE_L2;
1480 }
1481
1482 /**
1483 * i40e_rx_hash - set the hash value in the skb
1484 * @ring: descriptor ring
1485 * @rx_desc: specific descriptor
1486 **/
1487 static inline void i40e_rx_hash(struct i40e_ring *ring,
1488 union i40e_rx_desc *rx_desc,
1489 struct sk_buff *skb,
1490 u8 rx_ptype)
1491 {
1492 u32 hash;
1493 const __le64 rss_mask =
1494 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1495 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1496
1497 if (ring->netdev->features & NETIF_F_RXHASH)
1498 return;
1499
1500 if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) {
1501 hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1502 skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype));
1503 }
1504 }
1505
1506 /**
1507 * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
1508 * @rx_ring: rx ring to clean
1509 * @budget: how many cleans we're allowed
1510 *
1511 * Returns true if there's any budget left (e.g. the clean is finished)
1512 **/
1513 static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget)
1514 {
1515 unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1516 u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1517 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1518 struct i40e_vsi *vsi = rx_ring->vsi;
1519 u16 i = rx_ring->next_to_clean;
1520 union i40e_rx_desc *rx_desc;
1521 u32 rx_error, rx_status;
1522 bool failure = false;
1523 u8 rx_ptype;
1524 u64 qword;
1525 u32 copysize;
1526
1527 if (budget <= 0)
1528 return 0;
1529
1530 do {
1531 struct i40e_rx_buffer *rx_bi;
1532 struct sk_buff *skb;
1533 u16 vlan_tag;
1534 /* return some buffers to hardware, one at a time is too slow */
1535 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1536 failure = failure ||
1537 i40e_alloc_rx_buffers_ps(rx_ring,
1538 cleaned_count);
1539 cleaned_count = 0;
1540 }
1541
1542 i = rx_ring->next_to_clean;
1543 rx_desc = I40E_RX_DESC(rx_ring, i);
1544 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1545 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1546 I40E_RXD_QW1_STATUS_SHIFT;
1547
1548 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1549 break;
1550
1551 /* This memory barrier is needed to keep us from reading
1552 * any other fields out of the rx_desc until we know the
1553 * DD bit is set.
1554 */
1555 dma_rmb();
1556 /* sync header buffer for reading */
1557 dma_sync_single_range_for_cpu(rx_ring->dev,
1558 rx_ring->rx_bi[0].dma,
1559 i * rx_ring->rx_hdr_len,
1560 rx_ring->rx_hdr_len,
1561 DMA_FROM_DEVICE);
1562 if (i40e_rx_is_programming_status(qword)) {
1563 i40e_clean_programming_status(rx_ring, rx_desc);
1564 I40E_RX_INCREMENT(rx_ring, i);
1565 continue;
1566 }
1567 rx_bi = &rx_ring->rx_bi[i];
1568 skb = rx_bi->skb;
1569 if (likely(!skb)) {
1570 skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
1571 rx_ring->rx_hdr_len,
1572 GFP_ATOMIC |
1573 __GFP_NOWARN);
1574 if (!skb) {
1575 rx_ring->rx_stats.alloc_buff_failed++;
1576 failure = true;
1577 break;
1578 }
1579
1580 /* initialize queue mapping */
1581 skb_record_rx_queue(skb, rx_ring->queue_index);
1582 /* we are reusing so sync this buffer for CPU use */
1583 dma_sync_single_range_for_cpu(rx_ring->dev,
1584 rx_ring->rx_bi[0].dma,
1585 i * rx_ring->rx_hdr_len,
1586 rx_ring->rx_hdr_len,
1587 DMA_FROM_DEVICE);
1588 }
1589 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1590 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1591 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1592 I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1593 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1594 I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1595
1596 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1597 I40E_RXD_QW1_ERROR_SHIFT;
1598 rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1599 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1600
1601 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1602 I40E_RXD_QW1_PTYPE_SHIFT;
1603 /* sync half-page for reading */
1604 dma_sync_single_range_for_cpu(rx_ring->dev,
1605 rx_bi->page_dma,
1606 rx_bi->page_offset,
1607 PAGE_SIZE / 2,
1608 DMA_FROM_DEVICE);
1609 prefetch(page_address(rx_bi->page) + rx_bi->page_offset);
1610 rx_bi->skb = NULL;
1611 cleaned_count++;
1612 copysize = 0;
1613 if (rx_hbo || rx_sph) {
1614 int len;
1615
1616 if (rx_hbo)
1617 len = I40E_RX_HDR_SIZE;
1618 else
1619 len = rx_header_len;
1620 memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
1621 } else if (skb->len == 0) {
1622 int len;
1623 unsigned char *va = page_address(rx_bi->page) +
1624 rx_bi->page_offset;
1625
1626 len = min(rx_packet_len, rx_ring->rx_hdr_len);
1627 memcpy(__skb_put(skb, len), va, len);
1628 copysize = len;
1629 rx_packet_len -= len;
1630 }
1631 /* Get the rest of the data if this was a header split */
1632 if (rx_packet_len) {
1633 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
1634 rx_bi->page,
1635 rx_bi->page_offset + copysize,
1636 rx_packet_len, I40E_RXBUFFER_2048);
1637
1638 /* If the page count is more than 2, then both halves
1639 * of the page are used and we need to free it. Do it
1640 * here instead of in the alloc code. Otherwise one
1641 * of the half-pages might be released between now and
1642 * then, and we wouldn't know which one to use.
1643 * Don't call get_page and free_page since those are
1644 * both expensive atomic operations that just change
1645 * the refcount in opposite directions. Just give the
1646 * page to the stack; he can have our refcount.
1647 */
1648 if (page_count(rx_bi->page) > 2) {
1649 dma_unmap_page(rx_ring->dev,
1650 rx_bi->page_dma,
1651 PAGE_SIZE,
1652 DMA_FROM_DEVICE);
1653 rx_bi->page = NULL;
1654 rx_bi->page_dma = 0;
1655 rx_ring->rx_stats.realloc_count++;
1656 } else {
1657 get_page(rx_bi->page);
1658 /* switch to the other half-page here; the
1659 * allocation code programs the right addr
1660 * into HW. If we haven't used this half-page,
1661 * the address won't be changed, and HW can
1662 * just use it next time through.
1663 */
1664 rx_bi->page_offset ^= PAGE_SIZE / 2;
1665 }
1666
1667 }
1668 I40E_RX_INCREMENT(rx_ring, i);
1669
1670 if (unlikely(
1671 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1672 struct i40e_rx_buffer *next_buffer;
1673
1674 next_buffer = &rx_ring->rx_bi[i];
1675 next_buffer->skb = skb;
1676 rx_ring->rx_stats.non_eop_descs++;
1677 continue;
1678 }
1679
1680 /* ERR_MASK will only have valid bits if EOP set */
1681 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1682 dev_kfree_skb_any(skb);
1683 continue;
1684 }
1685
1686 i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
1687
1688 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1689 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1690 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1691 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1692 rx_ring->last_rx_timestamp = jiffies;
1693 }
1694
1695 /* probably a little skewed due to removing CRC */
1696 total_rx_bytes += skb->len;
1697 total_rx_packets++;
1698
1699 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1700
1701 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1702
1703 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1704 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1705 : 0;
1706 #ifdef I40E_FCOE
1707 if (unlikely(
1708 i40e_rx_is_fcoe(rx_ptype) &&
1709 !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) {
1710 dev_kfree_skb_any(skb);
1711 continue;
1712 }
1713 #endif
1714 i40e_receive_skb(rx_ring, skb, vlan_tag);
1715
1716 rx_desc->wb.qword1.status_error_len = 0;
1717
1718 } while (likely(total_rx_packets < budget));
1719
1720 u64_stats_update_begin(&rx_ring->syncp);
1721 rx_ring->stats.packets += total_rx_packets;
1722 rx_ring->stats.bytes += total_rx_bytes;
1723 u64_stats_update_end(&rx_ring->syncp);
1724 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1725 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1726
1727 return failure ? budget : total_rx_packets;
1728 }
1729
1730 /**
1731 * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1732 * @rx_ring: rx ring to clean
1733 * @budget: how many cleans we're allowed
1734 *
1735 * Returns number of packets cleaned
1736 **/
1737 static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
1738 {
1739 unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1740 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1741 struct i40e_vsi *vsi = rx_ring->vsi;
1742 union i40e_rx_desc *rx_desc;
1743 u32 rx_error, rx_status;
1744 u16 rx_packet_len;
1745 bool failure = false;
1746 u8 rx_ptype;
1747 u64 qword;
1748 u16 i;
1749
1750 do {
1751 struct i40e_rx_buffer *rx_bi;
1752 struct sk_buff *skb;
1753 u16 vlan_tag;
1754 /* return some buffers to hardware, one at a time is too slow */
1755 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1756 failure = failure ||
1757 i40e_alloc_rx_buffers_1buf(rx_ring,
1758 cleaned_count);
1759 cleaned_count = 0;
1760 }
1761
1762 i = rx_ring->next_to_clean;
1763 rx_desc = I40E_RX_DESC(rx_ring, i);
1764 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1765 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1766 I40E_RXD_QW1_STATUS_SHIFT;
1767
1768 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1769 break;
1770
1771 /* This memory barrier is needed to keep us from reading
1772 * any other fields out of the rx_desc until we know the
1773 * DD bit is set.
1774 */
1775 dma_rmb();
1776
1777 if (i40e_rx_is_programming_status(qword)) {
1778 i40e_clean_programming_status(rx_ring, rx_desc);
1779 I40E_RX_INCREMENT(rx_ring, i);
1780 continue;
1781 }
1782 rx_bi = &rx_ring->rx_bi[i];
1783 skb = rx_bi->skb;
1784 prefetch(skb->data);
1785
1786 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1787 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1788
1789 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1790 I40E_RXD_QW1_ERROR_SHIFT;
1791 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1792
1793 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1794 I40E_RXD_QW1_PTYPE_SHIFT;
1795 rx_bi->skb = NULL;
1796 cleaned_count++;
1797
1798 /* Get the header and possibly the whole packet
1799 * If this is an skb from previous receive dma will be 0
1800 */
1801 skb_put(skb, rx_packet_len);
1802 dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len,
1803 DMA_FROM_DEVICE);
1804 rx_bi->dma = 0;
1805
1806 I40E_RX_INCREMENT(rx_ring, i);
1807
1808 if (unlikely(
1809 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1810 rx_ring->rx_stats.non_eop_descs++;
1811 continue;
1812 }
1813
1814 /* ERR_MASK will only have valid bits if EOP set */
1815 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1816 dev_kfree_skb_any(skb);
1817 continue;
1818 }
1819
1820 i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
1821 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1822 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1823 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1824 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1825 rx_ring->last_rx_timestamp = jiffies;
1826 }
1827
1828 /* probably a little skewed due to removing CRC */
1829 total_rx_bytes += skb->len;
1830 total_rx_packets++;
1831
1832 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1833
1834 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1835
1836 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1837 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1838 : 0;
1839 #ifdef I40E_FCOE
1840 if (unlikely(
1841 i40e_rx_is_fcoe(rx_ptype) &&
1842 !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) {
1843 dev_kfree_skb_any(skb);
1844 continue;
1845 }
1846 #endif
1847 i40e_receive_skb(rx_ring, skb, vlan_tag);
1848
1849 rx_desc->wb.qword1.status_error_len = 0;
1850 } while (likely(total_rx_packets < budget));
1851
1852 u64_stats_update_begin(&rx_ring->syncp);
1853 rx_ring->stats.packets += total_rx_packets;
1854 rx_ring->stats.bytes += total_rx_bytes;
1855 u64_stats_update_end(&rx_ring->syncp);
1856 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1857 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1858
1859 return failure ? budget : total_rx_packets;
1860 }
1861
1862 static u32 i40e_buildreg_itr(const int type, const u16 itr)
1863 {
1864 u32 val;
1865
1866 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1867 /* Don't clear PBA because that can cause lost interrupts that
1868 * came in while we were cleaning/polling
1869 */
1870 (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1871 (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1872
1873 return val;
1874 }
1875
1876 /* a small macro to shorten up some long lines */
1877 #define INTREG I40E_PFINT_DYN_CTLN
1878
1879 /**
1880 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
1881 * @vsi: the VSI we care about
1882 * @q_vector: q_vector for which itr is being updated and interrupt enabled
1883 *
1884 **/
1885 static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
1886 struct i40e_q_vector *q_vector)
1887 {
1888 struct i40e_hw *hw = &vsi->back->hw;
1889 bool rx = false, tx = false;
1890 u32 rxval, txval;
1891 int vector;
1892 int idx = q_vector->v_idx;
1893
1894 vector = (q_vector->v_idx + vsi->base_vector);
1895
1896 /* avoid dynamic calculation if in countdown mode OR if
1897 * all dynamic is disabled
1898 */
1899 rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
1900
1901 if (q_vector->itr_countdown > 0 ||
1902 (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) &&
1903 !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) {
1904 goto enable_int;
1905 }
1906
1907 if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) {
1908 rx = i40e_set_new_dynamic_itr(&q_vector->rx);
1909 rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
1910 }
1911
1912 if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) {
1913 tx = i40e_set_new_dynamic_itr(&q_vector->tx);
1914 txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
1915 }
1916
1917 if (rx || tx) {
1918 /* get the higher of the two ITR adjustments and
1919 * use the same value for both ITR registers
1920 * when in adaptive mode (Rx and/or Tx)
1921 */
1922 u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
1923
1924 q_vector->tx.itr = q_vector->rx.itr = itr;
1925 txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
1926 tx = true;
1927 rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
1928 rx = true;
1929 }
1930
1931 /* only need to enable the interrupt once, but need
1932 * to possibly update both ITR values
1933 */
1934 if (rx) {
1935 /* set the INTENA_MSK_MASK so that this first write
1936 * won't actually enable the interrupt, instead just
1937 * updating the ITR (it's bit 31 PF and VF)
1938 */
1939 rxval |= BIT(31);
1940 /* don't check _DOWN because interrupt isn't being enabled */
1941 wr32(hw, INTREG(vector - 1), rxval);
1942 }
1943
1944 enable_int:
1945 if (!test_bit(__I40E_DOWN, &vsi->state))
1946 wr32(hw, INTREG(vector - 1), txval);
1947
1948 if (q_vector->itr_countdown)
1949 q_vector->itr_countdown--;
1950 else
1951 q_vector->itr_countdown = ITR_COUNTDOWN_START;
1952 }
1953
1954 /**
1955 * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1956 * @napi: napi struct with our devices info in it
1957 * @budget: amount of work driver is allowed to do this pass, in packets
1958 *
1959 * This function will clean all queues associated with a q_vector.
1960 *
1961 * Returns the amount of work done
1962 **/
1963 int i40e_napi_poll(struct napi_struct *napi, int budget)
1964 {
1965 struct i40e_q_vector *q_vector =
1966 container_of(napi, struct i40e_q_vector, napi);
1967 struct i40e_vsi *vsi = q_vector->vsi;
1968 struct i40e_ring *ring;
1969 bool clean_complete = true;
1970 bool arm_wb = false;
1971 int budget_per_ring;
1972 int work_done = 0;
1973
1974 if (test_bit(__I40E_DOWN, &vsi->state)) {
1975 napi_complete(napi);
1976 return 0;
1977 }
1978
1979 /* Clear hung_detected bit */
1980 clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected);
1981 /* Since the actual Tx work is minimal, we can give the Tx a larger
1982 * budget and be more aggressive about cleaning up the Tx descriptors.
1983 */
1984 i40e_for_each_ring(ring, q_vector->tx) {
1985 if (!i40e_clean_tx_irq(vsi, ring, budget)) {
1986 clean_complete = false;
1987 continue;
1988 }
1989 arm_wb |= ring->arm_wb;
1990 ring->arm_wb = false;
1991 }
1992
1993 /* Handle case where we are called by netpoll with a budget of 0 */
1994 if (budget <= 0)
1995 goto tx_only;
1996
1997 /* We attempt to distribute budget to each Rx queue fairly, but don't
1998 * allow the budget to go below 1 because that would exit polling early.
1999 */
2000 budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
2001
2002 i40e_for_each_ring(ring, q_vector->rx) {
2003 int cleaned;
2004
2005 if (ring_is_ps_enabled(ring))
2006 cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
2007 else
2008 cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
2009
2010 work_done += cleaned;
2011 /* if we clean as many as budgeted, we must not be done */
2012 if (cleaned >= budget_per_ring)
2013 clean_complete = false;
2014 }
2015
2016 /* If work not completed, return budget and polling will return */
2017 if (!clean_complete) {
2018 tx_only:
2019 if (arm_wb) {
2020 q_vector->tx.ring[0].tx_stats.tx_force_wb++;
2021 i40e_enable_wb_on_itr(vsi, q_vector);
2022 }
2023 return budget;
2024 }
2025
2026 if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
2027 q_vector->arm_wb_state = false;
2028
2029 /* Work is done so exit the polling mode and re-enable the interrupt */
2030 napi_complete_done(napi, work_done);
2031 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
2032 i40e_update_enable_itr(vsi, q_vector);
2033 } else { /* Legacy mode */
2034 i40e_irq_dynamic_enable_icr0(vsi->back, false);
2035 }
2036 return 0;
2037 }
2038
2039 /**
2040 * i40e_atr - Add a Flow Director ATR filter
2041 * @tx_ring: ring to add programming descriptor to
2042 * @skb: send buffer
2043 * @tx_flags: send tx flags
2044 **/
2045 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
2046 u32 tx_flags)
2047 {
2048 struct i40e_filter_program_desc *fdir_desc;
2049 struct i40e_pf *pf = tx_ring->vsi->back;
2050 union {
2051 unsigned char *network;
2052 struct iphdr *ipv4;
2053 struct ipv6hdr *ipv6;
2054 } hdr;
2055 struct tcphdr *th;
2056 unsigned int hlen;
2057 u32 flex_ptype, dtype_cmd;
2058 int l4_proto;
2059 u16 i;
2060
2061 /* make sure ATR is enabled */
2062 if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
2063 return;
2064
2065 if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2066 return;
2067
2068 /* if sampling is disabled do nothing */
2069 if (!tx_ring->atr_sample_rate)
2070 return;
2071
2072 /* Currently only IPv4/IPv6 with TCP is supported */
2073 if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
2074 return;
2075
2076 /* snag network header to get L4 type and address */
2077 hdr.network = (tx_flags & I40E_TX_FLAGS_UDP_TUNNEL) ?
2078 skb_inner_network_header(skb) : skb_network_header(skb);
2079
2080 /* Note: tx_flags gets modified to reflect inner protocols in
2081 * tx_enable_csum function if encap is enabled.
2082 */
2083 if (tx_flags & I40E_TX_FLAGS_IPV4) {
2084 /* access ihl as u8 to avoid unaligned access on ia64 */
2085 hlen = (hdr.network[0] & 0x0F) << 2;
2086 l4_proto = hdr.ipv4->protocol;
2087 } else {
2088 hlen = hdr.network - skb->data;
2089 l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL);
2090 hlen -= hdr.network - skb->data;
2091 }
2092
2093 if (l4_proto != IPPROTO_TCP)
2094 return;
2095
2096 th = (struct tcphdr *)(hdr.network + hlen);
2097
2098 /* Due to lack of space, no more new filters can be programmed */
2099 if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2100 return;
2101 if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
2102 (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) {
2103 /* HW ATR eviction will take care of removing filters on FIN
2104 * and RST packets.
2105 */
2106 if (th->fin || th->rst)
2107 return;
2108 }
2109
2110 tx_ring->atr_count++;
2111
2112 /* sample on all syn/fin/rst packets or once every atr sample rate */
2113 if (!th->fin &&
2114 !th->syn &&
2115 !th->rst &&
2116 (tx_ring->atr_count < tx_ring->atr_sample_rate))
2117 return;
2118
2119 tx_ring->atr_count = 0;
2120
2121 /* grab the next descriptor */
2122 i = tx_ring->next_to_use;
2123 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
2124
2125 i++;
2126 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2127
2128 flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
2129 I40E_TXD_FLTR_QW0_QINDEX_MASK;
2130 flex_ptype |= (tx_flags & I40E_TX_FLAGS_IPV4) ?
2131 (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
2132 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
2133 (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
2134 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
2135
2136 flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
2137
2138 dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
2139
2140 dtype_cmd |= (th->fin || th->rst) ?
2141 (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
2142 I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
2143 (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
2144 I40E_TXD_FLTR_QW1_PCMD_SHIFT);
2145
2146 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
2147 I40E_TXD_FLTR_QW1_DEST_SHIFT;
2148
2149 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
2150 I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
2151
2152 dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
2153 if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL))
2154 dtype_cmd |=
2155 ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
2156 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2157 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2158 else
2159 dtype_cmd |=
2160 ((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) <<
2161 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2162 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2163
2164 if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
2165 (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)))
2166 dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
2167
2168 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
2169 fdir_desc->rsvd = cpu_to_le32(0);
2170 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
2171 fdir_desc->fd_id = cpu_to_le32(0);
2172 }
2173
2174 /**
2175 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2176 * @skb: send buffer
2177 * @tx_ring: ring to send buffer on
2178 * @flags: the tx flags to be set
2179 *
2180 * Checks the skb and set up correspondingly several generic transmit flags
2181 * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2182 *
2183 * Returns error code indicate the frame should be dropped upon error and the
2184 * otherwise returns 0 to indicate the flags has been set properly.
2185 **/
2186 #ifdef I40E_FCOE
2187 inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2188 struct i40e_ring *tx_ring,
2189 u32 *flags)
2190 #else
2191 static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2192 struct i40e_ring *tx_ring,
2193 u32 *flags)
2194 #endif
2195 {
2196 __be16 protocol = skb->protocol;
2197 u32 tx_flags = 0;
2198
2199 if (protocol == htons(ETH_P_8021Q) &&
2200 !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
2201 /* When HW VLAN acceleration is turned off by the user the
2202 * stack sets the protocol to 8021q so that the driver
2203 * can take any steps required to support the SW only
2204 * VLAN handling. In our case the driver doesn't need
2205 * to take any further steps so just set the protocol
2206 * to the encapsulated ethertype.
2207 */
2208 skb->protocol = vlan_get_protocol(skb);
2209 goto out;
2210 }
2211
2212 /* if we have a HW VLAN tag being added, default to the HW one */
2213 if (skb_vlan_tag_present(skb)) {
2214 tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
2215 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2216 /* else if it is a SW VLAN, check the next protocol and store the tag */
2217 } else if (protocol == htons(ETH_P_8021Q)) {
2218 struct vlan_hdr *vhdr, _vhdr;
2219
2220 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
2221 if (!vhdr)
2222 return -EINVAL;
2223
2224 protocol = vhdr->h_vlan_encapsulated_proto;
2225 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
2226 tx_flags |= I40E_TX_FLAGS_SW_VLAN;
2227 }
2228
2229 if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
2230 goto out;
2231
2232 /* Insert 802.1p priority into VLAN header */
2233 if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
2234 (skb->priority != TC_PRIO_CONTROL)) {
2235 tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
2236 tx_flags |= (skb->priority & 0x7) <<
2237 I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
2238 if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
2239 struct vlan_ethhdr *vhdr;
2240 int rc;
2241
2242 rc = skb_cow_head(skb, 0);
2243 if (rc < 0)
2244 return rc;
2245 vhdr = (struct vlan_ethhdr *)skb->data;
2246 vhdr->h_vlan_TCI = htons(tx_flags >>
2247 I40E_TX_FLAGS_VLAN_SHIFT);
2248 } else {
2249 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2250 }
2251 }
2252
2253 out:
2254 *flags = tx_flags;
2255 return 0;
2256 }
2257
2258 /**
2259 * i40e_tso - set up the tso context descriptor
2260 * @skb: ptr to the skb we're sending
2261 * @hdr_len: ptr to the size of the packet header
2262 * @cd_type_cmd_tso_mss: Quad Word 1
2263 *
2264 * Returns 0 if no TSO can happen, 1 if tso is going, or error
2265 **/
2266 static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
2267 {
2268 u64 cd_cmd, cd_tso_len, cd_mss;
2269 union {
2270 struct iphdr *v4;
2271 struct ipv6hdr *v6;
2272 unsigned char *hdr;
2273 } ip;
2274 union {
2275 struct tcphdr *tcp;
2276 struct udphdr *udp;
2277 unsigned char *hdr;
2278 } l4;
2279 u32 paylen, l4_offset;
2280 int err;
2281
2282 if (skb->ip_summed != CHECKSUM_PARTIAL)
2283 return 0;
2284
2285 if (!skb_is_gso(skb))
2286 return 0;
2287
2288 err = skb_cow_head(skb, 0);
2289 if (err < 0)
2290 return err;
2291
2292 ip.hdr = skb_network_header(skb);
2293 l4.hdr = skb_transport_header(skb);
2294
2295 /* initialize outer IP header fields */
2296 if (ip.v4->version == 4) {
2297 ip.v4->tot_len = 0;
2298 ip.v4->check = 0;
2299 } else {
2300 ip.v6->payload_len = 0;
2301 }
2302
2303 if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
2304 SKB_GSO_GRE_CSUM |
2305 SKB_GSO_IPIP |
2306 SKB_GSO_SIT |
2307 SKB_GSO_UDP_TUNNEL |
2308 SKB_GSO_UDP_TUNNEL_CSUM)) {
2309 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
2310 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) {
2311 l4.udp->len = 0;
2312
2313 /* determine offset of outer transport header */
2314 l4_offset = l4.hdr - skb->data;
2315
2316 /* remove payload length from outer checksum */
2317 paylen = skb->len - l4_offset;
2318 csum_replace_by_diff(&l4.udp->check, htonl(paylen));
2319 }
2320
2321 /* reset pointers to inner headers */
2322 ip.hdr = skb_inner_network_header(skb);
2323 l4.hdr = skb_inner_transport_header(skb);
2324
2325 /* initialize inner IP header fields */
2326 if (ip.v4->version == 4) {
2327 ip.v4->tot_len = 0;
2328 ip.v4->check = 0;
2329 } else {
2330 ip.v6->payload_len = 0;
2331 }
2332 }
2333
2334 /* determine offset of inner transport header */
2335 l4_offset = l4.hdr - skb->data;
2336
2337 /* remove payload length from inner checksum */
2338 paylen = skb->len - l4_offset;
2339 csum_replace_by_diff(&l4.tcp->check, htonl(paylen));
2340
2341 /* compute length of segmentation header */
2342 *hdr_len = (l4.tcp->doff * 4) + l4_offset;
2343
2344 /* find the field values */
2345 cd_cmd = I40E_TX_CTX_DESC_TSO;
2346 cd_tso_len = skb->len - *hdr_len;
2347 cd_mss = skb_shinfo(skb)->gso_size;
2348 *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
2349 (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
2350 (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
2351 return 1;
2352 }
2353
2354 /**
2355 * i40e_tsyn - set up the tsyn context descriptor
2356 * @tx_ring: ptr to the ring to send
2357 * @skb: ptr to the skb we're sending
2358 * @tx_flags: the collected send information
2359 * @cd_type_cmd_tso_mss: Quad Word 1
2360 *
2361 * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2362 **/
2363 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
2364 u32 tx_flags, u64 *cd_type_cmd_tso_mss)
2365 {
2366 struct i40e_pf *pf;
2367
2368 if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2369 return 0;
2370
2371 /* Tx timestamps cannot be sampled when doing TSO */
2372 if (tx_flags & I40E_TX_FLAGS_TSO)
2373 return 0;
2374
2375 /* only timestamp the outbound packet if the user has requested it and
2376 * we are not already transmitting a packet to be timestamped
2377 */
2378 pf = i40e_netdev_to_pf(tx_ring->netdev);
2379 if (!(pf->flags & I40E_FLAG_PTP))
2380 return 0;
2381
2382 if (pf->ptp_tx &&
2383 !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
2384 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2385 pf->ptp_tx_skb = skb_get(skb);
2386 } else {
2387 return 0;
2388 }
2389
2390 *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
2391 I40E_TXD_CTX_QW1_CMD_SHIFT;
2392
2393 return 1;
2394 }
2395
2396 /**
2397 * i40e_tx_enable_csum - Enable Tx checksum offloads
2398 * @skb: send buffer
2399 * @tx_flags: pointer to Tx flags currently set
2400 * @td_cmd: Tx descriptor command bits to set
2401 * @td_offset: Tx descriptor header offsets to set
2402 * @tx_ring: Tx descriptor ring
2403 * @cd_tunneling: ptr to context desc bits
2404 **/
2405 static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
2406 u32 *td_cmd, u32 *td_offset,
2407 struct i40e_ring *tx_ring,
2408 u32 *cd_tunneling)
2409 {
2410 union {
2411 struct iphdr *v4;
2412 struct ipv6hdr *v6;
2413 unsigned char *hdr;
2414 } ip;
2415 union {
2416 struct tcphdr *tcp;
2417 struct udphdr *udp;
2418 unsigned char *hdr;
2419 } l4;
2420 unsigned char *exthdr;
2421 u32 offset, cmd = 0;
2422 __be16 frag_off;
2423 u8 l4_proto = 0;
2424
2425 if (skb->ip_summed != CHECKSUM_PARTIAL)
2426 return 0;
2427
2428 ip.hdr = skb_network_header(skb);
2429 l4.hdr = skb_transport_header(skb);
2430
2431 /* compute outer L2 header size */
2432 offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
2433
2434 if (skb->encapsulation) {
2435 u32 tunnel = 0;
2436 /* define outer network header type */
2437 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2438 tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
2439 I40E_TX_CTX_EXT_IP_IPV4 :
2440 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
2441
2442 l4_proto = ip.v4->protocol;
2443 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2444 tunnel |= I40E_TX_CTX_EXT_IP_IPV6;
2445
2446 exthdr = ip.hdr + sizeof(*ip.v6);
2447 l4_proto = ip.v6->nexthdr;
2448 if (l4.hdr != exthdr)
2449 ipv6_skip_exthdr(skb, exthdr - skb->data,
2450 &l4_proto, &frag_off);
2451 }
2452
2453 /* define outer transport */
2454 switch (l4_proto) {
2455 case IPPROTO_UDP:
2456 tunnel |= I40E_TXD_CTX_UDP_TUNNELING;
2457 *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
2458 break;
2459 case IPPROTO_GRE:
2460 tunnel |= I40E_TXD_CTX_GRE_TUNNELING;
2461 *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
2462 break;
2463 case IPPROTO_IPIP:
2464 case IPPROTO_IPV6:
2465 *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
2466 l4.hdr = skb_inner_network_header(skb);
2467 break;
2468 default:
2469 if (*tx_flags & I40E_TX_FLAGS_TSO)
2470 return -1;
2471
2472 skb_checksum_help(skb);
2473 return 0;
2474 }
2475
2476 /* compute outer L3 header size */
2477 tunnel |= ((l4.hdr - ip.hdr) / 4) <<
2478 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT;
2479
2480 /* switch IP header pointer from outer to inner header */
2481 ip.hdr = skb_inner_network_header(skb);
2482
2483 /* compute tunnel header size */
2484 tunnel |= ((ip.hdr - l4.hdr) / 2) <<
2485 I40E_TXD_CTX_QW0_NATLEN_SHIFT;
2486
2487 /* indicate if we need to offload outer UDP header */
2488 if ((*tx_flags & I40E_TX_FLAGS_TSO) &&
2489 !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
2490 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
2491 tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
2492
2493 /* record tunnel offload values */
2494 *cd_tunneling |= tunnel;
2495
2496 /* switch L4 header pointer from outer to inner */
2497 l4.hdr = skb_inner_transport_header(skb);
2498 l4_proto = 0;
2499
2500 /* reset type as we transition from outer to inner headers */
2501 *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6);
2502 if (ip.v4->version == 4)
2503 *tx_flags |= I40E_TX_FLAGS_IPV4;
2504 if (ip.v6->version == 6)
2505 *tx_flags |= I40E_TX_FLAGS_IPV6;
2506 }
2507
2508 /* Enable IP checksum offloads */
2509 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2510 l4_proto = ip.v4->protocol;
2511 /* the stack computes the IP header already, the only time we
2512 * need the hardware to recompute it is in the case of TSO.
2513 */
2514 cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
2515 I40E_TX_DESC_CMD_IIPT_IPV4_CSUM :
2516 I40E_TX_DESC_CMD_IIPT_IPV4;
2517 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2518 cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
2519
2520 exthdr = ip.hdr + sizeof(*ip.v6);
2521 l4_proto = ip.v6->nexthdr;
2522 if (l4.hdr != exthdr)
2523 ipv6_skip_exthdr(skb, exthdr - skb->data,
2524 &l4_proto, &frag_off);
2525 }
2526
2527 /* compute inner L3 header size */
2528 offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2529
2530 /* Enable L4 checksum offloads */
2531 switch (l4_proto) {
2532 case IPPROTO_TCP:
2533 /* enable checksum offloads */
2534 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2535 offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2536 break;
2537 case IPPROTO_SCTP:
2538 /* enable SCTP checksum offload */
2539 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2540 offset |= (sizeof(struct sctphdr) >> 2) <<
2541 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2542 break;
2543 case IPPROTO_UDP:
2544 /* enable UDP checksum offload */
2545 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2546 offset |= (sizeof(struct udphdr) >> 2) <<
2547 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2548 break;
2549 default:
2550 if (*tx_flags & I40E_TX_FLAGS_TSO)
2551 return -1;
2552 skb_checksum_help(skb);
2553 return 0;
2554 }
2555
2556 *td_cmd |= cmd;
2557 *td_offset |= offset;
2558
2559 return 1;
2560 }
2561
2562 /**
2563 * i40e_create_tx_ctx Build the Tx context descriptor
2564 * @tx_ring: ring to create the descriptor on
2565 * @cd_type_cmd_tso_mss: Quad Word 1
2566 * @cd_tunneling: Quad Word 0 - bits 0-31
2567 * @cd_l2tag2: Quad Word 0 - bits 32-63
2568 **/
2569 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2570 const u64 cd_type_cmd_tso_mss,
2571 const u32 cd_tunneling, const u32 cd_l2tag2)
2572 {
2573 struct i40e_tx_context_desc *context_desc;
2574 int i = tx_ring->next_to_use;
2575
2576 if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2577 !cd_tunneling && !cd_l2tag2)
2578 return;
2579
2580 /* grab the next descriptor */
2581 context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2582
2583 i++;
2584 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2585
2586 /* cpu_to_le32 and assign to struct fields */
2587 context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2588 context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
2589 context_desc->rsvd = cpu_to_le16(0);
2590 context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2591 }
2592
2593 /**
2594 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2595 * @tx_ring: the ring to be checked
2596 * @size: the size buffer we want to assure is available
2597 *
2598 * Returns -EBUSY if a stop is needed, else 0
2599 **/
2600 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2601 {
2602 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2603 /* Memory barrier before checking head and tail */
2604 smp_mb();
2605
2606 /* Check again in a case another CPU has just made room available. */
2607 if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2608 return -EBUSY;
2609
2610 /* A reprieve! - use start_queue because it doesn't call schedule */
2611 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2612 ++tx_ring->tx_stats.restart_queue;
2613 return 0;
2614 }
2615
2616 /**
2617 * __i40e_chk_linearize - Check if there are more than 8 buffers per packet
2618 * @skb: send buffer
2619 *
2620 * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire
2621 * and so we need to figure out the cases where we need to linearize the skb.
2622 *
2623 * For TSO we need to count the TSO header and segment payload separately.
2624 * As such we need to check cases where we have 7 fragments or more as we
2625 * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
2626 * the segment payload in the first descriptor, and another 7 for the
2627 * fragments.
2628 **/
2629 bool __i40e_chk_linearize(struct sk_buff *skb)
2630 {
2631 const struct skb_frag_struct *frag, *stale;
2632 int nr_frags, sum;
2633
2634 /* no need to check if number of frags is less than 7 */
2635 nr_frags = skb_shinfo(skb)->nr_frags;
2636 if (nr_frags < (I40E_MAX_BUFFER_TXD - 1))
2637 return false;
2638
2639 /* We need to walk through the list and validate that each group
2640 * of 6 fragments totals at least gso_size. However we don't need
2641 * to perform such validation on the last 6 since the last 6 cannot
2642 * inherit any data from a descriptor after them.
2643 */
2644 nr_frags -= I40E_MAX_BUFFER_TXD - 2;
2645 frag = &skb_shinfo(skb)->frags[0];
2646
2647 /* Initialize size to the negative value of gso_size minus 1. We
2648 * use this as the worst case scenerio in which the frag ahead
2649 * of us only provides one byte which is why we are limited to 6
2650 * descriptors for a single transmit as the header and previous
2651 * fragment are already consuming 2 descriptors.
2652 */
2653 sum = 1 - skb_shinfo(skb)->gso_size;
2654
2655 /* Add size of frags 0 through 4 to create our initial sum */
2656 sum += skb_frag_size(frag++);
2657 sum += skb_frag_size(frag++);
2658 sum += skb_frag_size(frag++);
2659 sum += skb_frag_size(frag++);
2660 sum += skb_frag_size(frag++);
2661
2662 /* Walk through fragments adding latest fragment, testing it, and
2663 * then removing stale fragments from the sum.
2664 */
2665 stale = &skb_shinfo(skb)->frags[0];
2666 for (;;) {
2667 sum += skb_frag_size(frag++);
2668
2669 /* if sum is negative we failed to make sufficient progress */
2670 if (sum < 0)
2671 return true;
2672
2673 /* use pre-decrement to avoid processing last fragment */
2674 if (!--nr_frags)
2675 break;
2676
2677 sum -= skb_frag_size(stale++);
2678 }
2679
2680 return false;
2681 }
2682
2683 /**
2684 * i40e_tx_map - Build the Tx descriptor
2685 * @tx_ring: ring to send buffer on
2686 * @skb: send buffer
2687 * @first: first buffer info buffer to use
2688 * @tx_flags: collected send information
2689 * @hdr_len: size of the packet header
2690 * @td_cmd: the command field in the descriptor
2691 * @td_offset: offset for checksum or crc
2692 **/
2693 #ifdef I40E_FCOE
2694 inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2695 struct i40e_tx_buffer *first, u32 tx_flags,
2696 const u8 hdr_len, u32 td_cmd, u32 td_offset)
2697 #else
2698 static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2699 struct i40e_tx_buffer *first, u32 tx_flags,
2700 const u8 hdr_len, u32 td_cmd, u32 td_offset)
2701 #endif
2702 {
2703 unsigned int data_len = skb->data_len;
2704 unsigned int size = skb_headlen(skb);
2705 struct skb_frag_struct *frag;
2706 struct i40e_tx_buffer *tx_bi;
2707 struct i40e_tx_desc *tx_desc;
2708 u16 i = tx_ring->next_to_use;
2709 u32 td_tag = 0;
2710 dma_addr_t dma;
2711 u16 gso_segs;
2712 u16 desc_count = 0;
2713 bool tail_bump = true;
2714 bool do_rs = false;
2715
2716 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2717 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2718 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2719 I40E_TX_FLAGS_VLAN_SHIFT;
2720 }
2721
2722 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2723 gso_segs = skb_shinfo(skb)->gso_segs;
2724 else
2725 gso_segs = 1;
2726
2727 /* multiply data chunks by size of headers */
2728 first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2729 first->gso_segs = gso_segs;
2730 first->skb = skb;
2731 first->tx_flags = tx_flags;
2732
2733 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2734
2735 tx_desc = I40E_TX_DESC(tx_ring, i);
2736 tx_bi = first;
2737
2738 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2739 unsigned int max_data = I40E_MAX_DATA_PER_TXD_ALIGNED;
2740
2741 if (dma_mapping_error(tx_ring->dev, dma))
2742 goto dma_error;
2743
2744 /* record length, and DMA address */
2745 dma_unmap_len_set(tx_bi, len, size);
2746 dma_unmap_addr_set(tx_bi, dma, dma);
2747
2748 /* align size to end of page */
2749 max_data += -dma & (I40E_MAX_READ_REQ_SIZE - 1);
2750 tx_desc->buffer_addr = cpu_to_le64(dma);
2751
2752 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2753 tx_desc->cmd_type_offset_bsz =
2754 build_ctob(td_cmd, td_offset,
2755 max_data, td_tag);
2756
2757 tx_desc++;
2758 i++;
2759 desc_count++;
2760
2761 if (i == tx_ring->count) {
2762 tx_desc = I40E_TX_DESC(tx_ring, 0);
2763 i = 0;
2764 }
2765
2766 dma += max_data;
2767 size -= max_data;
2768
2769 max_data = I40E_MAX_DATA_PER_TXD_ALIGNED;
2770 tx_desc->buffer_addr = cpu_to_le64(dma);
2771 }
2772
2773 if (likely(!data_len))
2774 break;
2775
2776 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2777 size, td_tag);
2778
2779 tx_desc++;
2780 i++;
2781 desc_count++;
2782
2783 if (i == tx_ring->count) {
2784 tx_desc = I40E_TX_DESC(tx_ring, 0);
2785 i = 0;
2786 }
2787
2788 size = skb_frag_size(frag);
2789 data_len -= size;
2790
2791 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2792 DMA_TO_DEVICE);
2793
2794 tx_bi = &tx_ring->tx_bi[i];
2795 }
2796
2797 /* set next_to_watch value indicating a packet is present */
2798 first->next_to_watch = tx_desc;
2799
2800 i++;
2801 if (i == tx_ring->count)
2802 i = 0;
2803
2804 tx_ring->next_to_use = i;
2805
2806 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2807 tx_ring->queue_index),
2808 first->bytecount);
2809 i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2810
2811 /* Algorithm to optimize tail and RS bit setting:
2812 * if xmit_more is supported
2813 * if xmit_more is true
2814 * do not update tail and do not mark RS bit.
2815 * if xmit_more is false and last xmit_more was false
2816 * if every packet spanned less than 4 desc
2817 * then set RS bit on 4th packet and update tail
2818 * on every packet
2819 * else
2820 * update tail and set RS bit on every packet.
2821 * if xmit_more is false and last_xmit_more was true
2822 * update tail and set RS bit.
2823 *
2824 * Optimization: wmb to be issued only in case of tail update.
2825 * Also optimize the Descriptor WB path for RS bit with the same
2826 * algorithm.
2827 *
2828 * Note: If there are less than 4 packets
2829 * pending and interrupts were disabled the service task will
2830 * trigger a force WB.
2831 */
2832 if (skb->xmit_more &&
2833 !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2834 tx_ring->queue_index))) {
2835 tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2836 tail_bump = false;
2837 } else if (!skb->xmit_more &&
2838 !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2839 tx_ring->queue_index)) &&
2840 (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
2841 (tx_ring->packet_stride < WB_STRIDE) &&
2842 (desc_count < WB_STRIDE)) {
2843 tx_ring->packet_stride++;
2844 } else {
2845 tx_ring->packet_stride = 0;
2846 tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2847 do_rs = true;
2848 }
2849 if (do_rs)
2850 tx_ring->packet_stride = 0;
2851
2852 tx_desc->cmd_type_offset_bsz =
2853 build_ctob(td_cmd, td_offset, size, td_tag) |
2854 cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
2855 I40E_TX_DESC_CMD_EOP) <<
2856 I40E_TXD_QW1_CMD_SHIFT);
2857
2858 /* notify HW of packet */
2859 if (!tail_bump)
2860 prefetchw(tx_desc + 1);
2861
2862 if (tail_bump) {
2863 /* Force memory writes to complete before letting h/w
2864 * know there are new descriptors to fetch. (Only
2865 * applicable for weak-ordered memory model archs,
2866 * such as IA-64).
2867 */
2868 wmb();
2869 writel(i, tx_ring->tail);
2870 }
2871
2872 return;
2873
2874 dma_error:
2875 dev_info(tx_ring->dev, "TX DMA map failed\n");
2876
2877 /* clear dma mappings for failed tx_bi map */
2878 for (;;) {
2879 tx_bi = &tx_ring->tx_bi[i];
2880 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2881 if (tx_bi == first)
2882 break;
2883 if (i == 0)
2884 i = tx_ring->count;
2885 i--;
2886 }
2887
2888 tx_ring->next_to_use = i;
2889 }
2890
2891 /**
2892 * i40e_xmit_frame_ring - Sends buffer on Tx ring
2893 * @skb: send buffer
2894 * @tx_ring: ring to send buffer on
2895 *
2896 * Returns NETDEV_TX_OK if sent, else an error code
2897 **/
2898 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2899 struct i40e_ring *tx_ring)
2900 {
2901 u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2902 u32 cd_tunneling = 0, cd_l2tag2 = 0;
2903 struct i40e_tx_buffer *first;
2904 u32 td_offset = 0;
2905 u32 tx_flags = 0;
2906 __be16 protocol;
2907 u32 td_cmd = 0;
2908 u8 hdr_len = 0;
2909 int tso, count;
2910 int tsyn;
2911
2912 /* prefetch the data, we'll need it later */
2913 prefetch(skb->data);
2914
2915 count = i40e_xmit_descriptor_count(skb);
2916 if (i40e_chk_linearize(skb, count)) {
2917 if (__skb_linearize(skb))
2918 goto out_drop;
2919 count = i40e_txd_use_count(skb->len);
2920 tx_ring->tx_stats.tx_linearize++;
2921 }
2922
2923 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2924 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2925 * + 4 desc gap to avoid the cache line where head is,
2926 * + 1 desc for context descriptor,
2927 * otherwise try next time
2928 */
2929 if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2930 tx_ring->tx_stats.tx_busy++;
2931 return NETDEV_TX_BUSY;
2932 }
2933
2934 /* prepare the xmit flags */
2935 if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2936 goto out_drop;
2937
2938 /* obtain protocol of skb */
2939 protocol = vlan_get_protocol(skb);
2940
2941 /* record the location of the first descriptor for this packet */
2942 first = &tx_ring->tx_bi[tx_ring->next_to_use];
2943
2944 /* setup IPv4/IPv6 offloads */
2945 if (protocol == htons(ETH_P_IP))
2946 tx_flags |= I40E_TX_FLAGS_IPV4;
2947 else if (protocol == htons(ETH_P_IPV6))
2948 tx_flags |= I40E_TX_FLAGS_IPV6;
2949
2950 tso = i40e_tso(skb, &hdr_len, &cd_type_cmd_tso_mss);
2951
2952 if (tso < 0)
2953 goto out_drop;
2954 else if (tso)
2955 tx_flags |= I40E_TX_FLAGS_TSO;
2956
2957 /* Always offload the checksum, since it's in the data descriptor */
2958 tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
2959 tx_ring, &cd_tunneling);
2960 if (tso < 0)
2961 goto out_drop;
2962
2963 tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2964
2965 if (tsyn)
2966 tx_flags |= I40E_TX_FLAGS_TSYN;
2967
2968 skb_tx_timestamp(skb);
2969
2970 /* always enable CRC insertion offload */
2971 td_cmd |= I40E_TX_DESC_CMD_ICRC;
2972
2973 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2974 cd_tunneling, cd_l2tag2);
2975
2976 /* Add Flow Director ATR if it's enabled.
2977 *
2978 * NOTE: this must always be directly before the data descriptor.
2979 */
2980 i40e_atr(tx_ring, skb, tx_flags);
2981
2982 i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2983 td_cmd, td_offset);
2984
2985 return NETDEV_TX_OK;
2986
2987 out_drop:
2988 dev_kfree_skb_any(skb);
2989 return NETDEV_TX_OK;
2990 }
2991
2992 /**
2993 * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2994 * @skb: send buffer
2995 * @netdev: network interface device structure
2996 *
2997 * Returns NETDEV_TX_OK if sent, else an error code
2998 **/
2999 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
3000 {
3001 struct i40e_netdev_priv *np = netdev_priv(netdev);
3002 struct i40e_vsi *vsi = np->vsi;
3003 struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
3004
3005 /* hardware can't handle really short frames, hardware padding works
3006 * beyond this point
3007 */
3008 if (skb_put_padto(skb, I40E_MIN_TX_LEN))
3009 return NETDEV_TX_OK;
3010
3011 return i40e_xmit_frame_ring(skb, tx_ring);
3012 }
This page took 0.09641 seconds and 5 git commands to generate.