i40e/i40evf: refactor tx timeout logic
[deliverable/linux.git] / drivers / net / ethernet / intel / i40e / i40e_txrx.c
1 /*******************************************************************************
2 *
3 * Intel Ethernet Controller XL710 Family Linux Driver
4 * Copyright(c) 2013 - 2014 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 * Contact Information:
22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
24 *
25 ******************************************************************************/
26
27 #include <linux/prefetch.h>
28 #include <net/busy_poll.h>
29 #include "i40e.h"
30 #include "i40e_prototype.h"
31
32 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
33 u32 td_tag)
34 {
35 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
36 ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) |
37 ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
38 ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
39 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
40 }
41
42 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
43 #define I40E_FD_CLEAN_DELAY 10
44 /**
45 * i40e_program_fdir_filter - Program a Flow Director filter
46 * @fdir_data: Packet data that will be filter parameters
47 * @raw_packet: the pre-allocated packet buffer for FDir
48 * @pf: The PF pointer
49 * @add: True for add/update, False for remove
50 **/
51 int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
52 struct i40e_pf *pf, bool add)
53 {
54 struct i40e_filter_program_desc *fdir_desc;
55 struct i40e_tx_buffer *tx_buf, *first;
56 struct i40e_tx_desc *tx_desc;
57 struct i40e_ring *tx_ring;
58 unsigned int fpt, dcc;
59 struct i40e_vsi *vsi;
60 struct device *dev;
61 dma_addr_t dma;
62 u32 td_cmd = 0;
63 u16 delay = 0;
64 u16 i;
65
66 /* find existing FDIR VSI */
67 vsi = NULL;
68 for (i = 0; i < pf->num_alloc_vsi; i++)
69 if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
70 vsi = pf->vsi[i];
71 if (!vsi)
72 return -ENOENT;
73
74 tx_ring = vsi->tx_rings[0];
75 dev = tx_ring->dev;
76
77 /* we need two descriptors to add/del a filter and we can wait */
78 do {
79 if (I40E_DESC_UNUSED(tx_ring) > 1)
80 break;
81 msleep_interruptible(1);
82 delay++;
83 } while (delay < I40E_FD_CLEAN_DELAY);
84
85 if (!(I40E_DESC_UNUSED(tx_ring) > 1))
86 return -EAGAIN;
87
88 dma = dma_map_single(dev, raw_packet,
89 I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
90 if (dma_mapping_error(dev, dma))
91 goto dma_fail;
92
93 /* grab the next descriptor */
94 i = tx_ring->next_to_use;
95 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
96 first = &tx_ring->tx_bi[i];
97 memset(first, 0, sizeof(struct i40e_tx_buffer));
98
99 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
100
101 fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
102 I40E_TXD_FLTR_QW0_QINDEX_MASK;
103
104 fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
105 I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
106
107 fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
108 I40E_TXD_FLTR_QW0_PCTYPE_MASK;
109
110 /* Use LAN VSI Id if not programmed by user */
111 if (fdir_data->dest_vsi == 0)
112 fpt |= (pf->vsi[pf->lan_vsi]->id) <<
113 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
114 else
115 fpt |= ((u32)fdir_data->dest_vsi <<
116 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
117 I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
118
119 dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
120
121 if (add)
122 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
123 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
124 else
125 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
126 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
127
128 dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
129 I40E_TXD_FLTR_QW1_DEST_MASK;
130
131 dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
132 I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
133
134 if (fdir_data->cnt_index != 0) {
135 dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
136 dcc |= ((u32)fdir_data->cnt_index <<
137 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
138 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
139 }
140
141 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
142 fdir_desc->rsvd = cpu_to_le32(0);
143 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
144 fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
145
146 /* Now program a dummy descriptor */
147 i = tx_ring->next_to_use;
148 tx_desc = I40E_TX_DESC(tx_ring, i);
149 tx_buf = &tx_ring->tx_bi[i];
150
151 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
152
153 memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
154
155 /* record length, and DMA address */
156 dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
157 dma_unmap_addr_set(tx_buf, dma, dma);
158
159 tx_desc->buffer_addr = cpu_to_le64(dma);
160 td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
161
162 tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
163 tx_buf->raw_buf = (void *)raw_packet;
164
165 tx_desc->cmd_type_offset_bsz =
166 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
167
168 /* Force memory writes to complete before letting h/w
169 * know there are new descriptors to fetch.
170 */
171 wmb();
172
173 /* Mark the data descriptor to be watched */
174 first->next_to_watch = tx_desc;
175
176 writel(tx_ring->next_to_use, tx_ring->tail);
177 return 0;
178
179 dma_fail:
180 return -1;
181 }
182
183 #define IP_HEADER_OFFSET 14
184 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
185 /**
186 * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
187 * @vsi: pointer to the targeted VSI
188 * @fd_data: the flow director data required for the FDir descriptor
189 * @add: true adds a filter, false removes it
190 *
191 * Returns 0 if the filters were successfully added or removed
192 **/
193 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
194 struct i40e_fdir_filter *fd_data,
195 bool add)
196 {
197 struct i40e_pf *pf = vsi->back;
198 struct udphdr *udp;
199 struct iphdr *ip;
200 bool err = false;
201 u8 *raw_packet;
202 int ret;
203 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
204 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
205 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
206
207 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
208 if (!raw_packet)
209 return -ENOMEM;
210 memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
211
212 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
213 udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
214 + sizeof(struct iphdr));
215
216 ip->daddr = fd_data->dst_ip[0];
217 udp->dest = fd_data->dst_port;
218 ip->saddr = fd_data->src_ip[0];
219 udp->source = fd_data->src_port;
220
221 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
222 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
223 if (ret) {
224 dev_info(&pf->pdev->dev,
225 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
226 fd_data->pctype, fd_data->fd_id, ret);
227 err = true;
228 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
229 if (add)
230 dev_info(&pf->pdev->dev,
231 "Filter OK for PCTYPE %d loc = %d\n",
232 fd_data->pctype, fd_data->fd_id);
233 else
234 dev_info(&pf->pdev->dev,
235 "Filter deleted for PCTYPE %d loc = %d\n",
236 fd_data->pctype, fd_data->fd_id);
237 }
238 return err ? -EOPNOTSUPP : 0;
239 }
240
241 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
242 /**
243 * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
244 * @vsi: pointer to the targeted VSI
245 * @fd_data: the flow director data required for the FDir descriptor
246 * @add: true adds a filter, false removes it
247 *
248 * Returns 0 if the filters were successfully added or removed
249 **/
250 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
251 struct i40e_fdir_filter *fd_data,
252 bool add)
253 {
254 struct i40e_pf *pf = vsi->back;
255 struct tcphdr *tcp;
256 struct iphdr *ip;
257 bool err = false;
258 u8 *raw_packet;
259 int ret;
260 /* Dummy packet */
261 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
262 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
263 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
264 0x0, 0x72, 0, 0, 0, 0};
265
266 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
267 if (!raw_packet)
268 return -ENOMEM;
269 memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
270
271 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
272 tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
273 + sizeof(struct iphdr));
274
275 ip->daddr = fd_data->dst_ip[0];
276 tcp->dest = fd_data->dst_port;
277 ip->saddr = fd_data->src_ip[0];
278 tcp->source = fd_data->src_port;
279
280 if (add) {
281 pf->fd_tcp_rule++;
282 if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
283 if (I40E_DEBUG_FD & pf->hw.debug_mask)
284 dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
285 pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
286 }
287 } else {
288 pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
289 (pf->fd_tcp_rule - 1) : 0;
290 if (pf->fd_tcp_rule == 0) {
291 pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
292 if (I40E_DEBUG_FD & pf->hw.debug_mask)
293 dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
294 }
295 }
296
297 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
298 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
299
300 if (ret) {
301 dev_info(&pf->pdev->dev,
302 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
303 fd_data->pctype, fd_data->fd_id, ret);
304 err = true;
305 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
306 if (add)
307 dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
308 fd_data->pctype, fd_data->fd_id);
309 else
310 dev_info(&pf->pdev->dev,
311 "Filter deleted for PCTYPE %d loc = %d\n",
312 fd_data->pctype, fd_data->fd_id);
313 }
314
315 return err ? -EOPNOTSUPP : 0;
316 }
317
318 /**
319 * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
320 * a specific flow spec
321 * @vsi: pointer to the targeted VSI
322 * @fd_data: the flow director data required for the FDir descriptor
323 * @add: true adds a filter, false removes it
324 *
325 * Always returns -EOPNOTSUPP
326 **/
327 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
328 struct i40e_fdir_filter *fd_data,
329 bool add)
330 {
331 return -EOPNOTSUPP;
332 }
333
334 #define I40E_IP_DUMMY_PACKET_LEN 34
335 /**
336 * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
337 * a specific flow spec
338 * @vsi: pointer to the targeted VSI
339 * @fd_data: the flow director data required for the FDir descriptor
340 * @add: true adds a filter, false removes it
341 *
342 * Returns 0 if the filters were successfully added or removed
343 **/
344 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
345 struct i40e_fdir_filter *fd_data,
346 bool add)
347 {
348 struct i40e_pf *pf = vsi->back;
349 struct iphdr *ip;
350 bool err = false;
351 u8 *raw_packet;
352 int ret;
353 int i;
354 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
355 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
356 0, 0, 0, 0};
357
358 for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
359 i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) {
360 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
361 if (!raw_packet)
362 return -ENOMEM;
363 memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
364 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
365
366 ip->saddr = fd_data->src_ip[0];
367 ip->daddr = fd_data->dst_ip[0];
368 ip->protocol = 0;
369
370 fd_data->pctype = i;
371 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
372
373 if (ret) {
374 dev_info(&pf->pdev->dev,
375 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
376 fd_data->pctype, fd_data->fd_id, ret);
377 err = true;
378 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
379 if (add)
380 dev_info(&pf->pdev->dev,
381 "Filter OK for PCTYPE %d loc = %d\n",
382 fd_data->pctype, fd_data->fd_id);
383 else
384 dev_info(&pf->pdev->dev,
385 "Filter deleted for PCTYPE %d loc = %d\n",
386 fd_data->pctype, fd_data->fd_id);
387 }
388 }
389
390 return err ? -EOPNOTSUPP : 0;
391 }
392
393 /**
394 * i40e_add_del_fdir - Build raw packets to add/del fdir filter
395 * @vsi: pointer to the targeted VSI
396 * @cmd: command to get or set RX flow classification rules
397 * @add: true adds a filter, false removes it
398 *
399 **/
400 int i40e_add_del_fdir(struct i40e_vsi *vsi,
401 struct i40e_fdir_filter *input, bool add)
402 {
403 struct i40e_pf *pf = vsi->back;
404 int ret;
405
406 switch (input->flow_type & ~FLOW_EXT) {
407 case TCP_V4_FLOW:
408 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
409 break;
410 case UDP_V4_FLOW:
411 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
412 break;
413 case SCTP_V4_FLOW:
414 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
415 break;
416 case IPV4_FLOW:
417 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
418 break;
419 case IP_USER_FLOW:
420 switch (input->ip4_proto) {
421 case IPPROTO_TCP:
422 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
423 break;
424 case IPPROTO_UDP:
425 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
426 break;
427 case IPPROTO_SCTP:
428 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
429 break;
430 default:
431 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
432 break;
433 }
434 break;
435 default:
436 dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
437 input->flow_type);
438 ret = -EINVAL;
439 }
440
441 /* The buffer allocated here is freed by the i40e_clean_tx_ring() */
442 return ret;
443 }
444
445 /**
446 * i40e_fd_handle_status - check the Programming Status for FD
447 * @rx_ring: the Rx ring for this descriptor
448 * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
449 * @prog_id: the id originally used for programming
450 *
451 * This is used to verify if the FD programming or invalidation
452 * requested by SW to the HW is successful or not and take actions accordingly.
453 **/
454 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
455 union i40e_rx_desc *rx_desc, u8 prog_id)
456 {
457 struct i40e_pf *pf = rx_ring->vsi->back;
458 struct pci_dev *pdev = pf->pdev;
459 u32 fcnt_prog, fcnt_avail;
460 u32 error;
461 u64 qw;
462
463 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
464 error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
465 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
466
467 if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
468 if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
469 (I40E_DEBUG_FD & pf->hw.debug_mask))
470 dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
471 rx_desc->wb.qword0.hi_dword.fd_id);
472
473 /* Check if the programming error is for ATR.
474 * If so, auto disable ATR and set a state for
475 * flush in progress. Next time we come here if flush is in
476 * progress do nothing, once flush is complete the state will
477 * be cleared.
478 */
479 if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
480 return;
481
482 pf->fd_add_err++;
483 /* store the current atr filter count */
484 pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
485
486 if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
487 (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
488 pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
489 set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
490 }
491
492 /* filter programming failed most likely due to table full */
493 fcnt_prog = i40e_get_global_fd_count(pf);
494 fcnt_avail = pf->fdir_pf_filter_count;
495 /* If ATR is running fcnt_prog can quickly change,
496 * if we are very close to full, it makes sense to disable
497 * FD ATR/SB and then re-enable it when there is room.
498 */
499 if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
500 if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
501 !(pf->auto_disable_flags &
502 I40E_FLAG_FD_SB_ENABLED)) {
503 if (I40E_DEBUG_FD & pf->hw.debug_mask)
504 dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
505 pf->auto_disable_flags |=
506 I40E_FLAG_FD_SB_ENABLED;
507 }
508 } else {
509 dev_info(&pdev->dev,
510 "FD filter programming failed due to incorrect filter parameters\n");
511 }
512 } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
513 if (I40E_DEBUG_FD & pf->hw.debug_mask)
514 dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
515 rx_desc->wb.qword0.hi_dword.fd_id);
516 }
517 }
518
519 /**
520 * i40e_unmap_and_free_tx_resource - Release a Tx buffer
521 * @ring: the ring that owns the buffer
522 * @tx_buffer: the buffer to free
523 **/
524 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
525 struct i40e_tx_buffer *tx_buffer)
526 {
527 if (tx_buffer->skb) {
528 if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
529 kfree(tx_buffer->raw_buf);
530 else
531 dev_kfree_skb_any(tx_buffer->skb);
532
533 if (dma_unmap_len(tx_buffer, len))
534 dma_unmap_single(ring->dev,
535 dma_unmap_addr(tx_buffer, dma),
536 dma_unmap_len(tx_buffer, len),
537 DMA_TO_DEVICE);
538 } else if (dma_unmap_len(tx_buffer, len)) {
539 dma_unmap_page(ring->dev,
540 dma_unmap_addr(tx_buffer, dma),
541 dma_unmap_len(tx_buffer, len),
542 DMA_TO_DEVICE);
543 }
544 tx_buffer->next_to_watch = NULL;
545 tx_buffer->skb = NULL;
546 dma_unmap_len_set(tx_buffer, len, 0);
547 /* tx_buffer must be completely set up in the transmit path */
548 }
549
550 /**
551 * i40e_clean_tx_ring - Free any empty Tx buffers
552 * @tx_ring: ring to be cleaned
553 **/
554 void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
555 {
556 unsigned long bi_size;
557 u16 i;
558
559 /* ring already cleared, nothing to do */
560 if (!tx_ring->tx_bi)
561 return;
562
563 /* Free all the Tx ring sk_buffs */
564 for (i = 0; i < tx_ring->count; i++)
565 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
566
567 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
568 memset(tx_ring->tx_bi, 0, bi_size);
569
570 /* Zero out the descriptor ring */
571 memset(tx_ring->desc, 0, tx_ring->size);
572
573 tx_ring->next_to_use = 0;
574 tx_ring->next_to_clean = 0;
575
576 if (!tx_ring->netdev)
577 return;
578
579 /* cleanup Tx queue statistics */
580 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
581 tx_ring->queue_index));
582 }
583
584 /**
585 * i40e_free_tx_resources - Free Tx resources per queue
586 * @tx_ring: Tx descriptor ring for a specific queue
587 *
588 * Free all transmit software resources
589 **/
590 void i40e_free_tx_resources(struct i40e_ring *tx_ring)
591 {
592 i40e_clean_tx_ring(tx_ring);
593 kfree(tx_ring->tx_bi);
594 tx_ring->tx_bi = NULL;
595
596 if (tx_ring->desc) {
597 dma_free_coherent(tx_ring->dev, tx_ring->size,
598 tx_ring->desc, tx_ring->dma);
599 tx_ring->desc = NULL;
600 }
601 }
602
603 /**
604 * i40e_get_tx_pending - how many tx descriptors not processed
605 * @tx_ring: the ring of descriptors
606 *
607 * Since there is no access to the ring head register
608 * in XL710, we need to use our local copies
609 **/
610 u32 i40e_get_tx_pending(struct i40e_ring *ring)
611 {
612 u32 head, tail;
613
614 head = i40e_get_head(ring);
615 tail = readl(ring->tail);
616
617 if (head != tail)
618 return (head < tail) ?
619 tail - head : (tail + ring->count - head);
620
621 return 0;
622 }
623
624 #define WB_STRIDE 0x3
625
626 /**
627 * i40e_clean_tx_irq - Reclaim resources after transmit completes
628 * @tx_ring: tx ring to clean
629 * @budget: how many cleans we're allowed
630 *
631 * Returns true if there's any budget left (e.g. the clean is finished)
632 **/
633 static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
634 {
635 u16 i = tx_ring->next_to_clean;
636 struct i40e_tx_buffer *tx_buf;
637 struct i40e_tx_desc *tx_head;
638 struct i40e_tx_desc *tx_desc;
639 unsigned int total_packets = 0;
640 unsigned int total_bytes = 0;
641
642 tx_buf = &tx_ring->tx_bi[i];
643 tx_desc = I40E_TX_DESC(tx_ring, i);
644 i -= tx_ring->count;
645
646 tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
647
648 do {
649 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
650
651 /* if next_to_watch is not set then there is no work pending */
652 if (!eop_desc)
653 break;
654
655 /* prevent any other reads prior to eop_desc */
656 read_barrier_depends();
657
658 /* we have caught up to head, no work left to do */
659 if (tx_head == tx_desc)
660 break;
661
662 /* clear next_to_watch to prevent false hangs */
663 tx_buf->next_to_watch = NULL;
664
665 /* update the statistics for this packet */
666 total_bytes += tx_buf->bytecount;
667 total_packets += tx_buf->gso_segs;
668
669 /* free the skb */
670 dev_consume_skb_any(tx_buf->skb);
671
672 /* unmap skb header data */
673 dma_unmap_single(tx_ring->dev,
674 dma_unmap_addr(tx_buf, dma),
675 dma_unmap_len(tx_buf, len),
676 DMA_TO_DEVICE);
677
678 /* clear tx_buffer data */
679 tx_buf->skb = NULL;
680 dma_unmap_len_set(tx_buf, len, 0);
681
682 /* unmap remaining buffers */
683 while (tx_desc != eop_desc) {
684
685 tx_buf++;
686 tx_desc++;
687 i++;
688 if (unlikely(!i)) {
689 i -= tx_ring->count;
690 tx_buf = tx_ring->tx_bi;
691 tx_desc = I40E_TX_DESC(tx_ring, 0);
692 }
693
694 /* unmap any remaining paged data */
695 if (dma_unmap_len(tx_buf, len)) {
696 dma_unmap_page(tx_ring->dev,
697 dma_unmap_addr(tx_buf, dma),
698 dma_unmap_len(tx_buf, len),
699 DMA_TO_DEVICE);
700 dma_unmap_len_set(tx_buf, len, 0);
701 }
702 }
703
704 /* move us one more past the eop_desc for start of next pkt */
705 tx_buf++;
706 tx_desc++;
707 i++;
708 if (unlikely(!i)) {
709 i -= tx_ring->count;
710 tx_buf = tx_ring->tx_bi;
711 tx_desc = I40E_TX_DESC(tx_ring, 0);
712 }
713
714 prefetch(tx_desc);
715
716 /* update budget accounting */
717 budget--;
718 } while (likely(budget));
719
720 i += tx_ring->count;
721 tx_ring->next_to_clean = i;
722 u64_stats_update_begin(&tx_ring->syncp);
723 tx_ring->stats.bytes += total_bytes;
724 tx_ring->stats.packets += total_packets;
725 u64_stats_update_end(&tx_ring->syncp);
726 tx_ring->q_vector->tx.total_bytes += total_bytes;
727 tx_ring->q_vector->tx.total_packets += total_packets;
728
729 /* check to see if there are any non-cache aligned descriptors
730 * waiting to be written back, and kick the hardware to force
731 * them to be written back in case of napi polling
732 */
733 if (budget &&
734 !((i & WB_STRIDE) == WB_STRIDE) &&
735 !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
736 (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
737 tx_ring->arm_wb = true;
738 else
739 tx_ring->arm_wb = false;
740
741 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
742 tx_ring->queue_index),
743 total_packets, total_bytes);
744
745 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
746 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
747 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
748 /* Make sure that anybody stopping the queue after this
749 * sees the new next_to_clean.
750 */
751 smp_mb();
752 if (__netif_subqueue_stopped(tx_ring->netdev,
753 tx_ring->queue_index) &&
754 !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
755 netif_wake_subqueue(tx_ring->netdev,
756 tx_ring->queue_index);
757 ++tx_ring->tx_stats.restart_queue;
758 }
759 }
760
761 return !!budget;
762 }
763
764 /**
765 * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
766 * @vsi: the VSI we care about
767 * @q_vector: the vector on which to force writeback
768 *
769 **/
770 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
771 {
772 u16 flags = q_vector->tx.ring[0].flags;
773
774 if (flags & I40E_TXR_FLAGS_WB_ON_ITR) {
775 u32 val;
776
777 if (q_vector->arm_wb_state)
778 return;
779
780 val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK;
781
782 wr32(&vsi->back->hw,
783 I40E_PFINT_DYN_CTLN(q_vector->v_idx +
784 vsi->base_vector - 1),
785 val);
786 q_vector->arm_wb_state = true;
787 } else if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
788 u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
789 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
790 I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
791 I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
792 /* allow 00 to be written to the index */
793
794 wr32(&vsi->back->hw,
795 I40E_PFINT_DYN_CTLN(q_vector->v_idx +
796 vsi->base_vector - 1), val);
797 } else {
798 u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
799 I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
800 I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
801 I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK;
802 /* allow 00 to be written to the index */
803
804 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
805 }
806 }
807
808 /**
809 * i40e_set_new_dynamic_itr - Find new ITR level
810 * @rc: structure containing ring performance data
811 *
812 * Stores a new ITR value based on packets and byte counts during
813 * the last interrupt. The advantage of per interrupt computation
814 * is faster updates and more accurate ITR for the current traffic
815 * pattern. Constants in this function were computed based on
816 * theoretical maximum wire speed and thresholds were set based on
817 * testing data as well as attempting to minimize response time
818 * while increasing bulk throughput.
819 **/
820 static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
821 {
822 enum i40e_latency_range new_latency_range = rc->latency_range;
823 u32 new_itr = rc->itr;
824 int bytes_per_int;
825
826 if (rc->total_packets == 0 || !rc->itr)
827 return;
828
829 /* simple throttlerate management
830 * 0-10MB/s lowest (100000 ints/s)
831 * 10-20MB/s low (20000 ints/s)
832 * 20-1249MB/s bulk (8000 ints/s)
833 */
834 bytes_per_int = rc->total_bytes / rc->itr;
835 switch (new_latency_range) {
836 case I40E_LOWEST_LATENCY:
837 if (bytes_per_int > 10)
838 new_latency_range = I40E_LOW_LATENCY;
839 break;
840 case I40E_LOW_LATENCY:
841 if (bytes_per_int > 20)
842 new_latency_range = I40E_BULK_LATENCY;
843 else if (bytes_per_int <= 10)
844 new_latency_range = I40E_LOWEST_LATENCY;
845 break;
846 case I40E_BULK_LATENCY:
847 if (bytes_per_int <= 20)
848 new_latency_range = I40E_LOW_LATENCY;
849 break;
850 default:
851 if (bytes_per_int <= 20)
852 new_latency_range = I40E_LOW_LATENCY;
853 break;
854 }
855 rc->latency_range = new_latency_range;
856
857 switch (new_latency_range) {
858 case I40E_LOWEST_LATENCY:
859 new_itr = I40E_ITR_100K;
860 break;
861 case I40E_LOW_LATENCY:
862 new_itr = I40E_ITR_20K;
863 break;
864 case I40E_BULK_LATENCY:
865 new_itr = I40E_ITR_8K;
866 break;
867 default:
868 break;
869 }
870
871 if (new_itr != rc->itr)
872 rc->itr = new_itr;
873
874 rc->total_bytes = 0;
875 rc->total_packets = 0;
876 }
877
878 /**
879 * i40e_clean_programming_status - clean the programming status descriptor
880 * @rx_ring: the rx ring that has this descriptor
881 * @rx_desc: the rx descriptor written back by HW
882 *
883 * Flow director should handle FD_FILTER_STATUS to check its filter programming
884 * status being successful or not and take actions accordingly. FCoE should
885 * handle its context/filter programming/invalidation status and take actions.
886 *
887 **/
888 static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
889 union i40e_rx_desc *rx_desc)
890 {
891 u64 qw;
892 u8 id;
893
894 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
895 id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
896 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
897
898 if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
899 i40e_fd_handle_status(rx_ring, rx_desc, id);
900 #ifdef I40E_FCOE
901 else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
902 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
903 i40e_fcoe_handle_status(rx_ring, rx_desc, id);
904 #endif
905 }
906
907 /**
908 * i40e_setup_tx_descriptors - Allocate the Tx descriptors
909 * @tx_ring: the tx ring to set up
910 *
911 * Return 0 on success, negative on error
912 **/
913 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
914 {
915 struct device *dev = tx_ring->dev;
916 int bi_size;
917
918 if (!dev)
919 return -ENOMEM;
920
921 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
922 tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
923 if (!tx_ring->tx_bi)
924 goto err;
925
926 /* round up to nearest 4K */
927 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
928 /* add u32 for head writeback, align after this takes care of
929 * guaranteeing this is at least one cache line in size
930 */
931 tx_ring->size += sizeof(u32);
932 tx_ring->size = ALIGN(tx_ring->size, 4096);
933 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
934 &tx_ring->dma, GFP_KERNEL);
935 if (!tx_ring->desc) {
936 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
937 tx_ring->size);
938 goto err;
939 }
940
941 tx_ring->next_to_use = 0;
942 tx_ring->next_to_clean = 0;
943 return 0;
944
945 err:
946 kfree(tx_ring->tx_bi);
947 tx_ring->tx_bi = NULL;
948 return -ENOMEM;
949 }
950
951 /**
952 * i40e_clean_rx_ring - Free Rx buffers
953 * @rx_ring: ring to be cleaned
954 **/
955 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
956 {
957 struct device *dev = rx_ring->dev;
958 struct i40e_rx_buffer *rx_bi;
959 unsigned long bi_size;
960 u16 i;
961
962 /* ring already cleared, nothing to do */
963 if (!rx_ring->rx_bi)
964 return;
965
966 if (ring_is_ps_enabled(rx_ring)) {
967 int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count;
968
969 rx_bi = &rx_ring->rx_bi[0];
970 if (rx_bi->hdr_buf) {
971 dma_free_coherent(dev,
972 bufsz,
973 rx_bi->hdr_buf,
974 rx_bi->dma);
975 for (i = 0; i < rx_ring->count; i++) {
976 rx_bi = &rx_ring->rx_bi[i];
977 rx_bi->dma = 0;
978 rx_bi->hdr_buf = NULL;
979 }
980 }
981 }
982 /* Free all the Rx ring sk_buffs */
983 for (i = 0; i < rx_ring->count; i++) {
984 rx_bi = &rx_ring->rx_bi[i];
985 if (rx_bi->dma) {
986 dma_unmap_single(dev,
987 rx_bi->dma,
988 rx_ring->rx_buf_len,
989 DMA_FROM_DEVICE);
990 rx_bi->dma = 0;
991 }
992 if (rx_bi->skb) {
993 dev_kfree_skb(rx_bi->skb);
994 rx_bi->skb = NULL;
995 }
996 if (rx_bi->page) {
997 if (rx_bi->page_dma) {
998 dma_unmap_page(dev,
999 rx_bi->page_dma,
1000 PAGE_SIZE / 2,
1001 DMA_FROM_DEVICE);
1002 rx_bi->page_dma = 0;
1003 }
1004 __free_page(rx_bi->page);
1005 rx_bi->page = NULL;
1006 rx_bi->page_offset = 0;
1007 }
1008 }
1009
1010 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1011 memset(rx_ring->rx_bi, 0, bi_size);
1012
1013 /* Zero out the descriptor ring */
1014 memset(rx_ring->desc, 0, rx_ring->size);
1015
1016 rx_ring->next_to_clean = 0;
1017 rx_ring->next_to_use = 0;
1018 }
1019
1020 /**
1021 * i40e_free_rx_resources - Free Rx resources
1022 * @rx_ring: ring to clean the resources from
1023 *
1024 * Free all receive software resources
1025 **/
1026 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1027 {
1028 i40e_clean_rx_ring(rx_ring);
1029 kfree(rx_ring->rx_bi);
1030 rx_ring->rx_bi = NULL;
1031
1032 if (rx_ring->desc) {
1033 dma_free_coherent(rx_ring->dev, rx_ring->size,
1034 rx_ring->desc, rx_ring->dma);
1035 rx_ring->desc = NULL;
1036 }
1037 }
1038
1039 /**
1040 * i40e_alloc_rx_headers - allocate rx header buffers
1041 * @rx_ring: ring to alloc buffers
1042 *
1043 * Allocate rx header buffers for the entire ring. As these are static,
1044 * this is only called when setting up a new ring.
1045 **/
1046 void i40e_alloc_rx_headers(struct i40e_ring *rx_ring)
1047 {
1048 struct device *dev = rx_ring->dev;
1049 struct i40e_rx_buffer *rx_bi;
1050 dma_addr_t dma;
1051 void *buffer;
1052 int buf_size;
1053 int i;
1054
1055 if (rx_ring->rx_bi[0].hdr_buf)
1056 return;
1057 /* Make sure the buffers don't cross cache line boundaries. */
1058 buf_size = ALIGN(rx_ring->rx_hdr_len, 256);
1059 buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count,
1060 &dma, GFP_KERNEL);
1061 if (!buffer)
1062 return;
1063 for (i = 0; i < rx_ring->count; i++) {
1064 rx_bi = &rx_ring->rx_bi[i];
1065 rx_bi->dma = dma + (i * buf_size);
1066 rx_bi->hdr_buf = buffer + (i * buf_size);
1067 }
1068 }
1069
1070 /**
1071 * i40e_setup_rx_descriptors - Allocate Rx descriptors
1072 * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1073 *
1074 * Returns 0 on success, negative on failure
1075 **/
1076 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1077 {
1078 struct device *dev = rx_ring->dev;
1079 int bi_size;
1080
1081 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1082 rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1083 if (!rx_ring->rx_bi)
1084 goto err;
1085
1086 u64_stats_init(&rx_ring->syncp);
1087
1088 /* Round up to nearest 4K */
1089 rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1090 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1091 : rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1092 rx_ring->size = ALIGN(rx_ring->size, 4096);
1093 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1094 &rx_ring->dma, GFP_KERNEL);
1095
1096 if (!rx_ring->desc) {
1097 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1098 rx_ring->size);
1099 goto err;
1100 }
1101
1102 rx_ring->next_to_clean = 0;
1103 rx_ring->next_to_use = 0;
1104
1105 return 0;
1106 err:
1107 kfree(rx_ring->rx_bi);
1108 rx_ring->rx_bi = NULL;
1109 return -ENOMEM;
1110 }
1111
1112 /**
1113 * i40e_release_rx_desc - Store the new tail and head values
1114 * @rx_ring: ring to bump
1115 * @val: new head index
1116 **/
1117 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1118 {
1119 rx_ring->next_to_use = val;
1120 /* Force memory writes to complete before letting h/w
1121 * know there are new descriptors to fetch. (Only
1122 * applicable for weak-ordered memory model archs,
1123 * such as IA-64).
1124 */
1125 wmb();
1126 writel(val, rx_ring->tail);
1127 }
1128
1129 /**
1130 * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
1131 * @rx_ring: ring to place buffers on
1132 * @cleaned_count: number of buffers to replace
1133 **/
1134 void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
1135 {
1136 u16 i = rx_ring->next_to_use;
1137 union i40e_rx_desc *rx_desc;
1138 struct i40e_rx_buffer *bi;
1139
1140 /* do nothing if no valid netdev defined */
1141 if (!rx_ring->netdev || !cleaned_count)
1142 return;
1143
1144 while (cleaned_count--) {
1145 rx_desc = I40E_RX_DESC(rx_ring, i);
1146 bi = &rx_ring->rx_bi[i];
1147
1148 if (bi->skb) /* desc is in use */
1149 goto no_buffers;
1150 if (!bi->page) {
1151 bi->page = alloc_page(GFP_ATOMIC);
1152 if (!bi->page) {
1153 rx_ring->rx_stats.alloc_page_failed++;
1154 goto no_buffers;
1155 }
1156 }
1157
1158 if (!bi->page_dma) {
1159 /* use a half page if we're re-using */
1160 bi->page_offset ^= PAGE_SIZE / 2;
1161 bi->page_dma = dma_map_page(rx_ring->dev,
1162 bi->page,
1163 bi->page_offset,
1164 PAGE_SIZE / 2,
1165 DMA_FROM_DEVICE);
1166 if (dma_mapping_error(rx_ring->dev,
1167 bi->page_dma)) {
1168 rx_ring->rx_stats.alloc_page_failed++;
1169 bi->page_dma = 0;
1170 goto no_buffers;
1171 }
1172 }
1173
1174 dma_sync_single_range_for_device(rx_ring->dev,
1175 bi->dma,
1176 0,
1177 rx_ring->rx_hdr_len,
1178 DMA_FROM_DEVICE);
1179 /* Refresh the desc even if buffer_addrs didn't change
1180 * because each write-back erases this info.
1181 */
1182 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
1183 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1184 i++;
1185 if (i == rx_ring->count)
1186 i = 0;
1187 }
1188
1189 no_buffers:
1190 if (rx_ring->next_to_use != i)
1191 i40e_release_rx_desc(rx_ring, i);
1192 }
1193
1194 /**
1195 * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
1196 * @rx_ring: ring to place buffers on
1197 * @cleaned_count: number of buffers to replace
1198 **/
1199 void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
1200 {
1201 u16 i = rx_ring->next_to_use;
1202 union i40e_rx_desc *rx_desc;
1203 struct i40e_rx_buffer *bi;
1204 struct sk_buff *skb;
1205
1206 /* do nothing if no valid netdev defined */
1207 if (!rx_ring->netdev || !cleaned_count)
1208 return;
1209
1210 while (cleaned_count--) {
1211 rx_desc = I40E_RX_DESC(rx_ring, i);
1212 bi = &rx_ring->rx_bi[i];
1213 skb = bi->skb;
1214
1215 if (!skb) {
1216 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1217 rx_ring->rx_buf_len);
1218 if (!skb) {
1219 rx_ring->rx_stats.alloc_buff_failed++;
1220 goto no_buffers;
1221 }
1222 /* initialize queue mapping */
1223 skb_record_rx_queue(skb, rx_ring->queue_index);
1224 bi->skb = skb;
1225 }
1226
1227 if (!bi->dma) {
1228 bi->dma = dma_map_single(rx_ring->dev,
1229 skb->data,
1230 rx_ring->rx_buf_len,
1231 DMA_FROM_DEVICE);
1232 if (dma_mapping_error(rx_ring->dev, bi->dma)) {
1233 rx_ring->rx_stats.alloc_buff_failed++;
1234 bi->dma = 0;
1235 goto no_buffers;
1236 }
1237 }
1238
1239 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1240 rx_desc->read.hdr_addr = 0;
1241 i++;
1242 if (i == rx_ring->count)
1243 i = 0;
1244 }
1245
1246 no_buffers:
1247 if (rx_ring->next_to_use != i)
1248 i40e_release_rx_desc(rx_ring, i);
1249 }
1250
1251 /**
1252 * i40e_receive_skb - Send a completed packet up the stack
1253 * @rx_ring: rx ring in play
1254 * @skb: packet to send up
1255 * @vlan_tag: vlan tag for packet
1256 **/
1257 static void i40e_receive_skb(struct i40e_ring *rx_ring,
1258 struct sk_buff *skb, u16 vlan_tag)
1259 {
1260 struct i40e_q_vector *q_vector = rx_ring->q_vector;
1261 struct i40e_vsi *vsi = rx_ring->vsi;
1262 u64 flags = vsi->back->flags;
1263
1264 if (vlan_tag & VLAN_VID_MASK)
1265 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1266
1267 if (flags & I40E_FLAG_IN_NETPOLL)
1268 netif_rx(skb);
1269 else
1270 napi_gro_receive(&q_vector->napi, skb);
1271 }
1272
1273 /**
1274 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1275 * @vsi: the VSI we care about
1276 * @skb: skb currently being received and modified
1277 * @rx_status: status value of last descriptor in packet
1278 * @rx_error: error value of last descriptor in packet
1279 * @rx_ptype: ptype value of last descriptor in packet
1280 **/
1281 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1282 struct sk_buff *skb,
1283 u32 rx_status,
1284 u32 rx_error,
1285 u16 rx_ptype)
1286 {
1287 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
1288 bool ipv4 = false, ipv6 = false;
1289 bool ipv4_tunnel, ipv6_tunnel;
1290 __wsum rx_udp_csum;
1291 struct iphdr *iph;
1292 __sum16 csum;
1293
1294 ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1295 (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1296 ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1297 (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1298
1299 skb->ip_summed = CHECKSUM_NONE;
1300
1301 /* Rx csum enabled and ip headers found? */
1302 if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1303 return;
1304
1305 /* did the hardware decode the packet and checksum? */
1306 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1307 return;
1308
1309 /* both known and outer_ip must be set for the below code to work */
1310 if (!(decoded.known && decoded.outer_ip))
1311 return;
1312
1313 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1314 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4)
1315 ipv4 = true;
1316 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1317 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1318 ipv6 = true;
1319
1320 if (ipv4 &&
1321 (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
1322 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
1323 goto checksum_fail;
1324
1325 /* likely incorrect csum if alternate IP extension headers found */
1326 if (ipv6 &&
1327 rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1328 /* don't increment checksum err here, non-fatal err */
1329 return;
1330
1331 /* there was some L4 error, count error and punt packet to the stack */
1332 if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
1333 goto checksum_fail;
1334
1335 /* handle packets that were not able to be checksummed due
1336 * to arrival speed, in this case the stack can compute
1337 * the csum.
1338 */
1339 if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
1340 return;
1341
1342 /* If VXLAN traffic has an outer UDPv4 checksum we need to check
1343 * it in the driver, hardware does not do it for us.
1344 * Since L3L4P bit was set we assume a valid IHL value (>=5)
1345 * so the total length of IPv4 header is IHL*4 bytes
1346 * The UDP_0 bit *may* bet set if the *inner* header is UDP
1347 */
1348 if (!(vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) &&
1349 (ipv4_tunnel)) {
1350 skb->transport_header = skb->mac_header +
1351 sizeof(struct ethhdr) +
1352 (ip_hdr(skb)->ihl * 4);
1353
1354 /* Add 4 bytes for VLAN tagged packets */
1355 skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) ||
1356 skb->protocol == htons(ETH_P_8021AD))
1357 ? VLAN_HLEN : 0;
1358
1359 if ((ip_hdr(skb)->protocol == IPPROTO_UDP) &&
1360 (udp_hdr(skb)->check != 0)) {
1361 rx_udp_csum = udp_csum(skb);
1362 iph = ip_hdr(skb);
1363 csum = csum_tcpudp_magic(
1364 iph->saddr, iph->daddr,
1365 (skb->len - skb_transport_offset(skb)),
1366 IPPROTO_UDP, rx_udp_csum);
1367
1368 if (udp_hdr(skb)->check != csum)
1369 goto checksum_fail;
1370
1371 } /* else its GRE and so no outer UDP header */
1372 }
1373
1374 skb->ip_summed = CHECKSUM_UNNECESSARY;
1375 skb->csum_level = ipv4_tunnel || ipv6_tunnel;
1376
1377 return;
1378
1379 checksum_fail:
1380 vsi->back->hw_csum_rx_error++;
1381 }
1382
1383 /**
1384 * i40e_rx_hash - returns the hash value from the Rx descriptor
1385 * @ring: descriptor ring
1386 * @rx_desc: specific descriptor
1387 **/
1388 static inline u32 i40e_rx_hash(struct i40e_ring *ring,
1389 union i40e_rx_desc *rx_desc)
1390 {
1391 const __le64 rss_mask =
1392 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1393 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1394
1395 if ((ring->netdev->features & NETIF_F_RXHASH) &&
1396 (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask)
1397 return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1398 else
1399 return 0;
1400 }
1401
1402 /**
1403 * i40e_ptype_to_hash - get a hash type
1404 * @ptype: the ptype value from the descriptor
1405 *
1406 * Returns a hash type to be used by skb_set_hash
1407 **/
1408 static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype)
1409 {
1410 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1411
1412 if (!decoded.known)
1413 return PKT_HASH_TYPE_NONE;
1414
1415 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1416 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1417 return PKT_HASH_TYPE_L4;
1418 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1419 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1420 return PKT_HASH_TYPE_L3;
1421 else
1422 return PKT_HASH_TYPE_L2;
1423 }
1424
1425 /**
1426 * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
1427 * @rx_ring: rx ring to clean
1428 * @budget: how many cleans we're allowed
1429 *
1430 * Returns true if there's any budget left (e.g. the clean is finished)
1431 **/
1432 static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
1433 {
1434 unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1435 u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1436 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1437 const int current_node = numa_node_id();
1438 struct i40e_vsi *vsi = rx_ring->vsi;
1439 u16 i = rx_ring->next_to_clean;
1440 union i40e_rx_desc *rx_desc;
1441 u32 rx_error, rx_status;
1442 u8 rx_ptype;
1443 u64 qword;
1444
1445 if (budget <= 0)
1446 return 0;
1447
1448 do {
1449 struct i40e_rx_buffer *rx_bi;
1450 struct sk_buff *skb;
1451 u16 vlan_tag;
1452 /* return some buffers to hardware, one at a time is too slow */
1453 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1454 i40e_alloc_rx_buffers_ps(rx_ring, cleaned_count);
1455 cleaned_count = 0;
1456 }
1457
1458 i = rx_ring->next_to_clean;
1459 rx_desc = I40E_RX_DESC(rx_ring, i);
1460 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1461 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1462 I40E_RXD_QW1_STATUS_SHIFT;
1463
1464 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1465 break;
1466
1467 /* This memory barrier is needed to keep us from reading
1468 * any other fields out of the rx_desc until we know the
1469 * DD bit is set.
1470 */
1471 dma_rmb();
1472 if (i40e_rx_is_programming_status(qword)) {
1473 i40e_clean_programming_status(rx_ring, rx_desc);
1474 I40E_RX_INCREMENT(rx_ring, i);
1475 continue;
1476 }
1477 rx_bi = &rx_ring->rx_bi[i];
1478 skb = rx_bi->skb;
1479 if (likely(!skb)) {
1480 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1481 rx_ring->rx_hdr_len);
1482 if (!skb) {
1483 rx_ring->rx_stats.alloc_buff_failed++;
1484 break;
1485 }
1486
1487 /* initialize queue mapping */
1488 skb_record_rx_queue(skb, rx_ring->queue_index);
1489 /* we are reusing so sync this buffer for CPU use */
1490 dma_sync_single_range_for_cpu(rx_ring->dev,
1491 rx_bi->dma,
1492 0,
1493 rx_ring->rx_hdr_len,
1494 DMA_FROM_DEVICE);
1495 }
1496 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1497 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1498 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1499 I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1500 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1501 I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1502
1503 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1504 I40E_RXD_QW1_ERROR_SHIFT;
1505 rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1506 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1507
1508 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1509 I40E_RXD_QW1_PTYPE_SHIFT;
1510 prefetch(rx_bi->page);
1511 rx_bi->skb = NULL;
1512 cleaned_count++;
1513 if (rx_hbo || rx_sph) {
1514 int len;
1515 if (rx_hbo)
1516 len = I40E_RX_HDR_SIZE;
1517 else
1518 len = rx_header_len;
1519 memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
1520 } else if (skb->len == 0) {
1521 int len;
1522
1523 len = (rx_packet_len > skb_headlen(skb) ?
1524 skb_headlen(skb) : rx_packet_len);
1525 memcpy(__skb_put(skb, len),
1526 rx_bi->page + rx_bi->page_offset,
1527 len);
1528 rx_bi->page_offset += len;
1529 rx_packet_len -= len;
1530 }
1531
1532 /* Get the rest of the data if this was a header split */
1533 if (rx_packet_len) {
1534 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
1535 rx_bi->page,
1536 rx_bi->page_offset,
1537 rx_packet_len);
1538
1539 skb->len += rx_packet_len;
1540 skb->data_len += rx_packet_len;
1541 skb->truesize += rx_packet_len;
1542
1543 if ((page_count(rx_bi->page) == 1) &&
1544 (page_to_nid(rx_bi->page) == current_node))
1545 get_page(rx_bi->page);
1546 else
1547 rx_bi->page = NULL;
1548
1549 dma_unmap_page(rx_ring->dev,
1550 rx_bi->page_dma,
1551 PAGE_SIZE / 2,
1552 DMA_FROM_DEVICE);
1553 rx_bi->page_dma = 0;
1554 }
1555 I40E_RX_INCREMENT(rx_ring, i);
1556
1557 if (unlikely(
1558 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1559 struct i40e_rx_buffer *next_buffer;
1560
1561 next_buffer = &rx_ring->rx_bi[i];
1562 next_buffer->skb = skb;
1563 rx_ring->rx_stats.non_eop_descs++;
1564 continue;
1565 }
1566
1567 /* ERR_MASK will only have valid bits if EOP set */
1568 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1569 dev_kfree_skb_any(skb);
1570 continue;
1571 }
1572
1573 skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1574 i40e_ptype_to_hash(rx_ptype));
1575 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1576 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1577 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1578 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1579 rx_ring->last_rx_timestamp = jiffies;
1580 }
1581
1582 /* probably a little skewed due to removing CRC */
1583 total_rx_bytes += skb->len;
1584 total_rx_packets++;
1585
1586 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1587
1588 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1589
1590 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1591 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1592 : 0;
1593 #ifdef I40E_FCOE
1594 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1595 dev_kfree_skb_any(skb);
1596 continue;
1597 }
1598 #endif
1599 skb_mark_napi_id(skb, &rx_ring->q_vector->napi);
1600 i40e_receive_skb(rx_ring, skb, vlan_tag);
1601
1602 rx_desc->wb.qword1.status_error_len = 0;
1603
1604 } while (likely(total_rx_packets < budget));
1605
1606 u64_stats_update_begin(&rx_ring->syncp);
1607 rx_ring->stats.packets += total_rx_packets;
1608 rx_ring->stats.bytes += total_rx_bytes;
1609 u64_stats_update_end(&rx_ring->syncp);
1610 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1611 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1612
1613 return total_rx_packets;
1614 }
1615
1616 /**
1617 * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1618 * @rx_ring: rx ring to clean
1619 * @budget: how many cleans we're allowed
1620 *
1621 * Returns number of packets cleaned
1622 **/
1623 static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
1624 {
1625 unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1626 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1627 struct i40e_vsi *vsi = rx_ring->vsi;
1628 union i40e_rx_desc *rx_desc;
1629 u32 rx_error, rx_status;
1630 u16 rx_packet_len;
1631 u8 rx_ptype;
1632 u64 qword;
1633 u16 i;
1634
1635 do {
1636 struct i40e_rx_buffer *rx_bi;
1637 struct sk_buff *skb;
1638 u16 vlan_tag;
1639 /* return some buffers to hardware, one at a time is too slow */
1640 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1641 i40e_alloc_rx_buffers_1buf(rx_ring, cleaned_count);
1642 cleaned_count = 0;
1643 }
1644
1645 i = rx_ring->next_to_clean;
1646 rx_desc = I40E_RX_DESC(rx_ring, i);
1647 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1648 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1649 I40E_RXD_QW1_STATUS_SHIFT;
1650
1651 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1652 break;
1653
1654 /* This memory barrier is needed to keep us from reading
1655 * any other fields out of the rx_desc until we know the
1656 * DD bit is set.
1657 */
1658 dma_rmb();
1659
1660 if (i40e_rx_is_programming_status(qword)) {
1661 i40e_clean_programming_status(rx_ring, rx_desc);
1662 I40E_RX_INCREMENT(rx_ring, i);
1663 continue;
1664 }
1665 rx_bi = &rx_ring->rx_bi[i];
1666 skb = rx_bi->skb;
1667 prefetch(skb->data);
1668
1669 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1670 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1671
1672 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1673 I40E_RXD_QW1_ERROR_SHIFT;
1674 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1675
1676 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1677 I40E_RXD_QW1_PTYPE_SHIFT;
1678 rx_bi->skb = NULL;
1679 cleaned_count++;
1680
1681 /* Get the header and possibly the whole packet
1682 * If this is an skb from previous receive dma will be 0
1683 */
1684 skb_put(skb, rx_packet_len);
1685 dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len,
1686 DMA_FROM_DEVICE);
1687 rx_bi->dma = 0;
1688
1689 I40E_RX_INCREMENT(rx_ring, i);
1690
1691 if (unlikely(
1692 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1693 rx_ring->rx_stats.non_eop_descs++;
1694 continue;
1695 }
1696
1697 /* ERR_MASK will only have valid bits if EOP set */
1698 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1699 dev_kfree_skb_any(skb);
1700 /* TODO: shouldn't we increment a counter indicating the
1701 * drop?
1702 */
1703 continue;
1704 }
1705
1706 skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1707 i40e_ptype_to_hash(rx_ptype));
1708 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1709 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1710 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1711 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1712 rx_ring->last_rx_timestamp = jiffies;
1713 }
1714
1715 /* probably a little skewed due to removing CRC */
1716 total_rx_bytes += skb->len;
1717 total_rx_packets++;
1718
1719 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1720
1721 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1722
1723 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1724 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1725 : 0;
1726 #ifdef I40E_FCOE
1727 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1728 dev_kfree_skb_any(skb);
1729 continue;
1730 }
1731 #endif
1732 i40e_receive_skb(rx_ring, skb, vlan_tag);
1733
1734 rx_desc->wb.qword1.status_error_len = 0;
1735 } while (likely(total_rx_packets < budget));
1736
1737 u64_stats_update_begin(&rx_ring->syncp);
1738 rx_ring->stats.packets += total_rx_packets;
1739 rx_ring->stats.bytes += total_rx_bytes;
1740 u64_stats_update_end(&rx_ring->syncp);
1741 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1742 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1743
1744 return total_rx_packets;
1745 }
1746
1747 /**
1748 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
1749 * @vsi: the VSI we care about
1750 * @q_vector: q_vector for which itr is being updated and interrupt enabled
1751 *
1752 **/
1753 static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
1754 struct i40e_q_vector *q_vector)
1755 {
1756 struct i40e_hw *hw = &vsi->back->hw;
1757 u16 old_itr;
1758 int vector;
1759 u32 val;
1760
1761 vector = (q_vector->v_idx + vsi->base_vector);
1762 if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) {
1763 old_itr = q_vector->rx.itr;
1764 i40e_set_new_dynamic_itr(&q_vector->rx);
1765 if (old_itr != q_vector->rx.itr) {
1766 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1767 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1768 (I40E_RX_ITR <<
1769 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1770 (q_vector->rx.itr <<
1771 I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1772 } else {
1773 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1774 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1775 (I40E_ITR_NONE <<
1776 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
1777 }
1778 if (!test_bit(__I40E_DOWN, &vsi->state))
1779 wr32(hw, I40E_PFINT_DYN_CTLN(vector - 1), val);
1780 } else {
1781 i40e_irq_dynamic_enable(vsi,
1782 q_vector->v_idx + vsi->base_vector);
1783 }
1784 if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) {
1785 old_itr = q_vector->tx.itr;
1786 i40e_set_new_dynamic_itr(&q_vector->tx);
1787 if (old_itr != q_vector->tx.itr) {
1788 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1789 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1790 (I40E_TX_ITR <<
1791 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1792 (q_vector->tx.itr <<
1793 I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1794 } else {
1795 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1796 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1797 (I40E_ITR_NONE <<
1798 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
1799 }
1800 if (!test_bit(__I40E_DOWN, &vsi->state))
1801 wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->v_idx +
1802 vsi->base_vector - 1), val);
1803 } else {
1804 i40e_irq_dynamic_enable(vsi,
1805 q_vector->v_idx + vsi->base_vector);
1806 }
1807 }
1808
1809 /**
1810 * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1811 * @napi: napi struct with our devices info in it
1812 * @budget: amount of work driver is allowed to do this pass, in packets
1813 *
1814 * This function will clean all queues associated with a q_vector.
1815 *
1816 * Returns the amount of work done
1817 **/
1818 int i40e_napi_poll(struct napi_struct *napi, int budget)
1819 {
1820 struct i40e_q_vector *q_vector =
1821 container_of(napi, struct i40e_q_vector, napi);
1822 struct i40e_vsi *vsi = q_vector->vsi;
1823 struct i40e_ring *ring;
1824 bool clean_complete = true;
1825 bool arm_wb = false;
1826 int budget_per_ring;
1827 int cleaned;
1828
1829 if (test_bit(__I40E_DOWN, &vsi->state)) {
1830 napi_complete(napi);
1831 return 0;
1832 }
1833
1834 /* Since the actual Tx work is minimal, we can give the Tx a larger
1835 * budget and be more aggressive about cleaning up the Tx descriptors.
1836 */
1837 i40e_for_each_ring(ring, q_vector->tx) {
1838 clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
1839 arm_wb |= ring->arm_wb;
1840 }
1841
1842 /* We attempt to distribute budget to each Rx queue fairly, but don't
1843 * allow the budget to go below 1 because that would exit polling early.
1844 */
1845 budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
1846
1847 i40e_for_each_ring(ring, q_vector->rx) {
1848 if (ring_is_ps_enabled(ring))
1849 cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
1850 else
1851 cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
1852 /* if we didn't clean as many as budgeted, we must be done */
1853 clean_complete &= (budget_per_ring != cleaned);
1854 }
1855
1856 /* If work not completed, return budget and polling will return */
1857 if (!clean_complete) {
1858 if (arm_wb)
1859 i40e_force_wb(vsi, q_vector);
1860 return budget;
1861 }
1862
1863 if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
1864 q_vector->arm_wb_state = false;
1865
1866 /* Work is done so exit the polling mode and re-enable the interrupt */
1867 napi_complete(napi);
1868 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
1869 i40e_update_enable_itr(vsi, q_vector);
1870 } else { /* Legacy mode */
1871 struct i40e_hw *hw = &vsi->back->hw;
1872 /* We re-enable the queue 0 cause, but
1873 * don't worry about dynamic_enable
1874 * because we left it on for the other
1875 * possible interrupts during napi
1876 */
1877 u32 qval = rd32(hw, I40E_QINT_RQCTL(0)) |
1878 I40E_QINT_RQCTL_CAUSE_ENA_MASK;
1879
1880 wr32(hw, I40E_QINT_RQCTL(0), qval);
1881 qval = rd32(hw, I40E_QINT_TQCTL(0)) |
1882 I40E_QINT_TQCTL_CAUSE_ENA_MASK;
1883 wr32(hw, I40E_QINT_TQCTL(0), qval);
1884 i40e_irq_dynamic_enable_icr0(vsi->back);
1885 }
1886 return 0;
1887 }
1888
1889 /**
1890 * i40e_atr - Add a Flow Director ATR filter
1891 * @tx_ring: ring to add programming descriptor to
1892 * @skb: send buffer
1893 * @tx_flags: send tx flags
1894 * @protocol: wire protocol
1895 **/
1896 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1897 u32 tx_flags, __be16 protocol)
1898 {
1899 struct i40e_filter_program_desc *fdir_desc;
1900 struct i40e_pf *pf = tx_ring->vsi->back;
1901 union {
1902 unsigned char *network;
1903 struct iphdr *ipv4;
1904 struct ipv6hdr *ipv6;
1905 } hdr;
1906 struct tcphdr *th;
1907 unsigned int hlen;
1908 u32 flex_ptype, dtype_cmd;
1909 u16 i;
1910
1911 /* make sure ATR is enabled */
1912 if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
1913 return;
1914
1915 if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
1916 return;
1917
1918 /* if sampling is disabled do nothing */
1919 if (!tx_ring->atr_sample_rate)
1920 return;
1921
1922 if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
1923 return;
1924
1925 if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL)) {
1926 /* snag network header to get L4 type and address */
1927 hdr.network = skb_network_header(skb);
1928
1929 /* Currently only IPv4/IPv6 with TCP is supported
1930 * access ihl as u8 to avoid unaligned access on ia64
1931 */
1932 if (tx_flags & I40E_TX_FLAGS_IPV4)
1933 hlen = (hdr.network[0] & 0x0F) << 2;
1934 else if (protocol == htons(ETH_P_IPV6))
1935 hlen = sizeof(struct ipv6hdr);
1936 else
1937 return;
1938 } else {
1939 hdr.network = skb_inner_network_header(skb);
1940 hlen = skb_inner_network_header_len(skb);
1941 }
1942
1943 /* Currently only IPv4/IPv6 with TCP is supported
1944 * Note: tx_flags gets modified to reflect inner protocols in
1945 * tx_enable_csum function if encap is enabled.
1946 */
1947 if ((tx_flags & I40E_TX_FLAGS_IPV4) &&
1948 (hdr.ipv4->protocol != IPPROTO_TCP))
1949 return;
1950 else if ((tx_flags & I40E_TX_FLAGS_IPV6) &&
1951 (hdr.ipv6->nexthdr != IPPROTO_TCP))
1952 return;
1953
1954 th = (struct tcphdr *)(hdr.network + hlen);
1955
1956 /* Due to lack of space, no more new filters can be programmed */
1957 if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
1958 return;
1959 if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) {
1960 /* HW ATR eviction will take care of removing filters on FIN
1961 * and RST packets.
1962 */
1963 if (th->fin || th->rst)
1964 return;
1965 }
1966
1967 tx_ring->atr_count++;
1968
1969 /* sample on all syn/fin/rst packets or once every atr sample rate */
1970 if (!th->fin &&
1971 !th->syn &&
1972 !th->rst &&
1973 (tx_ring->atr_count < tx_ring->atr_sample_rate))
1974 return;
1975
1976 tx_ring->atr_count = 0;
1977
1978 /* grab the next descriptor */
1979 i = tx_ring->next_to_use;
1980 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
1981
1982 i++;
1983 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1984
1985 flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
1986 I40E_TXD_FLTR_QW0_QINDEX_MASK;
1987 flex_ptype |= (protocol == htons(ETH_P_IP)) ?
1988 (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
1989 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
1990 (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
1991 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
1992
1993 flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
1994
1995 dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
1996
1997 dtype_cmd |= (th->fin || th->rst) ?
1998 (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
1999 I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
2000 (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
2001 I40E_TXD_FLTR_QW1_PCMD_SHIFT);
2002
2003 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
2004 I40E_TXD_FLTR_QW1_DEST_SHIFT;
2005
2006 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
2007 I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
2008
2009 dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
2010 if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL))
2011 dtype_cmd |=
2012 ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
2013 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2014 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2015 else
2016 dtype_cmd |=
2017 ((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) <<
2018 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2019 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2020
2021 if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
2022 dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
2023
2024 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
2025 fdir_desc->rsvd = cpu_to_le32(0);
2026 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
2027 fdir_desc->fd_id = cpu_to_le32(0);
2028 }
2029
2030 /**
2031 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2032 * @skb: send buffer
2033 * @tx_ring: ring to send buffer on
2034 * @flags: the tx flags to be set
2035 *
2036 * Checks the skb and set up correspondingly several generic transmit flags
2037 * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2038 *
2039 * Returns error code indicate the frame should be dropped upon error and the
2040 * otherwise returns 0 to indicate the flags has been set properly.
2041 **/
2042 #ifdef I40E_FCOE
2043 inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2044 struct i40e_ring *tx_ring,
2045 u32 *flags)
2046 #else
2047 static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2048 struct i40e_ring *tx_ring,
2049 u32 *flags)
2050 #endif
2051 {
2052 __be16 protocol = skb->protocol;
2053 u32 tx_flags = 0;
2054
2055 if (protocol == htons(ETH_P_8021Q) &&
2056 !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
2057 /* When HW VLAN acceleration is turned off by the user the
2058 * stack sets the protocol to 8021q so that the driver
2059 * can take any steps required to support the SW only
2060 * VLAN handling. In our case the driver doesn't need
2061 * to take any further steps so just set the protocol
2062 * to the encapsulated ethertype.
2063 */
2064 skb->protocol = vlan_get_protocol(skb);
2065 goto out;
2066 }
2067
2068 /* if we have a HW VLAN tag being added, default to the HW one */
2069 if (skb_vlan_tag_present(skb)) {
2070 tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
2071 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2072 /* else if it is a SW VLAN, check the next protocol and store the tag */
2073 } else if (protocol == htons(ETH_P_8021Q)) {
2074 struct vlan_hdr *vhdr, _vhdr;
2075 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
2076 if (!vhdr)
2077 return -EINVAL;
2078
2079 protocol = vhdr->h_vlan_encapsulated_proto;
2080 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
2081 tx_flags |= I40E_TX_FLAGS_SW_VLAN;
2082 }
2083
2084 if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
2085 goto out;
2086
2087 /* Insert 802.1p priority into VLAN header */
2088 if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
2089 (skb->priority != TC_PRIO_CONTROL)) {
2090 tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
2091 tx_flags |= (skb->priority & 0x7) <<
2092 I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
2093 if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
2094 struct vlan_ethhdr *vhdr;
2095 int rc;
2096
2097 rc = skb_cow_head(skb, 0);
2098 if (rc < 0)
2099 return rc;
2100 vhdr = (struct vlan_ethhdr *)skb->data;
2101 vhdr->h_vlan_TCI = htons(tx_flags >>
2102 I40E_TX_FLAGS_VLAN_SHIFT);
2103 } else {
2104 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2105 }
2106 }
2107
2108 out:
2109 *flags = tx_flags;
2110 return 0;
2111 }
2112
2113 /**
2114 * i40e_tso - set up the tso context descriptor
2115 * @tx_ring: ptr to the ring to send
2116 * @skb: ptr to the skb we're sending
2117 * @hdr_len: ptr to the size of the packet header
2118 * @cd_tunneling: ptr to context descriptor bits
2119 *
2120 * Returns 0 if no TSO can happen, 1 if tso is going, or error
2121 **/
2122 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
2123 u8 *hdr_len, u64 *cd_type_cmd_tso_mss,
2124 u32 *cd_tunneling)
2125 {
2126 u32 cd_cmd, cd_tso_len, cd_mss;
2127 struct ipv6hdr *ipv6h;
2128 struct tcphdr *tcph;
2129 struct iphdr *iph;
2130 u32 l4len;
2131 int err;
2132
2133 if (!skb_is_gso(skb))
2134 return 0;
2135
2136 err = skb_cow_head(skb, 0);
2137 if (err < 0)
2138 return err;
2139
2140 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
2141 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
2142
2143 if (iph->version == 4) {
2144 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2145 iph->tot_len = 0;
2146 iph->check = 0;
2147 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
2148 0, IPPROTO_TCP, 0);
2149 } else if (ipv6h->version == 6) {
2150 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2151 ipv6h->payload_len = 0;
2152 tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
2153 0, IPPROTO_TCP, 0);
2154 }
2155
2156 l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
2157 *hdr_len = (skb->encapsulation
2158 ? (skb_inner_transport_header(skb) - skb->data)
2159 : skb_transport_offset(skb)) + l4len;
2160
2161 /* find the field values */
2162 cd_cmd = I40E_TX_CTX_DESC_TSO;
2163 cd_tso_len = skb->len - *hdr_len;
2164 cd_mss = skb_shinfo(skb)->gso_size;
2165 *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
2166 ((u64)cd_tso_len <<
2167 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
2168 ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
2169 return 1;
2170 }
2171
2172 /**
2173 * i40e_tsyn - set up the tsyn context descriptor
2174 * @tx_ring: ptr to the ring to send
2175 * @skb: ptr to the skb we're sending
2176 * @tx_flags: the collected send information
2177 *
2178 * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2179 **/
2180 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
2181 u32 tx_flags, u64 *cd_type_cmd_tso_mss)
2182 {
2183 struct i40e_pf *pf;
2184
2185 if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2186 return 0;
2187
2188 /* Tx timestamps cannot be sampled when doing TSO */
2189 if (tx_flags & I40E_TX_FLAGS_TSO)
2190 return 0;
2191
2192 /* only timestamp the outbound packet if the user has requested it and
2193 * we are not already transmitting a packet to be timestamped
2194 */
2195 pf = i40e_netdev_to_pf(tx_ring->netdev);
2196 if (!(pf->flags & I40E_FLAG_PTP))
2197 return 0;
2198
2199 if (pf->ptp_tx &&
2200 !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
2201 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2202 pf->ptp_tx_skb = skb_get(skb);
2203 } else {
2204 return 0;
2205 }
2206
2207 *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
2208 I40E_TXD_CTX_QW1_CMD_SHIFT;
2209
2210 return 1;
2211 }
2212
2213 /**
2214 * i40e_tx_enable_csum - Enable Tx checksum offloads
2215 * @skb: send buffer
2216 * @tx_flags: pointer to Tx flags currently set
2217 * @td_cmd: Tx descriptor command bits to set
2218 * @td_offset: Tx descriptor header offsets to set
2219 * @cd_tunneling: ptr to context desc bits
2220 **/
2221 static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
2222 u32 *td_cmd, u32 *td_offset,
2223 struct i40e_ring *tx_ring,
2224 u32 *cd_tunneling)
2225 {
2226 struct ipv6hdr *this_ipv6_hdr;
2227 unsigned int this_tcp_hdrlen;
2228 struct iphdr *this_ip_hdr;
2229 u32 network_hdr_len;
2230 u8 l4_hdr = 0;
2231 struct udphdr *oudph;
2232 struct iphdr *oiph;
2233 u32 l4_tunnel = 0;
2234
2235 if (skb->encapsulation) {
2236 switch (ip_hdr(skb)->protocol) {
2237 case IPPROTO_UDP:
2238 oudph = udp_hdr(skb);
2239 oiph = ip_hdr(skb);
2240 l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING;
2241 *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL;
2242 break;
2243 default:
2244 return;
2245 }
2246 network_hdr_len = skb_inner_network_header_len(skb);
2247 this_ip_hdr = inner_ip_hdr(skb);
2248 this_ipv6_hdr = inner_ipv6_hdr(skb);
2249 this_tcp_hdrlen = inner_tcp_hdrlen(skb);
2250
2251 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2252 if (*tx_flags & I40E_TX_FLAGS_TSO) {
2253 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
2254 ip_hdr(skb)->check = 0;
2255 } else {
2256 *cd_tunneling |=
2257 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
2258 }
2259 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2260 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
2261 if (*tx_flags & I40E_TX_FLAGS_TSO)
2262 ip_hdr(skb)->check = 0;
2263 }
2264
2265 /* Now set the ctx descriptor fields */
2266 *cd_tunneling |= (skb_network_header_len(skb) >> 2) <<
2267 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
2268 l4_tunnel |
2269 ((skb_inner_network_offset(skb) -
2270 skb_transport_offset(skb)) >> 1) <<
2271 I40E_TXD_CTX_QW0_NATLEN_SHIFT;
2272 if (this_ip_hdr->version == 6) {
2273 *tx_flags &= ~I40E_TX_FLAGS_IPV4;
2274 *tx_flags |= I40E_TX_FLAGS_IPV6;
2275 }
2276 if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) &&
2277 (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) &&
2278 (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) {
2279 oudph->check = ~csum_tcpudp_magic(oiph->saddr,
2280 oiph->daddr,
2281 (skb->len - skb_transport_offset(skb)),
2282 IPPROTO_UDP, 0);
2283 *cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
2284 }
2285 } else {
2286 network_hdr_len = skb_network_header_len(skb);
2287 this_ip_hdr = ip_hdr(skb);
2288 this_ipv6_hdr = ipv6_hdr(skb);
2289 this_tcp_hdrlen = tcp_hdrlen(skb);
2290 }
2291
2292 /* Enable IP checksum offloads */
2293 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2294 l4_hdr = this_ip_hdr->protocol;
2295 /* the stack computes the IP header already, the only time we
2296 * need the hardware to recompute it is in the case of TSO.
2297 */
2298 if (*tx_flags & I40E_TX_FLAGS_TSO) {
2299 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
2300 this_ip_hdr->check = 0;
2301 } else {
2302 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
2303 }
2304 /* Now set the td_offset for IP header length */
2305 *td_offset = (network_hdr_len >> 2) <<
2306 I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2307 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2308 l4_hdr = this_ipv6_hdr->nexthdr;
2309 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
2310 /* Now set the td_offset for IP header length */
2311 *td_offset = (network_hdr_len >> 2) <<
2312 I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2313 }
2314 /* words in MACLEN + dwords in IPLEN + dwords in L4Len */
2315 *td_offset |= (skb_network_offset(skb) >> 1) <<
2316 I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
2317
2318 /* Enable L4 checksum offloads */
2319 switch (l4_hdr) {
2320 case IPPROTO_TCP:
2321 /* enable checksum offloads */
2322 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2323 *td_offset |= (this_tcp_hdrlen >> 2) <<
2324 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2325 break;
2326 case IPPROTO_SCTP:
2327 /* enable SCTP checksum offload */
2328 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2329 *td_offset |= (sizeof(struct sctphdr) >> 2) <<
2330 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2331 break;
2332 case IPPROTO_UDP:
2333 /* enable UDP checksum offload */
2334 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2335 *td_offset |= (sizeof(struct udphdr) >> 2) <<
2336 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2337 break;
2338 default:
2339 break;
2340 }
2341 }
2342
2343 /**
2344 * i40e_create_tx_ctx Build the Tx context descriptor
2345 * @tx_ring: ring to create the descriptor on
2346 * @cd_type_cmd_tso_mss: Quad Word 1
2347 * @cd_tunneling: Quad Word 0 - bits 0-31
2348 * @cd_l2tag2: Quad Word 0 - bits 32-63
2349 **/
2350 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2351 const u64 cd_type_cmd_tso_mss,
2352 const u32 cd_tunneling, const u32 cd_l2tag2)
2353 {
2354 struct i40e_tx_context_desc *context_desc;
2355 int i = tx_ring->next_to_use;
2356
2357 if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2358 !cd_tunneling && !cd_l2tag2)
2359 return;
2360
2361 /* grab the next descriptor */
2362 context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2363
2364 i++;
2365 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2366
2367 /* cpu_to_le32 and assign to struct fields */
2368 context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2369 context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
2370 context_desc->rsvd = cpu_to_le16(0);
2371 context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2372 }
2373
2374 /**
2375 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2376 * @tx_ring: the ring to be checked
2377 * @size: the size buffer we want to assure is available
2378 *
2379 * Returns -EBUSY if a stop is needed, else 0
2380 **/
2381 static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2382 {
2383 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2384 /* Memory barrier before checking head and tail */
2385 smp_mb();
2386
2387 /* Check again in a case another CPU has just made room available. */
2388 if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2389 return -EBUSY;
2390
2391 /* A reprieve! - use start_queue because it doesn't call schedule */
2392 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2393 ++tx_ring->tx_stats.restart_queue;
2394 return 0;
2395 }
2396
2397 /**
2398 * i40e_maybe_stop_tx - 1st level check for tx stop conditions
2399 * @tx_ring: the ring to be checked
2400 * @size: the size buffer we want to assure is available
2401 *
2402 * Returns 0 if stop is not needed
2403 **/
2404 #ifdef I40E_FCOE
2405 inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2406 #else
2407 static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2408 #endif
2409 {
2410 if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
2411 return 0;
2412 return __i40e_maybe_stop_tx(tx_ring, size);
2413 }
2414
2415 /**
2416 * i40e_chk_linearize - Check if there are more than 8 fragments per packet
2417 * @skb: send buffer
2418 * @tx_flags: collected send information
2419 *
2420 * Note: Our HW can't scatter-gather more than 8 fragments to build
2421 * a packet on the wire and so we need to figure out the cases where we
2422 * need to linearize the skb.
2423 **/
2424 static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags)
2425 {
2426 struct skb_frag_struct *frag;
2427 bool linearize = false;
2428 unsigned int size = 0;
2429 u16 num_frags;
2430 u16 gso_segs;
2431
2432 num_frags = skb_shinfo(skb)->nr_frags;
2433 gso_segs = skb_shinfo(skb)->gso_segs;
2434
2435 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
2436 u16 j = 0;
2437
2438 if (num_frags < (I40E_MAX_BUFFER_TXD))
2439 goto linearize_chk_done;
2440 /* try the simple math, if we have too many frags per segment */
2441 if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) >
2442 I40E_MAX_BUFFER_TXD) {
2443 linearize = true;
2444 goto linearize_chk_done;
2445 }
2446 frag = &skb_shinfo(skb)->frags[0];
2447 /* we might still have more fragments per segment */
2448 do {
2449 size += skb_frag_size(frag);
2450 frag++; j++;
2451 if ((size >= skb_shinfo(skb)->gso_size) &&
2452 (j < I40E_MAX_BUFFER_TXD)) {
2453 size = (size % skb_shinfo(skb)->gso_size);
2454 j = (size) ? 1 : 0;
2455 }
2456 if (j == I40E_MAX_BUFFER_TXD) {
2457 linearize = true;
2458 break;
2459 }
2460 num_frags--;
2461 } while (num_frags);
2462 } else {
2463 if (num_frags >= I40E_MAX_BUFFER_TXD)
2464 linearize = true;
2465 }
2466
2467 linearize_chk_done:
2468 return linearize;
2469 }
2470
2471 /**
2472 * i40e_tx_map - Build the Tx descriptor
2473 * @tx_ring: ring to send buffer on
2474 * @skb: send buffer
2475 * @first: first buffer info buffer to use
2476 * @tx_flags: collected send information
2477 * @hdr_len: size of the packet header
2478 * @td_cmd: the command field in the descriptor
2479 * @td_offset: offset for checksum or crc
2480 **/
2481 #ifdef I40E_FCOE
2482 inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2483 struct i40e_tx_buffer *first, u32 tx_flags,
2484 const u8 hdr_len, u32 td_cmd, u32 td_offset)
2485 #else
2486 static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2487 struct i40e_tx_buffer *first, u32 tx_flags,
2488 const u8 hdr_len, u32 td_cmd, u32 td_offset)
2489 #endif
2490 {
2491 unsigned int data_len = skb->data_len;
2492 unsigned int size = skb_headlen(skb);
2493 struct skb_frag_struct *frag;
2494 struct i40e_tx_buffer *tx_bi;
2495 struct i40e_tx_desc *tx_desc;
2496 u16 i = tx_ring->next_to_use;
2497 u32 td_tag = 0;
2498 dma_addr_t dma;
2499 u16 gso_segs;
2500
2501 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2502 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2503 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2504 I40E_TX_FLAGS_VLAN_SHIFT;
2505 }
2506
2507 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2508 gso_segs = skb_shinfo(skb)->gso_segs;
2509 else
2510 gso_segs = 1;
2511
2512 /* multiply data chunks by size of headers */
2513 first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2514 first->gso_segs = gso_segs;
2515 first->skb = skb;
2516 first->tx_flags = tx_flags;
2517
2518 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2519
2520 tx_desc = I40E_TX_DESC(tx_ring, i);
2521 tx_bi = first;
2522
2523 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2524 if (dma_mapping_error(tx_ring->dev, dma))
2525 goto dma_error;
2526
2527 /* record length, and DMA address */
2528 dma_unmap_len_set(tx_bi, len, size);
2529 dma_unmap_addr_set(tx_bi, dma, dma);
2530
2531 tx_desc->buffer_addr = cpu_to_le64(dma);
2532
2533 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2534 tx_desc->cmd_type_offset_bsz =
2535 build_ctob(td_cmd, td_offset,
2536 I40E_MAX_DATA_PER_TXD, td_tag);
2537
2538 tx_desc++;
2539 i++;
2540 if (i == tx_ring->count) {
2541 tx_desc = I40E_TX_DESC(tx_ring, 0);
2542 i = 0;
2543 }
2544
2545 dma += I40E_MAX_DATA_PER_TXD;
2546 size -= I40E_MAX_DATA_PER_TXD;
2547
2548 tx_desc->buffer_addr = cpu_to_le64(dma);
2549 }
2550
2551 if (likely(!data_len))
2552 break;
2553
2554 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2555 size, td_tag);
2556
2557 tx_desc++;
2558 i++;
2559 if (i == tx_ring->count) {
2560 tx_desc = I40E_TX_DESC(tx_ring, 0);
2561 i = 0;
2562 }
2563
2564 size = skb_frag_size(frag);
2565 data_len -= size;
2566
2567 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2568 DMA_TO_DEVICE);
2569
2570 tx_bi = &tx_ring->tx_bi[i];
2571 }
2572
2573 /* Place RS bit on last descriptor of any packet that spans across the
2574 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
2575 */
2576 if (((i & WB_STRIDE) != WB_STRIDE) &&
2577 (first <= &tx_ring->tx_bi[i]) &&
2578 (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
2579 tx_desc->cmd_type_offset_bsz =
2580 build_ctob(td_cmd, td_offset, size, td_tag) |
2581 cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
2582 I40E_TXD_QW1_CMD_SHIFT);
2583 } else {
2584 tx_desc->cmd_type_offset_bsz =
2585 build_ctob(td_cmd, td_offset, size, td_tag) |
2586 cpu_to_le64((u64)I40E_TXD_CMD <<
2587 I40E_TXD_QW1_CMD_SHIFT);
2588 }
2589
2590 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2591 tx_ring->queue_index),
2592 first->bytecount);
2593
2594 /* Force memory writes to complete before letting h/w
2595 * know there are new descriptors to fetch. (Only
2596 * applicable for weak-ordered memory model archs,
2597 * such as IA-64).
2598 */
2599 wmb();
2600
2601 /* set next_to_watch value indicating a packet is present */
2602 first->next_to_watch = tx_desc;
2603
2604 i++;
2605 if (i == tx_ring->count)
2606 i = 0;
2607
2608 tx_ring->next_to_use = i;
2609
2610 i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2611 /* notify HW of packet */
2612 if (!skb->xmit_more ||
2613 netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2614 tx_ring->queue_index)))
2615 writel(i, tx_ring->tail);
2616 else
2617 prefetchw(tx_desc + 1);
2618
2619 return;
2620
2621 dma_error:
2622 dev_info(tx_ring->dev, "TX DMA map failed\n");
2623
2624 /* clear dma mappings for failed tx_bi map */
2625 for (;;) {
2626 tx_bi = &tx_ring->tx_bi[i];
2627 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2628 if (tx_bi == first)
2629 break;
2630 if (i == 0)
2631 i = tx_ring->count;
2632 i--;
2633 }
2634
2635 tx_ring->next_to_use = i;
2636 }
2637
2638 /**
2639 * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
2640 * @skb: send buffer
2641 * @tx_ring: ring to send buffer on
2642 *
2643 * Returns number of data descriptors needed for this skb. Returns 0 to indicate
2644 * there is not enough descriptors available in this ring since we need at least
2645 * one descriptor.
2646 **/
2647 #ifdef I40E_FCOE
2648 inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
2649 struct i40e_ring *tx_ring)
2650 #else
2651 static inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
2652 struct i40e_ring *tx_ring)
2653 #endif
2654 {
2655 unsigned int f;
2656 int count = 0;
2657
2658 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2659 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2660 * + 4 desc gap to avoid the cache line where head is,
2661 * + 1 desc for context descriptor,
2662 * otherwise try next time
2663 */
2664 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
2665 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
2666
2667 count += TXD_USE_COUNT(skb_headlen(skb));
2668 if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2669 tx_ring->tx_stats.tx_busy++;
2670 return 0;
2671 }
2672 return count;
2673 }
2674
2675 /**
2676 * i40e_xmit_frame_ring - Sends buffer on Tx ring
2677 * @skb: send buffer
2678 * @tx_ring: ring to send buffer on
2679 *
2680 * Returns NETDEV_TX_OK if sent, else an error code
2681 **/
2682 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2683 struct i40e_ring *tx_ring)
2684 {
2685 u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2686 u32 cd_tunneling = 0, cd_l2tag2 = 0;
2687 struct i40e_tx_buffer *first;
2688 u32 td_offset = 0;
2689 u32 tx_flags = 0;
2690 __be16 protocol;
2691 u32 td_cmd = 0;
2692 u8 hdr_len = 0;
2693 int tsyn;
2694 int tso;
2695 if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
2696 return NETDEV_TX_BUSY;
2697
2698 /* prepare the xmit flags */
2699 if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2700 goto out_drop;
2701
2702 /* obtain protocol of skb */
2703 protocol = vlan_get_protocol(skb);
2704
2705 /* record the location of the first descriptor for this packet */
2706 first = &tx_ring->tx_bi[tx_ring->next_to_use];
2707
2708 /* setup IPv4/IPv6 offloads */
2709 if (protocol == htons(ETH_P_IP))
2710 tx_flags |= I40E_TX_FLAGS_IPV4;
2711 else if (protocol == htons(ETH_P_IPV6))
2712 tx_flags |= I40E_TX_FLAGS_IPV6;
2713
2714 tso = i40e_tso(tx_ring, skb, &hdr_len,
2715 &cd_type_cmd_tso_mss, &cd_tunneling);
2716
2717 if (tso < 0)
2718 goto out_drop;
2719 else if (tso)
2720 tx_flags |= I40E_TX_FLAGS_TSO;
2721
2722 tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2723
2724 if (tsyn)
2725 tx_flags |= I40E_TX_FLAGS_TSYN;
2726
2727 if (i40e_chk_linearize(skb, tx_flags))
2728 if (skb_linearize(skb))
2729 goto out_drop;
2730
2731 skb_tx_timestamp(skb);
2732
2733 /* always enable CRC insertion offload */
2734 td_cmd |= I40E_TX_DESC_CMD_ICRC;
2735
2736 /* Always offload the checksum, since it's in the data descriptor */
2737 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2738 tx_flags |= I40E_TX_FLAGS_CSUM;
2739
2740 i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
2741 tx_ring, &cd_tunneling);
2742 }
2743
2744 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2745 cd_tunneling, cd_l2tag2);
2746
2747 /* Add Flow Director ATR if it's enabled.
2748 *
2749 * NOTE: this must always be directly before the data descriptor.
2750 */
2751 i40e_atr(tx_ring, skb, tx_flags, protocol);
2752
2753 i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2754 td_cmd, td_offset);
2755
2756 return NETDEV_TX_OK;
2757
2758 out_drop:
2759 dev_kfree_skb_any(skb);
2760 return NETDEV_TX_OK;
2761 }
2762
2763 /**
2764 * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2765 * @skb: send buffer
2766 * @netdev: network interface device structure
2767 *
2768 * Returns NETDEV_TX_OK if sent, else an error code
2769 **/
2770 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
2771 {
2772 struct i40e_netdev_priv *np = netdev_priv(netdev);
2773 struct i40e_vsi *vsi = np->vsi;
2774 struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
2775
2776 /* hardware can't handle really short frames, hardware padding works
2777 * beyond this point
2778 */
2779 if (skb_put_padto(skb, I40E_MIN_TX_LEN))
2780 return NETDEV_TX_OK;
2781
2782 return i40e_xmit_frame_ring(skb, tx_ring);
2783 }
This page took 0.091183 seconds and 5 git commands to generate.