1 /*******************************************************************************
3 * Intel Ethernet Controller XL710 Family Linux Driver
4 * Copyright(c) 2013 - 2014 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along
16 * with this program. If not, see <http://www.gnu.org/licenses/>.
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
21 * Contact Information:
22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25 ******************************************************************************/
27 #include <linux/prefetch.h>
28 #include <net/busy_poll.h>
30 #include "i40e_prototype.h"
32 static inline __le64
build_ctob(u32 td_cmd
, u32 td_offset
, unsigned int size
,
35 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA
|
36 ((u64
)td_cmd
<< I40E_TXD_QW1_CMD_SHIFT
) |
37 ((u64
)td_offset
<< I40E_TXD_QW1_OFFSET_SHIFT
) |
38 ((u64
)size
<< I40E_TXD_QW1_TX_BUF_SZ_SHIFT
) |
39 ((u64
)td_tag
<< I40E_TXD_QW1_L2TAG1_SHIFT
));
42 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
43 #define I40E_FD_CLEAN_DELAY 10
45 * i40e_program_fdir_filter - Program a Flow Director filter
46 * @fdir_data: Packet data that will be filter parameters
47 * @raw_packet: the pre-allocated packet buffer for FDir
49 * @add: True for add/update, False for remove
51 int i40e_program_fdir_filter(struct i40e_fdir_filter
*fdir_data
, u8
*raw_packet
,
52 struct i40e_pf
*pf
, bool add
)
54 struct i40e_filter_program_desc
*fdir_desc
;
55 struct i40e_tx_buffer
*tx_buf
, *first
;
56 struct i40e_tx_desc
*tx_desc
;
57 struct i40e_ring
*tx_ring
;
58 unsigned int fpt
, dcc
;
66 /* find existing FDIR VSI */
68 for (i
= 0; i
< pf
->num_alloc_vsi
; i
++)
69 if (pf
->vsi
[i
] && pf
->vsi
[i
]->type
== I40E_VSI_FDIR
)
74 tx_ring
= vsi
->tx_rings
[0];
77 /* we need two descriptors to add/del a filter and we can wait */
79 if (I40E_DESC_UNUSED(tx_ring
) > 1)
81 msleep_interruptible(1);
83 } while (delay
< I40E_FD_CLEAN_DELAY
);
85 if (!(I40E_DESC_UNUSED(tx_ring
) > 1))
88 dma
= dma_map_single(dev
, raw_packet
,
89 I40E_FDIR_MAX_RAW_PACKET_SIZE
, DMA_TO_DEVICE
);
90 if (dma_mapping_error(dev
, dma
))
93 /* grab the next descriptor */
94 i
= tx_ring
->next_to_use
;
95 fdir_desc
= I40E_TX_FDIRDESC(tx_ring
, i
);
96 first
= &tx_ring
->tx_bi
[i
];
97 memset(first
, 0, sizeof(struct i40e_tx_buffer
));
99 tx_ring
->next_to_use
= ((i
+ 1) < tx_ring
->count
) ? i
+ 1 : 0;
101 fpt
= (fdir_data
->q_index
<< I40E_TXD_FLTR_QW0_QINDEX_SHIFT
) &
102 I40E_TXD_FLTR_QW0_QINDEX_MASK
;
104 fpt
|= (fdir_data
->flex_off
<< I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT
) &
105 I40E_TXD_FLTR_QW0_FLEXOFF_MASK
;
107 fpt
|= (fdir_data
->pctype
<< I40E_TXD_FLTR_QW0_PCTYPE_SHIFT
) &
108 I40E_TXD_FLTR_QW0_PCTYPE_MASK
;
110 /* Use LAN VSI Id if not programmed by user */
111 if (fdir_data
->dest_vsi
== 0)
112 fpt
|= (pf
->vsi
[pf
->lan_vsi
]->id
) <<
113 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT
;
115 fpt
|= ((u32
)fdir_data
->dest_vsi
<<
116 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT
) &
117 I40E_TXD_FLTR_QW0_DEST_VSI_MASK
;
119 dcc
= I40E_TX_DESC_DTYPE_FILTER_PROG
;
122 dcc
|= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE
<<
123 I40E_TXD_FLTR_QW1_PCMD_SHIFT
;
125 dcc
|= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE
<<
126 I40E_TXD_FLTR_QW1_PCMD_SHIFT
;
128 dcc
|= (fdir_data
->dest_ctl
<< I40E_TXD_FLTR_QW1_DEST_SHIFT
) &
129 I40E_TXD_FLTR_QW1_DEST_MASK
;
131 dcc
|= (fdir_data
->fd_status
<< I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT
) &
132 I40E_TXD_FLTR_QW1_FD_STATUS_MASK
;
134 if (fdir_data
->cnt_index
!= 0) {
135 dcc
|= I40E_TXD_FLTR_QW1_CNT_ENA_MASK
;
136 dcc
|= ((u32
)fdir_data
->cnt_index
<<
137 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT
) &
138 I40E_TXD_FLTR_QW1_CNTINDEX_MASK
;
141 fdir_desc
->qindex_flex_ptype_vsi
= cpu_to_le32(fpt
);
142 fdir_desc
->rsvd
= cpu_to_le32(0);
143 fdir_desc
->dtype_cmd_cntindex
= cpu_to_le32(dcc
);
144 fdir_desc
->fd_id
= cpu_to_le32(fdir_data
->fd_id
);
146 /* Now program a dummy descriptor */
147 i
= tx_ring
->next_to_use
;
148 tx_desc
= I40E_TX_DESC(tx_ring
, i
);
149 tx_buf
= &tx_ring
->tx_bi
[i
];
151 tx_ring
->next_to_use
= ((i
+ 1) < tx_ring
->count
) ? i
+ 1 : 0;
153 memset(tx_buf
, 0, sizeof(struct i40e_tx_buffer
));
155 /* record length, and DMA address */
156 dma_unmap_len_set(tx_buf
, len
, I40E_FDIR_MAX_RAW_PACKET_SIZE
);
157 dma_unmap_addr_set(tx_buf
, dma
, dma
);
159 tx_desc
->buffer_addr
= cpu_to_le64(dma
);
160 td_cmd
= I40E_TXD_CMD
| I40E_TX_DESC_CMD_DUMMY
;
162 tx_buf
->tx_flags
= I40E_TX_FLAGS_FD_SB
;
163 tx_buf
->raw_buf
= (void *)raw_packet
;
165 tx_desc
->cmd_type_offset_bsz
=
166 build_ctob(td_cmd
, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE
, 0);
168 /* set the timestamp */
169 tx_buf
->time_stamp
= jiffies
;
171 /* Force memory writes to complete before letting h/w
172 * know there are new descriptors to fetch.
176 /* Mark the data descriptor to be watched */
177 first
->next_to_watch
= tx_desc
;
179 writel(tx_ring
->next_to_use
, tx_ring
->tail
);
186 #define IP_HEADER_OFFSET 14
187 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
189 * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
190 * @vsi: pointer to the targeted VSI
191 * @fd_data: the flow director data required for the FDir descriptor
192 * @add: true adds a filter, false removes it
194 * Returns 0 if the filters were successfully added or removed
196 static int i40e_add_del_fdir_udpv4(struct i40e_vsi
*vsi
,
197 struct i40e_fdir_filter
*fd_data
,
200 struct i40e_pf
*pf
= vsi
->back
;
206 static char packet
[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
207 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
208 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
210 raw_packet
= kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE
, GFP_KERNEL
);
213 memcpy(raw_packet
, packet
, I40E_UDPIP_DUMMY_PACKET_LEN
);
215 ip
= (struct iphdr
*)(raw_packet
+ IP_HEADER_OFFSET
);
216 udp
= (struct udphdr
*)(raw_packet
+ IP_HEADER_OFFSET
217 + sizeof(struct iphdr
));
219 ip
->daddr
= fd_data
->dst_ip
[0];
220 udp
->dest
= fd_data
->dst_port
;
221 ip
->saddr
= fd_data
->src_ip
[0];
222 udp
->source
= fd_data
->src_port
;
224 fd_data
->pctype
= I40E_FILTER_PCTYPE_NONF_IPV4_UDP
;
225 ret
= i40e_program_fdir_filter(fd_data
, raw_packet
, pf
, add
);
227 dev_info(&pf
->pdev
->dev
,
228 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
229 fd_data
->pctype
, fd_data
->fd_id
, ret
);
233 dev_info(&pf
->pdev
->dev
,
234 "Filter OK for PCTYPE %d loc = %d\n",
235 fd_data
->pctype
, fd_data
->fd_id
);
237 dev_info(&pf
->pdev
->dev
,
238 "Filter deleted for PCTYPE %d loc = %d\n",
239 fd_data
->pctype
, fd_data
->fd_id
);
241 return err
? -EOPNOTSUPP
: 0;
244 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
246 * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
247 * @vsi: pointer to the targeted VSI
248 * @fd_data: the flow director data required for the FDir descriptor
249 * @add: true adds a filter, false removes it
251 * Returns 0 if the filters were successfully added or removed
253 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi
*vsi
,
254 struct i40e_fdir_filter
*fd_data
,
257 struct i40e_pf
*pf
= vsi
->back
;
264 static char packet
[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
265 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
267 0x0, 0x72, 0, 0, 0, 0};
269 raw_packet
= kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE
, GFP_KERNEL
);
272 memcpy(raw_packet
, packet
, I40E_TCPIP_DUMMY_PACKET_LEN
);
274 ip
= (struct iphdr
*)(raw_packet
+ IP_HEADER_OFFSET
);
275 tcp
= (struct tcphdr
*)(raw_packet
+ IP_HEADER_OFFSET
276 + sizeof(struct iphdr
));
278 ip
->daddr
= fd_data
->dst_ip
[0];
279 tcp
->dest
= fd_data
->dst_port
;
280 ip
->saddr
= fd_data
->src_ip
[0];
281 tcp
->source
= fd_data
->src_port
;
285 if (pf
->flags
& I40E_FLAG_FD_ATR_ENABLED
) {
286 dev_info(&pf
->pdev
->dev
, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
287 pf
->flags
&= ~I40E_FLAG_FD_ATR_ENABLED
;
290 pf
->fd_tcp_rule
= (pf
->fd_tcp_rule
> 0) ?
291 (pf
->fd_tcp_rule
- 1) : 0;
292 if (pf
->fd_tcp_rule
== 0) {
293 pf
->flags
|= I40E_FLAG_FD_ATR_ENABLED
;
294 dev_info(&pf
->pdev
->dev
, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
298 fd_data
->pctype
= I40E_FILTER_PCTYPE_NONF_IPV4_TCP
;
299 ret
= i40e_program_fdir_filter(fd_data
, raw_packet
, pf
, add
);
302 dev_info(&pf
->pdev
->dev
,
303 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
304 fd_data
->pctype
, fd_data
->fd_id
, ret
);
308 dev_info(&pf
->pdev
->dev
, "Filter OK for PCTYPE %d loc = %d)\n",
309 fd_data
->pctype
, fd_data
->fd_id
);
311 dev_info(&pf
->pdev
->dev
,
312 "Filter deleted for PCTYPE %d loc = %d\n",
313 fd_data
->pctype
, fd_data
->fd_id
);
316 return err
? -EOPNOTSUPP
: 0;
320 * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
321 * a specific flow spec
322 * @vsi: pointer to the targeted VSI
323 * @fd_data: the flow director data required for the FDir descriptor
324 * @add: true adds a filter, false removes it
326 * Always returns -EOPNOTSUPP
328 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi
*vsi
,
329 struct i40e_fdir_filter
*fd_data
,
335 #define I40E_IP_DUMMY_PACKET_LEN 34
337 * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
338 * a specific flow spec
339 * @vsi: pointer to the targeted VSI
340 * @fd_data: the flow director data required for the FDir descriptor
341 * @add: true adds a filter, false removes it
343 * Returns 0 if the filters were successfully added or removed
345 static int i40e_add_del_fdir_ipv4(struct i40e_vsi
*vsi
,
346 struct i40e_fdir_filter
*fd_data
,
349 struct i40e_pf
*pf
= vsi
->back
;
355 static char packet
[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
356 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
359 for (i
= I40E_FILTER_PCTYPE_NONF_IPV4_OTHER
;
360 i
<= I40E_FILTER_PCTYPE_FRAG_IPV4
; i
++) {
361 raw_packet
= kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE
, GFP_KERNEL
);
364 memcpy(raw_packet
, packet
, I40E_IP_DUMMY_PACKET_LEN
);
365 ip
= (struct iphdr
*)(raw_packet
+ IP_HEADER_OFFSET
);
367 ip
->saddr
= fd_data
->src_ip
[0];
368 ip
->daddr
= fd_data
->dst_ip
[0];
372 ret
= i40e_program_fdir_filter(fd_data
, raw_packet
, pf
, add
);
375 dev_info(&pf
->pdev
->dev
,
376 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
377 fd_data
->pctype
, fd_data
->fd_id
, ret
);
381 dev_info(&pf
->pdev
->dev
,
382 "Filter OK for PCTYPE %d loc = %d\n",
383 fd_data
->pctype
, fd_data
->fd_id
);
385 dev_info(&pf
->pdev
->dev
,
386 "Filter deleted for PCTYPE %d loc = %d\n",
387 fd_data
->pctype
, fd_data
->fd_id
);
391 return err
? -EOPNOTSUPP
: 0;
395 * i40e_add_del_fdir - Build raw packets to add/del fdir filter
396 * @vsi: pointer to the targeted VSI
397 * @cmd: command to get or set RX flow classification rules
398 * @add: true adds a filter, false removes it
401 int i40e_add_del_fdir(struct i40e_vsi
*vsi
,
402 struct i40e_fdir_filter
*input
, bool add
)
404 struct i40e_pf
*pf
= vsi
->back
;
407 switch (input
->flow_type
& ~FLOW_EXT
) {
409 ret
= i40e_add_del_fdir_tcpv4(vsi
, input
, add
);
412 ret
= i40e_add_del_fdir_udpv4(vsi
, input
, add
);
415 ret
= i40e_add_del_fdir_sctpv4(vsi
, input
, add
);
418 ret
= i40e_add_del_fdir_ipv4(vsi
, input
, add
);
421 switch (input
->ip4_proto
) {
423 ret
= i40e_add_del_fdir_tcpv4(vsi
, input
, add
);
426 ret
= i40e_add_del_fdir_udpv4(vsi
, input
, add
);
429 ret
= i40e_add_del_fdir_sctpv4(vsi
, input
, add
);
432 ret
= i40e_add_del_fdir_ipv4(vsi
, input
, add
);
437 dev_info(&pf
->pdev
->dev
, "Could not specify spec type %d\n",
442 /* The buffer allocated here is freed by the i40e_clean_tx_ring() */
447 * i40e_fd_handle_status - check the Programming Status for FD
448 * @rx_ring: the Rx ring for this descriptor
449 * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
450 * @prog_id: the id originally used for programming
452 * This is used to verify if the FD programming or invalidation
453 * requested by SW to the HW is successful or not and take actions accordingly.
455 static void i40e_fd_handle_status(struct i40e_ring
*rx_ring
,
456 union i40e_rx_desc
*rx_desc
, u8 prog_id
)
458 struct i40e_pf
*pf
= rx_ring
->vsi
->back
;
459 struct pci_dev
*pdev
= pf
->pdev
;
460 u32 fcnt_prog
, fcnt_avail
;
464 qw
= le64_to_cpu(rx_desc
->wb
.qword1
.status_error_len
);
465 error
= (qw
& I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK
) >>
466 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT
;
468 if (error
== (0x1 << I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT
)) {
469 if ((rx_desc
->wb
.qword0
.hi_dword
.fd_id
!= 0) ||
470 (I40E_DEBUG_FD
& pf
->hw
.debug_mask
))
471 dev_warn(&pdev
->dev
, "ntuple filter loc = %d, could not be added\n",
472 rx_desc
->wb
.qword0
.hi_dword
.fd_id
);
475 /* store the current atr filter count */
476 pf
->fd_atr_cnt
= i40e_get_current_atr_cnt(pf
);
478 /* filter programming failed most likely due to table full */
479 fcnt_prog
= i40e_get_cur_guaranteed_fd_count(pf
);
480 fcnt_avail
= pf
->fdir_pf_filter_count
;
481 /* If ATR is running fcnt_prog can quickly change,
482 * if we are very close to full, it makes sense to disable
483 * FD ATR/SB and then re-enable it when there is room.
485 if (fcnt_prog
>= (fcnt_avail
- I40E_FDIR_BUFFER_FULL_MARGIN
)) {
486 if ((pf
->flags
& I40E_FLAG_FD_SB_ENABLED
) &&
487 !(pf
->auto_disable_flags
&
488 I40E_FLAG_FD_SB_ENABLED
)) {
489 dev_warn(&pdev
->dev
, "FD filter space full, new ntuple rules will not be added\n");
490 pf
->auto_disable_flags
|=
491 I40E_FLAG_FD_SB_ENABLED
;
495 "FD filter programming failed due to incorrect filter parameters\n");
498 (0x1 << I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT
)) {
499 if (I40E_DEBUG_FD
& pf
->hw
.debug_mask
)
500 dev_info(&pdev
->dev
, "ntuple filter fd_id = %d, could not be removed\n",
501 rx_desc
->wb
.qword0
.hi_dword
.fd_id
);
506 * i40e_unmap_and_free_tx_resource - Release a Tx buffer
507 * @ring: the ring that owns the buffer
508 * @tx_buffer: the buffer to free
510 static void i40e_unmap_and_free_tx_resource(struct i40e_ring
*ring
,
511 struct i40e_tx_buffer
*tx_buffer
)
513 if (tx_buffer
->skb
) {
514 if (tx_buffer
->tx_flags
& I40E_TX_FLAGS_FD_SB
)
515 kfree(tx_buffer
->raw_buf
);
517 dev_kfree_skb_any(tx_buffer
->skb
);
519 if (dma_unmap_len(tx_buffer
, len
))
520 dma_unmap_single(ring
->dev
,
521 dma_unmap_addr(tx_buffer
, dma
),
522 dma_unmap_len(tx_buffer
, len
),
524 } else if (dma_unmap_len(tx_buffer
, len
)) {
525 dma_unmap_page(ring
->dev
,
526 dma_unmap_addr(tx_buffer
, dma
),
527 dma_unmap_len(tx_buffer
, len
),
530 tx_buffer
->next_to_watch
= NULL
;
531 tx_buffer
->skb
= NULL
;
532 dma_unmap_len_set(tx_buffer
, len
, 0);
533 /* tx_buffer must be completely set up in the transmit path */
537 * i40e_clean_tx_ring - Free any empty Tx buffers
538 * @tx_ring: ring to be cleaned
540 void i40e_clean_tx_ring(struct i40e_ring
*tx_ring
)
542 unsigned long bi_size
;
545 /* ring already cleared, nothing to do */
549 /* Free all the Tx ring sk_buffs */
550 for (i
= 0; i
< tx_ring
->count
; i
++)
551 i40e_unmap_and_free_tx_resource(tx_ring
, &tx_ring
->tx_bi
[i
]);
553 bi_size
= sizeof(struct i40e_tx_buffer
) * tx_ring
->count
;
554 memset(tx_ring
->tx_bi
, 0, bi_size
);
556 /* Zero out the descriptor ring */
557 memset(tx_ring
->desc
, 0, tx_ring
->size
);
559 tx_ring
->next_to_use
= 0;
560 tx_ring
->next_to_clean
= 0;
562 if (!tx_ring
->netdev
)
565 /* cleanup Tx queue statistics */
566 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring
->netdev
,
567 tx_ring
->queue_index
));
571 * i40e_free_tx_resources - Free Tx resources per queue
572 * @tx_ring: Tx descriptor ring for a specific queue
574 * Free all transmit software resources
576 void i40e_free_tx_resources(struct i40e_ring
*tx_ring
)
578 i40e_clean_tx_ring(tx_ring
);
579 kfree(tx_ring
->tx_bi
);
580 tx_ring
->tx_bi
= NULL
;
583 dma_free_coherent(tx_ring
->dev
, tx_ring
->size
,
584 tx_ring
->desc
, tx_ring
->dma
);
585 tx_ring
->desc
= NULL
;
590 * i40e_get_head - Retrieve head from head writeback
591 * @tx_ring: tx ring to fetch head of
593 * Returns value of Tx ring head based on value stored
594 * in head write-back location
596 static inline u32
i40e_get_head(struct i40e_ring
*tx_ring
)
598 void *head
= (struct i40e_tx_desc
*)tx_ring
->desc
+ tx_ring
->count
;
600 return le32_to_cpu(*(volatile __le32
*)head
);
604 * i40e_get_tx_pending - how many tx descriptors not processed
605 * @tx_ring: the ring of descriptors
607 * Since there is no access to the ring head register
608 * in XL710, we need to use our local copies
610 static u32
i40e_get_tx_pending(struct i40e_ring
*ring
)
614 head
= i40e_get_head(ring
);
615 tail
= readl(ring
->tail
);
618 return (head
< tail
) ?
619 tail
- head
: (tail
+ ring
->count
- head
);
625 * i40e_check_tx_hang - Is there a hang in the Tx queue
626 * @tx_ring: the ring of descriptors
628 static bool i40e_check_tx_hang(struct i40e_ring
*tx_ring
)
630 u32 tx_done
= tx_ring
->stats
.packets
;
631 u32 tx_done_old
= tx_ring
->tx_stats
.tx_done_old
;
632 u32 tx_pending
= i40e_get_tx_pending(tx_ring
);
633 struct i40e_pf
*pf
= tx_ring
->vsi
->back
;
636 clear_check_for_tx_hang(tx_ring
);
638 /* Check for a hung queue, but be thorough. This verifies
639 * that a transmit has been completed since the previous
640 * check AND there is at least one packet pending. The
641 * ARMED bit is set to indicate a potential hang. The
642 * bit is cleared if a pause frame is received to remove
643 * false hang detection due to PFC or 802.3x frames. By
644 * requiring this to fail twice we avoid races with
645 * PFC clearing the ARMED bit and conditions where we
646 * run the check_tx_hang logic with a transmit completion
647 * pending but without time to complete it yet.
649 if ((tx_done_old
== tx_done
) && tx_pending
) {
650 /* make sure it is true for two checks in a row */
651 ret
= test_and_set_bit(__I40E_HANG_CHECK_ARMED
,
653 } else if (tx_done_old
== tx_done
&&
654 (tx_pending
< I40E_MIN_DESC_PENDING
) && (tx_pending
> 0)) {
655 if (I40E_DEBUG_FLOW
& pf
->hw
.debug_mask
)
656 dev_info(tx_ring
->dev
, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d",
657 tx_pending
, tx_ring
->queue_index
);
658 pf
->tx_sluggish_count
++;
660 /* update completed stats and disarm the hang check */
661 tx_ring
->tx_stats
.tx_done_old
= tx_done
;
662 clear_bit(__I40E_HANG_CHECK_ARMED
, &tx_ring
->state
);
668 #define WB_STRIDE 0x3
671 * i40e_clean_tx_irq - Reclaim resources after transmit completes
672 * @tx_ring: tx ring to clean
673 * @budget: how many cleans we're allowed
675 * Returns true if there's any budget left (e.g. the clean is finished)
677 static bool i40e_clean_tx_irq(struct i40e_ring
*tx_ring
, int budget
)
679 u16 i
= tx_ring
->next_to_clean
;
680 struct i40e_tx_buffer
*tx_buf
;
681 struct i40e_tx_desc
*tx_head
;
682 struct i40e_tx_desc
*tx_desc
;
683 unsigned int total_packets
= 0;
684 unsigned int total_bytes
= 0;
686 tx_buf
= &tx_ring
->tx_bi
[i
];
687 tx_desc
= I40E_TX_DESC(tx_ring
, i
);
690 tx_head
= I40E_TX_DESC(tx_ring
, i40e_get_head(tx_ring
));
693 struct i40e_tx_desc
*eop_desc
= tx_buf
->next_to_watch
;
695 /* if next_to_watch is not set then there is no work pending */
699 /* prevent any other reads prior to eop_desc */
700 read_barrier_depends();
702 /* we have caught up to head, no work left to do */
703 if (tx_head
== tx_desc
)
706 /* clear next_to_watch to prevent false hangs */
707 tx_buf
->next_to_watch
= NULL
;
709 /* update the statistics for this packet */
710 total_bytes
+= tx_buf
->bytecount
;
711 total_packets
+= tx_buf
->gso_segs
;
714 dev_consume_skb_any(tx_buf
->skb
);
716 /* unmap skb header data */
717 dma_unmap_single(tx_ring
->dev
,
718 dma_unmap_addr(tx_buf
, dma
),
719 dma_unmap_len(tx_buf
, len
),
722 /* clear tx_buffer data */
724 dma_unmap_len_set(tx_buf
, len
, 0);
726 /* unmap remaining buffers */
727 while (tx_desc
!= eop_desc
) {
734 tx_buf
= tx_ring
->tx_bi
;
735 tx_desc
= I40E_TX_DESC(tx_ring
, 0);
738 /* unmap any remaining paged data */
739 if (dma_unmap_len(tx_buf
, len
)) {
740 dma_unmap_page(tx_ring
->dev
,
741 dma_unmap_addr(tx_buf
, dma
),
742 dma_unmap_len(tx_buf
, len
),
744 dma_unmap_len_set(tx_buf
, len
, 0);
748 /* move us one more past the eop_desc for start of next pkt */
754 tx_buf
= tx_ring
->tx_bi
;
755 tx_desc
= I40E_TX_DESC(tx_ring
, 0);
758 /* update budget accounting */
760 } while (likely(budget
));
763 tx_ring
->next_to_clean
= i
;
764 u64_stats_update_begin(&tx_ring
->syncp
);
765 tx_ring
->stats
.bytes
+= total_bytes
;
766 tx_ring
->stats
.packets
+= total_packets
;
767 u64_stats_update_end(&tx_ring
->syncp
);
768 tx_ring
->q_vector
->tx
.total_bytes
+= total_bytes
;
769 tx_ring
->q_vector
->tx
.total_packets
+= total_packets
;
771 /* check to see if there are any non-cache aligned descriptors
772 * waiting to be written back, and kick the hardware to force
773 * them to be written back in case of napi polling
776 !((i
& WB_STRIDE
) == WB_STRIDE
) &&
777 !test_bit(__I40E_DOWN
, &tx_ring
->vsi
->state
) &&
778 (I40E_DESC_UNUSED(tx_ring
) != tx_ring
->count
))
779 tx_ring
->arm_wb
= true;
781 tx_ring
->arm_wb
= false;
783 if (check_for_tx_hang(tx_ring
) && i40e_check_tx_hang(tx_ring
)) {
784 /* schedule immediate reset if we believe we hung */
785 dev_info(tx_ring
->dev
, "Detected Tx Unit Hang\n"
788 " next_to_use <%x>\n"
789 " next_to_clean <%x>\n",
791 tx_ring
->queue_index
,
792 tx_ring
->next_to_use
, i
);
793 dev_info(tx_ring
->dev
, "tx_bi[next_to_clean]\n"
794 " time_stamp <%lx>\n"
796 tx_ring
->tx_bi
[i
].time_stamp
, jiffies
);
798 netif_stop_subqueue(tx_ring
->netdev
, tx_ring
->queue_index
);
800 dev_info(tx_ring
->dev
,
801 "tx hang detected on queue %d, reset requested\n",
802 tx_ring
->queue_index
);
804 /* do not fire the reset immediately, wait for the stack to
805 * decide we are truly stuck, also prevents every queue from
806 * simultaneously requesting a reset
809 /* the adapter is about to reset, no point in enabling polling */
813 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring
->netdev
,
814 tx_ring
->queue_index
),
815 total_packets
, total_bytes
);
817 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
818 if (unlikely(total_packets
&& netif_carrier_ok(tx_ring
->netdev
) &&
819 (I40E_DESC_UNUSED(tx_ring
) >= TX_WAKE_THRESHOLD
))) {
820 /* Make sure that anybody stopping the queue after this
821 * sees the new next_to_clean.
824 if (__netif_subqueue_stopped(tx_ring
->netdev
,
825 tx_ring
->queue_index
) &&
826 !test_bit(__I40E_DOWN
, &tx_ring
->vsi
->state
)) {
827 netif_wake_subqueue(tx_ring
->netdev
,
828 tx_ring
->queue_index
);
829 ++tx_ring
->tx_stats
.restart_queue
;
837 * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
838 * @vsi: the VSI we care about
839 * @q_vector: the vector on which to force writeback
842 static void i40e_force_wb(struct i40e_vsi
*vsi
, struct i40e_q_vector
*q_vector
)
844 u32 val
= I40E_PFINT_DYN_CTLN_INTENA_MASK
|
845 I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK
|
846 I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK
;
847 /* allow 00 to be written to the index */
850 I40E_PFINT_DYN_CTLN(q_vector
->v_idx
+ vsi
->base_vector
- 1),
855 * i40e_set_new_dynamic_itr - Find new ITR level
856 * @rc: structure containing ring performance data
858 * Stores a new ITR value based on packets and byte counts during
859 * the last interrupt. The advantage of per interrupt computation
860 * is faster updates and more accurate ITR for the current traffic
861 * pattern. Constants in this function were computed based on
862 * theoretical maximum wire speed and thresholds were set based on
863 * testing data as well as attempting to minimize response time
864 * while increasing bulk throughput.
866 static void i40e_set_new_dynamic_itr(struct i40e_ring_container
*rc
)
868 enum i40e_latency_range new_latency_range
= rc
->latency_range
;
869 u32 new_itr
= rc
->itr
;
872 if (rc
->total_packets
== 0 || !rc
->itr
)
875 /* simple throttlerate management
876 * 0-10MB/s lowest (100000 ints/s)
877 * 10-20MB/s low (20000 ints/s)
878 * 20-1249MB/s bulk (8000 ints/s)
880 bytes_per_int
= rc
->total_bytes
/ rc
->itr
;
882 case I40E_LOWEST_LATENCY
:
883 if (bytes_per_int
> 10)
884 new_latency_range
= I40E_LOW_LATENCY
;
886 case I40E_LOW_LATENCY
:
887 if (bytes_per_int
> 20)
888 new_latency_range
= I40E_BULK_LATENCY
;
889 else if (bytes_per_int
<= 10)
890 new_latency_range
= I40E_LOWEST_LATENCY
;
892 case I40E_BULK_LATENCY
:
893 if (bytes_per_int
<= 20)
894 rc
->latency_range
= I40E_LOW_LATENCY
;
898 switch (new_latency_range
) {
899 case I40E_LOWEST_LATENCY
:
900 new_itr
= I40E_ITR_100K
;
902 case I40E_LOW_LATENCY
:
903 new_itr
= I40E_ITR_20K
;
905 case I40E_BULK_LATENCY
:
906 new_itr
= I40E_ITR_8K
;
912 if (new_itr
!= rc
->itr
) {
913 /* do an exponential smoothing */
914 new_itr
= (10 * new_itr
* rc
->itr
) /
915 ((9 * new_itr
) + rc
->itr
);
916 rc
->itr
= new_itr
& I40E_MAX_ITR
;
920 rc
->total_packets
= 0;
924 * i40e_update_dynamic_itr - Adjust ITR based on bytes per int
925 * @q_vector: the vector to adjust
927 static void i40e_update_dynamic_itr(struct i40e_q_vector
*q_vector
)
929 u16 vector
= q_vector
->vsi
->base_vector
+ q_vector
->v_idx
;
930 struct i40e_hw
*hw
= &q_vector
->vsi
->back
->hw
;
934 reg_addr
= I40E_PFINT_ITRN(I40E_RX_ITR
, vector
- 1);
935 old_itr
= q_vector
->rx
.itr
;
936 i40e_set_new_dynamic_itr(&q_vector
->rx
);
937 if (old_itr
!= q_vector
->rx
.itr
)
938 wr32(hw
, reg_addr
, q_vector
->rx
.itr
);
940 reg_addr
= I40E_PFINT_ITRN(I40E_TX_ITR
, vector
- 1);
941 old_itr
= q_vector
->tx
.itr
;
942 i40e_set_new_dynamic_itr(&q_vector
->tx
);
943 if (old_itr
!= q_vector
->tx
.itr
)
944 wr32(hw
, reg_addr
, q_vector
->tx
.itr
);
948 * i40e_clean_programming_status - clean the programming status descriptor
949 * @rx_ring: the rx ring that has this descriptor
950 * @rx_desc: the rx descriptor written back by HW
952 * Flow director should handle FD_FILTER_STATUS to check its filter programming
953 * status being successful or not and take actions accordingly. FCoE should
954 * handle its context/filter programming/invalidation status and take actions.
957 static void i40e_clean_programming_status(struct i40e_ring
*rx_ring
,
958 union i40e_rx_desc
*rx_desc
)
963 qw
= le64_to_cpu(rx_desc
->wb
.qword1
.status_error_len
);
964 id
= (qw
& I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK
) >>
965 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT
;
967 if (id
== I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS
)
968 i40e_fd_handle_status(rx_ring
, rx_desc
, id
);
970 else if ((id
== I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS
) ||
971 (id
== I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS
))
972 i40e_fcoe_handle_status(rx_ring
, rx_desc
, id
);
977 * i40e_setup_tx_descriptors - Allocate the Tx descriptors
978 * @tx_ring: the tx ring to set up
980 * Return 0 on success, negative on error
982 int i40e_setup_tx_descriptors(struct i40e_ring
*tx_ring
)
984 struct device
*dev
= tx_ring
->dev
;
990 bi_size
= sizeof(struct i40e_tx_buffer
) * tx_ring
->count
;
991 tx_ring
->tx_bi
= kzalloc(bi_size
, GFP_KERNEL
);
995 /* round up to nearest 4K */
996 tx_ring
->size
= tx_ring
->count
* sizeof(struct i40e_tx_desc
);
997 /* add u32 for head writeback, align after this takes care of
998 * guaranteeing this is at least one cache line in size
1000 tx_ring
->size
+= sizeof(u32
);
1001 tx_ring
->size
= ALIGN(tx_ring
->size
, 4096);
1002 tx_ring
->desc
= dma_alloc_coherent(dev
, tx_ring
->size
,
1003 &tx_ring
->dma
, GFP_KERNEL
);
1004 if (!tx_ring
->desc
) {
1005 dev_info(dev
, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
1010 tx_ring
->next_to_use
= 0;
1011 tx_ring
->next_to_clean
= 0;
1015 kfree(tx_ring
->tx_bi
);
1016 tx_ring
->tx_bi
= NULL
;
1021 * i40e_clean_rx_ring - Free Rx buffers
1022 * @rx_ring: ring to be cleaned
1024 void i40e_clean_rx_ring(struct i40e_ring
*rx_ring
)
1026 struct device
*dev
= rx_ring
->dev
;
1027 struct i40e_rx_buffer
*rx_bi
;
1028 unsigned long bi_size
;
1031 /* ring already cleared, nothing to do */
1032 if (!rx_ring
->rx_bi
)
1035 if (ring_is_ps_enabled(rx_ring
)) {
1036 int bufsz
= ALIGN(rx_ring
->rx_hdr_len
, 256) * rx_ring
->count
;
1038 rx_bi
= &rx_ring
->rx_bi
[0];
1039 if (rx_bi
->hdr_buf
) {
1040 dma_free_coherent(dev
,
1044 for (i
= 0; i
< rx_ring
->count
; i
++) {
1045 rx_bi
= &rx_ring
->rx_bi
[i
];
1047 rx_bi
->hdr_buf
= NULL
;
1051 /* Free all the Rx ring sk_buffs */
1052 for (i
= 0; i
< rx_ring
->count
; i
++) {
1053 rx_bi
= &rx_ring
->rx_bi
[i
];
1055 dma_unmap_single(dev
,
1057 rx_ring
->rx_buf_len
,
1062 dev_kfree_skb(rx_bi
->skb
);
1066 if (rx_bi
->page_dma
) {
1071 rx_bi
->page_dma
= 0;
1073 __free_page(rx_bi
->page
);
1075 rx_bi
->page_offset
= 0;
1079 bi_size
= sizeof(struct i40e_rx_buffer
) * rx_ring
->count
;
1080 memset(rx_ring
->rx_bi
, 0, bi_size
);
1082 /* Zero out the descriptor ring */
1083 memset(rx_ring
->desc
, 0, rx_ring
->size
);
1085 rx_ring
->next_to_clean
= 0;
1086 rx_ring
->next_to_use
= 0;
1090 * i40e_free_rx_resources - Free Rx resources
1091 * @rx_ring: ring to clean the resources from
1093 * Free all receive software resources
1095 void i40e_free_rx_resources(struct i40e_ring
*rx_ring
)
1097 i40e_clean_rx_ring(rx_ring
);
1098 kfree(rx_ring
->rx_bi
);
1099 rx_ring
->rx_bi
= NULL
;
1101 if (rx_ring
->desc
) {
1102 dma_free_coherent(rx_ring
->dev
, rx_ring
->size
,
1103 rx_ring
->desc
, rx_ring
->dma
);
1104 rx_ring
->desc
= NULL
;
1109 * i40e_alloc_rx_headers - allocate rx header buffers
1110 * @rx_ring: ring to alloc buffers
1112 * Allocate rx header buffers for the entire ring. As these are static,
1113 * this is only called when setting up a new ring.
1115 void i40e_alloc_rx_headers(struct i40e_ring
*rx_ring
)
1117 struct device
*dev
= rx_ring
->dev
;
1118 struct i40e_rx_buffer
*rx_bi
;
1124 if (rx_ring
->rx_bi
[0].hdr_buf
)
1126 /* Make sure the buffers don't cross cache line boundaries. */
1127 buf_size
= ALIGN(rx_ring
->rx_hdr_len
, 256);
1128 buffer
= dma_alloc_coherent(dev
, buf_size
* rx_ring
->count
,
1132 for (i
= 0; i
< rx_ring
->count
; i
++) {
1133 rx_bi
= &rx_ring
->rx_bi
[i
];
1134 rx_bi
->dma
= dma
+ (i
* buf_size
);
1135 rx_bi
->hdr_buf
= buffer
+ (i
* buf_size
);
1140 * i40e_setup_rx_descriptors - Allocate Rx descriptors
1141 * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1143 * Returns 0 on success, negative on failure
1145 int i40e_setup_rx_descriptors(struct i40e_ring
*rx_ring
)
1147 struct device
*dev
= rx_ring
->dev
;
1150 bi_size
= sizeof(struct i40e_rx_buffer
) * rx_ring
->count
;
1151 rx_ring
->rx_bi
= kzalloc(bi_size
, GFP_KERNEL
);
1152 if (!rx_ring
->rx_bi
)
1155 u64_stats_init(&rx_ring
->syncp
);
1157 /* Round up to nearest 4K */
1158 rx_ring
->size
= ring_is_16byte_desc_enabled(rx_ring
)
1159 ? rx_ring
->count
* sizeof(union i40e_16byte_rx_desc
)
1160 : rx_ring
->count
* sizeof(union i40e_32byte_rx_desc
);
1161 rx_ring
->size
= ALIGN(rx_ring
->size
, 4096);
1162 rx_ring
->desc
= dma_alloc_coherent(dev
, rx_ring
->size
,
1163 &rx_ring
->dma
, GFP_KERNEL
);
1165 if (!rx_ring
->desc
) {
1166 dev_info(dev
, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1171 rx_ring
->next_to_clean
= 0;
1172 rx_ring
->next_to_use
= 0;
1176 kfree(rx_ring
->rx_bi
);
1177 rx_ring
->rx_bi
= NULL
;
1182 * i40e_release_rx_desc - Store the new tail and head values
1183 * @rx_ring: ring to bump
1184 * @val: new head index
1186 static inline void i40e_release_rx_desc(struct i40e_ring
*rx_ring
, u32 val
)
1188 rx_ring
->next_to_use
= val
;
1189 /* Force memory writes to complete before letting h/w
1190 * know there are new descriptors to fetch. (Only
1191 * applicable for weak-ordered memory model archs,
1195 writel(val
, rx_ring
->tail
);
1199 * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
1200 * @rx_ring: ring to place buffers on
1201 * @cleaned_count: number of buffers to replace
1203 void i40e_alloc_rx_buffers_ps(struct i40e_ring
*rx_ring
, u16 cleaned_count
)
1205 u16 i
= rx_ring
->next_to_use
;
1206 union i40e_rx_desc
*rx_desc
;
1207 struct i40e_rx_buffer
*bi
;
1209 /* do nothing if no valid netdev defined */
1210 if (!rx_ring
->netdev
|| !cleaned_count
)
1213 while (cleaned_count
--) {
1214 rx_desc
= I40E_RX_DESC(rx_ring
, i
);
1215 bi
= &rx_ring
->rx_bi
[i
];
1217 if (bi
->skb
) /* desc is in use */
1220 bi
->page
= alloc_page(GFP_ATOMIC
);
1222 rx_ring
->rx_stats
.alloc_page_failed
++;
1227 if (!bi
->page_dma
) {
1228 /* use a half page if we're re-using */
1229 bi
->page_offset
^= PAGE_SIZE
/ 2;
1230 bi
->page_dma
= dma_map_page(rx_ring
->dev
,
1235 if (dma_mapping_error(rx_ring
->dev
,
1237 rx_ring
->rx_stats
.alloc_page_failed
++;
1243 dma_sync_single_range_for_device(rx_ring
->dev
,
1246 rx_ring
->rx_hdr_len
,
1248 /* Refresh the desc even if buffer_addrs didn't change
1249 * because each write-back erases this info.
1251 rx_desc
->read
.pkt_addr
= cpu_to_le64(bi
->page_dma
);
1252 rx_desc
->read
.hdr_addr
= cpu_to_le64(bi
->dma
);
1254 if (i
== rx_ring
->count
)
1259 if (rx_ring
->next_to_use
!= i
)
1260 i40e_release_rx_desc(rx_ring
, i
);
1264 * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
1265 * @rx_ring: ring to place buffers on
1266 * @cleaned_count: number of buffers to replace
1268 void i40e_alloc_rx_buffers_1buf(struct i40e_ring
*rx_ring
, u16 cleaned_count
)
1270 u16 i
= rx_ring
->next_to_use
;
1271 union i40e_rx_desc
*rx_desc
;
1272 struct i40e_rx_buffer
*bi
;
1273 struct sk_buff
*skb
;
1275 /* do nothing if no valid netdev defined */
1276 if (!rx_ring
->netdev
|| !cleaned_count
)
1279 while (cleaned_count
--) {
1280 rx_desc
= I40E_RX_DESC(rx_ring
, i
);
1281 bi
= &rx_ring
->rx_bi
[i
];
1285 skb
= netdev_alloc_skb_ip_align(rx_ring
->netdev
,
1286 rx_ring
->rx_buf_len
);
1288 rx_ring
->rx_stats
.alloc_buff_failed
++;
1291 /* initialize queue mapping */
1292 skb_record_rx_queue(skb
, rx_ring
->queue_index
);
1297 bi
->dma
= dma_map_single(rx_ring
->dev
,
1299 rx_ring
->rx_buf_len
,
1301 if (dma_mapping_error(rx_ring
->dev
, bi
->dma
)) {
1302 rx_ring
->rx_stats
.alloc_buff_failed
++;
1308 rx_desc
->read
.pkt_addr
= cpu_to_le64(bi
->dma
);
1309 rx_desc
->read
.hdr_addr
= 0;
1311 if (i
== rx_ring
->count
)
1316 if (rx_ring
->next_to_use
!= i
)
1317 i40e_release_rx_desc(rx_ring
, i
);
1321 * i40e_receive_skb - Send a completed packet up the stack
1322 * @rx_ring: rx ring in play
1323 * @skb: packet to send up
1324 * @vlan_tag: vlan tag for packet
1326 static void i40e_receive_skb(struct i40e_ring
*rx_ring
,
1327 struct sk_buff
*skb
, u16 vlan_tag
)
1329 struct i40e_q_vector
*q_vector
= rx_ring
->q_vector
;
1330 struct i40e_vsi
*vsi
= rx_ring
->vsi
;
1331 u64 flags
= vsi
->back
->flags
;
1333 if (vlan_tag
& VLAN_VID_MASK
)
1334 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
), vlan_tag
);
1336 if (flags
& I40E_FLAG_IN_NETPOLL
)
1339 napi_gro_receive(&q_vector
->napi
, skb
);
1343 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1344 * @vsi: the VSI we care about
1345 * @skb: skb currently being received and modified
1346 * @rx_status: status value of last descriptor in packet
1347 * @rx_error: error value of last descriptor in packet
1348 * @rx_ptype: ptype value of last descriptor in packet
1350 static inline void i40e_rx_checksum(struct i40e_vsi
*vsi
,
1351 struct sk_buff
*skb
,
1356 struct i40e_rx_ptype_decoded decoded
= decode_rx_desc_ptype(rx_ptype
);
1357 bool ipv4
= false, ipv6
= false;
1358 bool ipv4_tunnel
, ipv6_tunnel
;
1363 ipv4_tunnel
= (rx_ptype
>= I40E_RX_PTYPE_GRENAT4_MAC_PAY3
) &&
1364 (rx_ptype
<= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4
);
1365 ipv6_tunnel
= (rx_ptype
>= I40E_RX_PTYPE_GRENAT6_MAC_PAY3
) &&
1366 (rx_ptype
<= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4
);
1368 skb
->ip_summed
= CHECKSUM_NONE
;
1370 /* Rx csum enabled and ip headers found? */
1371 if (!(vsi
->netdev
->features
& NETIF_F_RXCSUM
))
1374 /* did the hardware decode the packet and checksum? */
1375 if (!(rx_status
& (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT
)))
1378 /* both known and outer_ip must be set for the below code to work */
1379 if (!(decoded
.known
&& decoded
.outer_ip
))
1382 if (decoded
.outer_ip
== I40E_RX_PTYPE_OUTER_IP
&&
1383 decoded
.outer_ip_ver
== I40E_RX_PTYPE_OUTER_IPV4
)
1385 else if (decoded
.outer_ip
== I40E_RX_PTYPE_OUTER_IP
&&
1386 decoded
.outer_ip_ver
== I40E_RX_PTYPE_OUTER_IPV6
)
1390 (rx_error
& ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT
) |
1391 (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT
))))
1394 /* likely incorrect csum if alternate IP extension headers found */
1396 rx_status
& (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT
))
1397 /* don't increment checksum err here, non-fatal err */
1400 /* there was some L4 error, count error and punt packet to the stack */
1401 if (rx_error
& (1 << I40E_RX_DESC_ERROR_L4E_SHIFT
))
1404 /* handle packets that were not able to be checksummed due
1405 * to arrival speed, in this case the stack can compute
1408 if (rx_error
& (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT
))
1411 /* If VXLAN traffic has an outer UDPv4 checksum we need to check
1412 * it in the driver, hardware does not do it for us.
1413 * Since L3L4P bit was set we assume a valid IHL value (>=5)
1414 * so the total length of IPv4 header is IHL*4 bytes
1415 * The UDP_0 bit *may* bet set if the *inner* header is UDP
1418 skb
->transport_header
= skb
->mac_header
+
1419 sizeof(struct ethhdr
) +
1420 (ip_hdr(skb
)->ihl
* 4);
1422 /* Add 4 bytes for VLAN tagged packets */
1423 skb
->transport_header
+= (skb
->protocol
== htons(ETH_P_8021Q
) ||
1424 skb
->protocol
== htons(ETH_P_8021AD
))
1427 if ((ip_hdr(skb
)->protocol
== IPPROTO_UDP
) &&
1428 (udp_hdr(skb
)->check
!= 0)) {
1429 rx_udp_csum
= udp_csum(skb
);
1431 csum
= csum_tcpudp_magic(
1432 iph
->saddr
, iph
->daddr
,
1433 (skb
->len
- skb_transport_offset(skb
)),
1434 IPPROTO_UDP
, rx_udp_csum
);
1436 if (udp_hdr(skb
)->check
!= csum
)
1439 } /* else its GRE and so no outer UDP header */
1442 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1443 skb
->csum_level
= ipv4_tunnel
|| ipv6_tunnel
;
1448 vsi
->back
->hw_csum_rx_error
++;
1452 * i40e_rx_hash - returns the hash value from the Rx descriptor
1453 * @ring: descriptor ring
1454 * @rx_desc: specific descriptor
1456 static inline u32
i40e_rx_hash(struct i40e_ring
*ring
,
1457 union i40e_rx_desc
*rx_desc
)
1459 const __le64 rss_mask
=
1460 cpu_to_le64((u64
)I40E_RX_DESC_FLTSTAT_RSS_HASH
<<
1461 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT
);
1463 if ((ring
->netdev
->features
& NETIF_F_RXHASH
) &&
1464 (rx_desc
->wb
.qword1
.status_error_len
& rss_mask
) == rss_mask
)
1465 return le32_to_cpu(rx_desc
->wb
.qword0
.hi_dword
.rss
);
1471 * i40e_ptype_to_hash - get a hash type
1472 * @ptype: the ptype value from the descriptor
1474 * Returns a hash type to be used by skb_set_hash
1476 static inline enum pkt_hash_types
i40e_ptype_to_hash(u8 ptype
)
1478 struct i40e_rx_ptype_decoded decoded
= decode_rx_desc_ptype(ptype
);
1481 return PKT_HASH_TYPE_NONE
;
1483 if (decoded
.outer_ip
== I40E_RX_PTYPE_OUTER_IP
&&
1484 decoded
.payload_layer
== I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4
)
1485 return PKT_HASH_TYPE_L4
;
1486 else if (decoded
.outer_ip
== I40E_RX_PTYPE_OUTER_IP
&&
1487 decoded
.payload_layer
== I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3
)
1488 return PKT_HASH_TYPE_L3
;
1490 return PKT_HASH_TYPE_L2
;
1494 * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
1495 * @rx_ring: rx ring to clean
1496 * @budget: how many cleans we're allowed
1498 * Returns true if there's any budget left (e.g. the clean is finished)
1500 static int i40e_clean_rx_irq_ps(struct i40e_ring
*rx_ring
, int budget
)
1502 unsigned int total_rx_bytes
= 0, total_rx_packets
= 0;
1503 u16 rx_packet_len
, rx_header_len
, rx_sph
, rx_hbo
;
1504 u16 cleaned_count
= I40E_DESC_UNUSED(rx_ring
);
1505 const int current_node
= numa_node_id();
1506 struct i40e_vsi
*vsi
= rx_ring
->vsi
;
1507 u16 i
= rx_ring
->next_to_clean
;
1508 union i40e_rx_desc
*rx_desc
;
1509 u32 rx_error
, rx_status
;
1517 struct i40e_rx_buffer
*rx_bi
;
1518 struct sk_buff
*skb
;
1520 /* return some buffers to hardware, one at a time is too slow */
1521 if (cleaned_count
>= I40E_RX_BUFFER_WRITE
) {
1522 i40e_alloc_rx_buffers_ps(rx_ring
, cleaned_count
);
1526 i
= rx_ring
->next_to_clean
;
1527 rx_desc
= I40E_RX_DESC(rx_ring
, i
);
1528 qword
= le64_to_cpu(rx_desc
->wb
.qword1
.status_error_len
);
1529 rx_status
= (qword
& I40E_RXD_QW1_STATUS_MASK
) >>
1530 I40E_RXD_QW1_STATUS_SHIFT
;
1532 if (!(rx_status
& (1 << I40E_RX_DESC_STATUS_DD_SHIFT
)))
1535 /* This memory barrier is needed to keep us from reading
1536 * any other fields out of the rx_desc until we know the
1540 if (i40e_rx_is_programming_status(qword
)) {
1541 i40e_clean_programming_status(rx_ring
, rx_desc
);
1542 I40E_RX_INCREMENT(rx_ring
, i
);
1545 rx_bi
= &rx_ring
->rx_bi
[i
];
1548 skb
= netdev_alloc_skb_ip_align(rx_ring
->netdev
,
1549 rx_ring
->rx_hdr_len
);
1551 rx_ring
->rx_stats
.alloc_buff_failed
++;
1552 /* initialize queue mapping */
1553 skb_record_rx_queue(skb
, rx_ring
->queue_index
);
1554 /* we are reusing so sync this buffer for CPU use */
1555 dma_sync_single_range_for_cpu(rx_ring
->dev
,
1558 rx_ring
->rx_hdr_len
,
1561 rx_packet_len
= (qword
& I40E_RXD_QW1_LENGTH_PBUF_MASK
) >>
1562 I40E_RXD_QW1_LENGTH_PBUF_SHIFT
;
1563 rx_header_len
= (qword
& I40E_RXD_QW1_LENGTH_HBUF_MASK
) >>
1564 I40E_RXD_QW1_LENGTH_HBUF_SHIFT
;
1565 rx_sph
= (qword
& I40E_RXD_QW1_LENGTH_SPH_MASK
) >>
1566 I40E_RXD_QW1_LENGTH_SPH_SHIFT
;
1568 rx_error
= (qword
& I40E_RXD_QW1_ERROR_MASK
) >>
1569 I40E_RXD_QW1_ERROR_SHIFT
;
1570 rx_hbo
= rx_error
& (1 << I40E_RX_DESC_ERROR_HBO_SHIFT
);
1571 rx_error
&= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT
);
1573 rx_ptype
= (qword
& I40E_RXD_QW1_PTYPE_MASK
) >>
1574 I40E_RXD_QW1_PTYPE_SHIFT
;
1575 prefetch(rx_bi
->page
);
1578 if (rx_hbo
|| rx_sph
) {
1581 len
= I40E_RX_HDR_SIZE
;
1583 len
= rx_header_len
;
1584 memcpy(__skb_put(skb
, len
), rx_bi
->hdr_buf
, len
);
1585 } else if (skb
->len
== 0) {
1588 len
= (rx_packet_len
> skb_headlen(skb
) ?
1589 skb_headlen(skb
) : rx_packet_len
);
1590 memcpy(__skb_put(skb
, len
),
1591 rx_bi
->page
+ rx_bi
->page_offset
,
1593 rx_bi
->page_offset
+= len
;
1594 rx_packet_len
-= len
;
1597 /* Get the rest of the data if this was a header split */
1598 if (rx_packet_len
) {
1599 skb_fill_page_desc(skb
, skb_shinfo(skb
)->nr_frags
,
1604 skb
->len
+= rx_packet_len
;
1605 skb
->data_len
+= rx_packet_len
;
1606 skb
->truesize
+= rx_packet_len
;
1608 if ((page_count(rx_bi
->page
) == 1) &&
1609 (page_to_nid(rx_bi
->page
) == current_node
))
1610 get_page(rx_bi
->page
);
1614 dma_unmap_page(rx_ring
->dev
,
1618 rx_bi
->page_dma
= 0;
1620 I40E_RX_INCREMENT(rx_ring
, i
);
1623 !(rx_status
& (1 << I40E_RX_DESC_STATUS_EOF_SHIFT
)))) {
1624 struct i40e_rx_buffer
*next_buffer
;
1626 next_buffer
= &rx_ring
->rx_bi
[i
];
1627 next_buffer
->skb
= skb
;
1628 rx_ring
->rx_stats
.non_eop_descs
++;
1632 /* ERR_MASK will only have valid bits if EOP set */
1633 if (unlikely(rx_error
& (1 << I40E_RX_DESC_ERROR_RXE_SHIFT
))) {
1634 dev_kfree_skb_any(skb
);
1635 /* TODO: shouldn't we increment a counter indicating the
1641 skb_set_hash(skb
, i40e_rx_hash(rx_ring
, rx_desc
),
1642 i40e_ptype_to_hash(rx_ptype
));
1643 if (unlikely(rx_status
& I40E_RXD_QW1_STATUS_TSYNVALID_MASK
)) {
1644 i40e_ptp_rx_hwtstamp(vsi
->back
, skb
, (rx_status
&
1645 I40E_RXD_QW1_STATUS_TSYNINDX_MASK
) >>
1646 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT
);
1647 rx_ring
->last_rx_timestamp
= jiffies
;
1650 /* probably a little skewed due to removing CRC */
1651 total_rx_bytes
+= skb
->len
;
1654 skb
->protocol
= eth_type_trans(skb
, rx_ring
->netdev
);
1656 i40e_rx_checksum(vsi
, skb
, rx_status
, rx_error
, rx_ptype
);
1658 vlan_tag
= rx_status
& (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT
)
1659 ? le16_to_cpu(rx_desc
->wb
.qword0
.lo_dword
.l2tag1
)
1662 if (!i40e_fcoe_handle_offload(rx_ring
, rx_desc
, skb
)) {
1663 dev_kfree_skb_any(skb
);
1667 skb_mark_napi_id(skb
, &rx_ring
->q_vector
->napi
);
1668 i40e_receive_skb(rx_ring
, skb
, vlan_tag
);
1670 rx_ring
->netdev
->last_rx
= jiffies
;
1671 rx_desc
->wb
.qword1
.status_error_len
= 0;
1673 } while (likely(total_rx_packets
< budget
));
1675 u64_stats_update_begin(&rx_ring
->syncp
);
1676 rx_ring
->stats
.packets
+= total_rx_packets
;
1677 rx_ring
->stats
.bytes
+= total_rx_bytes
;
1678 u64_stats_update_end(&rx_ring
->syncp
);
1679 rx_ring
->q_vector
->rx
.total_packets
+= total_rx_packets
;
1680 rx_ring
->q_vector
->rx
.total_bytes
+= total_rx_bytes
;
1682 return total_rx_packets
;
1686 * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1687 * @rx_ring: rx ring to clean
1688 * @budget: how many cleans we're allowed
1690 * Returns number of packets cleaned
1692 static int i40e_clean_rx_irq_1buf(struct i40e_ring
*rx_ring
, int budget
)
1694 unsigned int total_rx_bytes
= 0, total_rx_packets
= 0;
1695 u16 cleaned_count
= I40E_DESC_UNUSED(rx_ring
);
1696 struct i40e_vsi
*vsi
= rx_ring
->vsi
;
1697 union i40e_rx_desc
*rx_desc
;
1698 u32 rx_error
, rx_status
;
1705 struct i40e_rx_buffer
*rx_bi
;
1706 struct sk_buff
*skb
;
1708 /* return some buffers to hardware, one at a time is too slow */
1709 if (cleaned_count
>= I40E_RX_BUFFER_WRITE
) {
1710 i40e_alloc_rx_buffers_1buf(rx_ring
, cleaned_count
);
1714 i
= rx_ring
->next_to_clean
;
1715 rx_desc
= I40E_RX_DESC(rx_ring
, i
);
1716 qword
= le64_to_cpu(rx_desc
->wb
.qword1
.status_error_len
);
1717 rx_status
= (qword
& I40E_RXD_QW1_STATUS_MASK
) >>
1718 I40E_RXD_QW1_STATUS_SHIFT
;
1720 if (!(rx_status
& (1 << I40E_RX_DESC_STATUS_DD_SHIFT
)))
1723 /* This memory barrier is needed to keep us from reading
1724 * any other fields out of the rx_desc until we know the
1729 if (i40e_rx_is_programming_status(qword
)) {
1730 i40e_clean_programming_status(rx_ring
, rx_desc
);
1731 I40E_RX_INCREMENT(rx_ring
, i
);
1734 rx_bi
= &rx_ring
->rx_bi
[i
];
1736 prefetch(skb
->data
);
1738 rx_packet_len
= (qword
& I40E_RXD_QW1_LENGTH_PBUF_MASK
) >>
1739 I40E_RXD_QW1_LENGTH_PBUF_SHIFT
;
1741 rx_error
= (qword
& I40E_RXD_QW1_ERROR_MASK
) >>
1742 I40E_RXD_QW1_ERROR_SHIFT
;
1743 rx_error
&= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT
);
1745 rx_ptype
= (qword
& I40E_RXD_QW1_PTYPE_MASK
) >>
1746 I40E_RXD_QW1_PTYPE_SHIFT
;
1750 /* Get the header and possibly the whole packet
1751 * If this is an skb from previous receive dma will be 0
1753 skb_put(skb
, rx_packet_len
);
1754 dma_unmap_single(rx_ring
->dev
, rx_bi
->dma
, rx_ring
->rx_buf_len
,
1758 I40E_RX_INCREMENT(rx_ring
, i
);
1761 !(rx_status
& (1 << I40E_RX_DESC_STATUS_EOF_SHIFT
)))) {
1762 rx_ring
->rx_stats
.non_eop_descs
++;
1766 /* ERR_MASK will only have valid bits if EOP set */
1767 if (unlikely(rx_error
& (1 << I40E_RX_DESC_ERROR_RXE_SHIFT
))) {
1768 dev_kfree_skb_any(skb
);
1769 /* TODO: shouldn't we increment a counter indicating the
1775 skb_set_hash(skb
, i40e_rx_hash(rx_ring
, rx_desc
),
1776 i40e_ptype_to_hash(rx_ptype
));
1777 if (unlikely(rx_status
& I40E_RXD_QW1_STATUS_TSYNVALID_MASK
)) {
1778 i40e_ptp_rx_hwtstamp(vsi
->back
, skb
, (rx_status
&
1779 I40E_RXD_QW1_STATUS_TSYNINDX_MASK
) >>
1780 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT
);
1781 rx_ring
->last_rx_timestamp
= jiffies
;
1784 /* probably a little skewed due to removing CRC */
1785 total_rx_bytes
+= skb
->len
;
1788 skb
->protocol
= eth_type_trans(skb
, rx_ring
->netdev
);
1790 i40e_rx_checksum(vsi
, skb
, rx_status
, rx_error
, rx_ptype
);
1792 vlan_tag
= rx_status
& (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT
)
1793 ? le16_to_cpu(rx_desc
->wb
.qword0
.lo_dword
.l2tag1
)
1796 if (!i40e_fcoe_handle_offload(rx_ring
, rx_desc
, skb
)) {
1797 dev_kfree_skb_any(skb
);
1801 i40e_receive_skb(rx_ring
, skb
, vlan_tag
);
1803 rx_ring
->netdev
->last_rx
= jiffies
;
1804 rx_desc
->wb
.qword1
.status_error_len
= 0;
1805 } while (likely(total_rx_packets
< budget
));
1807 u64_stats_update_begin(&rx_ring
->syncp
);
1808 rx_ring
->stats
.packets
+= total_rx_packets
;
1809 rx_ring
->stats
.bytes
+= total_rx_bytes
;
1810 u64_stats_update_end(&rx_ring
->syncp
);
1811 rx_ring
->q_vector
->rx
.total_packets
+= total_rx_packets
;
1812 rx_ring
->q_vector
->rx
.total_bytes
+= total_rx_bytes
;
1814 return total_rx_packets
;
1818 * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1819 * @napi: napi struct with our devices info in it
1820 * @budget: amount of work driver is allowed to do this pass, in packets
1822 * This function will clean all queues associated with a q_vector.
1824 * Returns the amount of work done
1826 int i40e_napi_poll(struct napi_struct
*napi
, int budget
)
1828 struct i40e_q_vector
*q_vector
=
1829 container_of(napi
, struct i40e_q_vector
, napi
);
1830 struct i40e_vsi
*vsi
= q_vector
->vsi
;
1831 struct i40e_ring
*ring
;
1832 bool clean_complete
= true;
1833 bool arm_wb
= false;
1834 int budget_per_ring
;
1837 if (test_bit(__I40E_DOWN
, &vsi
->state
)) {
1838 napi_complete(napi
);
1842 /* Since the actual Tx work is minimal, we can give the Tx a larger
1843 * budget and be more aggressive about cleaning up the Tx descriptors.
1845 i40e_for_each_ring(ring
, q_vector
->tx
) {
1846 clean_complete
&= i40e_clean_tx_irq(ring
, vsi
->work_limit
);
1847 arm_wb
|= ring
->arm_wb
;
1850 /* We attempt to distribute budget to each Rx queue fairly, but don't
1851 * allow the budget to go below 1 because that would exit polling early.
1853 budget_per_ring
= max(budget
/q_vector
->num_ringpairs
, 1);
1855 i40e_for_each_ring(ring
, q_vector
->rx
) {
1856 if (ring_is_ps_enabled(ring
))
1857 cleaned
= i40e_clean_rx_irq_ps(ring
, budget_per_ring
);
1859 cleaned
= i40e_clean_rx_irq_1buf(ring
, budget_per_ring
);
1860 /* if we didn't clean as many as budgeted, we must be done */
1861 clean_complete
&= (budget_per_ring
!= cleaned
);
1864 /* If work not completed, return budget and polling will return */
1865 if (!clean_complete
) {
1867 i40e_force_wb(vsi
, q_vector
);
1871 /* Work is done so exit the polling mode and re-enable the interrupt */
1872 napi_complete(napi
);
1873 if (ITR_IS_DYNAMIC(vsi
->rx_itr_setting
) ||
1874 ITR_IS_DYNAMIC(vsi
->tx_itr_setting
))
1875 i40e_update_dynamic_itr(q_vector
);
1877 if (!test_bit(__I40E_DOWN
, &vsi
->state
)) {
1878 if (vsi
->back
->flags
& I40E_FLAG_MSIX_ENABLED
) {
1879 i40e_irq_dynamic_enable(vsi
,
1880 q_vector
->v_idx
+ vsi
->base_vector
);
1882 struct i40e_hw
*hw
= &vsi
->back
->hw
;
1883 /* We re-enable the queue 0 cause, but
1884 * don't worry about dynamic_enable
1885 * because we left it on for the other
1886 * possible interrupts during napi
1888 u32 qval
= rd32(hw
, I40E_QINT_RQCTL(0));
1889 qval
|= I40E_QINT_RQCTL_CAUSE_ENA_MASK
;
1890 wr32(hw
, I40E_QINT_RQCTL(0), qval
);
1892 qval
= rd32(hw
, I40E_QINT_TQCTL(0));
1893 qval
|= I40E_QINT_TQCTL_CAUSE_ENA_MASK
;
1894 wr32(hw
, I40E_QINT_TQCTL(0), qval
);
1896 i40e_irq_dynamic_enable_icr0(vsi
->back
);
1904 * i40e_atr - Add a Flow Director ATR filter
1905 * @tx_ring: ring to add programming descriptor to
1907 * @flags: send flags
1908 * @protocol: wire protocol
1910 static void i40e_atr(struct i40e_ring
*tx_ring
, struct sk_buff
*skb
,
1911 u32 flags
, __be16 protocol
)
1913 struct i40e_filter_program_desc
*fdir_desc
;
1914 struct i40e_pf
*pf
= tx_ring
->vsi
->back
;
1916 unsigned char *network
;
1918 struct ipv6hdr
*ipv6
;
1922 u32 flex_ptype
, dtype_cmd
;
1925 /* make sure ATR is enabled */
1926 if (!(pf
->flags
& I40E_FLAG_FD_ATR_ENABLED
))
1929 /* if sampling is disabled do nothing */
1930 if (!tx_ring
->atr_sample_rate
)
1933 /* snag network header to get L4 type and address */
1934 hdr
.network
= skb_network_header(skb
);
1936 /* Currently only IPv4/IPv6 with TCP is supported */
1937 if (protocol
== htons(ETH_P_IP
)) {
1938 if (hdr
.ipv4
->protocol
!= IPPROTO_TCP
)
1941 /* access ihl as a u8 to avoid unaligned access on ia64 */
1942 hlen
= (hdr
.network
[0] & 0x0F) << 2;
1943 } else if (protocol
== htons(ETH_P_IPV6
)) {
1944 if (hdr
.ipv6
->nexthdr
!= IPPROTO_TCP
)
1947 hlen
= sizeof(struct ipv6hdr
);
1952 th
= (struct tcphdr
*)(hdr
.network
+ hlen
);
1954 /* Due to lack of space, no more new filters can be programmed */
1955 if (th
->syn
&& (pf
->auto_disable_flags
& I40E_FLAG_FD_ATR_ENABLED
))
1958 tx_ring
->atr_count
++;
1960 /* sample on all syn/fin/rst packets or once every atr sample rate */
1964 (tx_ring
->atr_count
< tx_ring
->atr_sample_rate
))
1967 tx_ring
->atr_count
= 0;
1969 /* grab the next descriptor */
1970 i
= tx_ring
->next_to_use
;
1971 fdir_desc
= I40E_TX_FDIRDESC(tx_ring
, i
);
1974 tx_ring
->next_to_use
= (i
< tx_ring
->count
) ? i
: 0;
1976 flex_ptype
= (tx_ring
->queue_index
<< I40E_TXD_FLTR_QW0_QINDEX_SHIFT
) &
1977 I40E_TXD_FLTR_QW0_QINDEX_MASK
;
1978 flex_ptype
|= (protocol
== htons(ETH_P_IP
)) ?
1979 (I40E_FILTER_PCTYPE_NONF_IPV4_TCP
<<
1980 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT
) :
1981 (I40E_FILTER_PCTYPE_NONF_IPV6_TCP
<<
1982 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT
);
1984 flex_ptype
|= tx_ring
->vsi
->id
<< I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT
;
1986 dtype_cmd
= I40E_TX_DESC_DTYPE_FILTER_PROG
;
1988 dtype_cmd
|= (th
->fin
|| th
->rst
) ?
1989 (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE
<<
1990 I40E_TXD_FLTR_QW1_PCMD_SHIFT
) :
1991 (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE
<<
1992 I40E_TXD_FLTR_QW1_PCMD_SHIFT
);
1994 dtype_cmd
|= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX
<<
1995 I40E_TXD_FLTR_QW1_DEST_SHIFT
;
1997 dtype_cmd
|= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID
<<
1998 I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT
;
2000 dtype_cmd
|= I40E_TXD_FLTR_QW1_CNT_ENA_MASK
;
2002 ((u32
)pf
->fd_atr_cnt_idx
<< I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT
) &
2003 I40E_TXD_FLTR_QW1_CNTINDEX_MASK
;
2005 fdir_desc
->qindex_flex_ptype_vsi
= cpu_to_le32(flex_ptype
);
2006 fdir_desc
->rsvd
= cpu_to_le32(0);
2007 fdir_desc
->dtype_cmd_cntindex
= cpu_to_le32(dtype_cmd
);
2008 fdir_desc
->fd_id
= cpu_to_le32(0);
2012 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2014 * @tx_ring: ring to send buffer on
2015 * @flags: the tx flags to be set
2017 * Checks the skb and set up correspondingly several generic transmit flags
2018 * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2020 * Returns error code indicate the frame should be dropped upon error and the
2021 * otherwise returns 0 to indicate the flags has been set properly.
2024 int i40e_tx_prepare_vlan_flags(struct sk_buff
*skb
,
2025 struct i40e_ring
*tx_ring
,
2028 static int i40e_tx_prepare_vlan_flags(struct sk_buff
*skb
,
2029 struct i40e_ring
*tx_ring
,
2033 __be16 protocol
= skb
->protocol
;
2036 /* if we have a HW VLAN tag being added, default to the HW one */
2037 if (skb_vlan_tag_present(skb
)) {
2038 tx_flags
|= skb_vlan_tag_get(skb
) << I40E_TX_FLAGS_VLAN_SHIFT
;
2039 tx_flags
|= I40E_TX_FLAGS_HW_VLAN
;
2040 /* else if it is a SW VLAN, check the next protocol and store the tag */
2041 } else if (protocol
== htons(ETH_P_8021Q
)) {
2042 struct vlan_hdr
*vhdr
, _vhdr
;
2043 vhdr
= skb_header_pointer(skb
, ETH_HLEN
, sizeof(_vhdr
), &_vhdr
);
2047 protocol
= vhdr
->h_vlan_encapsulated_proto
;
2048 tx_flags
|= ntohs(vhdr
->h_vlan_TCI
) << I40E_TX_FLAGS_VLAN_SHIFT
;
2049 tx_flags
|= I40E_TX_FLAGS_SW_VLAN
;
2052 if (!(tx_ring
->vsi
->back
->flags
& I40E_FLAG_DCB_ENABLED
))
2055 /* Insert 802.1p priority into VLAN header */
2056 if ((tx_flags
& (I40E_TX_FLAGS_HW_VLAN
| I40E_TX_FLAGS_SW_VLAN
)) ||
2057 (skb
->priority
!= TC_PRIO_CONTROL
)) {
2058 tx_flags
&= ~I40E_TX_FLAGS_VLAN_PRIO_MASK
;
2059 tx_flags
|= (skb
->priority
& 0x7) <<
2060 I40E_TX_FLAGS_VLAN_PRIO_SHIFT
;
2061 if (tx_flags
& I40E_TX_FLAGS_SW_VLAN
) {
2062 struct vlan_ethhdr
*vhdr
;
2065 rc
= skb_cow_head(skb
, 0);
2068 vhdr
= (struct vlan_ethhdr
*)skb
->data
;
2069 vhdr
->h_vlan_TCI
= htons(tx_flags
>>
2070 I40E_TX_FLAGS_VLAN_SHIFT
);
2072 tx_flags
|= I40E_TX_FLAGS_HW_VLAN
;
2082 * i40e_tso - set up the tso context descriptor
2083 * @tx_ring: ptr to the ring to send
2084 * @skb: ptr to the skb we're sending
2085 * @tx_flags: the collected send information
2086 * @protocol: the send protocol
2087 * @hdr_len: ptr to the size of the packet header
2088 * @cd_tunneling: ptr to context descriptor bits
2090 * Returns 0 if no TSO can happen, 1 if tso is going, or error
2092 static int i40e_tso(struct i40e_ring
*tx_ring
, struct sk_buff
*skb
,
2093 u32 tx_flags
, __be16 protocol
, u8
*hdr_len
,
2094 u64
*cd_type_cmd_tso_mss
, u32
*cd_tunneling
)
2096 u32 cd_cmd
, cd_tso_len
, cd_mss
;
2097 struct ipv6hdr
*ipv6h
;
2098 struct tcphdr
*tcph
;
2103 if (!skb_is_gso(skb
))
2106 err
= skb_cow_head(skb
, 0);
2110 iph
= skb
->encapsulation
? inner_ip_hdr(skb
) : ip_hdr(skb
);
2111 ipv6h
= skb
->encapsulation
? inner_ipv6_hdr(skb
) : ipv6_hdr(skb
);
2113 if (iph
->version
== 4) {
2114 tcph
= skb
->encapsulation
? inner_tcp_hdr(skb
) : tcp_hdr(skb
);
2117 tcph
->check
= ~csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
2119 } else if (ipv6h
->version
== 6) {
2120 tcph
= skb
->encapsulation
? inner_tcp_hdr(skb
) : tcp_hdr(skb
);
2121 ipv6h
->payload_len
= 0;
2122 tcph
->check
= ~csum_ipv6_magic(&ipv6h
->saddr
, &ipv6h
->daddr
,
2126 l4len
= skb
->encapsulation
? inner_tcp_hdrlen(skb
) : tcp_hdrlen(skb
);
2127 *hdr_len
= (skb
->encapsulation
2128 ? (skb_inner_transport_header(skb
) - skb
->data
)
2129 : skb_transport_offset(skb
)) + l4len
;
2131 /* find the field values */
2132 cd_cmd
= I40E_TX_CTX_DESC_TSO
;
2133 cd_tso_len
= skb
->len
- *hdr_len
;
2134 cd_mss
= skb_shinfo(skb
)->gso_size
;
2135 *cd_type_cmd_tso_mss
|= ((u64
)cd_cmd
<< I40E_TXD_CTX_QW1_CMD_SHIFT
) |
2137 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT
) |
2138 ((u64
)cd_mss
<< I40E_TXD_CTX_QW1_MSS_SHIFT
);
2143 * i40e_tsyn - set up the tsyn context descriptor
2144 * @tx_ring: ptr to the ring to send
2145 * @skb: ptr to the skb we're sending
2146 * @tx_flags: the collected send information
2148 * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2150 static int i40e_tsyn(struct i40e_ring
*tx_ring
, struct sk_buff
*skb
,
2151 u32 tx_flags
, u64
*cd_type_cmd_tso_mss
)
2155 if (likely(!(skb_shinfo(skb
)->tx_flags
& SKBTX_HW_TSTAMP
)))
2158 /* Tx timestamps cannot be sampled when doing TSO */
2159 if (tx_flags
& I40E_TX_FLAGS_TSO
)
2162 /* only timestamp the outbound packet if the user has requested it and
2163 * we are not already transmitting a packet to be timestamped
2165 pf
= i40e_netdev_to_pf(tx_ring
->netdev
);
2166 if (!(pf
->flags
& I40E_FLAG_PTP
))
2170 !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS
, &pf
->state
)) {
2171 skb_shinfo(skb
)->tx_flags
|= SKBTX_IN_PROGRESS
;
2172 pf
->ptp_tx_skb
= skb_get(skb
);
2177 *cd_type_cmd_tso_mss
|= (u64
)I40E_TX_CTX_DESC_TSYN
<<
2178 I40E_TXD_CTX_QW1_CMD_SHIFT
;
2184 * i40e_tx_enable_csum - Enable Tx checksum offloads
2186 * @tx_flags: Tx flags currently set
2187 * @td_cmd: Tx descriptor command bits to set
2188 * @td_offset: Tx descriptor header offsets to set
2189 * @cd_tunneling: ptr to context desc bits
2191 static void i40e_tx_enable_csum(struct sk_buff
*skb
, u32 tx_flags
,
2192 u32
*td_cmd
, u32
*td_offset
,
2193 struct i40e_ring
*tx_ring
,
2196 struct ipv6hdr
*this_ipv6_hdr
;
2197 unsigned int this_tcp_hdrlen
;
2198 struct iphdr
*this_ip_hdr
;
2199 u32 network_hdr_len
;
2202 if (skb
->encapsulation
) {
2203 network_hdr_len
= skb_inner_network_header_len(skb
);
2204 this_ip_hdr
= inner_ip_hdr(skb
);
2205 this_ipv6_hdr
= inner_ipv6_hdr(skb
);
2206 this_tcp_hdrlen
= inner_tcp_hdrlen(skb
);
2208 if (tx_flags
& I40E_TX_FLAGS_IPV4
) {
2210 if (tx_flags
& I40E_TX_FLAGS_TSO
) {
2211 *cd_tunneling
|= I40E_TX_CTX_EXT_IP_IPV4
;
2212 ip_hdr(skb
)->check
= 0;
2215 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM
;
2217 } else if (tx_flags
& I40E_TX_FLAGS_IPV6
) {
2218 *cd_tunneling
|= I40E_TX_CTX_EXT_IP_IPV6
;
2219 if (tx_flags
& I40E_TX_FLAGS_TSO
)
2220 ip_hdr(skb
)->check
= 0;
2223 /* Now set the ctx descriptor fields */
2224 *cd_tunneling
|= (skb_network_header_len(skb
) >> 2) <<
2225 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT
|
2226 I40E_TXD_CTX_UDP_TUNNELING
|
2227 ((skb_inner_network_offset(skb
) -
2228 skb_transport_offset(skb
)) >> 1) <<
2229 I40E_TXD_CTX_QW0_NATLEN_SHIFT
;
2230 if (this_ip_hdr
->version
== 6) {
2231 tx_flags
&= ~I40E_TX_FLAGS_IPV4
;
2232 tx_flags
|= I40E_TX_FLAGS_IPV6
;
2235 network_hdr_len
= skb_network_header_len(skb
);
2236 this_ip_hdr
= ip_hdr(skb
);
2237 this_ipv6_hdr
= ipv6_hdr(skb
);
2238 this_tcp_hdrlen
= tcp_hdrlen(skb
);
2241 /* Enable IP checksum offloads */
2242 if (tx_flags
& I40E_TX_FLAGS_IPV4
) {
2243 l4_hdr
= this_ip_hdr
->protocol
;
2244 /* the stack computes the IP header already, the only time we
2245 * need the hardware to recompute it is in the case of TSO.
2247 if (tx_flags
& I40E_TX_FLAGS_TSO
) {
2248 *td_cmd
|= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM
;
2249 this_ip_hdr
->check
= 0;
2251 *td_cmd
|= I40E_TX_DESC_CMD_IIPT_IPV4
;
2253 /* Now set the td_offset for IP header length */
2254 *td_offset
= (network_hdr_len
>> 2) <<
2255 I40E_TX_DESC_LENGTH_IPLEN_SHIFT
;
2256 } else if (tx_flags
& I40E_TX_FLAGS_IPV6
) {
2257 l4_hdr
= this_ipv6_hdr
->nexthdr
;
2258 *td_cmd
|= I40E_TX_DESC_CMD_IIPT_IPV6
;
2259 /* Now set the td_offset for IP header length */
2260 *td_offset
= (network_hdr_len
>> 2) <<
2261 I40E_TX_DESC_LENGTH_IPLEN_SHIFT
;
2263 /* words in MACLEN + dwords in IPLEN + dwords in L4Len */
2264 *td_offset
|= (skb_network_offset(skb
) >> 1) <<
2265 I40E_TX_DESC_LENGTH_MACLEN_SHIFT
;
2267 /* Enable L4 checksum offloads */
2270 /* enable checksum offloads */
2271 *td_cmd
|= I40E_TX_DESC_CMD_L4T_EOFT_TCP
;
2272 *td_offset
|= (this_tcp_hdrlen
>> 2) <<
2273 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT
;
2276 /* enable SCTP checksum offload */
2277 *td_cmd
|= I40E_TX_DESC_CMD_L4T_EOFT_SCTP
;
2278 *td_offset
|= (sizeof(struct sctphdr
) >> 2) <<
2279 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT
;
2282 /* enable UDP checksum offload */
2283 *td_cmd
|= I40E_TX_DESC_CMD_L4T_EOFT_UDP
;
2284 *td_offset
|= (sizeof(struct udphdr
) >> 2) <<
2285 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT
;
2293 * i40e_create_tx_ctx Build the Tx context descriptor
2294 * @tx_ring: ring to create the descriptor on
2295 * @cd_type_cmd_tso_mss: Quad Word 1
2296 * @cd_tunneling: Quad Word 0 - bits 0-31
2297 * @cd_l2tag2: Quad Word 0 - bits 32-63
2299 static void i40e_create_tx_ctx(struct i40e_ring
*tx_ring
,
2300 const u64 cd_type_cmd_tso_mss
,
2301 const u32 cd_tunneling
, const u32 cd_l2tag2
)
2303 struct i40e_tx_context_desc
*context_desc
;
2304 int i
= tx_ring
->next_to_use
;
2306 if ((cd_type_cmd_tso_mss
== I40E_TX_DESC_DTYPE_CONTEXT
) &&
2307 !cd_tunneling
&& !cd_l2tag2
)
2310 /* grab the next descriptor */
2311 context_desc
= I40E_TX_CTXTDESC(tx_ring
, i
);
2314 tx_ring
->next_to_use
= (i
< tx_ring
->count
) ? i
: 0;
2316 /* cpu_to_le32 and assign to struct fields */
2317 context_desc
->tunneling_params
= cpu_to_le32(cd_tunneling
);
2318 context_desc
->l2tag2
= cpu_to_le16(cd_l2tag2
);
2319 context_desc
->rsvd
= cpu_to_le16(0);
2320 context_desc
->type_cmd_tso_mss
= cpu_to_le64(cd_type_cmd_tso_mss
);
2324 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2325 * @tx_ring: the ring to be checked
2326 * @size: the size buffer we want to assure is available
2328 * Returns -EBUSY if a stop is needed, else 0
2330 static inline int __i40e_maybe_stop_tx(struct i40e_ring
*tx_ring
, int size
)
2332 netif_stop_subqueue(tx_ring
->netdev
, tx_ring
->queue_index
);
2333 /* Memory barrier before checking head and tail */
2336 /* Check again in a case another CPU has just made room available. */
2337 if (likely(I40E_DESC_UNUSED(tx_ring
) < size
))
2340 /* A reprieve! - use start_queue because it doesn't call schedule */
2341 netif_start_subqueue(tx_ring
->netdev
, tx_ring
->queue_index
);
2342 ++tx_ring
->tx_stats
.restart_queue
;
2347 * i40e_maybe_stop_tx - 1st level check for tx stop conditions
2348 * @tx_ring: the ring to be checked
2349 * @size: the size buffer we want to assure is available
2351 * Returns 0 if stop is not needed
2354 int i40e_maybe_stop_tx(struct i40e_ring
*tx_ring
, int size
)
2356 static int i40e_maybe_stop_tx(struct i40e_ring
*tx_ring
, int size
)
2359 if (likely(I40E_DESC_UNUSED(tx_ring
) >= size
))
2361 return __i40e_maybe_stop_tx(tx_ring
, size
);
2365 * i40e_chk_linearize - Check if there are more than 8 fragments per packet
2367 * @tx_flags: collected send information
2368 * @hdr_len: size of the packet header
2370 * Note: Our HW can't scatter-gather more than 8 fragments to build
2371 * a packet on the wire and so we need to figure out the cases where we
2372 * need to linearize the skb.
2374 static bool i40e_chk_linearize(struct sk_buff
*skb
, u32 tx_flags
,
2377 struct skb_frag_struct
*frag
;
2378 bool linearize
= false;
2379 unsigned int size
= 0;
2383 num_frags
= skb_shinfo(skb
)->nr_frags
;
2384 gso_segs
= skb_shinfo(skb
)->gso_segs
;
2386 if (tx_flags
& (I40E_TX_FLAGS_TSO
| I40E_TX_FLAGS_FSO
)) {
2389 if (num_frags
< (I40E_MAX_BUFFER_TXD
))
2390 goto linearize_chk_done
;
2391 /* try the simple math, if we have too many frags per segment */
2392 if (DIV_ROUND_UP((num_frags
+ gso_segs
), gso_segs
) >
2393 I40E_MAX_BUFFER_TXD
) {
2395 goto linearize_chk_done
;
2397 frag
= &skb_shinfo(skb
)->frags
[0];
2399 /* we might still have more fragments per segment */
2401 size
+= skb_frag_size(frag
);
2403 if (j
== I40E_MAX_BUFFER_TXD
) {
2404 if (size
< skb_shinfo(skb
)->gso_size
) {
2409 size
-= skb_shinfo(skb
)->gso_size
;
2415 } while (num_frags
);
2417 if (num_frags
>= I40E_MAX_BUFFER_TXD
)
2426 * i40e_tx_map - Build the Tx descriptor
2427 * @tx_ring: ring to send buffer on
2429 * @first: first buffer info buffer to use
2430 * @tx_flags: collected send information
2431 * @hdr_len: size of the packet header
2432 * @td_cmd: the command field in the descriptor
2433 * @td_offset: offset for checksum or crc
2436 void i40e_tx_map(struct i40e_ring
*tx_ring
, struct sk_buff
*skb
,
2437 struct i40e_tx_buffer
*first
, u32 tx_flags
,
2438 const u8 hdr_len
, u32 td_cmd
, u32 td_offset
)
2440 static void i40e_tx_map(struct i40e_ring
*tx_ring
, struct sk_buff
*skb
,
2441 struct i40e_tx_buffer
*first
, u32 tx_flags
,
2442 const u8 hdr_len
, u32 td_cmd
, u32 td_offset
)
2445 unsigned int data_len
= skb
->data_len
;
2446 unsigned int size
= skb_headlen(skb
);
2447 struct skb_frag_struct
*frag
;
2448 struct i40e_tx_buffer
*tx_bi
;
2449 struct i40e_tx_desc
*tx_desc
;
2450 u16 i
= tx_ring
->next_to_use
;
2455 if (tx_flags
& I40E_TX_FLAGS_HW_VLAN
) {
2456 td_cmd
|= I40E_TX_DESC_CMD_IL2TAG1
;
2457 td_tag
= (tx_flags
& I40E_TX_FLAGS_VLAN_MASK
) >>
2458 I40E_TX_FLAGS_VLAN_SHIFT
;
2461 if (tx_flags
& (I40E_TX_FLAGS_TSO
| I40E_TX_FLAGS_FSO
))
2462 gso_segs
= skb_shinfo(skb
)->gso_segs
;
2466 /* multiply data chunks by size of headers */
2467 first
->bytecount
= skb
->len
- hdr_len
+ (gso_segs
* hdr_len
);
2468 first
->gso_segs
= gso_segs
;
2470 first
->tx_flags
= tx_flags
;
2472 dma
= dma_map_single(tx_ring
->dev
, skb
->data
, size
, DMA_TO_DEVICE
);
2474 tx_desc
= I40E_TX_DESC(tx_ring
, i
);
2477 for (frag
= &skb_shinfo(skb
)->frags
[0];; frag
++) {
2478 if (dma_mapping_error(tx_ring
->dev
, dma
))
2481 /* record length, and DMA address */
2482 dma_unmap_len_set(tx_bi
, len
, size
);
2483 dma_unmap_addr_set(tx_bi
, dma
, dma
);
2485 tx_desc
->buffer_addr
= cpu_to_le64(dma
);
2487 while (unlikely(size
> I40E_MAX_DATA_PER_TXD
)) {
2488 tx_desc
->cmd_type_offset_bsz
=
2489 build_ctob(td_cmd
, td_offset
,
2490 I40E_MAX_DATA_PER_TXD
, td_tag
);
2494 if (i
== tx_ring
->count
) {
2495 tx_desc
= I40E_TX_DESC(tx_ring
, 0);
2499 dma
+= I40E_MAX_DATA_PER_TXD
;
2500 size
-= I40E_MAX_DATA_PER_TXD
;
2502 tx_desc
->buffer_addr
= cpu_to_le64(dma
);
2505 if (likely(!data_len
))
2508 tx_desc
->cmd_type_offset_bsz
= build_ctob(td_cmd
, td_offset
,
2513 if (i
== tx_ring
->count
) {
2514 tx_desc
= I40E_TX_DESC(tx_ring
, 0);
2518 size
= skb_frag_size(frag
);
2521 dma
= skb_frag_dma_map(tx_ring
->dev
, frag
, 0, size
,
2524 tx_bi
= &tx_ring
->tx_bi
[i
];
2527 /* Place RS bit on last descriptor of any packet that spans across the
2528 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
2530 if (((i
& WB_STRIDE
) != WB_STRIDE
) &&
2531 (first
<= &tx_ring
->tx_bi
[i
]) &&
2532 (first
>= &tx_ring
->tx_bi
[i
& ~WB_STRIDE
])) {
2533 tx_desc
->cmd_type_offset_bsz
=
2534 build_ctob(td_cmd
, td_offset
, size
, td_tag
) |
2535 cpu_to_le64((u64
)I40E_TX_DESC_CMD_EOP
<<
2536 I40E_TXD_QW1_CMD_SHIFT
);
2538 tx_desc
->cmd_type_offset_bsz
=
2539 build_ctob(td_cmd
, td_offset
, size
, td_tag
) |
2540 cpu_to_le64((u64
)I40E_TXD_CMD
<<
2541 I40E_TXD_QW1_CMD_SHIFT
);
2544 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring
->netdev
,
2545 tx_ring
->queue_index
),
2548 /* set the timestamp */
2549 first
->time_stamp
= jiffies
;
2551 /* Force memory writes to complete before letting h/w
2552 * know there are new descriptors to fetch. (Only
2553 * applicable for weak-ordered memory model archs,
2558 /* set next_to_watch value indicating a packet is present */
2559 first
->next_to_watch
= tx_desc
;
2562 if (i
== tx_ring
->count
)
2565 tx_ring
->next_to_use
= i
;
2567 i40e_maybe_stop_tx(tx_ring
, DESC_NEEDED
);
2568 /* notify HW of packet */
2569 if (!skb
->xmit_more
||
2570 netif_xmit_stopped(netdev_get_tx_queue(tx_ring
->netdev
,
2571 tx_ring
->queue_index
)))
2572 writel(i
, tx_ring
->tail
);
2577 dev_info(tx_ring
->dev
, "TX DMA map failed\n");
2579 /* clear dma mappings for failed tx_bi map */
2581 tx_bi
= &tx_ring
->tx_bi
[i
];
2582 i40e_unmap_and_free_tx_resource(tx_ring
, tx_bi
);
2590 tx_ring
->next_to_use
= i
;
2594 * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
2596 * @tx_ring: ring to send buffer on
2598 * Returns number of data descriptors needed for this skb. Returns 0 to indicate
2599 * there is not enough descriptors available in this ring since we need at least
2603 int i40e_xmit_descriptor_count(struct sk_buff
*skb
,
2604 struct i40e_ring
*tx_ring
)
2606 static int i40e_xmit_descriptor_count(struct sk_buff
*skb
,
2607 struct i40e_ring
*tx_ring
)
2613 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2614 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2615 * + 4 desc gap to avoid the cache line where head is,
2616 * + 1 desc for context descriptor,
2617 * otherwise try next time
2619 for (f
= 0; f
< skb_shinfo(skb
)->nr_frags
; f
++)
2620 count
+= TXD_USE_COUNT(skb_shinfo(skb
)->frags
[f
].size
);
2622 count
+= TXD_USE_COUNT(skb_headlen(skb
));
2623 if (i40e_maybe_stop_tx(tx_ring
, count
+ 4 + 1)) {
2624 tx_ring
->tx_stats
.tx_busy
++;
2631 * i40e_xmit_frame_ring - Sends buffer on Tx ring
2633 * @tx_ring: ring to send buffer on
2635 * Returns NETDEV_TX_OK if sent, else an error code
2637 static netdev_tx_t
i40e_xmit_frame_ring(struct sk_buff
*skb
,
2638 struct i40e_ring
*tx_ring
)
2640 u64 cd_type_cmd_tso_mss
= I40E_TX_DESC_DTYPE_CONTEXT
;
2641 u32 cd_tunneling
= 0, cd_l2tag2
= 0;
2642 struct i40e_tx_buffer
*first
;
2650 if (0 == i40e_xmit_descriptor_count(skb
, tx_ring
))
2651 return NETDEV_TX_BUSY
;
2653 /* prepare the xmit flags */
2654 if (i40e_tx_prepare_vlan_flags(skb
, tx_ring
, &tx_flags
))
2657 /* obtain protocol of skb */
2658 protocol
= vlan_get_protocol(skb
);
2660 /* record the location of the first descriptor for this packet */
2661 first
= &tx_ring
->tx_bi
[tx_ring
->next_to_use
];
2663 /* setup IPv4/IPv6 offloads */
2664 if (protocol
== htons(ETH_P_IP
))
2665 tx_flags
|= I40E_TX_FLAGS_IPV4
;
2666 else if (protocol
== htons(ETH_P_IPV6
))
2667 tx_flags
|= I40E_TX_FLAGS_IPV6
;
2669 tso
= i40e_tso(tx_ring
, skb
, tx_flags
, protocol
, &hdr_len
,
2670 &cd_type_cmd_tso_mss
, &cd_tunneling
);
2675 tx_flags
|= I40E_TX_FLAGS_TSO
;
2677 tsyn
= i40e_tsyn(tx_ring
, skb
, tx_flags
, &cd_type_cmd_tso_mss
);
2680 tx_flags
|= I40E_TX_FLAGS_TSYN
;
2682 if (i40e_chk_linearize(skb
, tx_flags
, hdr_len
))
2683 if (skb_linearize(skb
))
2686 skb_tx_timestamp(skb
);
2688 /* always enable CRC insertion offload */
2689 td_cmd
|= I40E_TX_DESC_CMD_ICRC
;
2691 /* Always offload the checksum, since it's in the data descriptor */
2692 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
2693 tx_flags
|= I40E_TX_FLAGS_CSUM
;
2695 i40e_tx_enable_csum(skb
, tx_flags
, &td_cmd
, &td_offset
,
2696 tx_ring
, &cd_tunneling
);
2699 i40e_create_tx_ctx(tx_ring
, cd_type_cmd_tso_mss
,
2700 cd_tunneling
, cd_l2tag2
);
2702 /* Add Flow Director ATR if it's enabled.
2704 * NOTE: this must always be directly before the data descriptor.
2706 i40e_atr(tx_ring
, skb
, tx_flags
, protocol
);
2708 i40e_tx_map(tx_ring
, skb
, first
, tx_flags
, hdr_len
,
2711 return NETDEV_TX_OK
;
2714 dev_kfree_skb_any(skb
);
2715 return NETDEV_TX_OK
;
2719 * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2721 * @netdev: network interface device structure
2723 * Returns NETDEV_TX_OK if sent, else an error code
2725 netdev_tx_t
i40e_lan_xmit_frame(struct sk_buff
*skb
, struct net_device
*netdev
)
2727 struct i40e_netdev_priv
*np
= netdev_priv(netdev
);
2728 struct i40e_vsi
*vsi
= np
->vsi
;
2729 struct i40e_ring
*tx_ring
= vsi
->tx_rings
[skb
->queue_mapping
];
2731 /* hardware can't handle really short frames, hardware padding works
2734 if (skb_put_padto(skb
, I40E_MIN_TX_LEN
))
2735 return NETDEV_TX_OK
;
2737 return i40e_xmit_frame_ring(skb
, tx_ring
);