1 /*******************************************************************************
3 * Intel Ethernet Controller XL710 Family Linux Driver
4 * Copyright(c) 2013 - 2014 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along
16 * with this program. If not, see <http://www.gnu.org/licenses/>.
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
21 * Contact Information:
22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25 ******************************************************************************/
27 #include <linux/prefetch.h>
28 #include <net/busy_poll.h>
30 #include "i40e_prototype.h"
32 static inline __le64
build_ctob(u32 td_cmd
, u32 td_offset
, unsigned int size
,
35 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA
|
36 ((u64
)td_cmd
<< I40E_TXD_QW1_CMD_SHIFT
) |
37 ((u64
)td_offset
<< I40E_TXD_QW1_OFFSET_SHIFT
) |
38 ((u64
)size
<< I40E_TXD_QW1_TX_BUF_SZ_SHIFT
) |
39 ((u64
)td_tag
<< I40E_TXD_QW1_L2TAG1_SHIFT
));
42 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
43 #define I40E_FD_CLEAN_DELAY 10
45 * i40e_program_fdir_filter - Program a Flow Director filter
46 * @fdir_data: Packet data that will be filter parameters
47 * @raw_packet: the pre-allocated packet buffer for FDir
49 * @add: True for add/update, False for remove
51 int i40e_program_fdir_filter(struct i40e_fdir_filter
*fdir_data
, u8
*raw_packet
,
52 struct i40e_pf
*pf
, bool add
)
54 struct i40e_filter_program_desc
*fdir_desc
;
55 struct i40e_tx_buffer
*tx_buf
, *first
;
56 struct i40e_tx_desc
*tx_desc
;
57 struct i40e_ring
*tx_ring
;
58 unsigned int fpt
, dcc
;
66 /* find existing FDIR VSI */
68 for (i
= 0; i
< pf
->num_alloc_vsi
; i
++)
69 if (pf
->vsi
[i
] && pf
->vsi
[i
]->type
== I40E_VSI_FDIR
)
74 tx_ring
= vsi
->tx_rings
[0];
77 /* we need two descriptors to add/del a filter and we can wait */
79 if (I40E_DESC_UNUSED(tx_ring
) > 1)
81 msleep_interruptible(1);
83 } while (delay
< I40E_FD_CLEAN_DELAY
);
85 if (!(I40E_DESC_UNUSED(tx_ring
) > 1))
88 dma
= dma_map_single(dev
, raw_packet
,
89 I40E_FDIR_MAX_RAW_PACKET_SIZE
, DMA_TO_DEVICE
);
90 if (dma_mapping_error(dev
, dma
))
93 /* grab the next descriptor */
94 i
= tx_ring
->next_to_use
;
95 fdir_desc
= I40E_TX_FDIRDESC(tx_ring
, i
);
96 first
= &tx_ring
->tx_bi
[i
];
97 memset(first
, 0, sizeof(struct i40e_tx_buffer
));
99 tx_ring
->next_to_use
= ((i
+ 1) < tx_ring
->count
) ? i
+ 1 : 0;
101 fpt
= (fdir_data
->q_index
<< I40E_TXD_FLTR_QW0_QINDEX_SHIFT
) &
102 I40E_TXD_FLTR_QW0_QINDEX_MASK
;
104 fpt
|= (fdir_data
->flex_off
<< I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT
) &
105 I40E_TXD_FLTR_QW0_FLEXOFF_MASK
;
107 fpt
|= (fdir_data
->pctype
<< I40E_TXD_FLTR_QW0_PCTYPE_SHIFT
) &
108 I40E_TXD_FLTR_QW0_PCTYPE_MASK
;
110 /* Use LAN VSI Id if not programmed by user */
111 if (fdir_data
->dest_vsi
== 0)
112 fpt
|= (pf
->vsi
[pf
->lan_vsi
]->id
) <<
113 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT
;
115 fpt
|= ((u32
)fdir_data
->dest_vsi
<<
116 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT
) &
117 I40E_TXD_FLTR_QW0_DEST_VSI_MASK
;
119 dcc
= I40E_TX_DESC_DTYPE_FILTER_PROG
;
122 dcc
|= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE
<<
123 I40E_TXD_FLTR_QW1_PCMD_SHIFT
;
125 dcc
|= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE
<<
126 I40E_TXD_FLTR_QW1_PCMD_SHIFT
;
128 dcc
|= (fdir_data
->dest_ctl
<< I40E_TXD_FLTR_QW1_DEST_SHIFT
) &
129 I40E_TXD_FLTR_QW1_DEST_MASK
;
131 dcc
|= (fdir_data
->fd_status
<< I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT
) &
132 I40E_TXD_FLTR_QW1_FD_STATUS_MASK
;
134 if (fdir_data
->cnt_index
!= 0) {
135 dcc
|= I40E_TXD_FLTR_QW1_CNT_ENA_MASK
;
136 dcc
|= ((u32
)fdir_data
->cnt_index
<<
137 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT
) &
138 I40E_TXD_FLTR_QW1_CNTINDEX_MASK
;
141 fdir_desc
->qindex_flex_ptype_vsi
= cpu_to_le32(fpt
);
142 fdir_desc
->rsvd
= cpu_to_le32(0);
143 fdir_desc
->dtype_cmd_cntindex
= cpu_to_le32(dcc
);
144 fdir_desc
->fd_id
= cpu_to_le32(fdir_data
->fd_id
);
146 /* Now program a dummy descriptor */
147 i
= tx_ring
->next_to_use
;
148 tx_desc
= I40E_TX_DESC(tx_ring
, i
);
149 tx_buf
= &tx_ring
->tx_bi
[i
];
151 tx_ring
->next_to_use
= ((i
+ 1) < tx_ring
->count
) ? i
+ 1 : 0;
153 memset(tx_buf
, 0, sizeof(struct i40e_tx_buffer
));
155 /* record length, and DMA address */
156 dma_unmap_len_set(tx_buf
, len
, I40E_FDIR_MAX_RAW_PACKET_SIZE
);
157 dma_unmap_addr_set(tx_buf
, dma
, dma
);
159 tx_desc
->buffer_addr
= cpu_to_le64(dma
);
160 td_cmd
= I40E_TXD_CMD
| I40E_TX_DESC_CMD_DUMMY
;
162 tx_buf
->tx_flags
= I40E_TX_FLAGS_FD_SB
;
163 tx_buf
->raw_buf
= (void *)raw_packet
;
165 tx_desc
->cmd_type_offset_bsz
=
166 build_ctob(td_cmd
, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE
, 0);
168 /* Force memory writes to complete before letting h/w
169 * know there are new descriptors to fetch.
173 /* Mark the data descriptor to be watched */
174 first
->next_to_watch
= tx_desc
;
176 writel(tx_ring
->next_to_use
, tx_ring
->tail
);
183 #define IP_HEADER_OFFSET 14
184 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
186 * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
187 * @vsi: pointer to the targeted VSI
188 * @fd_data: the flow director data required for the FDir descriptor
189 * @add: true adds a filter, false removes it
191 * Returns 0 if the filters were successfully added or removed
193 static int i40e_add_del_fdir_udpv4(struct i40e_vsi
*vsi
,
194 struct i40e_fdir_filter
*fd_data
,
197 struct i40e_pf
*pf
= vsi
->back
;
203 static char packet
[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
204 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
205 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
207 raw_packet
= kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE
, GFP_KERNEL
);
210 memcpy(raw_packet
, packet
, I40E_UDPIP_DUMMY_PACKET_LEN
);
212 ip
= (struct iphdr
*)(raw_packet
+ IP_HEADER_OFFSET
);
213 udp
= (struct udphdr
*)(raw_packet
+ IP_HEADER_OFFSET
214 + sizeof(struct iphdr
));
216 ip
->daddr
= fd_data
->dst_ip
[0];
217 udp
->dest
= fd_data
->dst_port
;
218 ip
->saddr
= fd_data
->src_ip
[0];
219 udp
->source
= fd_data
->src_port
;
221 fd_data
->pctype
= I40E_FILTER_PCTYPE_NONF_IPV4_UDP
;
222 ret
= i40e_program_fdir_filter(fd_data
, raw_packet
, pf
, add
);
224 dev_info(&pf
->pdev
->dev
,
225 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
226 fd_data
->pctype
, fd_data
->fd_id
, ret
);
228 } else if (I40E_DEBUG_FD
& pf
->hw
.debug_mask
) {
230 dev_info(&pf
->pdev
->dev
,
231 "Filter OK for PCTYPE %d loc = %d\n",
232 fd_data
->pctype
, fd_data
->fd_id
);
234 dev_info(&pf
->pdev
->dev
,
235 "Filter deleted for PCTYPE %d loc = %d\n",
236 fd_data
->pctype
, fd_data
->fd_id
);
238 return err
? -EOPNOTSUPP
: 0;
241 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
243 * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
244 * @vsi: pointer to the targeted VSI
245 * @fd_data: the flow director data required for the FDir descriptor
246 * @add: true adds a filter, false removes it
248 * Returns 0 if the filters were successfully added or removed
250 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi
*vsi
,
251 struct i40e_fdir_filter
*fd_data
,
254 struct i40e_pf
*pf
= vsi
->back
;
261 static char packet
[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
262 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
263 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
264 0x0, 0x72, 0, 0, 0, 0};
266 raw_packet
= kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE
, GFP_KERNEL
);
269 memcpy(raw_packet
, packet
, I40E_TCPIP_DUMMY_PACKET_LEN
);
271 ip
= (struct iphdr
*)(raw_packet
+ IP_HEADER_OFFSET
);
272 tcp
= (struct tcphdr
*)(raw_packet
+ IP_HEADER_OFFSET
273 + sizeof(struct iphdr
));
275 ip
->daddr
= fd_data
->dst_ip
[0];
276 tcp
->dest
= fd_data
->dst_port
;
277 ip
->saddr
= fd_data
->src_ip
[0];
278 tcp
->source
= fd_data
->src_port
;
282 if (pf
->flags
& I40E_FLAG_FD_ATR_ENABLED
) {
283 if (I40E_DEBUG_FD
& pf
->hw
.debug_mask
)
284 dev_info(&pf
->pdev
->dev
, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
285 pf
->flags
&= ~I40E_FLAG_FD_ATR_ENABLED
;
288 pf
->fd_tcp_rule
= (pf
->fd_tcp_rule
> 0) ?
289 (pf
->fd_tcp_rule
- 1) : 0;
290 if (pf
->fd_tcp_rule
== 0) {
291 pf
->flags
|= I40E_FLAG_FD_ATR_ENABLED
;
292 if (I40E_DEBUG_FD
& pf
->hw
.debug_mask
)
293 dev_info(&pf
->pdev
->dev
, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
297 fd_data
->pctype
= I40E_FILTER_PCTYPE_NONF_IPV4_TCP
;
298 ret
= i40e_program_fdir_filter(fd_data
, raw_packet
, pf
, add
);
301 dev_info(&pf
->pdev
->dev
,
302 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
303 fd_data
->pctype
, fd_data
->fd_id
, ret
);
305 } else if (I40E_DEBUG_FD
& pf
->hw
.debug_mask
) {
307 dev_info(&pf
->pdev
->dev
, "Filter OK for PCTYPE %d loc = %d)\n",
308 fd_data
->pctype
, fd_data
->fd_id
);
310 dev_info(&pf
->pdev
->dev
,
311 "Filter deleted for PCTYPE %d loc = %d\n",
312 fd_data
->pctype
, fd_data
->fd_id
);
315 return err
? -EOPNOTSUPP
: 0;
319 * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
320 * a specific flow spec
321 * @vsi: pointer to the targeted VSI
322 * @fd_data: the flow director data required for the FDir descriptor
323 * @add: true adds a filter, false removes it
325 * Always returns -EOPNOTSUPP
327 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi
*vsi
,
328 struct i40e_fdir_filter
*fd_data
,
334 #define I40E_IP_DUMMY_PACKET_LEN 34
336 * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
337 * a specific flow spec
338 * @vsi: pointer to the targeted VSI
339 * @fd_data: the flow director data required for the FDir descriptor
340 * @add: true adds a filter, false removes it
342 * Returns 0 if the filters were successfully added or removed
344 static int i40e_add_del_fdir_ipv4(struct i40e_vsi
*vsi
,
345 struct i40e_fdir_filter
*fd_data
,
348 struct i40e_pf
*pf
= vsi
->back
;
354 static char packet
[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
355 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
358 for (i
= I40E_FILTER_PCTYPE_NONF_IPV4_OTHER
;
359 i
<= I40E_FILTER_PCTYPE_FRAG_IPV4
; i
++) {
360 raw_packet
= kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE
, GFP_KERNEL
);
363 memcpy(raw_packet
, packet
, I40E_IP_DUMMY_PACKET_LEN
);
364 ip
= (struct iphdr
*)(raw_packet
+ IP_HEADER_OFFSET
);
366 ip
->saddr
= fd_data
->src_ip
[0];
367 ip
->daddr
= fd_data
->dst_ip
[0];
371 ret
= i40e_program_fdir_filter(fd_data
, raw_packet
, pf
, add
);
374 dev_info(&pf
->pdev
->dev
,
375 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
376 fd_data
->pctype
, fd_data
->fd_id
, ret
);
378 } else if (I40E_DEBUG_FD
& pf
->hw
.debug_mask
) {
380 dev_info(&pf
->pdev
->dev
,
381 "Filter OK for PCTYPE %d loc = %d\n",
382 fd_data
->pctype
, fd_data
->fd_id
);
384 dev_info(&pf
->pdev
->dev
,
385 "Filter deleted for PCTYPE %d loc = %d\n",
386 fd_data
->pctype
, fd_data
->fd_id
);
390 return err
? -EOPNOTSUPP
: 0;
394 * i40e_add_del_fdir - Build raw packets to add/del fdir filter
395 * @vsi: pointer to the targeted VSI
396 * @cmd: command to get or set RX flow classification rules
397 * @add: true adds a filter, false removes it
400 int i40e_add_del_fdir(struct i40e_vsi
*vsi
,
401 struct i40e_fdir_filter
*input
, bool add
)
403 struct i40e_pf
*pf
= vsi
->back
;
406 switch (input
->flow_type
& ~FLOW_EXT
) {
408 ret
= i40e_add_del_fdir_tcpv4(vsi
, input
, add
);
411 ret
= i40e_add_del_fdir_udpv4(vsi
, input
, add
);
414 ret
= i40e_add_del_fdir_sctpv4(vsi
, input
, add
);
417 ret
= i40e_add_del_fdir_ipv4(vsi
, input
, add
);
420 switch (input
->ip4_proto
) {
422 ret
= i40e_add_del_fdir_tcpv4(vsi
, input
, add
);
425 ret
= i40e_add_del_fdir_udpv4(vsi
, input
, add
);
428 ret
= i40e_add_del_fdir_sctpv4(vsi
, input
, add
);
431 ret
= i40e_add_del_fdir_ipv4(vsi
, input
, add
);
436 dev_info(&pf
->pdev
->dev
, "Could not specify spec type %d\n",
441 /* The buffer allocated here is freed by the i40e_clean_tx_ring() */
446 * i40e_fd_handle_status - check the Programming Status for FD
447 * @rx_ring: the Rx ring for this descriptor
448 * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
449 * @prog_id: the id originally used for programming
451 * This is used to verify if the FD programming or invalidation
452 * requested by SW to the HW is successful or not and take actions accordingly.
454 static void i40e_fd_handle_status(struct i40e_ring
*rx_ring
,
455 union i40e_rx_desc
*rx_desc
, u8 prog_id
)
457 struct i40e_pf
*pf
= rx_ring
->vsi
->back
;
458 struct pci_dev
*pdev
= pf
->pdev
;
459 u32 fcnt_prog
, fcnt_avail
;
463 qw
= le64_to_cpu(rx_desc
->wb
.qword1
.status_error_len
);
464 error
= (qw
& I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK
) >>
465 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT
;
467 if (error
== BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT
)) {
468 if ((rx_desc
->wb
.qword0
.hi_dword
.fd_id
!= 0) ||
469 (I40E_DEBUG_FD
& pf
->hw
.debug_mask
))
470 dev_warn(&pdev
->dev
, "ntuple filter loc = %d, could not be added\n",
471 rx_desc
->wb
.qword0
.hi_dword
.fd_id
);
473 /* Check if the programming error is for ATR.
474 * If so, auto disable ATR and set a state for
475 * flush in progress. Next time we come here if flush is in
476 * progress do nothing, once flush is complete the state will
479 if (test_bit(__I40E_FD_FLUSH_REQUESTED
, &pf
->state
))
483 /* store the current atr filter count */
484 pf
->fd_atr_cnt
= i40e_get_current_atr_cnt(pf
);
486 if ((rx_desc
->wb
.qword0
.hi_dword
.fd_id
== 0) &&
487 (pf
->auto_disable_flags
& I40E_FLAG_FD_SB_ENABLED
)) {
488 pf
->auto_disable_flags
|= I40E_FLAG_FD_ATR_ENABLED
;
489 set_bit(__I40E_FD_FLUSH_REQUESTED
, &pf
->state
);
492 /* filter programming failed most likely due to table full */
493 fcnt_prog
= i40e_get_global_fd_count(pf
);
494 fcnt_avail
= pf
->fdir_pf_filter_count
;
495 /* If ATR is running fcnt_prog can quickly change,
496 * if we are very close to full, it makes sense to disable
497 * FD ATR/SB and then re-enable it when there is room.
499 if (fcnt_prog
>= (fcnt_avail
- I40E_FDIR_BUFFER_FULL_MARGIN
)) {
500 if ((pf
->flags
& I40E_FLAG_FD_SB_ENABLED
) &&
501 !(pf
->auto_disable_flags
&
502 I40E_FLAG_FD_SB_ENABLED
)) {
503 if (I40E_DEBUG_FD
& pf
->hw
.debug_mask
)
504 dev_warn(&pdev
->dev
, "FD filter space full, new ntuple rules will not be added\n");
505 pf
->auto_disable_flags
|=
506 I40E_FLAG_FD_SB_ENABLED
;
510 "FD filter programming failed due to incorrect filter parameters\n");
512 } else if (error
== BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT
)) {
513 if (I40E_DEBUG_FD
& pf
->hw
.debug_mask
)
514 dev_info(&pdev
->dev
, "ntuple filter fd_id = %d, could not be removed\n",
515 rx_desc
->wb
.qword0
.hi_dword
.fd_id
);
520 * i40e_unmap_and_free_tx_resource - Release a Tx buffer
521 * @ring: the ring that owns the buffer
522 * @tx_buffer: the buffer to free
524 static void i40e_unmap_and_free_tx_resource(struct i40e_ring
*ring
,
525 struct i40e_tx_buffer
*tx_buffer
)
527 if (tx_buffer
->skb
) {
528 if (tx_buffer
->tx_flags
& I40E_TX_FLAGS_FD_SB
)
529 kfree(tx_buffer
->raw_buf
);
531 dev_kfree_skb_any(tx_buffer
->skb
);
533 if (dma_unmap_len(tx_buffer
, len
))
534 dma_unmap_single(ring
->dev
,
535 dma_unmap_addr(tx_buffer
, dma
),
536 dma_unmap_len(tx_buffer
, len
),
538 } else if (dma_unmap_len(tx_buffer
, len
)) {
539 dma_unmap_page(ring
->dev
,
540 dma_unmap_addr(tx_buffer
, dma
),
541 dma_unmap_len(tx_buffer
, len
),
544 tx_buffer
->next_to_watch
= NULL
;
545 tx_buffer
->skb
= NULL
;
546 dma_unmap_len_set(tx_buffer
, len
, 0);
547 /* tx_buffer must be completely set up in the transmit path */
551 * i40e_clean_tx_ring - Free any empty Tx buffers
552 * @tx_ring: ring to be cleaned
554 void i40e_clean_tx_ring(struct i40e_ring
*tx_ring
)
556 unsigned long bi_size
;
559 /* ring already cleared, nothing to do */
563 /* Free all the Tx ring sk_buffs */
564 for (i
= 0; i
< tx_ring
->count
; i
++)
565 i40e_unmap_and_free_tx_resource(tx_ring
, &tx_ring
->tx_bi
[i
]);
567 bi_size
= sizeof(struct i40e_tx_buffer
) * tx_ring
->count
;
568 memset(tx_ring
->tx_bi
, 0, bi_size
);
570 /* Zero out the descriptor ring */
571 memset(tx_ring
->desc
, 0, tx_ring
->size
);
573 tx_ring
->next_to_use
= 0;
574 tx_ring
->next_to_clean
= 0;
576 if (!tx_ring
->netdev
)
579 /* cleanup Tx queue statistics */
580 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring
->netdev
,
581 tx_ring
->queue_index
));
585 * i40e_free_tx_resources - Free Tx resources per queue
586 * @tx_ring: Tx descriptor ring for a specific queue
588 * Free all transmit software resources
590 void i40e_free_tx_resources(struct i40e_ring
*tx_ring
)
592 i40e_clean_tx_ring(tx_ring
);
593 kfree(tx_ring
->tx_bi
);
594 tx_ring
->tx_bi
= NULL
;
597 dma_free_coherent(tx_ring
->dev
, tx_ring
->size
,
598 tx_ring
->desc
, tx_ring
->dma
);
599 tx_ring
->desc
= NULL
;
604 * i40e_get_tx_pending - how many tx descriptors not processed
605 * @tx_ring: the ring of descriptors
607 * Since there is no access to the ring head register
608 * in XL710, we need to use our local copies
610 u32
i40e_get_tx_pending(struct i40e_ring
*ring
)
614 head
= i40e_get_head(ring
);
615 tail
= readl(ring
->tail
);
618 return (head
< tail
) ?
619 tail
- head
: (tail
+ ring
->count
- head
);
624 #define WB_STRIDE 0x3
627 * i40e_clean_tx_irq - Reclaim resources after transmit completes
628 * @tx_ring: tx ring to clean
629 * @budget: how many cleans we're allowed
631 * Returns true if there's any budget left (e.g. the clean is finished)
633 static bool i40e_clean_tx_irq(struct i40e_ring
*tx_ring
, int budget
)
635 u16 i
= tx_ring
->next_to_clean
;
636 struct i40e_tx_buffer
*tx_buf
;
637 struct i40e_tx_desc
*tx_head
;
638 struct i40e_tx_desc
*tx_desc
;
639 unsigned int total_packets
= 0;
640 unsigned int total_bytes
= 0;
642 tx_buf
= &tx_ring
->tx_bi
[i
];
643 tx_desc
= I40E_TX_DESC(tx_ring
, i
);
646 tx_head
= I40E_TX_DESC(tx_ring
, i40e_get_head(tx_ring
));
649 struct i40e_tx_desc
*eop_desc
= tx_buf
->next_to_watch
;
651 /* if next_to_watch is not set then there is no work pending */
655 /* prevent any other reads prior to eop_desc */
656 read_barrier_depends();
658 /* we have caught up to head, no work left to do */
659 if (tx_head
== tx_desc
)
662 /* clear next_to_watch to prevent false hangs */
663 tx_buf
->next_to_watch
= NULL
;
665 /* update the statistics for this packet */
666 total_bytes
+= tx_buf
->bytecount
;
667 total_packets
+= tx_buf
->gso_segs
;
670 dev_consume_skb_any(tx_buf
->skb
);
672 /* unmap skb header data */
673 dma_unmap_single(tx_ring
->dev
,
674 dma_unmap_addr(tx_buf
, dma
),
675 dma_unmap_len(tx_buf
, len
),
678 /* clear tx_buffer data */
680 dma_unmap_len_set(tx_buf
, len
, 0);
682 /* unmap remaining buffers */
683 while (tx_desc
!= eop_desc
) {
690 tx_buf
= tx_ring
->tx_bi
;
691 tx_desc
= I40E_TX_DESC(tx_ring
, 0);
694 /* unmap any remaining paged data */
695 if (dma_unmap_len(tx_buf
, len
)) {
696 dma_unmap_page(tx_ring
->dev
,
697 dma_unmap_addr(tx_buf
, dma
),
698 dma_unmap_len(tx_buf
, len
),
700 dma_unmap_len_set(tx_buf
, len
, 0);
704 /* move us one more past the eop_desc for start of next pkt */
710 tx_buf
= tx_ring
->tx_bi
;
711 tx_desc
= I40E_TX_DESC(tx_ring
, 0);
716 /* update budget accounting */
718 } while (likely(budget
));
721 tx_ring
->next_to_clean
= i
;
722 u64_stats_update_begin(&tx_ring
->syncp
);
723 tx_ring
->stats
.bytes
+= total_bytes
;
724 tx_ring
->stats
.packets
+= total_packets
;
725 u64_stats_update_end(&tx_ring
->syncp
);
726 tx_ring
->q_vector
->tx
.total_bytes
+= total_bytes
;
727 tx_ring
->q_vector
->tx
.total_packets
+= total_packets
;
729 /* check to see if there are any non-cache aligned descriptors
730 * waiting to be written back, and kick the hardware to force
731 * them to be written back in case of napi polling
734 !((i
& WB_STRIDE
) == WB_STRIDE
) &&
735 !test_bit(__I40E_DOWN
, &tx_ring
->vsi
->state
) &&
736 (I40E_DESC_UNUSED(tx_ring
) != tx_ring
->count
))
737 tx_ring
->arm_wb
= true;
739 tx_ring
->arm_wb
= false;
741 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring
->netdev
,
742 tx_ring
->queue_index
),
743 total_packets
, total_bytes
);
745 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
746 if (unlikely(total_packets
&& netif_carrier_ok(tx_ring
->netdev
) &&
747 (I40E_DESC_UNUSED(tx_ring
) >= TX_WAKE_THRESHOLD
))) {
748 /* Make sure that anybody stopping the queue after this
749 * sees the new next_to_clean.
752 if (__netif_subqueue_stopped(tx_ring
->netdev
,
753 tx_ring
->queue_index
) &&
754 !test_bit(__I40E_DOWN
, &tx_ring
->vsi
->state
)) {
755 netif_wake_subqueue(tx_ring
->netdev
,
756 tx_ring
->queue_index
);
757 ++tx_ring
->tx_stats
.restart_queue
;
765 * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
766 * @vsi: the VSI we care about
767 * @q_vector: the vector on which to force writeback
770 void i40e_force_wb(struct i40e_vsi
*vsi
, struct i40e_q_vector
*q_vector
)
772 u16 flags
= q_vector
->tx
.ring
[0].flags
;
774 if (flags
& I40E_TXR_FLAGS_WB_ON_ITR
) {
777 if (q_vector
->arm_wb_state
)
780 val
= I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK
;
783 I40E_PFINT_DYN_CTLN(q_vector
->v_idx
+
784 vsi
->base_vector
- 1),
786 q_vector
->arm_wb_state
= true;
787 } else if (vsi
->back
->flags
& I40E_FLAG_MSIX_ENABLED
) {
788 u32 val
= I40E_PFINT_DYN_CTLN_INTENA_MASK
|
789 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK
| /* set noitr */
790 I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK
|
791 I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK
;
792 /* allow 00 to be written to the index */
795 I40E_PFINT_DYN_CTLN(q_vector
->v_idx
+
796 vsi
->base_vector
- 1), val
);
798 u32 val
= I40E_PFINT_DYN_CTL0_INTENA_MASK
|
799 I40E_PFINT_DYN_CTL0_ITR_INDX_MASK
| /* set noitr */
800 I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK
|
801 I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK
;
802 /* allow 00 to be written to the index */
804 wr32(&vsi
->back
->hw
, I40E_PFINT_DYN_CTL0
, val
);
809 * i40e_set_new_dynamic_itr - Find new ITR level
810 * @rc: structure containing ring performance data
812 * Stores a new ITR value based on packets and byte counts during
813 * the last interrupt. The advantage of per interrupt computation
814 * is faster updates and more accurate ITR for the current traffic
815 * pattern. Constants in this function were computed based on
816 * theoretical maximum wire speed and thresholds were set based on
817 * testing data as well as attempting to minimize response time
818 * while increasing bulk throughput.
820 static void i40e_set_new_dynamic_itr(struct i40e_ring_container
*rc
)
822 enum i40e_latency_range new_latency_range
= rc
->latency_range
;
823 u32 new_itr
= rc
->itr
;
826 if (rc
->total_packets
== 0 || !rc
->itr
)
829 /* simple throttlerate management
830 * 0-10MB/s lowest (100000 ints/s)
831 * 10-20MB/s low (20000 ints/s)
832 * 20-1249MB/s bulk (8000 ints/s)
834 bytes_per_int
= rc
->total_bytes
/ rc
->itr
;
835 switch (new_latency_range
) {
836 case I40E_LOWEST_LATENCY
:
837 if (bytes_per_int
> 10)
838 new_latency_range
= I40E_LOW_LATENCY
;
840 case I40E_LOW_LATENCY
:
841 if (bytes_per_int
> 20)
842 new_latency_range
= I40E_BULK_LATENCY
;
843 else if (bytes_per_int
<= 10)
844 new_latency_range
= I40E_LOWEST_LATENCY
;
846 case I40E_BULK_LATENCY
:
847 if (bytes_per_int
<= 20)
848 new_latency_range
= I40E_LOW_LATENCY
;
851 if (bytes_per_int
<= 20)
852 new_latency_range
= I40E_LOW_LATENCY
;
855 rc
->latency_range
= new_latency_range
;
857 switch (new_latency_range
) {
858 case I40E_LOWEST_LATENCY
:
859 new_itr
= I40E_ITR_100K
;
861 case I40E_LOW_LATENCY
:
862 new_itr
= I40E_ITR_20K
;
864 case I40E_BULK_LATENCY
:
865 new_itr
= I40E_ITR_8K
;
871 if (new_itr
!= rc
->itr
)
875 rc
->total_packets
= 0;
879 * i40e_clean_programming_status - clean the programming status descriptor
880 * @rx_ring: the rx ring that has this descriptor
881 * @rx_desc: the rx descriptor written back by HW
883 * Flow director should handle FD_FILTER_STATUS to check its filter programming
884 * status being successful or not and take actions accordingly. FCoE should
885 * handle its context/filter programming/invalidation status and take actions.
888 static void i40e_clean_programming_status(struct i40e_ring
*rx_ring
,
889 union i40e_rx_desc
*rx_desc
)
894 qw
= le64_to_cpu(rx_desc
->wb
.qword1
.status_error_len
);
895 id
= (qw
& I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK
) >>
896 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT
;
898 if (id
== I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS
)
899 i40e_fd_handle_status(rx_ring
, rx_desc
, id
);
901 else if ((id
== I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS
) ||
902 (id
== I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS
))
903 i40e_fcoe_handle_status(rx_ring
, rx_desc
, id
);
908 * i40e_setup_tx_descriptors - Allocate the Tx descriptors
909 * @tx_ring: the tx ring to set up
911 * Return 0 on success, negative on error
913 int i40e_setup_tx_descriptors(struct i40e_ring
*tx_ring
)
915 struct device
*dev
= tx_ring
->dev
;
921 bi_size
= sizeof(struct i40e_tx_buffer
) * tx_ring
->count
;
922 tx_ring
->tx_bi
= kzalloc(bi_size
, GFP_KERNEL
);
926 /* round up to nearest 4K */
927 tx_ring
->size
= tx_ring
->count
* sizeof(struct i40e_tx_desc
);
928 /* add u32 for head writeback, align after this takes care of
929 * guaranteeing this is at least one cache line in size
931 tx_ring
->size
+= sizeof(u32
);
932 tx_ring
->size
= ALIGN(tx_ring
->size
, 4096);
933 tx_ring
->desc
= dma_alloc_coherent(dev
, tx_ring
->size
,
934 &tx_ring
->dma
, GFP_KERNEL
);
935 if (!tx_ring
->desc
) {
936 dev_info(dev
, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
941 tx_ring
->next_to_use
= 0;
942 tx_ring
->next_to_clean
= 0;
946 kfree(tx_ring
->tx_bi
);
947 tx_ring
->tx_bi
= NULL
;
952 * i40e_clean_rx_ring - Free Rx buffers
953 * @rx_ring: ring to be cleaned
955 void i40e_clean_rx_ring(struct i40e_ring
*rx_ring
)
957 struct device
*dev
= rx_ring
->dev
;
958 struct i40e_rx_buffer
*rx_bi
;
959 unsigned long bi_size
;
962 /* ring already cleared, nothing to do */
966 if (ring_is_ps_enabled(rx_ring
)) {
967 int bufsz
= ALIGN(rx_ring
->rx_hdr_len
, 256) * rx_ring
->count
;
969 rx_bi
= &rx_ring
->rx_bi
[0];
970 if (rx_bi
->hdr_buf
) {
971 dma_free_coherent(dev
,
975 for (i
= 0; i
< rx_ring
->count
; i
++) {
976 rx_bi
= &rx_ring
->rx_bi
[i
];
978 rx_bi
->hdr_buf
= NULL
;
982 /* Free all the Rx ring sk_buffs */
983 for (i
= 0; i
< rx_ring
->count
; i
++) {
984 rx_bi
= &rx_ring
->rx_bi
[i
];
986 dma_unmap_single(dev
,
993 dev_kfree_skb(rx_bi
->skb
);
997 if (rx_bi
->page_dma
) {
1002 rx_bi
->page_dma
= 0;
1004 __free_page(rx_bi
->page
);
1006 rx_bi
->page_offset
= 0;
1010 bi_size
= sizeof(struct i40e_rx_buffer
) * rx_ring
->count
;
1011 memset(rx_ring
->rx_bi
, 0, bi_size
);
1013 /* Zero out the descriptor ring */
1014 memset(rx_ring
->desc
, 0, rx_ring
->size
);
1016 rx_ring
->next_to_clean
= 0;
1017 rx_ring
->next_to_use
= 0;
1021 * i40e_free_rx_resources - Free Rx resources
1022 * @rx_ring: ring to clean the resources from
1024 * Free all receive software resources
1026 void i40e_free_rx_resources(struct i40e_ring
*rx_ring
)
1028 i40e_clean_rx_ring(rx_ring
);
1029 kfree(rx_ring
->rx_bi
);
1030 rx_ring
->rx_bi
= NULL
;
1032 if (rx_ring
->desc
) {
1033 dma_free_coherent(rx_ring
->dev
, rx_ring
->size
,
1034 rx_ring
->desc
, rx_ring
->dma
);
1035 rx_ring
->desc
= NULL
;
1040 * i40e_alloc_rx_headers - allocate rx header buffers
1041 * @rx_ring: ring to alloc buffers
1043 * Allocate rx header buffers for the entire ring. As these are static,
1044 * this is only called when setting up a new ring.
1046 void i40e_alloc_rx_headers(struct i40e_ring
*rx_ring
)
1048 struct device
*dev
= rx_ring
->dev
;
1049 struct i40e_rx_buffer
*rx_bi
;
1055 if (rx_ring
->rx_bi
[0].hdr_buf
)
1057 /* Make sure the buffers don't cross cache line boundaries. */
1058 buf_size
= ALIGN(rx_ring
->rx_hdr_len
, 256);
1059 buffer
= dma_alloc_coherent(dev
, buf_size
* rx_ring
->count
,
1063 for (i
= 0; i
< rx_ring
->count
; i
++) {
1064 rx_bi
= &rx_ring
->rx_bi
[i
];
1065 rx_bi
->dma
= dma
+ (i
* buf_size
);
1066 rx_bi
->hdr_buf
= buffer
+ (i
* buf_size
);
1071 * i40e_setup_rx_descriptors - Allocate Rx descriptors
1072 * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1074 * Returns 0 on success, negative on failure
1076 int i40e_setup_rx_descriptors(struct i40e_ring
*rx_ring
)
1078 struct device
*dev
= rx_ring
->dev
;
1081 bi_size
= sizeof(struct i40e_rx_buffer
) * rx_ring
->count
;
1082 rx_ring
->rx_bi
= kzalloc(bi_size
, GFP_KERNEL
);
1083 if (!rx_ring
->rx_bi
)
1086 u64_stats_init(&rx_ring
->syncp
);
1088 /* Round up to nearest 4K */
1089 rx_ring
->size
= ring_is_16byte_desc_enabled(rx_ring
)
1090 ? rx_ring
->count
* sizeof(union i40e_16byte_rx_desc
)
1091 : rx_ring
->count
* sizeof(union i40e_32byte_rx_desc
);
1092 rx_ring
->size
= ALIGN(rx_ring
->size
, 4096);
1093 rx_ring
->desc
= dma_alloc_coherent(dev
, rx_ring
->size
,
1094 &rx_ring
->dma
, GFP_KERNEL
);
1096 if (!rx_ring
->desc
) {
1097 dev_info(dev
, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1102 rx_ring
->next_to_clean
= 0;
1103 rx_ring
->next_to_use
= 0;
1107 kfree(rx_ring
->rx_bi
);
1108 rx_ring
->rx_bi
= NULL
;
1113 * i40e_release_rx_desc - Store the new tail and head values
1114 * @rx_ring: ring to bump
1115 * @val: new head index
1117 static inline void i40e_release_rx_desc(struct i40e_ring
*rx_ring
, u32 val
)
1119 rx_ring
->next_to_use
= val
;
1120 /* Force memory writes to complete before letting h/w
1121 * know there are new descriptors to fetch. (Only
1122 * applicable for weak-ordered memory model archs,
1126 writel(val
, rx_ring
->tail
);
1130 * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
1131 * @rx_ring: ring to place buffers on
1132 * @cleaned_count: number of buffers to replace
1134 void i40e_alloc_rx_buffers_ps(struct i40e_ring
*rx_ring
, u16 cleaned_count
)
1136 u16 i
= rx_ring
->next_to_use
;
1137 union i40e_rx_desc
*rx_desc
;
1138 struct i40e_rx_buffer
*bi
;
1140 /* do nothing if no valid netdev defined */
1141 if (!rx_ring
->netdev
|| !cleaned_count
)
1144 while (cleaned_count
--) {
1145 rx_desc
= I40E_RX_DESC(rx_ring
, i
);
1146 bi
= &rx_ring
->rx_bi
[i
];
1148 if (bi
->skb
) /* desc is in use */
1151 bi
->page
= alloc_page(GFP_ATOMIC
);
1153 rx_ring
->rx_stats
.alloc_page_failed
++;
1158 if (!bi
->page_dma
) {
1159 /* use a half page if we're re-using */
1160 bi
->page_offset
^= PAGE_SIZE
/ 2;
1161 bi
->page_dma
= dma_map_page(rx_ring
->dev
,
1166 if (dma_mapping_error(rx_ring
->dev
,
1168 rx_ring
->rx_stats
.alloc_page_failed
++;
1174 dma_sync_single_range_for_device(rx_ring
->dev
,
1177 rx_ring
->rx_hdr_len
,
1179 /* Refresh the desc even if buffer_addrs didn't change
1180 * because each write-back erases this info.
1182 rx_desc
->read
.pkt_addr
= cpu_to_le64(bi
->page_dma
);
1183 rx_desc
->read
.hdr_addr
= cpu_to_le64(bi
->dma
);
1185 if (i
== rx_ring
->count
)
1190 if (rx_ring
->next_to_use
!= i
)
1191 i40e_release_rx_desc(rx_ring
, i
);
1195 * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
1196 * @rx_ring: ring to place buffers on
1197 * @cleaned_count: number of buffers to replace
1199 void i40e_alloc_rx_buffers_1buf(struct i40e_ring
*rx_ring
, u16 cleaned_count
)
1201 u16 i
= rx_ring
->next_to_use
;
1202 union i40e_rx_desc
*rx_desc
;
1203 struct i40e_rx_buffer
*bi
;
1204 struct sk_buff
*skb
;
1206 /* do nothing if no valid netdev defined */
1207 if (!rx_ring
->netdev
|| !cleaned_count
)
1210 while (cleaned_count
--) {
1211 rx_desc
= I40E_RX_DESC(rx_ring
, i
);
1212 bi
= &rx_ring
->rx_bi
[i
];
1216 skb
= netdev_alloc_skb_ip_align(rx_ring
->netdev
,
1217 rx_ring
->rx_buf_len
);
1219 rx_ring
->rx_stats
.alloc_buff_failed
++;
1222 /* initialize queue mapping */
1223 skb_record_rx_queue(skb
, rx_ring
->queue_index
);
1228 bi
->dma
= dma_map_single(rx_ring
->dev
,
1230 rx_ring
->rx_buf_len
,
1232 if (dma_mapping_error(rx_ring
->dev
, bi
->dma
)) {
1233 rx_ring
->rx_stats
.alloc_buff_failed
++;
1239 rx_desc
->read
.pkt_addr
= cpu_to_le64(bi
->dma
);
1240 rx_desc
->read
.hdr_addr
= 0;
1242 if (i
== rx_ring
->count
)
1247 if (rx_ring
->next_to_use
!= i
)
1248 i40e_release_rx_desc(rx_ring
, i
);
1252 * i40e_receive_skb - Send a completed packet up the stack
1253 * @rx_ring: rx ring in play
1254 * @skb: packet to send up
1255 * @vlan_tag: vlan tag for packet
1257 static void i40e_receive_skb(struct i40e_ring
*rx_ring
,
1258 struct sk_buff
*skb
, u16 vlan_tag
)
1260 struct i40e_q_vector
*q_vector
= rx_ring
->q_vector
;
1261 struct i40e_vsi
*vsi
= rx_ring
->vsi
;
1262 u64 flags
= vsi
->back
->flags
;
1264 if (vlan_tag
& VLAN_VID_MASK
)
1265 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
), vlan_tag
);
1267 if (flags
& I40E_FLAG_IN_NETPOLL
)
1270 napi_gro_receive(&q_vector
->napi
, skb
);
1274 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1275 * @vsi: the VSI we care about
1276 * @skb: skb currently being received and modified
1277 * @rx_status: status value of last descriptor in packet
1278 * @rx_error: error value of last descriptor in packet
1279 * @rx_ptype: ptype value of last descriptor in packet
1281 static inline void i40e_rx_checksum(struct i40e_vsi
*vsi
,
1282 struct sk_buff
*skb
,
1287 struct i40e_rx_ptype_decoded decoded
= decode_rx_desc_ptype(rx_ptype
);
1288 bool ipv4
= false, ipv6
= false;
1289 bool ipv4_tunnel
, ipv6_tunnel
;
1294 ipv4_tunnel
= (rx_ptype
>= I40E_RX_PTYPE_GRENAT4_MAC_PAY3
) &&
1295 (rx_ptype
<= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4
);
1296 ipv6_tunnel
= (rx_ptype
>= I40E_RX_PTYPE_GRENAT6_MAC_PAY3
) &&
1297 (rx_ptype
<= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4
);
1299 skb
->ip_summed
= CHECKSUM_NONE
;
1301 /* Rx csum enabled and ip headers found? */
1302 if (!(vsi
->netdev
->features
& NETIF_F_RXCSUM
))
1305 /* did the hardware decode the packet and checksum? */
1306 if (!(rx_status
& BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT
)))
1309 /* both known and outer_ip must be set for the below code to work */
1310 if (!(decoded
.known
&& decoded
.outer_ip
))
1313 if (decoded
.outer_ip
== I40E_RX_PTYPE_OUTER_IP
&&
1314 decoded
.outer_ip_ver
== I40E_RX_PTYPE_OUTER_IPV4
)
1316 else if (decoded
.outer_ip
== I40E_RX_PTYPE_OUTER_IP
&&
1317 decoded
.outer_ip_ver
== I40E_RX_PTYPE_OUTER_IPV6
)
1321 (rx_error
& (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT
) |
1322 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT
))))
1325 /* likely incorrect csum if alternate IP extension headers found */
1327 rx_status
& BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT
))
1328 /* don't increment checksum err here, non-fatal err */
1331 /* there was some L4 error, count error and punt packet to the stack */
1332 if (rx_error
& BIT(I40E_RX_DESC_ERROR_L4E_SHIFT
))
1335 /* handle packets that were not able to be checksummed due
1336 * to arrival speed, in this case the stack can compute
1339 if (rx_error
& BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT
))
1342 /* If VXLAN traffic has an outer UDPv4 checksum we need to check
1343 * it in the driver, hardware does not do it for us.
1344 * Since L3L4P bit was set we assume a valid IHL value (>=5)
1345 * so the total length of IPv4 header is IHL*4 bytes
1346 * The UDP_0 bit *may* bet set if the *inner* header is UDP
1348 if (!(vsi
->back
->flags
& I40E_FLAG_OUTER_UDP_CSUM_CAPABLE
) &&
1350 skb
->transport_header
= skb
->mac_header
+
1351 sizeof(struct ethhdr
) +
1352 (ip_hdr(skb
)->ihl
* 4);
1354 /* Add 4 bytes for VLAN tagged packets */
1355 skb
->transport_header
+= (skb
->protocol
== htons(ETH_P_8021Q
) ||
1356 skb
->protocol
== htons(ETH_P_8021AD
))
1359 if ((ip_hdr(skb
)->protocol
== IPPROTO_UDP
) &&
1360 (udp_hdr(skb
)->check
!= 0)) {
1361 rx_udp_csum
= udp_csum(skb
);
1363 csum
= csum_tcpudp_magic(
1364 iph
->saddr
, iph
->daddr
,
1365 (skb
->len
- skb_transport_offset(skb
)),
1366 IPPROTO_UDP
, rx_udp_csum
);
1368 if (udp_hdr(skb
)->check
!= csum
)
1371 } /* else its GRE and so no outer UDP header */
1374 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1375 skb
->csum_level
= ipv4_tunnel
|| ipv6_tunnel
;
1380 vsi
->back
->hw_csum_rx_error
++;
1384 * i40e_rx_hash - returns the hash value from the Rx descriptor
1385 * @ring: descriptor ring
1386 * @rx_desc: specific descriptor
1388 static inline u32
i40e_rx_hash(struct i40e_ring
*ring
,
1389 union i40e_rx_desc
*rx_desc
)
1391 const __le64 rss_mask
=
1392 cpu_to_le64((u64
)I40E_RX_DESC_FLTSTAT_RSS_HASH
<<
1393 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT
);
1395 if ((ring
->netdev
->features
& NETIF_F_RXHASH
) &&
1396 (rx_desc
->wb
.qword1
.status_error_len
& rss_mask
) == rss_mask
)
1397 return le32_to_cpu(rx_desc
->wb
.qword0
.hi_dword
.rss
);
1403 * i40e_ptype_to_hash - get a hash type
1404 * @ptype: the ptype value from the descriptor
1406 * Returns a hash type to be used by skb_set_hash
1408 static inline enum pkt_hash_types
i40e_ptype_to_hash(u8 ptype
)
1410 struct i40e_rx_ptype_decoded decoded
= decode_rx_desc_ptype(ptype
);
1413 return PKT_HASH_TYPE_NONE
;
1415 if (decoded
.outer_ip
== I40E_RX_PTYPE_OUTER_IP
&&
1416 decoded
.payload_layer
== I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4
)
1417 return PKT_HASH_TYPE_L4
;
1418 else if (decoded
.outer_ip
== I40E_RX_PTYPE_OUTER_IP
&&
1419 decoded
.payload_layer
== I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3
)
1420 return PKT_HASH_TYPE_L3
;
1422 return PKT_HASH_TYPE_L2
;
1426 * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
1427 * @rx_ring: rx ring to clean
1428 * @budget: how many cleans we're allowed
1430 * Returns true if there's any budget left (e.g. the clean is finished)
1432 static int i40e_clean_rx_irq_ps(struct i40e_ring
*rx_ring
, int budget
)
1434 unsigned int total_rx_bytes
= 0, total_rx_packets
= 0;
1435 u16 rx_packet_len
, rx_header_len
, rx_sph
, rx_hbo
;
1436 u16 cleaned_count
= I40E_DESC_UNUSED(rx_ring
);
1437 const int current_node
= numa_node_id();
1438 struct i40e_vsi
*vsi
= rx_ring
->vsi
;
1439 u16 i
= rx_ring
->next_to_clean
;
1440 union i40e_rx_desc
*rx_desc
;
1441 u32 rx_error
, rx_status
;
1449 struct i40e_rx_buffer
*rx_bi
;
1450 struct sk_buff
*skb
;
1452 /* return some buffers to hardware, one at a time is too slow */
1453 if (cleaned_count
>= I40E_RX_BUFFER_WRITE
) {
1454 i40e_alloc_rx_buffers_ps(rx_ring
, cleaned_count
);
1458 i
= rx_ring
->next_to_clean
;
1459 rx_desc
= I40E_RX_DESC(rx_ring
, i
);
1460 qword
= le64_to_cpu(rx_desc
->wb
.qword1
.status_error_len
);
1461 rx_status
= (qword
& I40E_RXD_QW1_STATUS_MASK
) >>
1462 I40E_RXD_QW1_STATUS_SHIFT
;
1464 if (!(rx_status
& BIT(I40E_RX_DESC_STATUS_DD_SHIFT
)))
1467 /* This memory barrier is needed to keep us from reading
1468 * any other fields out of the rx_desc until we know the
1472 if (i40e_rx_is_programming_status(qword
)) {
1473 i40e_clean_programming_status(rx_ring
, rx_desc
);
1474 I40E_RX_INCREMENT(rx_ring
, i
);
1477 rx_bi
= &rx_ring
->rx_bi
[i
];
1480 skb
= netdev_alloc_skb_ip_align(rx_ring
->netdev
,
1481 rx_ring
->rx_hdr_len
);
1483 rx_ring
->rx_stats
.alloc_buff_failed
++;
1487 /* initialize queue mapping */
1488 skb_record_rx_queue(skb
, rx_ring
->queue_index
);
1489 /* we are reusing so sync this buffer for CPU use */
1490 dma_sync_single_range_for_cpu(rx_ring
->dev
,
1493 rx_ring
->rx_hdr_len
,
1496 rx_packet_len
= (qword
& I40E_RXD_QW1_LENGTH_PBUF_MASK
) >>
1497 I40E_RXD_QW1_LENGTH_PBUF_SHIFT
;
1498 rx_header_len
= (qword
& I40E_RXD_QW1_LENGTH_HBUF_MASK
) >>
1499 I40E_RXD_QW1_LENGTH_HBUF_SHIFT
;
1500 rx_sph
= (qword
& I40E_RXD_QW1_LENGTH_SPH_MASK
) >>
1501 I40E_RXD_QW1_LENGTH_SPH_SHIFT
;
1503 rx_error
= (qword
& I40E_RXD_QW1_ERROR_MASK
) >>
1504 I40E_RXD_QW1_ERROR_SHIFT
;
1505 rx_hbo
= rx_error
& BIT(I40E_RX_DESC_ERROR_HBO_SHIFT
);
1506 rx_error
&= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT
);
1508 rx_ptype
= (qword
& I40E_RXD_QW1_PTYPE_MASK
) >>
1509 I40E_RXD_QW1_PTYPE_SHIFT
;
1510 prefetch(rx_bi
->page
);
1513 if (rx_hbo
|| rx_sph
) {
1516 len
= I40E_RX_HDR_SIZE
;
1518 len
= rx_header_len
;
1519 memcpy(__skb_put(skb
, len
), rx_bi
->hdr_buf
, len
);
1520 } else if (skb
->len
== 0) {
1523 len
= (rx_packet_len
> skb_headlen(skb
) ?
1524 skb_headlen(skb
) : rx_packet_len
);
1525 memcpy(__skb_put(skb
, len
),
1526 rx_bi
->page
+ rx_bi
->page_offset
,
1528 rx_bi
->page_offset
+= len
;
1529 rx_packet_len
-= len
;
1532 /* Get the rest of the data if this was a header split */
1533 if (rx_packet_len
) {
1534 skb_fill_page_desc(skb
, skb_shinfo(skb
)->nr_frags
,
1539 skb
->len
+= rx_packet_len
;
1540 skb
->data_len
+= rx_packet_len
;
1541 skb
->truesize
+= rx_packet_len
;
1543 if ((page_count(rx_bi
->page
) == 1) &&
1544 (page_to_nid(rx_bi
->page
) == current_node
))
1545 get_page(rx_bi
->page
);
1549 dma_unmap_page(rx_ring
->dev
,
1553 rx_bi
->page_dma
= 0;
1555 I40E_RX_INCREMENT(rx_ring
, i
);
1558 !(rx_status
& BIT(I40E_RX_DESC_STATUS_EOF_SHIFT
)))) {
1559 struct i40e_rx_buffer
*next_buffer
;
1561 next_buffer
= &rx_ring
->rx_bi
[i
];
1562 next_buffer
->skb
= skb
;
1563 rx_ring
->rx_stats
.non_eop_descs
++;
1567 /* ERR_MASK will only have valid bits if EOP set */
1568 if (unlikely(rx_error
& BIT(I40E_RX_DESC_ERROR_RXE_SHIFT
))) {
1569 dev_kfree_skb_any(skb
);
1573 skb_set_hash(skb
, i40e_rx_hash(rx_ring
, rx_desc
),
1574 i40e_ptype_to_hash(rx_ptype
));
1575 if (unlikely(rx_status
& I40E_RXD_QW1_STATUS_TSYNVALID_MASK
)) {
1576 i40e_ptp_rx_hwtstamp(vsi
->back
, skb
, (rx_status
&
1577 I40E_RXD_QW1_STATUS_TSYNINDX_MASK
) >>
1578 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT
);
1579 rx_ring
->last_rx_timestamp
= jiffies
;
1582 /* probably a little skewed due to removing CRC */
1583 total_rx_bytes
+= skb
->len
;
1586 skb
->protocol
= eth_type_trans(skb
, rx_ring
->netdev
);
1588 i40e_rx_checksum(vsi
, skb
, rx_status
, rx_error
, rx_ptype
);
1590 vlan_tag
= rx_status
& BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT
)
1591 ? le16_to_cpu(rx_desc
->wb
.qword0
.lo_dword
.l2tag1
)
1594 if (!i40e_fcoe_handle_offload(rx_ring
, rx_desc
, skb
)) {
1595 dev_kfree_skb_any(skb
);
1599 skb_mark_napi_id(skb
, &rx_ring
->q_vector
->napi
);
1600 i40e_receive_skb(rx_ring
, skb
, vlan_tag
);
1602 rx_desc
->wb
.qword1
.status_error_len
= 0;
1604 } while (likely(total_rx_packets
< budget
));
1606 u64_stats_update_begin(&rx_ring
->syncp
);
1607 rx_ring
->stats
.packets
+= total_rx_packets
;
1608 rx_ring
->stats
.bytes
+= total_rx_bytes
;
1609 u64_stats_update_end(&rx_ring
->syncp
);
1610 rx_ring
->q_vector
->rx
.total_packets
+= total_rx_packets
;
1611 rx_ring
->q_vector
->rx
.total_bytes
+= total_rx_bytes
;
1613 return total_rx_packets
;
1617 * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1618 * @rx_ring: rx ring to clean
1619 * @budget: how many cleans we're allowed
1621 * Returns number of packets cleaned
1623 static int i40e_clean_rx_irq_1buf(struct i40e_ring
*rx_ring
, int budget
)
1625 unsigned int total_rx_bytes
= 0, total_rx_packets
= 0;
1626 u16 cleaned_count
= I40E_DESC_UNUSED(rx_ring
);
1627 struct i40e_vsi
*vsi
= rx_ring
->vsi
;
1628 union i40e_rx_desc
*rx_desc
;
1629 u32 rx_error
, rx_status
;
1636 struct i40e_rx_buffer
*rx_bi
;
1637 struct sk_buff
*skb
;
1639 /* return some buffers to hardware, one at a time is too slow */
1640 if (cleaned_count
>= I40E_RX_BUFFER_WRITE
) {
1641 i40e_alloc_rx_buffers_1buf(rx_ring
, cleaned_count
);
1645 i
= rx_ring
->next_to_clean
;
1646 rx_desc
= I40E_RX_DESC(rx_ring
, i
);
1647 qword
= le64_to_cpu(rx_desc
->wb
.qword1
.status_error_len
);
1648 rx_status
= (qword
& I40E_RXD_QW1_STATUS_MASK
) >>
1649 I40E_RXD_QW1_STATUS_SHIFT
;
1651 if (!(rx_status
& BIT(I40E_RX_DESC_STATUS_DD_SHIFT
)))
1654 /* This memory barrier is needed to keep us from reading
1655 * any other fields out of the rx_desc until we know the
1660 if (i40e_rx_is_programming_status(qword
)) {
1661 i40e_clean_programming_status(rx_ring
, rx_desc
);
1662 I40E_RX_INCREMENT(rx_ring
, i
);
1665 rx_bi
= &rx_ring
->rx_bi
[i
];
1667 prefetch(skb
->data
);
1669 rx_packet_len
= (qword
& I40E_RXD_QW1_LENGTH_PBUF_MASK
) >>
1670 I40E_RXD_QW1_LENGTH_PBUF_SHIFT
;
1672 rx_error
= (qword
& I40E_RXD_QW1_ERROR_MASK
) >>
1673 I40E_RXD_QW1_ERROR_SHIFT
;
1674 rx_error
&= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT
);
1676 rx_ptype
= (qword
& I40E_RXD_QW1_PTYPE_MASK
) >>
1677 I40E_RXD_QW1_PTYPE_SHIFT
;
1681 /* Get the header and possibly the whole packet
1682 * If this is an skb from previous receive dma will be 0
1684 skb_put(skb
, rx_packet_len
);
1685 dma_unmap_single(rx_ring
->dev
, rx_bi
->dma
, rx_ring
->rx_buf_len
,
1689 I40E_RX_INCREMENT(rx_ring
, i
);
1692 !(rx_status
& BIT(I40E_RX_DESC_STATUS_EOF_SHIFT
)))) {
1693 rx_ring
->rx_stats
.non_eop_descs
++;
1697 /* ERR_MASK will only have valid bits if EOP set */
1698 if (unlikely(rx_error
& BIT(I40E_RX_DESC_ERROR_RXE_SHIFT
))) {
1699 dev_kfree_skb_any(skb
);
1700 /* TODO: shouldn't we increment a counter indicating the
1706 skb_set_hash(skb
, i40e_rx_hash(rx_ring
, rx_desc
),
1707 i40e_ptype_to_hash(rx_ptype
));
1708 if (unlikely(rx_status
& I40E_RXD_QW1_STATUS_TSYNVALID_MASK
)) {
1709 i40e_ptp_rx_hwtstamp(vsi
->back
, skb
, (rx_status
&
1710 I40E_RXD_QW1_STATUS_TSYNINDX_MASK
) >>
1711 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT
);
1712 rx_ring
->last_rx_timestamp
= jiffies
;
1715 /* probably a little skewed due to removing CRC */
1716 total_rx_bytes
+= skb
->len
;
1719 skb
->protocol
= eth_type_trans(skb
, rx_ring
->netdev
);
1721 i40e_rx_checksum(vsi
, skb
, rx_status
, rx_error
, rx_ptype
);
1723 vlan_tag
= rx_status
& BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT
)
1724 ? le16_to_cpu(rx_desc
->wb
.qword0
.lo_dword
.l2tag1
)
1727 if (!i40e_fcoe_handle_offload(rx_ring
, rx_desc
, skb
)) {
1728 dev_kfree_skb_any(skb
);
1732 i40e_receive_skb(rx_ring
, skb
, vlan_tag
);
1734 rx_desc
->wb
.qword1
.status_error_len
= 0;
1735 } while (likely(total_rx_packets
< budget
));
1737 u64_stats_update_begin(&rx_ring
->syncp
);
1738 rx_ring
->stats
.packets
+= total_rx_packets
;
1739 rx_ring
->stats
.bytes
+= total_rx_bytes
;
1740 u64_stats_update_end(&rx_ring
->syncp
);
1741 rx_ring
->q_vector
->rx
.total_packets
+= total_rx_packets
;
1742 rx_ring
->q_vector
->rx
.total_bytes
+= total_rx_bytes
;
1744 return total_rx_packets
;
1748 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
1749 * @vsi: the VSI we care about
1750 * @q_vector: q_vector for which itr is being updated and interrupt enabled
1753 static inline void i40e_update_enable_itr(struct i40e_vsi
*vsi
,
1754 struct i40e_q_vector
*q_vector
)
1756 struct i40e_hw
*hw
= &vsi
->back
->hw
;
1761 vector
= (q_vector
->v_idx
+ vsi
->base_vector
);
1762 if (ITR_IS_DYNAMIC(vsi
->rx_itr_setting
)) {
1763 old_itr
= q_vector
->rx
.itr
;
1764 i40e_set_new_dynamic_itr(&q_vector
->rx
);
1765 if (old_itr
!= q_vector
->rx
.itr
) {
1766 val
= I40E_PFINT_DYN_CTLN_INTENA_MASK
|
1767 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK
|
1769 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT
) |
1770 (q_vector
->rx
.itr
<<
1771 I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT
);
1773 val
= I40E_PFINT_DYN_CTLN_INTENA_MASK
|
1774 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK
|
1776 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT
);
1778 if (!test_bit(__I40E_DOWN
, &vsi
->state
))
1779 wr32(hw
, I40E_PFINT_DYN_CTLN(vector
- 1), val
);
1781 i40e_irq_dynamic_enable(vsi
,
1782 q_vector
->v_idx
+ vsi
->base_vector
);
1784 if (ITR_IS_DYNAMIC(vsi
->tx_itr_setting
)) {
1785 old_itr
= q_vector
->tx
.itr
;
1786 i40e_set_new_dynamic_itr(&q_vector
->tx
);
1787 if (old_itr
!= q_vector
->tx
.itr
) {
1788 val
= I40E_PFINT_DYN_CTLN_INTENA_MASK
|
1789 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK
|
1791 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT
) |
1792 (q_vector
->tx
.itr
<<
1793 I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT
);
1795 val
= I40E_PFINT_DYN_CTLN_INTENA_MASK
|
1796 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK
|
1798 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT
);
1800 if (!test_bit(__I40E_DOWN
, &vsi
->state
))
1801 wr32(hw
, I40E_PFINT_DYN_CTLN(q_vector
->v_idx
+
1802 vsi
->base_vector
- 1), val
);
1804 i40e_irq_dynamic_enable(vsi
,
1805 q_vector
->v_idx
+ vsi
->base_vector
);
1810 * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1811 * @napi: napi struct with our devices info in it
1812 * @budget: amount of work driver is allowed to do this pass, in packets
1814 * This function will clean all queues associated with a q_vector.
1816 * Returns the amount of work done
1818 int i40e_napi_poll(struct napi_struct
*napi
, int budget
)
1820 struct i40e_q_vector
*q_vector
=
1821 container_of(napi
, struct i40e_q_vector
, napi
);
1822 struct i40e_vsi
*vsi
= q_vector
->vsi
;
1823 struct i40e_ring
*ring
;
1824 bool clean_complete
= true;
1825 bool arm_wb
= false;
1826 int budget_per_ring
;
1829 if (test_bit(__I40E_DOWN
, &vsi
->state
)) {
1830 napi_complete(napi
);
1834 /* Since the actual Tx work is minimal, we can give the Tx a larger
1835 * budget and be more aggressive about cleaning up the Tx descriptors.
1837 i40e_for_each_ring(ring
, q_vector
->tx
) {
1838 clean_complete
&= i40e_clean_tx_irq(ring
, vsi
->work_limit
);
1839 arm_wb
|= ring
->arm_wb
;
1842 /* We attempt to distribute budget to each Rx queue fairly, but don't
1843 * allow the budget to go below 1 because that would exit polling early.
1845 budget_per_ring
= max(budget
/q_vector
->num_ringpairs
, 1);
1847 i40e_for_each_ring(ring
, q_vector
->rx
) {
1848 if (ring_is_ps_enabled(ring
))
1849 cleaned
= i40e_clean_rx_irq_ps(ring
, budget_per_ring
);
1851 cleaned
= i40e_clean_rx_irq_1buf(ring
, budget_per_ring
);
1852 /* if we didn't clean as many as budgeted, we must be done */
1853 clean_complete
&= (budget_per_ring
!= cleaned
);
1856 /* If work not completed, return budget and polling will return */
1857 if (!clean_complete
) {
1859 i40e_force_wb(vsi
, q_vector
);
1863 if (vsi
->back
->flags
& I40E_TXR_FLAGS_WB_ON_ITR
)
1864 q_vector
->arm_wb_state
= false;
1866 /* Work is done so exit the polling mode and re-enable the interrupt */
1867 napi_complete(napi
);
1868 if (vsi
->back
->flags
& I40E_FLAG_MSIX_ENABLED
) {
1869 i40e_update_enable_itr(vsi
, q_vector
);
1870 } else { /* Legacy mode */
1871 struct i40e_hw
*hw
= &vsi
->back
->hw
;
1872 /* We re-enable the queue 0 cause, but
1873 * don't worry about dynamic_enable
1874 * because we left it on for the other
1875 * possible interrupts during napi
1877 u32 qval
= rd32(hw
, I40E_QINT_RQCTL(0)) |
1878 I40E_QINT_RQCTL_CAUSE_ENA_MASK
;
1880 wr32(hw
, I40E_QINT_RQCTL(0), qval
);
1881 qval
= rd32(hw
, I40E_QINT_TQCTL(0)) |
1882 I40E_QINT_TQCTL_CAUSE_ENA_MASK
;
1883 wr32(hw
, I40E_QINT_TQCTL(0), qval
);
1884 i40e_irq_dynamic_enable_icr0(vsi
->back
);
1890 * i40e_atr - Add a Flow Director ATR filter
1891 * @tx_ring: ring to add programming descriptor to
1893 * @tx_flags: send tx flags
1894 * @protocol: wire protocol
1896 static void i40e_atr(struct i40e_ring
*tx_ring
, struct sk_buff
*skb
,
1897 u32 tx_flags
, __be16 protocol
)
1899 struct i40e_filter_program_desc
*fdir_desc
;
1900 struct i40e_pf
*pf
= tx_ring
->vsi
->back
;
1902 unsigned char *network
;
1904 struct ipv6hdr
*ipv6
;
1908 u32 flex_ptype
, dtype_cmd
;
1911 /* make sure ATR is enabled */
1912 if (!(pf
->flags
& I40E_FLAG_FD_ATR_ENABLED
))
1915 if ((pf
->auto_disable_flags
& I40E_FLAG_FD_ATR_ENABLED
))
1918 /* if sampling is disabled do nothing */
1919 if (!tx_ring
->atr_sample_rate
)
1922 if (!(tx_flags
& (I40E_TX_FLAGS_IPV4
| I40E_TX_FLAGS_IPV6
)))
1925 if (!(tx_flags
& I40E_TX_FLAGS_VXLAN_TUNNEL
)) {
1926 /* snag network header to get L4 type and address */
1927 hdr
.network
= skb_network_header(skb
);
1929 /* Currently only IPv4/IPv6 with TCP is supported
1930 * access ihl as u8 to avoid unaligned access on ia64
1932 if (tx_flags
& I40E_TX_FLAGS_IPV4
)
1933 hlen
= (hdr
.network
[0] & 0x0F) << 2;
1934 else if (protocol
== htons(ETH_P_IPV6
))
1935 hlen
= sizeof(struct ipv6hdr
);
1939 hdr
.network
= skb_inner_network_header(skb
);
1940 hlen
= skb_inner_network_header_len(skb
);
1943 /* Currently only IPv4/IPv6 with TCP is supported
1944 * Note: tx_flags gets modified to reflect inner protocols in
1945 * tx_enable_csum function if encap is enabled.
1947 if ((tx_flags
& I40E_TX_FLAGS_IPV4
) &&
1948 (hdr
.ipv4
->protocol
!= IPPROTO_TCP
))
1950 else if ((tx_flags
& I40E_TX_FLAGS_IPV6
) &&
1951 (hdr
.ipv6
->nexthdr
!= IPPROTO_TCP
))
1954 th
= (struct tcphdr
*)(hdr
.network
+ hlen
);
1956 /* Due to lack of space, no more new filters can be programmed */
1957 if (th
->syn
&& (pf
->auto_disable_flags
& I40E_FLAG_FD_ATR_ENABLED
))
1959 if (pf
->flags
& I40E_FLAG_HW_ATR_EVICT_CAPABLE
) {
1960 /* HW ATR eviction will take care of removing filters on FIN
1963 if (th
->fin
|| th
->rst
)
1967 tx_ring
->atr_count
++;
1969 /* sample on all syn/fin/rst packets or once every atr sample rate */
1973 (tx_ring
->atr_count
< tx_ring
->atr_sample_rate
))
1976 tx_ring
->atr_count
= 0;
1978 /* grab the next descriptor */
1979 i
= tx_ring
->next_to_use
;
1980 fdir_desc
= I40E_TX_FDIRDESC(tx_ring
, i
);
1983 tx_ring
->next_to_use
= (i
< tx_ring
->count
) ? i
: 0;
1985 flex_ptype
= (tx_ring
->queue_index
<< I40E_TXD_FLTR_QW0_QINDEX_SHIFT
) &
1986 I40E_TXD_FLTR_QW0_QINDEX_MASK
;
1987 flex_ptype
|= (protocol
== htons(ETH_P_IP
)) ?
1988 (I40E_FILTER_PCTYPE_NONF_IPV4_TCP
<<
1989 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT
) :
1990 (I40E_FILTER_PCTYPE_NONF_IPV6_TCP
<<
1991 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT
);
1993 flex_ptype
|= tx_ring
->vsi
->id
<< I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT
;
1995 dtype_cmd
= I40E_TX_DESC_DTYPE_FILTER_PROG
;
1997 dtype_cmd
|= (th
->fin
|| th
->rst
) ?
1998 (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE
<<
1999 I40E_TXD_FLTR_QW1_PCMD_SHIFT
) :
2000 (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE
<<
2001 I40E_TXD_FLTR_QW1_PCMD_SHIFT
);
2003 dtype_cmd
|= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX
<<
2004 I40E_TXD_FLTR_QW1_DEST_SHIFT
;
2006 dtype_cmd
|= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID
<<
2007 I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT
;
2009 dtype_cmd
|= I40E_TXD_FLTR_QW1_CNT_ENA_MASK
;
2010 if (!(tx_flags
& I40E_TX_FLAGS_VXLAN_TUNNEL
))
2012 ((u32
)I40E_FD_ATR_STAT_IDX(pf
->hw
.pf_id
) <<
2013 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT
) &
2014 I40E_TXD_FLTR_QW1_CNTINDEX_MASK
;
2017 ((u32
)I40E_FD_ATR_TUNNEL_STAT_IDX(pf
->hw
.pf_id
) <<
2018 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT
) &
2019 I40E_TXD_FLTR_QW1_CNTINDEX_MASK
;
2021 if (pf
->flags
& I40E_FLAG_HW_ATR_EVICT_CAPABLE
)
2022 dtype_cmd
|= I40E_TXD_FLTR_QW1_ATR_MASK
;
2024 fdir_desc
->qindex_flex_ptype_vsi
= cpu_to_le32(flex_ptype
);
2025 fdir_desc
->rsvd
= cpu_to_le32(0);
2026 fdir_desc
->dtype_cmd_cntindex
= cpu_to_le32(dtype_cmd
);
2027 fdir_desc
->fd_id
= cpu_to_le32(0);
2031 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2033 * @tx_ring: ring to send buffer on
2034 * @flags: the tx flags to be set
2036 * Checks the skb and set up correspondingly several generic transmit flags
2037 * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2039 * Returns error code indicate the frame should be dropped upon error and the
2040 * otherwise returns 0 to indicate the flags has been set properly.
2043 inline int i40e_tx_prepare_vlan_flags(struct sk_buff
*skb
,
2044 struct i40e_ring
*tx_ring
,
2047 static inline int i40e_tx_prepare_vlan_flags(struct sk_buff
*skb
,
2048 struct i40e_ring
*tx_ring
,
2052 __be16 protocol
= skb
->protocol
;
2055 if (protocol
== htons(ETH_P_8021Q
) &&
2056 !(tx_ring
->netdev
->features
& NETIF_F_HW_VLAN_CTAG_TX
)) {
2057 /* When HW VLAN acceleration is turned off by the user the
2058 * stack sets the protocol to 8021q so that the driver
2059 * can take any steps required to support the SW only
2060 * VLAN handling. In our case the driver doesn't need
2061 * to take any further steps so just set the protocol
2062 * to the encapsulated ethertype.
2064 skb
->protocol
= vlan_get_protocol(skb
);
2068 /* if we have a HW VLAN tag being added, default to the HW one */
2069 if (skb_vlan_tag_present(skb
)) {
2070 tx_flags
|= skb_vlan_tag_get(skb
) << I40E_TX_FLAGS_VLAN_SHIFT
;
2071 tx_flags
|= I40E_TX_FLAGS_HW_VLAN
;
2072 /* else if it is a SW VLAN, check the next protocol and store the tag */
2073 } else if (protocol
== htons(ETH_P_8021Q
)) {
2074 struct vlan_hdr
*vhdr
, _vhdr
;
2075 vhdr
= skb_header_pointer(skb
, ETH_HLEN
, sizeof(_vhdr
), &_vhdr
);
2079 protocol
= vhdr
->h_vlan_encapsulated_proto
;
2080 tx_flags
|= ntohs(vhdr
->h_vlan_TCI
) << I40E_TX_FLAGS_VLAN_SHIFT
;
2081 tx_flags
|= I40E_TX_FLAGS_SW_VLAN
;
2084 if (!(tx_ring
->vsi
->back
->flags
& I40E_FLAG_DCB_ENABLED
))
2087 /* Insert 802.1p priority into VLAN header */
2088 if ((tx_flags
& (I40E_TX_FLAGS_HW_VLAN
| I40E_TX_FLAGS_SW_VLAN
)) ||
2089 (skb
->priority
!= TC_PRIO_CONTROL
)) {
2090 tx_flags
&= ~I40E_TX_FLAGS_VLAN_PRIO_MASK
;
2091 tx_flags
|= (skb
->priority
& 0x7) <<
2092 I40E_TX_FLAGS_VLAN_PRIO_SHIFT
;
2093 if (tx_flags
& I40E_TX_FLAGS_SW_VLAN
) {
2094 struct vlan_ethhdr
*vhdr
;
2097 rc
= skb_cow_head(skb
, 0);
2100 vhdr
= (struct vlan_ethhdr
*)skb
->data
;
2101 vhdr
->h_vlan_TCI
= htons(tx_flags
>>
2102 I40E_TX_FLAGS_VLAN_SHIFT
);
2104 tx_flags
|= I40E_TX_FLAGS_HW_VLAN
;
2114 * i40e_tso - set up the tso context descriptor
2115 * @tx_ring: ptr to the ring to send
2116 * @skb: ptr to the skb we're sending
2117 * @hdr_len: ptr to the size of the packet header
2118 * @cd_tunneling: ptr to context descriptor bits
2120 * Returns 0 if no TSO can happen, 1 if tso is going, or error
2122 static int i40e_tso(struct i40e_ring
*tx_ring
, struct sk_buff
*skb
,
2123 u8
*hdr_len
, u64
*cd_type_cmd_tso_mss
,
2126 u32 cd_cmd
, cd_tso_len
, cd_mss
;
2127 struct ipv6hdr
*ipv6h
;
2128 struct tcphdr
*tcph
;
2133 if (!skb_is_gso(skb
))
2136 err
= skb_cow_head(skb
, 0);
2140 iph
= skb
->encapsulation
? inner_ip_hdr(skb
) : ip_hdr(skb
);
2141 ipv6h
= skb
->encapsulation
? inner_ipv6_hdr(skb
) : ipv6_hdr(skb
);
2143 if (iph
->version
== 4) {
2144 tcph
= skb
->encapsulation
? inner_tcp_hdr(skb
) : tcp_hdr(skb
);
2147 tcph
->check
= ~csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
2149 } else if (ipv6h
->version
== 6) {
2150 tcph
= skb
->encapsulation
? inner_tcp_hdr(skb
) : tcp_hdr(skb
);
2151 ipv6h
->payload_len
= 0;
2152 tcph
->check
= ~csum_ipv6_magic(&ipv6h
->saddr
, &ipv6h
->daddr
,
2156 l4len
= skb
->encapsulation
? inner_tcp_hdrlen(skb
) : tcp_hdrlen(skb
);
2157 *hdr_len
= (skb
->encapsulation
2158 ? (skb_inner_transport_header(skb
) - skb
->data
)
2159 : skb_transport_offset(skb
)) + l4len
;
2161 /* find the field values */
2162 cd_cmd
= I40E_TX_CTX_DESC_TSO
;
2163 cd_tso_len
= skb
->len
- *hdr_len
;
2164 cd_mss
= skb_shinfo(skb
)->gso_size
;
2165 *cd_type_cmd_tso_mss
|= ((u64
)cd_cmd
<< I40E_TXD_CTX_QW1_CMD_SHIFT
) |
2167 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT
) |
2168 ((u64
)cd_mss
<< I40E_TXD_CTX_QW1_MSS_SHIFT
);
2173 * i40e_tsyn - set up the tsyn context descriptor
2174 * @tx_ring: ptr to the ring to send
2175 * @skb: ptr to the skb we're sending
2176 * @tx_flags: the collected send information
2178 * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2180 static int i40e_tsyn(struct i40e_ring
*tx_ring
, struct sk_buff
*skb
,
2181 u32 tx_flags
, u64
*cd_type_cmd_tso_mss
)
2185 if (likely(!(skb_shinfo(skb
)->tx_flags
& SKBTX_HW_TSTAMP
)))
2188 /* Tx timestamps cannot be sampled when doing TSO */
2189 if (tx_flags
& I40E_TX_FLAGS_TSO
)
2192 /* only timestamp the outbound packet if the user has requested it and
2193 * we are not already transmitting a packet to be timestamped
2195 pf
= i40e_netdev_to_pf(tx_ring
->netdev
);
2196 if (!(pf
->flags
& I40E_FLAG_PTP
))
2200 !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS
, &pf
->state
)) {
2201 skb_shinfo(skb
)->tx_flags
|= SKBTX_IN_PROGRESS
;
2202 pf
->ptp_tx_skb
= skb_get(skb
);
2207 *cd_type_cmd_tso_mss
|= (u64
)I40E_TX_CTX_DESC_TSYN
<<
2208 I40E_TXD_CTX_QW1_CMD_SHIFT
;
2214 * i40e_tx_enable_csum - Enable Tx checksum offloads
2216 * @tx_flags: pointer to Tx flags currently set
2217 * @td_cmd: Tx descriptor command bits to set
2218 * @td_offset: Tx descriptor header offsets to set
2219 * @cd_tunneling: ptr to context desc bits
2221 static void i40e_tx_enable_csum(struct sk_buff
*skb
, u32
*tx_flags
,
2222 u32
*td_cmd
, u32
*td_offset
,
2223 struct i40e_ring
*tx_ring
,
2226 struct ipv6hdr
*this_ipv6_hdr
;
2227 unsigned int this_tcp_hdrlen
;
2228 struct iphdr
*this_ip_hdr
;
2229 u32 network_hdr_len
;
2231 struct udphdr
*oudph
;
2235 if (skb
->encapsulation
) {
2236 switch (ip_hdr(skb
)->protocol
) {
2238 oudph
= udp_hdr(skb
);
2240 l4_tunnel
= I40E_TXD_CTX_UDP_TUNNELING
;
2241 *tx_flags
|= I40E_TX_FLAGS_VXLAN_TUNNEL
;
2246 network_hdr_len
= skb_inner_network_header_len(skb
);
2247 this_ip_hdr
= inner_ip_hdr(skb
);
2248 this_ipv6_hdr
= inner_ipv6_hdr(skb
);
2249 this_tcp_hdrlen
= inner_tcp_hdrlen(skb
);
2251 if (*tx_flags
& I40E_TX_FLAGS_IPV4
) {
2252 if (*tx_flags
& I40E_TX_FLAGS_TSO
) {
2253 *cd_tunneling
|= I40E_TX_CTX_EXT_IP_IPV4
;
2254 ip_hdr(skb
)->check
= 0;
2257 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM
;
2259 } else if (*tx_flags
& I40E_TX_FLAGS_IPV6
) {
2260 *cd_tunneling
|= I40E_TX_CTX_EXT_IP_IPV6
;
2261 if (*tx_flags
& I40E_TX_FLAGS_TSO
)
2262 ip_hdr(skb
)->check
= 0;
2265 /* Now set the ctx descriptor fields */
2266 *cd_tunneling
|= (skb_network_header_len(skb
) >> 2) <<
2267 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT
|
2269 ((skb_inner_network_offset(skb
) -
2270 skb_transport_offset(skb
)) >> 1) <<
2271 I40E_TXD_CTX_QW0_NATLEN_SHIFT
;
2272 if (this_ip_hdr
->version
== 6) {
2273 *tx_flags
&= ~I40E_TX_FLAGS_IPV4
;
2274 *tx_flags
|= I40E_TX_FLAGS_IPV6
;
2276 if ((tx_ring
->flags
& I40E_TXR_FLAGS_OUTER_UDP_CSUM
) &&
2277 (l4_tunnel
== I40E_TXD_CTX_UDP_TUNNELING
) &&
2278 (*cd_tunneling
& I40E_TXD_CTX_QW0_EXT_IP_MASK
)) {
2279 oudph
->check
= ~csum_tcpudp_magic(oiph
->saddr
,
2281 (skb
->len
- skb_transport_offset(skb
)),
2283 *cd_tunneling
|= I40E_TXD_CTX_QW0_L4T_CS_MASK
;
2286 network_hdr_len
= skb_network_header_len(skb
);
2287 this_ip_hdr
= ip_hdr(skb
);
2288 this_ipv6_hdr
= ipv6_hdr(skb
);
2289 this_tcp_hdrlen
= tcp_hdrlen(skb
);
2292 /* Enable IP checksum offloads */
2293 if (*tx_flags
& I40E_TX_FLAGS_IPV4
) {
2294 l4_hdr
= this_ip_hdr
->protocol
;
2295 /* the stack computes the IP header already, the only time we
2296 * need the hardware to recompute it is in the case of TSO.
2298 if (*tx_flags
& I40E_TX_FLAGS_TSO
) {
2299 *td_cmd
|= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM
;
2300 this_ip_hdr
->check
= 0;
2302 *td_cmd
|= I40E_TX_DESC_CMD_IIPT_IPV4
;
2304 /* Now set the td_offset for IP header length */
2305 *td_offset
= (network_hdr_len
>> 2) <<
2306 I40E_TX_DESC_LENGTH_IPLEN_SHIFT
;
2307 } else if (*tx_flags
& I40E_TX_FLAGS_IPV6
) {
2308 l4_hdr
= this_ipv6_hdr
->nexthdr
;
2309 *td_cmd
|= I40E_TX_DESC_CMD_IIPT_IPV6
;
2310 /* Now set the td_offset for IP header length */
2311 *td_offset
= (network_hdr_len
>> 2) <<
2312 I40E_TX_DESC_LENGTH_IPLEN_SHIFT
;
2314 /* words in MACLEN + dwords in IPLEN + dwords in L4Len */
2315 *td_offset
|= (skb_network_offset(skb
) >> 1) <<
2316 I40E_TX_DESC_LENGTH_MACLEN_SHIFT
;
2318 /* Enable L4 checksum offloads */
2321 /* enable checksum offloads */
2322 *td_cmd
|= I40E_TX_DESC_CMD_L4T_EOFT_TCP
;
2323 *td_offset
|= (this_tcp_hdrlen
>> 2) <<
2324 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT
;
2327 /* enable SCTP checksum offload */
2328 *td_cmd
|= I40E_TX_DESC_CMD_L4T_EOFT_SCTP
;
2329 *td_offset
|= (sizeof(struct sctphdr
) >> 2) <<
2330 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT
;
2333 /* enable UDP checksum offload */
2334 *td_cmd
|= I40E_TX_DESC_CMD_L4T_EOFT_UDP
;
2335 *td_offset
|= (sizeof(struct udphdr
) >> 2) <<
2336 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT
;
2344 * i40e_create_tx_ctx Build the Tx context descriptor
2345 * @tx_ring: ring to create the descriptor on
2346 * @cd_type_cmd_tso_mss: Quad Word 1
2347 * @cd_tunneling: Quad Word 0 - bits 0-31
2348 * @cd_l2tag2: Quad Word 0 - bits 32-63
2350 static void i40e_create_tx_ctx(struct i40e_ring
*tx_ring
,
2351 const u64 cd_type_cmd_tso_mss
,
2352 const u32 cd_tunneling
, const u32 cd_l2tag2
)
2354 struct i40e_tx_context_desc
*context_desc
;
2355 int i
= tx_ring
->next_to_use
;
2357 if ((cd_type_cmd_tso_mss
== I40E_TX_DESC_DTYPE_CONTEXT
) &&
2358 !cd_tunneling
&& !cd_l2tag2
)
2361 /* grab the next descriptor */
2362 context_desc
= I40E_TX_CTXTDESC(tx_ring
, i
);
2365 tx_ring
->next_to_use
= (i
< tx_ring
->count
) ? i
: 0;
2367 /* cpu_to_le32 and assign to struct fields */
2368 context_desc
->tunneling_params
= cpu_to_le32(cd_tunneling
);
2369 context_desc
->l2tag2
= cpu_to_le16(cd_l2tag2
);
2370 context_desc
->rsvd
= cpu_to_le16(0);
2371 context_desc
->type_cmd_tso_mss
= cpu_to_le64(cd_type_cmd_tso_mss
);
2375 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2376 * @tx_ring: the ring to be checked
2377 * @size: the size buffer we want to assure is available
2379 * Returns -EBUSY if a stop is needed, else 0
2381 static inline int __i40e_maybe_stop_tx(struct i40e_ring
*tx_ring
, int size
)
2383 netif_stop_subqueue(tx_ring
->netdev
, tx_ring
->queue_index
);
2384 /* Memory barrier before checking head and tail */
2387 /* Check again in a case another CPU has just made room available. */
2388 if (likely(I40E_DESC_UNUSED(tx_ring
) < size
))
2391 /* A reprieve! - use start_queue because it doesn't call schedule */
2392 netif_start_subqueue(tx_ring
->netdev
, tx_ring
->queue_index
);
2393 ++tx_ring
->tx_stats
.restart_queue
;
2398 * i40e_maybe_stop_tx - 1st level check for tx stop conditions
2399 * @tx_ring: the ring to be checked
2400 * @size: the size buffer we want to assure is available
2402 * Returns 0 if stop is not needed
2405 inline int i40e_maybe_stop_tx(struct i40e_ring
*tx_ring
, int size
)
2407 static inline int i40e_maybe_stop_tx(struct i40e_ring
*tx_ring
, int size
)
2410 if (likely(I40E_DESC_UNUSED(tx_ring
) >= size
))
2412 return __i40e_maybe_stop_tx(tx_ring
, size
);
2416 * i40e_chk_linearize - Check if there are more than 8 fragments per packet
2418 * @tx_flags: collected send information
2420 * Note: Our HW can't scatter-gather more than 8 fragments to build
2421 * a packet on the wire and so we need to figure out the cases where we
2422 * need to linearize the skb.
2424 static bool i40e_chk_linearize(struct sk_buff
*skb
, u32 tx_flags
)
2426 struct skb_frag_struct
*frag
;
2427 bool linearize
= false;
2428 unsigned int size
= 0;
2432 num_frags
= skb_shinfo(skb
)->nr_frags
;
2433 gso_segs
= skb_shinfo(skb
)->gso_segs
;
2435 if (tx_flags
& (I40E_TX_FLAGS_TSO
| I40E_TX_FLAGS_FSO
)) {
2438 if (num_frags
< (I40E_MAX_BUFFER_TXD
))
2439 goto linearize_chk_done
;
2440 /* try the simple math, if we have too many frags per segment */
2441 if (DIV_ROUND_UP((num_frags
+ gso_segs
), gso_segs
) >
2442 I40E_MAX_BUFFER_TXD
) {
2444 goto linearize_chk_done
;
2446 frag
= &skb_shinfo(skb
)->frags
[0];
2447 /* we might still have more fragments per segment */
2449 size
+= skb_frag_size(frag
);
2451 if ((size
>= skb_shinfo(skb
)->gso_size
) &&
2452 (j
< I40E_MAX_BUFFER_TXD
)) {
2453 size
= (size
% skb_shinfo(skb
)->gso_size
);
2456 if (j
== I40E_MAX_BUFFER_TXD
) {
2461 } while (num_frags
);
2463 if (num_frags
>= I40E_MAX_BUFFER_TXD
)
2472 * i40e_tx_map - Build the Tx descriptor
2473 * @tx_ring: ring to send buffer on
2475 * @first: first buffer info buffer to use
2476 * @tx_flags: collected send information
2477 * @hdr_len: size of the packet header
2478 * @td_cmd: the command field in the descriptor
2479 * @td_offset: offset for checksum or crc
2482 inline void i40e_tx_map(struct i40e_ring
*tx_ring
, struct sk_buff
*skb
,
2483 struct i40e_tx_buffer
*first
, u32 tx_flags
,
2484 const u8 hdr_len
, u32 td_cmd
, u32 td_offset
)
2486 static inline void i40e_tx_map(struct i40e_ring
*tx_ring
, struct sk_buff
*skb
,
2487 struct i40e_tx_buffer
*first
, u32 tx_flags
,
2488 const u8 hdr_len
, u32 td_cmd
, u32 td_offset
)
2491 unsigned int data_len
= skb
->data_len
;
2492 unsigned int size
= skb_headlen(skb
);
2493 struct skb_frag_struct
*frag
;
2494 struct i40e_tx_buffer
*tx_bi
;
2495 struct i40e_tx_desc
*tx_desc
;
2496 u16 i
= tx_ring
->next_to_use
;
2501 if (tx_flags
& I40E_TX_FLAGS_HW_VLAN
) {
2502 td_cmd
|= I40E_TX_DESC_CMD_IL2TAG1
;
2503 td_tag
= (tx_flags
& I40E_TX_FLAGS_VLAN_MASK
) >>
2504 I40E_TX_FLAGS_VLAN_SHIFT
;
2507 if (tx_flags
& (I40E_TX_FLAGS_TSO
| I40E_TX_FLAGS_FSO
))
2508 gso_segs
= skb_shinfo(skb
)->gso_segs
;
2512 /* multiply data chunks by size of headers */
2513 first
->bytecount
= skb
->len
- hdr_len
+ (gso_segs
* hdr_len
);
2514 first
->gso_segs
= gso_segs
;
2516 first
->tx_flags
= tx_flags
;
2518 dma
= dma_map_single(tx_ring
->dev
, skb
->data
, size
, DMA_TO_DEVICE
);
2520 tx_desc
= I40E_TX_DESC(tx_ring
, i
);
2523 for (frag
= &skb_shinfo(skb
)->frags
[0];; frag
++) {
2524 if (dma_mapping_error(tx_ring
->dev
, dma
))
2527 /* record length, and DMA address */
2528 dma_unmap_len_set(tx_bi
, len
, size
);
2529 dma_unmap_addr_set(tx_bi
, dma
, dma
);
2531 tx_desc
->buffer_addr
= cpu_to_le64(dma
);
2533 while (unlikely(size
> I40E_MAX_DATA_PER_TXD
)) {
2534 tx_desc
->cmd_type_offset_bsz
=
2535 build_ctob(td_cmd
, td_offset
,
2536 I40E_MAX_DATA_PER_TXD
, td_tag
);
2540 if (i
== tx_ring
->count
) {
2541 tx_desc
= I40E_TX_DESC(tx_ring
, 0);
2545 dma
+= I40E_MAX_DATA_PER_TXD
;
2546 size
-= I40E_MAX_DATA_PER_TXD
;
2548 tx_desc
->buffer_addr
= cpu_to_le64(dma
);
2551 if (likely(!data_len
))
2554 tx_desc
->cmd_type_offset_bsz
= build_ctob(td_cmd
, td_offset
,
2559 if (i
== tx_ring
->count
) {
2560 tx_desc
= I40E_TX_DESC(tx_ring
, 0);
2564 size
= skb_frag_size(frag
);
2567 dma
= skb_frag_dma_map(tx_ring
->dev
, frag
, 0, size
,
2570 tx_bi
= &tx_ring
->tx_bi
[i
];
2573 /* Place RS bit on last descriptor of any packet that spans across the
2574 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
2576 if (((i
& WB_STRIDE
) != WB_STRIDE
) &&
2577 (first
<= &tx_ring
->tx_bi
[i
]) &&
2578 (first
>= &tx_ring
->tx_bi
[i
& ~WB_STRIDE
])) {
2579 tx_desc
->cmd_type_offset_bsz
=
2580 build_ctob(td_cmd
, td_offset
, size
, td_tag
) |
2581 cpu_to_le64((u64
)I40E_TX_DESC_CMD_EOP
<<
2582 I40E_TXD_QW1_CMD_SHIFT
);
2584 tx_desc
->cmd_type_offset_bsz
=
2585 build_ctob(td_cmd
, td_offset
, size
, td_tag
) |
2586 cpu_to_le64((u64
)I40E_TXD_CMD
<<
2587 I40E_TXD_QW1_CMD_SHIFT
);
2590 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring
->netdev
,
2591 tx_ring
->queue_index
),
2594 /* Force memory writes to complete before letting h/w
2595 * know there are new descriptors to fetch. (Only
2596 * applicable for weak-ordered memory model archs,
2601 /* set next_to_watch value indicating a packet is present */
2602 first
->next_to_watch
= tx_desc
;
2605 if (i
== tx_ring
->count
)
2608 tx_ring
->next_to_use
= i
;
2610 i40e_maybe_stop_tx(tx_ring
, DESC_NEEDED
);
2611 /* notify HW of packet */
2612 if (!skb
->xmit_more
||
2613 netif_xmit_stopped(netdev_get_tx_queue(tx_ring
->netdev
,
2614 tx_ring
->queue_index
)))
2615 writel(i
, tx_ring
->tail
);
2617 prefetchw(tx_desc
+ 1);
2622 dev_info(tx_ring
->dev
, "TX DMA map failed\n");
2624 /* clear dma mappings for failed tx_bi map */
2626 tx_bi
= &tx_ring
->tx_bi
[i
];
2627 i40e_unmap_and_free_tx_resource(tx_ring
, tx_bi
);
2635 tx_ring
->next_to_use
= i
;
2639 * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
2641 * @tx_ring: ring to send buffer on
2643 * Returns number of data descriptors needed for this skb. Returns 0 to indicate
2644 * there is not enough descriptors available in this ring since we need at least
2648 inline int i40e_xmit_descriptor_count(struct sk_buff
*skb
,
2649 struct i40e_ring
*tx_ring
)
2651 static inline int i40e_xmit_descriptor_count(struct sk_buff
*skb
,
2652 struct i40e_ring
*tx_ring
)
2658 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2659 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2660 * + 4 desc gap to avoid the cache line where head is,
2661 * + 1 desc for context descriptor,
2662 * otherwise try next time
2664 for (f
= 0; f
< skb_shinfo(skb
)->nr_frags
; f
++)
2665 count
+= TXD_USE_COUNT(skb_shinfo(skb
)->frags
[f
].size
);
2667 count
+= TXD_USE_COUNT(skb_headlen(skb
));
2668 if (i40e_maybe_stop_tx(tx_ring
, count
+ 4 + 1)) {
2669 tx_ring
->tx_stats
.tx_busy
++;
2676 * i40e_xmit_frame_ring - Sends buffer on Tx ring
2678 * @tx_ring: ring to send buffer on
2680 * Returns NETDEV_TX_OK if sent, else an error code
2682 static netdev_tx_t
i40e_xmit_frame_ring(struct sk_buff
*skb
,
2683 struct i40e_ring
*tx_ring
)
2685 u64 cd_type_cmd_tso_mss
= I40E_TX_DESC_DTYPE_CONTEXT
;
2686 u32 cd_tunneling
= 0, cd_l2tag2
= 0;
2687 struct i40e_tx_buffer
*first
;
2695 if (0 == i40e_xmit_descriptor_count(skb
, tx_ring
))
2696 return NETDEV_TX_BUSY
;
2698 /* prepare the xmit flags */
2699 if (i40e_tx_prepare_vlan_flags(skb
, tx_ring
, &tx_flags
))
2702 /* obtain protocol of skb */
2703 protocol
= vlan_get_protocol(skb
);
2705 /* record the location of the first descriptor for this packet */
2706 first
= &tx_ring
->tx_bi
[tx_ring
->next_to_use
];
2708 /* setup IPv4/IPv6 offloads */
2709 if (protocol
== htons(ETH_P_IP
))
2710 tx_flags
|= I40E_TX_FLAGS_IPV4
;
2711 else if (protocol
== htons(ETH_P_IPV6
))
2712 tx_flags
|= I40E_TX_FLAGS_IPV6
;
2714 tso
= i40e_tso(tx_ring
, skb
, &hdr_len
,
2715 &cd_type_cmd_tso_mss
, &cd_tunneling
);
2720 tx_flags
|= I40E_TX_FLAGS_TSO
;
2722 tsyn
= i40e_tsyn(tx_ring
, skb
, tx_flags
, &cd_type_cmd_tso_mss
);
2725 tx_flags
|= I40E_TX_FLAGS_TSYN
;
2727 if (i40e_chk_linearize(skb
, tx_flags
))
2728 if (skb_linearize(skb
))
2731 skb_tx_timestamp(skb
);
2733 /* always enable CRC insertion offload */
2734 td_cmd
|= I40E_TX_DESC_CMD_ICRC
;
2736 /* Always offload the checksum, since it's in the data descriptor */
2737 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
2738 tx_flags
|= I40E_TX_FLAGS_CSUM
;
2740 i40e_tx_enable_csum(skb
, &tx_flags
, &td_cmd
, &td_offset
,
2741 tx_ring
, &cd_tunneling
);
2744 i40e_create_tx_ctx(tx_ring
, cd_type_cmd_tso_mss
,
2745 cd_tunneling
, cd_l2tag2
);
2747 /* Add Flow Director ATR if it's enabled.
2749 * NOTE: this must always be directly before the data descriptor.
2751 i40e_atr(tx_ring
, skb
, tx_flags
, protocol
);
2753 i40e_tx_map(tx_ring
, skb
, first
, tx_flags
, hdr_len
,
2756 return NETDEV_TX_OK
;
2759 dev_kfree_skb_any(skb
);
2760 return NETDEV_TX_OK
;
2764 * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2766 * @netdev: network interface device structure
2768 * Returns NETDEV_TX_OK if sent, else an error code
2770 netdev_tx_t
i40e_lan_xmit_frame(struct sk_buff
*skb
, struct net_device
*netdev
)
2772 struct i40e_netdev_priv
*np
= netdev_priv(netdev
);
2773 struct i40e_vsi
*vsi
= np
->vsi
;
2774 struct i40e_ring
*tx_ring
= vsi
->tx_rings
[skb
->queue_mapping
];
2776 /* hardware can't handle really short frames, hardware padding works
2779 if (skb_put_padto(skb
, I40E_MIN_TX_LEN
))
2780 return NETDEV_TX_OK
;
2782 return i40e_xmit_frame_ring(skb
, tx_ring
);