1 /*******************************************************************************
3 * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
4 * Copyright(c) 2013 - 2016 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along
16 * with this program. If not, see <http://www.gnu.org/licenses/>.
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
21 * Contact Information:
22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25 ******************************************************************************/
27 #include <linux/prefetch.h>
28 #include <net/busy_poll.h>
31 #include "i40e_prototype.h"
33 static inline __le64
build_ctob(u32 td_cmd
, u32 td_offset
, unsigned int size
,
36 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA
|
37 ((u64
)td_cmd
<< I40E_TXD_QW1_CMD_SHIFT
) |
38 ((u64
)td_offset
<< I40E_TXD_QW1_OFFSET_SHIFT
) |
39 ((u64
)size
<< I40E_TXD_QW1_TX_BUF_SZ_SHIFT
) |
40 ((u64
)td_tag
<< I40E_TXD_QW1_L2TAG1_SHIFT
));
43 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
46 * i40e_unmap_and_free_tx_resource - Release a Tx buffer
47 * @ring: the ring that owns the buffer
48 * @tx_buffer: the buffer to free
50 static void i40e_unmap_and_free_tx_resource(struct i40e_ring
*ring
,
51 struct i40e_tx_buffer
*tx_buffer
)
54 dev_kfree_skb_any(tx_buffer
->skb
);
55 if (dma_unmap_len(tx_buffer
, len
))
56 dma_unmap_single(ring
->dev
,
57 dma_unmap_addr(tx_buffer
, dma
),
58 dma_unmap_len(tx_buffer
, len
),
60 } else if (dma_unmap_len(tx_buffer
, len
)) {
61 dma_unmap_page(ring
->dev
,
62 dma_unmap_addr(tx_buffer
, dma
),
63 dma_unmap_len(tx_buffer
, len
),
67 if (tx_buffer
->tx_flags
& I40E_TX_FLAGS_FD_SB
)
68 kfree(tx_buffer
->raw_buf
);
70 tx_buffer
->next_to_watch
= NULL
;
71 tx_buffer
->skb
= NULL
;
72 dma_unmap_len_set(tx_buffer
, len
, 0);
73 /* tx_buffer must be completely set up in the transmit path */
77 * i40evf_clean_tx_ring - Free any empty Tx buffers
78 * @tx_ring: ring to be cleaned
80 void i40evf_clean_tx_ring(struct i40e_ring
*tx_ring
)
82 unsigned long bi_size
;
85 /* ring already cleared, nothing to do */
89 /* Free all the Tx ring sk_buffs */
90 for (i
= 0; i
< tx_ring
->count
; i
++)
91 i40e_unmap_and_free_tx_resource(tx_ring
, &tx_ring
->tx_bi
[i
]);
93 bi_size
= sizeof(struct i40e_tx_buffer
) * tx_ring
->count
;
94 memset(tx_ring
->tx_bi
, 0, bi_size
);
96 /* Zero out the descriptor ring */
97 memset(tx_ring
->desc
, 0, tx_ring
->size
);
99 tx_ring
->next_to_use
= 0;
100 tx_ring
->next_to_clean
= 0;
102 if (!tx_ring
->netdev
)
105 /* cleanup Tx queue statistics */
106 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring
->netdev
,
107 tx_ring
->queue_index
));
111 * i40evf_free_tx_resources - Free Tx resources per queue
112 * @tx_ring: Tx descriptor ring for a specific queue
114 * Free all transmit software resources
116 void i40evf_free_tx_resources(struct i40e_ring
*tx_ring
)
118 i40evf_clean_tx_ring(tx_ring
);
119 kfree(tx_ring
->tx_bi
);
120 tx_ring
->tx_bi
= NULL
;
123 dma_free_coherent(tx_ring
->dev
, tx_ring
->size
,
124 tx_ring
->desc
, tx_ring
->dma
);
125 tx_ring
->desc
= NULL
;
130 * i40evf_get_tx_pending - how many Tx descriptors not processed
131 * @tx_ring: the ring of descriptors
132 * @in_sw: is tx_pending being checked in SW or HW
134 * Since there is no access to the ring head register
135 * in XL710, we need to use our local copies
137 u32
i40evf_get_tx_pending(struct i40e_ring
*ring
, bool in_sw
)
142 head
= i40e_get_head(ring
);
144 head
= ring
->next_to_clean
;
145 tail
= readl(ring
->tail
);
148 return (head
< tail
) ?
149 tail
- head
: (tail
+ ring
->count
- head
);
154 #define WB_STRIDE 0x3
157 * i40e_clean_tx_irq - Reclaim resources after transmit completes
158 * @tx_ring: tx ring to clean
159 * @budget: how many cleans we're allowed
161 * Returns true if there's any budget left (e.g. the clean is finished)
163 static bool i40e_clean_tx_irq(struct i40e_ring
*tx_ring
, int budget
)
165 u16 i
= tx_ring
->next_to_clean
;
166 struct i40e_tx_buffer
*tx_buf
;
167 struct i40e_tx_desc
*tx_head
;
168 struct i40e_tx_desc
*tx_desc
;
169 unsigned int total_packets
= 0;
170 unsigned int total_bytes
= 0;
172 tx_buf
= &tx_ring
->tx_bi
[i
];
173 tx_desc
= I40E_TX_DESC(tx_ring
, i
);
176 tx_head
= I40E_TX_DESC(tx_ring
, i40e_get_head(tx_ring
));
179 struct i40e_tx_desc
*eop_desc
= tx_buf
->next_to_watch
;
181 /* if next_to_watch is not set then there is no work pending */
185 /* prevent any other reads prior to eop_desc */
186 read_barrier_depends();
188 /* we have caught up to head, no work left to do */
189 if (tx_head
== tx_desc
)
192 /* clear next_to_watch to prevent false hangs */
193 tx_buf
->next_to_watch
= NULL
;
195 /* update the statistics for this packet */
196 total_bytes
+= tx_buf
->bytecount
;
197 total_packets
+= tx_buf
->gso_segs
;
200 dev_kfree_skb_any(tx_buf
->skb
);
202 /* unmap skb header data */
203 dma_unmap_single(tx_ring
->dev
,
204 dma_unmap_addr(tx_buf
, dma
),
205 dma_unmap_len(tx_buf
, len
),
208 /* clear tx_buffer data */
210 dma_unmap_len_set(tx_buf
, len
, 0);
212 /* unmap remaining buffers */
213 while (tx_desc
!= eop_desc
) {
220 tx_buf
= tx_ring
->tx_bi
;
221 tx_desc
= I40E_TX_DESC(tx_ring
, 0);
224 /* unmap any remaining paged data */
225 if (dma_unmap_len(tx_buf
, len
)) {
226 dma_unmap_page(tx_ring
->dev
,
227 dma_unmap_addr(tx_buf
, dma
),
228 dma_unmap_len(tx_buf
, len
),
230 dma_unmap_len_set(tx_buf
, len
, 0);
234 /* move us one more past the eop_desc for start of next pkt */
240 tx_buf
= tx_ring
->tx_bi
;
241 tx_desc
= I40E_TX_DESC(tx_ring
, 0);
246 /* update budget accounting */
248 } while (likely(budget
));
251 tx_ring
->next_to_clean
= i
;
252 u64_stats_update_begin(&tx_ring
->syncp
);
253 tx_ring
->stats
.bytes
+= total_bytes
;
254 tx_ring
->stats
.packets
+= total_packets
;
255 u64_stats_update_end(&tx_ring
->syncp
);
256 tx_ring
->q_vector
->tx
.total_bytes
+= total_bytes
;
257 tx_ring
->q_vector
->tx
.total_packets
+= total_packets
;
259 if (tx_ring
->flags
& I40E_TXR_FLAGS_WB_ON_ITR
) {
261 /* check to see if there are < 4 descriptors
262 * waiting to be written back, then kick the hardware to force
263 * them to be written back in case we stay in NAPI.
264 * In this mode on X722 we do not enable Interrupt.
266 j
= i40evf_get_tx_pending(tx_ring
, false);
269 ((j
/ (WB_STRIDE
+ 1)) == 0) && (j
> 0) &&
270 !test_bit(__I40E_DOWN
, &tx_ring
->vsi
->state
) &&
271 (I40E_DESC_UNUSED(tx_ring
) != tx_ring
->count
))
272 tx_ring
->arm_wb
= true;
275 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring
->netdev
,
276 tx_ring
->queue_index
),
277 total_packets
, total_bytes
);
279 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
280 if (unlikely(total_packets
&& netif_carrier_ok(tx_ring
->netdev
) &&
281 (I40E_DESC_UNUSED(tx_ring
) >= TX_WAKE_THRESHOLD
))) {
282 /* Make sure that anybody stopping the queue after this
283 * sees the new next_to_clean.
286 if (__netif_subqueue_stopped(tx_ring
->netdev
,
287 tx_ring
->queue_index
) &&
288 !test_bit(__I40E_DOWN
, &tx_ring
->vsi
->state
)) {
289 netif_wake_subqueue(tx_ring
->netdev
,
290 tx_ring
->queue_index
);
291 ++tx_ring
->tx_stats
.restart_queue
;
299 * i40evf_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled
300 * @vsi: the VSI we care about
301 * @q_vector: the vector on which to enable writeback
304 static void i40e_enable_wb_on_itr(struct i40e_vsi
*vsi
,
305 struct i40e_q_vector
*q_vector
)
307 u16 flags
= q_vector
->tx
.ring
[0].flags
;
310 if (!(flags
& I40E_TXR_FLAGS_WB_ON_ITR
))
313 if (q_vector
->arm_wb_state
)
316 val
= I40E_VFINT_DYN_CTLN1_WB_ON_ITR_MASK
|
317 I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK
; /* set noitr */
320 I40E_VFINT_DYN_CTLN1(q_vector
->v_idx
+
321 vsi
->base_vector
- 1), val
);
322 q_vector
->arm_wb_state
= true;
326 * i40evf_force_wb - Issue SW Interrupt so HW does a wb
327 * @vsi: the VSI we care about
328 * @q_vector: the vector on which to force writeback
331 void i40evf_force_wb(struct i40e_vsi
*vsi
, struct i40e_q_vector
*q_vector
)
333 u32 val
= I40E_VFINT_DYN_CTLN1_INTENA_MASK
|
334 I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK
| /* set noitr */
335 I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK
|
336 I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK
337 /* allow 00 to be written to the index */;
340 I40E_VFINT_DYN_CTLN1(q_vector
->v_idx
+ vsi
->base_vector
- 1),
345 * i40e_set_new_dynamic_itr - Find new ITR level
346 * @rc: structure containing ring performance data
348 * Returns true if ITR changed, false if not
350 * Stores a new ITR value based on packets and byte counts during
351 * the last interrupt. The advantage of per interrupt computation
352 * is faster updates and more accurate ITR for the current traffic
353 * pattern. Constants in this function were computed based on
354 * theoretical maximum wire speed and thresholds were set based on
355 * testing data as well as attempting to minimize response time
356 * while increasing bulk throughput.
358 static bool i40e_set_new_dynamic_itr(struct i40e_ring_container
*rc
)
360 enum i40e_latency_range new_latency_range
= rc
->latency_range
;
361 struct i40e_q_vector
*qv
= rc
->ring
->q_vector
;
362 u32 new_itr
= rc
->itr
;
366 if (rc
->total_packets
== 0 || !rc
->itr
)
369 /* simple throttlerate management
370 * 0-10MB/s lowest (50000 ints/s)
371 * 10-20MB/s low (20000 ints/s)
372 * 20-1249MB/s bulk (18000 ints/s)
373 * > 40000 Rx packets per second (8000 ints/s)
375 * The math works out because the divisor is in 10^(-6) which
376 * turns the bytes/us input value into MB/s values, but
377 * make sure to use usecs, as the register values written
378 * are in 2 usec increments in the ITR registers, and make sure
379 * to use the smoothed values that the countdown timer gives us.
381 usecs
= (rc
->itr
<< 1) * ITR_COUNTDOWN_START
;
382 bytes_per_int
= rc
->total_bytes
/ usecs
;
384 switch (new_latency_range
) {
385 case I40E_LOWEST_LATENCY
:
386 if (bytes_per_int
> 10)
387 new_latency_range
= I40E_LOW_LATENCY
;
389 case I40E_LOW_LATENCY
:
390 if (bytes_per_int
> 20)
391 new_latency_range
= I40E_BULK_LATENCY
;
392 else if (bytes_per_int
<= 10)
393 new_latency_range
= I40E_LOWEST_LATENCY
;
395 case I40E_BULK_LATENCY
:
396 case I40E_ULTRA_LATENCY
:
398 if (bytes_per_int
<= 20)
399 new_latency_range
= I40E_LOW_LATENCY
;
403 /* this is to adjust RX more aggressively when streaming small
404 * packets. The value of 40000 was picked as it is just beyond
405 * what the hardware can receive per second if in low latency
408 #define RX_ULTRA_PACKET_RATE 40000
410 if ((((rc
->total_packets
* 1000000) / usecs
) > RX_ULTRA_PACKET_RATE
) &&
412 new_latency_range
= I40E_ULTRA_LATENCY
;
414 rc
->latency_range
= new_latency_range
;
416 switch (new_latency_range
) {
417 case I40E_LOWEST_LATENCY
:
418 new_itr
= I40E_ITR_50K
;
420 case I40E_LOW_LATENCY
:
421 new_itr
= I40E_ITR_20K
;
423 case I40E_BULK_LATENCY
:
424 new_itr
= I40E_ITR_18K
;
426 case I40E_ULTRA_LATENCY
:
427 new_itr
= I40E_ITR_8K
;
434 rc
->total_packets
= 0;
436 if (new_itr
!= rc
->itr
) {
445 * i40evf_setup_tx_descriptors - Allocate the Tx descriptors
446 * @tx_ring: the tx ring to set up
448 * Return 0 on success, negative on error
450 int i40evf_setup_tx_descriptors(struct i40e_ring
*tx_ring
)
452 struct device
*dev
= tx_ring
->dev
;
458 /* warn if we are about to overwrite the pointer */
459 WARN_ON(tx_ring
->tx_bi
);
460 bi_size
= sizeof(struct i40e_tx_buffer
) * tx_ring
->count
;
461 tx_ring
->tx_bi
= kzalloc(bi_size
, GFP_KERNEL
);
465 /* round up to nearest 4K */
466 tx_ring
->size
= tx_ring
->count
* sizeof(struct i40e_tx_desc
);
467 /* add u32 for head writeback, align after this takes care of
468 * guaranteeing this is at least one cache line in size
470 tx_ring
->size
+= sizeof(u32
);
471 tx_ring
->size
= ALIGN(tx_ring
->size
, 4096);
472 tx_ring
->desc
= dma_alloc_coherent(dev
, tx_ring
->size
,
473 &tx_ring
->dma
, GFP_KERNEL
);
474 if (!tx_ring
->desc
) {
475 dev_info(dev
, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
480 tx_ring
->next_to_use
= 0;
481 tx_ring
->next_to_clean
= 0;
485 kfree(tx_ring
->tx_bi
);
486 tx_ring
->tx_bi
= NULL
;
491 * i40evf_clean_rx_ring - Free Rx buffers
492 * @rx_ring: ring to be cleaned
494 void i40evf_clean_rx_ring(struct i40e_ring
*rx_ring
)
496 struct device
*dev
= rx_ring
->dev
;
497 struct i40e_rx_buffer
*rx_bi
;
498 unsigned long bi_size
;
501 /* ring already cleared, nothing to do */
505 if (ring_is_ps_enabled(rx_ring
)) {
506 int bufsz
= ALIGN(rx_ring
->rx_hdr_len
, 256) * rx_ring
->count
;
508 rx_bi
= &rx_ring
->rx_bi
[0];
509 if (rx_bi
->hdr_buf
) {
510 dma_free_coherent(dev
,
514 for (i
= 0; i
< rx_ring
->count
; i
++) {
515 rx_bi
= &rx_ring
->rx_bi
[i
];
517 rx_bi
->hdr_buf
= NULL
;
521 /* Free all the Rx ring sk_buffs */
522 for (i
= 0; i
< rx_ring
->count
; i
++) {
523 rx_bi
= &rx_ring
->rx_bi
[i
];
525 dma_unmap_single(dev
,
532 dev_kfree_skb(rx_bi
->skb
);
536 if (rx_bi
->page_dma
) {
543 __free_page(rx_bi
->page
);
545 rx_bi
->page_offset
= 0;
549 bi_size
= sizeof(struct i40e_rx_buffer
) * rx_ring
->count
;
550 memset(rx_ring
->rx_bi
, 0, bi_size
);
552 /* Zero out the descriptor ring */
553 memset(rx_ring
->desc
, 0, rx_ring
->size
);
555 rx_ring
->next_to_clean
= 0;
556 rx_ring
->next_to_use
= 0;
560 * i40evf_free_rx_resources - Free Rx resources
561 * @rx_ring: ring to clean the resources from
563 * Free all receive software resources
565 void i40evf_free_rx_resources(struct i40e_ring
*rx_ring
)
567 i40evf_clean_rx_ring(rx_ring
);
568 kfree(rx_ring
->rx_bi
);
569 rx_ring
->rx_bi
= NULL
;
572 dma_free_coherent(rx_ring
->dev
, rx_ring
->size
,
573 rx_ring
->desc
, rx_ring
->dma
);
574 rx_ring
->desc
= NULL
;
579 * i40evf_alloc_rx_headers - allocate rx header buffers
580 * @rx_ring: ring to alloc buffers
582 * Allocate rx header buffers for the entire ring. As these are static,
583 * this is only called when setting up a new ring.
585 void i40evf_alloc_rx_headers(struct i40e_ring
*rx_ring
)
587 struct device
*dev
= rx_ring
->dev
;
588 struct i40e_rx_buffer
*rx_bi
;
594 if (rx_ring
->rx_bi
[0].hdr_buf
)
596 /* Make sure the buffers don't cross cache line boundaries. */
597 buf_size
= ALIGN(rx_ring
->rx_hdr_len
, 256);
598 buffer
= dma_alloc_coherent(dev
, buf_size
* rx_ring
->count
,
602 for (i
= 0; i
< rx_ring
->count
; i
++) {
603 rx_bi
= &rx_ring
->rx_bi
[i
];
604 rx_bi
->dma
= dma
+ (i
* buf_size
);
605 rx_bi
->hdr_buf
= buffer
+ (i
* buf_size
);
610 * i40evf_setup_rx_descriptors - Allocate Rx descriptors
611 * @rx_ring: Rx descriptor ring (for a specific queue) to setup
613 * Returns 0 on success, negative on failure
615 int i40evf_setup_rx_descriptors(struct i40e_ring
*rx_ring
)
617 struct device
*dev
= rx_ring
->dev
;
620 /* warn if we are about to overwrite the pointer */
621 WARN_ON(rx_ring
->rx_bi
);
622 bi_size
= sizeof(struct i40e_rx_buffer
) * rx_ring
->count
;
623 rx_ring
->rx_bi
= kzalloc(bi_size
, GFP_KERNEL
);
627 u64_stats_init(&rx_ring
->syncp
);
629 /* Round up to nearest 4K */
630 rx_ring
->size
= ring_is_16byte_desc_enabled(rx_ring
)
631 ? rx_ring
->count
* sizeof(union i40e_16byte_rx_desc
)
632 : rx_ring
->count
* sizeof(union i40e_32byte_rx_desc
);
633 rx_ring
->size
= ALIGN(rx_ring
->size
, 4096);
634 rx_ring
->desc
= dma_alloc_coherent(dev
, rx_ring
->size
,
635 &rx_ring
->dma
, GFP_KERNEL
);
637 if (!rx_ring
->desc
) {
638 dev_info(dev
, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
643 rx_ring
->next_to_clean
= 0;
644 rx_ring
->next_to_use
= 0;
648 kfree(rx_ring
->rx_bi
);
649 rx_ring
->rx_bi
= NULL
;
654 * i40e_release_rx_desc - Store the new tail and head values
655 * @rx_ring: ring to bump
656 * @val: new head index
658 static inline void i40e_release_rx_desc(struct i40e_ring
*rx_ring
, u32 val
)
660 rx_ring
->next_to_use
= val
;
661 /* Force memory writes to complete before letting h/w
662 * know there are new descriptors to fetch. (Only
663 * applicable for weak-ordered memory model archs,
667 writel(val
, rx_ring
->tail
);
671 * i40evf_alloc_rx_buffers_ps - Replace used receive buffers; packet split
672 * @rx_ring: ring to place buffers on
673 * @cleaned_count: number of buffers to replace
675 * Returns true if any errors on allocation
677 bool i40evf_alloc_rx_buffers_ps(struct i40e_ring
*rx_ring
, u16 cleaned_count
)
679 u16 i
= rx_ring
->next_to_use
;
680 union i40e_rx_desc
*rx_desc
;
681 struct i40e_rx_buffer
*bi
;
682 const int current_node
= numa_node_id();
684 /* do nothing if no valid netdev defined */
685 if (!rx_ring
->netdev
|| !cleaned_count
)
688 while (cleaned_count
--) {
689 rx_desc
= I40E_RX_DESC(rx_ring
, i
);
690 bi
= &rx_ring
->rx_bi
[i
];
692 if (bi
->skb
) /* desc is in use */
695 /* If we've been moved to a different NUMA node, release the
696 * page so we can get a new one on the current node.
698 if (bi
->page
&& page_to_nid(bi
->page
) != current_node
) {
699 dma_unmap_page(rx_ring
->dev
,
703 __free_page(bi
->page
);
706 rx_ring
->rx_stats
.realloc_count
++;
707 } else if (bi
->page
) {
708 rx_ring
->rx_stats
.page_reuse_count
++;
712 bi
->page
= alloc_page(GFP_ATOMIC
);
714 rx_ring
->rx_stats
.alloc_page_failed
++;
717 bi
->page_dma
= dma_map_page(rx_ring
->dev
,
722 if (dma_mapping_error(rx_ring
->dev
, bi
->page_dma
)) {
723 rx_ring
->rx_stats
.alloc_page_failed
++;
724 __free_page(bi
->page
);
733 /* Refresh the desc even if buffer_addrs didn't change
734 * because each write-back erases this info.
736 rx_desc
->read
.pkt_addr
=
737 cpu_to_le64(bi
->page_dma
+ bi
->page_offset
);
738 rx_desc
->read
.hdr_addr
= cpu_to_le64(bi
->dma
);
740 if (i
== rx_ring
->count
)
744 if (rx_ring
->next_to_use
!= i
)
745 i40e_release_rx_desc(rx_ring
, i
);
750 if (rx_ring
->next_to_use
!= i
)
751 i40e_release_rx_desc(rx_ring
, i
);
753 /* make sure to come back via polling to try again after
760 * i40evf_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
761 * @rx_ring: ring to place buffers on
762 * @cleaned_count: number of buffers to replace
764 * Returns true if any errors on allocation
766 bool i40evf_alloc_rx_buffers_1buf(struct i40e_ring
*rx_ring
, u16 cleaned_count
)
768 u16 i
= rx_ring
->next_to_use
;
769 union i40e_rx_desc
*rx_desc
;
770 struct i40e_rx_buffer
*bi
;
773 /* do nothing if no valid netdev defined */
774 if (!rx_ring
->netdev
|| !cleaned_count
)
777 while (cleaned_count
--) {
778 rx_desc
= I40E_RX_DESC(rx_ring
, i
);
779 bi
= &rx_ring
->rx_bi
[i
];
783 skb
= __netdev_alloc_skb_ip_align(rx_ring
->netdev
,
788 rx_ring
->rx_stats
.alloc_buff_failed
++;
791 /* initialize queue mapping */
792 skb_record_rx_queue(skb
, rx_ring
->queue_index
);
797 bi
->dma
= dma_map_single(rx_ring
->dev
,
801 if (dma_mapping_error(rx_ring
->dev
, bi
->dma
)) {
802 rx_ring
->rx_stats
.alloc_buff_failed
++;
804 dev_kfree_skb(bi
->skb
);
810 rx_desc
->read
.pkt_addr
= cpu_to_le64(bi
->dma
);
811 rx_desc
->read
.hdr_addr
= 0;
813 if (i
== rx_ring
->count
)
817 if (rx_ring
->next_to_use
!= i
)
818 i40e_release_rx_desc(rx_ring
, i
);
823 if (rx_ring
->next_to_use
!= i
)
824 i40e_release_rx_desc(rx_ring
, i
);
826 /* make sure to come back via polling to try again after
833 * i40e_receive_skb - Send a completed packet up the stack
834 * @rx_ring: rx ring in play
835 * @skb: packet to send up
836 * @vlan_tag: vlan tag for packet
838 static void i40e_receive_skb(struct i40e_ring
*rx_ring
,
839 struct sk_buff
*skb
, u16 vlan_tag
)
841 struct i40e_q_vector
*q_vector
= rx_ring
->q_vector
;
843 if (vlan_tag
& VLAN_VID_MASK
)
844 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
), vlan_tag
);
846 napi_gro_receive(&q_vector
->napi
, skb
);
850 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
851 * @vsi: the VSI we care about
852 * @skb: skb currently being received and modified
853 * @rx_status: status value of last descriptor in packet
854 * @rx_error: error value of last descriptor in packet
855 * @rx_ptype: ptype value of last descriptor in packet
857 static inline void i40e_rx_checksum(struct i40e_vsi
*vsi
,
863 struct i40e_rx_ptype_decoded decoded
= decode_rx_desc_ptype(rx_ptype
);
864 bool ipv4
, ipv6
, ipv4_tunnel
, ipv6_tunnel
;
866 skb
->ip_summed
= CHECKSUM_NONE
;
868 /* Rx csum enabled and ip headers found? */
869 if (!(vsi
->netdev
->features
& NETIF_F_RXCSUM
))
872 /* did the hardware decode the packet and checksum? */
873 if (!(rx_status
& BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT
)))
876 /* both known and outer_ip must be set for the below code to work */
877 if (!(decoded
.known
&& decoded
.outer_ip
))
880 ipv4
= (decoded
.outer_ip
== I40E_RX_PTYPE_OUTER_IP
) &&
881 (decoded
.outer_ip_ver
== I40E_RX_PTYPE_OUTER_IPV4
);
882 ipv6
= (decoded
.outer_ip
== I40E_RX_PTYPE_OUTER_IP
) &&
883 (decoded
.outer_ip_ver
== I40E_RX_PTYPE_OUTER_IPV6
);
886 (rx_error
& (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT
) |
887 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT
))))
890 /* likely incorrect csum if alternate IP extension headers found */
892 rx_status
& BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT
))
893 /* don't increment checksum err here, non-fatal err */
896 /* there was some L4 error, count error and punt packet to the stack */
897 if (rx_error
& BIT(I40E_RX_DESC_ERROR_L4E_SHIFT
))
900 /* handle packets that were not able to be checksummed due
901 * to arrival speed, in this case the stack can compute
904 if (rx_error
& BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT
))
907 /* The hardware supported by this driver does not validate outer
908 * checksums for tunneled VXLAN or GENEVE frames. I don't agree
909 * with it but the specification states that you "MAY validate", it
910 * doesn't make it a hard requirement so if we have validated the
911 * inner checksum report CHECKSUM_UNNECESSARY.
914 ipv4_tunnel
= (rx_ptype
>= I40E_RX_PTYPE_GRENAT4_MAC_PAY3
) &&
915 (rx_ptype
<= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4
);
916 ipv6_tunnel
= (rx_ptype
>= I40E_RX_PTYPE_GRENAT6_MAC_PAY3
) &&
917 (rx_ptype
<= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4
);
919 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
920 skb
->csum_level
= ipv4_tunnel
|| ipv6_tunnel
;
925 vsi
->back
->hw_csum_rx_error
++;
929 * i40e_ptype_to_htype - get a hash type
930 * @ptype: the ptype value from the descriptor
932 * Returns a hash type to be used by skb_set_hash
934 static inline enum pkt_hash_types
i40e_ptype_to_htype(u8 ptype
)
936 struct i40e_rx_ptype_decoded decoded
= decode_rx_desc_ptype(ptype
);
939 return PKT_HASH_TYPE_NONE
;
941 if (decoded
.outer_ip
== I40E_RX_PTYPE_OUTER_IP
&&
942 decoded
.payload_layer
== I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4
)
943 return PKT_HASH_TYPE_L4
;
944 else if (decoded
.outer_ip
== I40E_RX_PTYPE_OUTER_IP
&&
945 decoded
.payload_layer
== I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3
)
946 return PKT_HASH_TYPE_L3
;
948 return PKT_HASH_TYPE_L2
;
952 * i40e_rx_hash - set the hash value in the skb
953 * @ring: descriptor ring
954 * @rx_desc: specific descriptor
956 static inline void i40e_rx_hash(struct i40e_ring
*ring
,
957 union i40e_rx_desc
*rx_desc
,
962 const __le64 rss_mask
=
963 cpu_to_le64((u64
)I40E_RX_DESC_FLTSTAT_RSS_HASH
<<
964 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT
);
966 if (ring
->netdev
->features
& NETIF_F_RXHASH
)
969 if ((rx_desc
->wb
.qword1
.status_error_len
& rss_mask
) == rss_mask
) {
970 hash
= le32_to_cpu(rx_desc
->wb
.qword0
.hi_dword
.rss
);
971 skb_set_hash(skb
, hash
, i40e_ptype_to_htype(rx_ptype
));
976 * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
977 * @rx_ring: rx ring to clean
978 * @budget: how many cleans we're allowed
980 * Returns true if there's any budget left (e.g. the clean is finished)
982 static int i40e_clean_rx_irq_ps(struct i40e_ring
*rx_ring
, const int budget
)
984 unsigned int total_rx_bytes
= 0, total_rx_packets
= 0;
985 u16 rx_packet_len
, rx_header_len
, rx_sph
, rx_hbo
;
986 u16 cleaned_count
= I40E_DESC_UNUSED(rx_ring
);
987 struct i40e_vsi
*vsi
= rx_ring
->vsi
;
988 u16 i
= rx_ring
->next_to_clean
;
989 union i40e_rx_desc
*rx_desc
;
990 u32 rx_error
, rx_status
;
991 bool failure
= false;
997 struct i40e_rx_buffer
*rx_bi
;
1000 /* return some buffers to hardware, one at a time is too slow */
1001 if (cleaned_count
>= I40E_RX_BUFFER_WRITE
) {
1002 failure
= failure
||
1003 i40evf_alloc_rx_buffers_ps(rx_ring
,
1008 i
= rx_ring
->next_to_clean
;
1009 rx_desc
= I40E_RX_DESC(rx_ring
, i
);
1010 qword
= le64_to_cpu(rx_desc
->wb
.qword1
.status_error_len
);
1011 rx_status
= (qword
& I40E_RXD_QW1_STATUS_MASK
) >>
1012 I40E_RXD_QW1_STATUS_SHIFT
;
1014 if (!(rx_status
& BIT(I40E_RX_DESC_STATUS_DD_SHIFT
)))
1017 /* This memory barrier is needed to keep us from reading
1018 * any other fields out of the rx_desc until we know the
1022 /* sync header buffer for reading */
1023 dma_sync_single_range_for_cpu(rx_ring
->dev
,
1024 rx_ring
->rx_bi
[0].dma
,
1025 i
* rx_ring
->rx_hdr_len
,
1026 rx_ring
->rx_hdr_len
,
1028 rx_bi
= &rx_ring
->rx_bi
[i
];
1031 skb
= __netdev_alloc_skb_ip_align(rx_ring
->netdev
,
1032 rx_ring
->rx_hdr_len
,
1036 rx_ring
->rx_stats
.alloc_buff_failed
++;
1041 /* initialize queue mapping */
1042 skb_record_rx_queue(skb
, rx_ring
->queue_index
);
1043 /* we are reusing so sync this buffer for CPU use */
1044 dma_sync_single_range_for_cpu(rx_ring
->dev
,
1045 rx_ring
->rx_bi
[0].dma
,
1046 i
* rx_ring
->rx_hdr_len
,
1047 rx_ring
->rx_hdr_len
,
1050 rx_packet_len
= (qword
& I40E_RXD_QW1_LENGTH_PBUF_MASK
) >>
1051 I40E_RXD_QW1_LENGTH_PBUF_SHIFT
;
1052 rx_header_len
= (qword
& I40E_RXD_QW1_LENGTH_HBUF_MASK
) >>
1053 I40E_RXD_QW1_LENGTH_HBUF_SHIFT
;
1054 rx_sph
= (qword
& I40E_RXD_QW1_LENGTH_SPH_MASK
) >>
1055 I40E_RXD_QW1_LENGTH_SPH_SHIFT
;
1057 rx_error
= (qword
& I40E_RXD_QW1_ERROR_MASK
) >>
1058 I40E_RXD_QW1_ERROR_SHIFT
;
1059 rx_hbo
= rx_error
& BIT(I40E_RX_DESC_ERROR_HBO_SHIFT
);
1060 rx_error
&= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT
);
1062 rx_ptype
= (qword
& I40E_RXD_QW1_PTYPE_MASK
) >>
1063 I40E_RXD_QW1_PTYPE_SHIFT
;
1064 /* sync half-page for reading */
1065 dma_sync_single_range_for_cpu(rx_ring
->dev
,
1070 prefetch(page_address(rx_bi
->page
) + rx_bi
->page_offset
);
1074 if (rx_hbo
|| rx_sph
) {
1078 len
= I40E_RX_HDR_SIZE
;
1080 len
= rx_header_len
;
1081 memcpy(__skb_put(skb
, len
), rx_bi
->hdr_buf
, len
);
1082 } else if (skb
->len
== 0) {
1084 unsigned char *va
= page_address(rx_bi
->page
) +
1087 len
= min(rx_packet_len
, rx_ring
->rx_hdr_len
);
1088 memcpy(__skb_put(skb
, len
), va
, len
);
1090 rx_packet_len
-= len
;
1092 /* Get the rest of the data if this was a header split */
1093 if (rx_packet_len
) {
1094 skb_add_rx_frag(skb
, skb_shinfo(skb
)->nr_frags
,
1096 rx_bi
->page_offset
+ copysize
,
1097 rx_packet_len
, I40E_RXBUFFER_2048
);
1099 /* If the page count is more than 2, then both halves
1100 * of the page are used and we need to free it. Do it
1101 * here instead of in the alloc code. Otherwise one
1102 * of the half-pages might be released between now and
1103 * then, and we wouldn't know which one to use.
1104 * Don't call get_page and free_page since those are
1105 * both expensive atomic operations that just change
1106 * the refcount in opposite directions. Just give the
1107 * page to the stack; he can have our refcount.
1109 if (page_count(rx_bi
->page
) > 2) {
1110 dma_unmap_page(rx_ring
->dev
,
1115 rx_bi
->page_dma
= 0;
1116 rx_ring
->rx_stats
.realloc_count
++;
1118 get_page(rx_bi
->page
);
1119 /* switch to the other half-page here; the
1120 * allocation code programs the right addr
1121 * into HW. If we haven't used this half-page,
1122 * the address won't be changed, and HW can
1123 * just use it next time through.
1125 rx_bi
->page_offset
^= PAGE_SIZE
/ 2;
1129 I40E_RX_INCREMENT(rx_ring
, i
);
1132 !(rx_status
& BIT(I40E_RX_DESC_STATUS_EOF_SHIFT
)))) {
1133 struct i40e_rx_buffer
*next_buffer
;
1135 next_buffer
= &rx_ring
->rx_bi
[i
];
1136 next_buffer
->skb
= skb
;
1137 rx_ring
->rx_stats
.non_eop_descs
++;
1141 /* ERR_MASK will only have valid bits if EOP set */
1142 if (unlikely(rx_error
& BIT(I40E_RX_DESC_ERROR_RXE_SHIFT
))) {
1143 dev_kfree_skb_any(skb
);
1147 i40e_rx_hash(rx_ring
, rx_desc
, skb
, rx_ptype
);
1149 /* probably a little skewed due to removing CRC */
1150 total_rx_bytes
+= skb
->len
;
1153 skb
->protocol
= eth_type_trans(skb
, rx_ring
->netdev
);
1155 i40e_rx_checksum(vsi
, skb
, rx_status
, rx_error
, rx_ptype
);
1157 vlan_tag
= rx_status
& BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT
)
1158 ? le16_to_cpu(rx_desc
->wb
.qword0
.lo_dword
.l2tag1
)
1161 if (!i40e_fcoe_handle_offload(rx_ring
, rx_desc
, skb
)) {
1162 dev_kfree_skb_any(skb
);
1166 i40e_receive_skb(rx_ring
, skb
, vlan_tag
);
1168 rx_desc
->wb
.qword1
.status_error_len
= 0;
1170 } while (likely(total_rx_packets
< budget
));
1172 u64_stats_update_begin(&rx_ring
->syncp
);
1173 rx_ring
->stats
.packets
+= total_rx_packets
;
1174 rx_ring
->stats
.bytes
+= total_rx_bytes
;
1175 u64_stats_update_end(&rx_ring
->syncp
);
1176 rx_ring
->q_vector
->rx
.total_packets
+= total_rx_packets
;
1177 rx_ring
->q_vector
->rx
.total_bytes
+= total_rx_bytes
;
1179 return failure
? budget
: total_rx_packets
;
1183 * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1184 * @rx_ring: rx ring to clean
1185 * @budget: how many cleans we're allowed
1187 * Returns number of packets cleaned
1189 static int i40e_clean_rx_irq_1buf(struct i40e_ring
*rx_ring
, int budget
)
1191 unsigned int total_rx_bytes
= 0, total_rx_packets
= 0;
1192 u16 cleaned_count
= I40E_DESC_UNUSED(rx_ring
);
1193 struct i40e_vsi
*vsi
= rx_ring
->vsi
;
1194 union i40e_rx_desc
*rx_desc
;
1195 u32 rx_error
, rx_status
;
1197 bool failure
= false;
1203 struct i40e_rx_buffer
*rx_bi
;
1204 struct sk_buff
*skb
;
1206 /* return some buffers to hardware, one at a time is too slow */
1207 if (cleaned_count
>= I40E_RX_BUFFER_WRITE
) {
1208 failure
= failure
||
1209 i40evf_alloc_rx_buffers_1buf(rx_ring
,
1214 i
= rx_ring
->next_to_clean
;
1215 rx_desc
= I40E_RX_DESC(rx_ring
, i
);
1216 qword
= le64_to_cpu(rx_desc
->wb
.qword1
.status_error_len
);
1217 rx_status
= (qword
& I40E_RXD_QW1_STATUS_MASK
) >>
1218 I40E_RXD_QW1_STATUS_SHIFT
;
1220 if (!(rx_status
& BIT(I40E_RX_DESC_STATUS_DD_SHIFT
)))
1223 /* This memory barrier is needed to keep us from reading
1224 * any other fields out of the rx_desc until we know the
1229 rx_bi
= &rx_ring
->rx_bi
[i
];
1231 prefetch(skb
->data
);
1233 rx_packet_len
= (qword
& I40E_RXD_QW1_LENGTH_PBUF_MASK
) >>
1234 I40E_RXD_QW1_LENGTH_PBUF_SHIFT
;
1236 rx_error
= (qword
& I40E_RXD_QW1_ERROR_MASK
) >>
1237 I40E_RXD_QW1_ERROR_SHIFT
;
1238 rx_error
&= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT
);
1240 rx_ptype
= (qword
& I40E_RXD_QW1_PTYPE_MASK
) >>
1241 I40E_RXD_QW1_PTYPE_SHIFT
;
1245 /* Get the header and possibly the whole packet
1246 * If this is an skb from previous receive dma will be 0
1248 skb_put(skb
, rx_packet_len
);
1249 dma_unmap_single(rx_ring
->dev
, rx_bi
->dma
, rx_ring
->rx_buf_len
,
1253 I40E_RX_INCREMENT(rx_ring
, i
);
1256 !(rx_status
& BIT(I40E_RX_DESC_STATUS_EOF_SHIFT
)))) {
1257 rx_ring
->rx_stats
.non_eop_descs
++;
1261 /* ERR_MASK will only have valid bits if EOP set */
1262 if (unlikely(rx_error
& BIT(I40E_RX_DESC_ERROR_RXE_SHIFT
))) {
1263 dev_kfree_skb_any(skb
);
1267 i40e_rx_hash(rx_ring
, rx_desc
, skb
, rx_ptype
);
1268 /* probably a little skewed due to removing CRC */
1269 total_rx_bytes
+= skb
->len
;
1272 skb
->protocol
= eth_type_trans(skb
, rx_ring
->netdev
);
1274 i40e_rx_checksum(vsi
, skb
, rx_status
, rx_error
, rx_ptype
);
1276 vlan_tag
= rx_status
& BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT
)
1277 ? le16_to_cpu(rx_desc
->wb
.qword0
.lo_dword
.l2tag1
)
1279 i40e_receive_skb(rx_ring
, skb
, vlan_tag
);
1281 rx_desc
->wb
.qword1
.status_error_len
= 0;
1282 } while (likely(total_rx_packets
< budget
));
1284 u64_stats_update_begin(&rx_ring
->syncp
);
1285 rx_ring
->stats
.packets
+= total_rx_packets
;
1286 rx_ring
->stats
.bytes
+= total_rx_bytes
;
1287 u64_stats_update_end(&rx_ring
->syncp
);
1288 rx_ring
->q_vector
->rx
.total_packets
+= total_rx_packets
;
1289 rx_ring
->q_vector
->rx
.total_bytes
+= total_rx_bytes
;
1291 return failure
? budget
: total_rx_packets
;
1294 static u32
i40e_buildreg_itr(const int type
, const u16 itr
)
1298 val
= I40E_VFINT_DYN_CTLN1_INTENA_MASK
|
1299 /* Don't clear PBA because that can cause lost interrupts that
1300 * came in while we were cleaning/polling
1302 (type
<< I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT
) |
1303 (itr
<< I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT
);
1308 /* a small macro to shorten up some long lines */
1309 #define INTREG I40E_VFINT_DYN_CTLN1
1312 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
1313 * @vsi: the VSI we care about
1314 * @q_vector: q_vector for which itr is being updated and interrupt enabled
1317 static inline void i40e_update_enable_itr(struct i40e_vsi
*vsi
,
1318 struct i40e_q_vector
*q_vector
)
1320 struct i40e_hw
*hw
= &vsi
->back
->hw
;
1321 bool rx
= false, tx
= false;
1325 vector
= (q_vector
->v_idx
+ vsi
->base_vector
);
1327 /* avoid dynamic calculation if in countdown mode OR if
1328 * all dynamic is disabled
1330 rxval
= txval
= i40e_buildreg_itr(I40E_ITR_NONE
, 0);
1332 if (q_vector
->itr_countdown
> 0 ||
1333 (!ITR_IS_DYNAMIC(vsi
->rx_itr_setting
) &&
1334 !ITR_IS_DYNAMIC(vsi
->tx_itr_setting
))) {
1338 if (ITR_IS_DYNAMIC(vsi
->rx_itr_setting
)) {
1339 rx
= i40e_set_new_dynamic_itr(&q_vector
->rx
);
1340 rxval
= i40e_buildreg_itr(I40E_RX_ITR
, q_vector
->rx
.itr
);
1343 if (ITR_IS_DYNAMIC(vsi
->tx_itr_setting
)) {
1344 tx
= i40e_set_new_dynamic_itr(&q_vector
->tx
);
1345 txval
= i40e_buildreg_itr(I40E_TX_ITR
, q_vector
->tx
.itr
);
1349 /* get the higher of the two ITR adjustments and
1350 * use the same value for both ITR registers
1351 * when in adaptive mode (Rx and/or Tx)
1353 u16 itr
= max(q_vector
->tx
.itr
, q_vector
->rx
.itr
);
1355 q_vector
->tx
.itr
= q_vector
->rx
.itr
= itr
;
1356 txval
= i40e_buildreg_itr(I40E_TX_ITR
, itr
);
1358 rxval
= i40e_buildreg_itr(I40E_RX_ITR
, itr
);
1362 /* only need to enable the interrupt once, but need
1363 * to possibly update both ITR values
1366 /* set the INTENA_MSK_MASK so that this first write
1367 * won't actually enable the interrupt, instead just
1368 * updating the ITR (it's bit 31 PF and VF)
1371 /* don't check _DOWN because interrupt isn't being enabled */
1372 wr32(hw
, INTREG(vector
- 1), rxval
);
1376 if (!test_bit(__I40E_DOWN
, &vsi
->state
))
1377 wr32(hw
, INTREG(vector
- 1), txval
);
1379 if (q_vector
->itr_countdown
)
1380 q_vector
->itr_countdown
--;
1382 q_vector
->itr_countdown
= ITR_COUNTDOWN_START
;
1386 * i40evf_napi_poll - NAPI polling Rx/Tx cleanup routine
1387 * @napi: napi struct with our devices info in it
1388 * @budget: amount of work driver is allowed to do this pass, in packets
1390 * This function will clean all queues associated with a q_vector.
1392 * Returns the amount of work done
1394 int i40evf_napi_poll(struct napi_struct
*napi
, int budget
)
1396 struct i40e_q_vector
*q_vector
=
1397 container_of(napi
, struct i40e_q_vector
, napi
);
1398 struct i40e_vsi
*vsi
= q_vector
->vsi
;
1399 struct i40e_ring
*ring
;
1400 bool clean_complete
= true;
1401 bool arm_wb
= false;
1402 int budget_per_ring
;
1405 if (test_bit(__I40E_DOWN
, &vsi
->state
)) {
1406 napi_complete(napi
);
1410 /* Since the actual Tx work is minimal, we can give the Tx a larger
1411 * budget and be more aggressive about cleaning up the Tx descriptors.
1413 i40e_for_each_ring(ring
, q_vector
->tx
) {
1414 clean_complete
= clean_complete
&&
1415 i40e_clean_tx_irq(ring
, vsi
->work_limit
);
1416 arm_wb
= arm_wb
|| ring
->arm_wb
;
1417 ring
->arm_wb
= false;
1420 /* Handle case where we are called by netpoll with a budget of 0 */
1424 /* We attempt to distribute budget to each Rx queue fairly, but don't
1425 * allow the budget to go below 1 because that would exit polling early.
1427 budget_per_ring
= max(budget
/q_vector
->num_ringpairs
, 1);
1429 i40e_for_each_ring(ring
, q_vector
->rx
) {
1432 if (ring_is_ps_enabled(ring
))
1433 cleaned
= i40e_clean_rx_irq_ps(ring
, budget_per_ring
);
1435 cleaned
= i40e_clean_rx_irq_1buf(ring
, budget_per_ring
);
1437 work_done
+= cleaned
;
1438 /* if we didn't clean as many as budgeted, we must be done */
1439 clean_complete
= clean_complete
&& (budget_per_ring
> cleaned
);
1442 /* If work not completed, return budget and polling will return */
1443 if (!clean_complete
) {
1446 q_vector
->tx
.ring
[0].tx_stats
.tx_force_wb
++;
1447 i40e_enable_wb_on_itr(vsi
, q_vector
);
1452 if (vsi
->back
->flags
& I40E_TXR_FLAGS_WB_ON_ITR
)
1453 q_vector
->arm_wb_state
= false;
1455 /* Work is done so exit the polling mode and re-enable the interrupt */
1456 napi_complete_done(napi
, work_done
);
1457 i40e_update_enable_itr(vsi
, q_vector
);
1462 * i40evf_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
1464 * @tx_ring: ring to send buffer on
1465 * @flags: the tx flags to be set
1467 * Checks the skb and set up correspondingly several generic transmit flags
1468 * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
1470 * Returns error code indicate the frame should be dropped upon error and the
1471 * otherwise returns 0 to indicate the flags has been set properly.
1473 static inline int i40evf_tx_prepare_vlan_flags(struct sk_buff
*skb
,
1474 struct i40e_ring
*tx_ring
,
1477 __be16 protocol
= skb
->protocol
;
1480 if (protocol
== htons(ETH_P_8021Q
) &&
1481 !(tx_ring
->netdev
->features
& NETIF_F_HW_VLAN_CTAG_TX
)) {
1482 /* When HW VLAN acceleration is turned off by the user the
1483 * stack sets the protocol to 8021q so that the driver
1484 * can take any steps required to support the SW only
1485 * VLAN handling. In our case the driver doesn't need
1486 * to take any further steps so just set the protocol
1487 * to the encapsulated ethertype.
1489 skb
->protocol
= vlan_get_protocol(skb
);
1493 /* if we have a HW VLAN tag being added, default to the HW one */
1494 if (skb_vlan_tag_present(skb
)) {
1495 tx_flags
|= skb_vlan_tag_get(skb
) << I40E_TX_FLAGS_VLAN_SHIFT
;
1496 tx_flags
|= I40E_TX_FLAGS_HW_VLAN
;
1497 /* else if it is a SW VLAN, check the next protocol and store the tag */
1498 } else if (protocol
== htons(ETH_P_8021Q
)) {
1499 struct vlan_hdr
*vhdr
, _vhdr
;
1501 vhdr
= skb_header_pointer(skb
, ETH_HLEN
, sizeof(_vhdr
), &_vhdr
);
1505 protocol
= vhdr
->h_vlan_encapsulated_proto
;
1506 tx_flags
|= ntohs(vhdr
->h_vlan_TCI
) << I40E_TX_FLAGS_VLAN_SHIFT
;
1507 tx_flags
|= I40E_TX_FLAGS_SW_VLAN
;
1516 * i40e_tso - set up the tso context descriptor
1517 * @tx_ring: ptr to the ring to send
1518 * @skb: ptr to the skb we're sending
1519 * @hdr_len: ptr to the size of the packet header
1520 * @cd_type_cmd_tso_mss: Quad Word 1
1522 * Returns 0 if no TSO can happen, 1 if tso is going, or error
1524 static int i40e_tso(struct i40e_ring
*tx_ring
, struct sk_buff
*skb
,
1525 u8
*hdr_len
, u64
*cd_type_cmd_tso_mss
)
1527 u64 cd_cmd
, cd_tso_len
, cd_mss
;
1538 u32 paylen
, l4_offset
;
1541 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
1544 if (!skb_is_gso(skb
))
1547 err
= skb_cow_head(skb
, 0);
1551 ip
.hdr
= skb_network_header(skb
);
1552 l4
.hdr
= skb_transport_header(skb
);
1554 /* initialize outer IP header fields */
1555 if (ip
.v4
->version
== 4) {
1559 ip
.v6
->payload_len
= 0;
1562 if (skb_shinfo(skb
)->gso_type
& (SKB_GSO_UDP_TUNNEL
| SKB_GSO_GRE
|
1563 SKB_GSO_UDP_TUNNEL_CSUM
)) {
1564 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_UDP_TUNNEL_CSUM
) {
1565 /* determine offset of outer transport header */
1566 l4_offset
= l4
.hdr
- skb
->data
;
1568 /* remove payload length from outer checksum */
1569 paylen
= (__force u16
)l4
.udp
->check
;
1570 paylen
+= ntohs(1) * (u16
)~(skb
->len
- l4_offset
);
1571 l4
.udp
->check
= ~csum_fold((__force __wsum
)paylen
);
1574 /* reset pointers to inner headers */
1575 ip
.hdr
= skb_inner_network_header(skb
);
1576 l4
.hdr
= skb_inner_transport_header(skb
);
1578 /* initialize inner IP header fields */
1579 if (ip
.v4
->version
== 4) {
1583 ip
.v6
->payload_len
= 0;
1587 /* determine offset of inner transport header */
1588 l4_offset
= l4
.hdr
- skb
->data
;
1590 /* remove payload length from inner checksum */
1591 paylen
= (__force u16
)l4
.tcp
->check
;
1592 paylen
+= ntohs(1) * (u16
)~(skb
->len
- l4_offset
);
1593 l4
.tcp
->check
= ~csum_fold((__force __wsum
)paylen
);
1595 /* compute length of segmentation header */
1596 *hdr_len
= (l4
.tcp
->doff
* 4) + l4_offset
;
1598 /* find the field values */
1599 cd_cmd
= I40E_TX_CTX_DESC_TSO
;
1600 cd_tso_len
= skb
->len
- *hdr_len
;
1601 cd_mss
= skb_shinfo(skb
)->gso_size
;
1602 *cd_type_cmd_tso_mss
|= (cd_cmd
<< I40E_TXD_CTX_QW1_CMD_SHIFT
) |
1603 (cd_tso_len
<< I40E_TXD_CTX_QW1_TSO_LEN_SHIFT
) |
1604 (cd_mss
<< I40E_TXD_CTX_QW1_MSS_SHIFT
);
1609 * i40e_tx_enable_csum - Enable Tx checksum offloads
1611 * @tx_flags: pointer to Tx flags currently set
1612 * @td_cmd: Tx descriptor command bits to set
1613 * @td_offset: Tx descriptor header offsets to set
1614 * @tx_ring: Tx descriptor ring
1615 * @cd_tunneling: ptr to context desc bits
1617 static int i40e_tx_enable_csum(struct sk_buff
*skb
, u32
*tx_flags
,
1618 u32
*td_cmd
, u32
*td_offset
,
1619 struct i40e_ring
*tx_ring
,
1632 unsigned char *exthdr
;
1633 u32 offset
, cmd
= 0, tunnel
= 0;
1637 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
1640 ip
.hdr
= skb_network_header(skb
);
1641 l4
.hdr
= skb_transport_header(skb
);
1643 /* compute outer L2 header size */
1644 offset
= ((ip
.hdr
- skb
->data
) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT
;
1646 if (skb
->encapsulation
) {
1647 /* define outer network header type */
1648 if (*tx_flags
& I40E_TX_FLAGS_IPV4
) {
1649 tunnel
|= (*tx_flags
& I40E_TX_FLAGS_TSO
) ?
1650 I40E_TX_CTX_EXT_IP_IPV4
:
1651 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM
;
1653 l4_proto
= ip
.v4
->protocol
;
1654 } else if (*tx_flags
& I40E_TX_FLAGS_IPV6
) {
1655 tunnel
|= I40E_TX_CTX_EXT_IP_IPV6
;
1657 exthdr
= ip
.hdr
+ sizeof(*ip
.v6
);
1658 l4_proto
= ip
.v6
->nexthdr
;
1659 if (l4
.hdr
!= exthdr
)
1660 ipv6_skip_exthdr(skb
, exthdr
- skb
->data
,
1661 &l4_proto
, &frag_off
);
1664 /* compute outer L3 header size */
1665 tunnel
|= ((l4
.hdr
- ip
.hdr
) / 4) <<
1666 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT
;
1668 /* switch IP header pointer from outer to inner header */
1669 ip
.hdr
= skb_inner_network_header(skb
);
1671 /* define outer transport */
1674 tunnel
|= I40E_TXD_CTX_UDP_TUNNELING
;
1675 *tx_flags
|= I40E_TX_FLAGS_VXLAN_TUNNEL
;
1678 tunnel
|= I40E_TXD_CTX_GRE_TUNNELING
;
1679 *tx_flags
|= I40E_TX_FLAGS_VXLAN_TUNNEL
;
1682 if (*tx_flags
& I40E_TX_FLAGS_TSO
)
1685 skb_checksum_help(skb
);
1689 /* compute tunnel header size */
1690 tunnel
|= ((ip
.hdr
- l4
.hdr
) / 2) <<
1691 I40E_TXD_CTX_QW0_NATLEN_SHIFT
;
1693 /* indicate if we need to offload outer UDP header */
1694 if ((*tx_flags
& I40E_TX_FLAGS_TSO
) &&
1695 (skb_shinfo(skb
)->gso_type
& SKB_GSO_UDP_TUNNEL_CSUM
))
1696 tunnel
|= I40E_TXD_CTX_QW0_L4T_CS_MASK
;
1698 /* record tunnel offload values */
1699 *cd_tunneling
|= tunnel
;
1701 /* switch L4 header pointer from outer to inner */
1702 l4
.hdr
= skb_inner_transport_header(skb
);
1705 /* reset type as we transition from outer to inner headers */
1706 *tx_flags
&= ~(I40E_TX_FLAGS_IPV4
| I40E_TX_FLAGS_IPV6
);
1707 if (ip
.v4
->version
== 4)
1708 *tx_flags
|= I40E_TX_FLAGS_IPV4
;
1709 if (ip
.v6
->version
== 6)
1710 *tx_flags
|= I40E_TX_FLAGS_IPV6
;
1713 /* Enable IP checksum offloads */
1714 if (*tx_flags
& I40E_TX_FLAGS_IPV4
) {
1715 l4_proto
= ip
.v4
->protocol
;
1716 /* the stack computes the IP header already, the only time we
1717 * need the hardware to recompute it is in the case of TSO.
1719 cmd
|= (*tx_flags
& I40E_TX_FLAGS_TSO
) ?
1720 I40E_TX_DESC_CMD_IIPT_IPV4_CSUM
:
1721 I40E_TX_DESC_CMD_IIPT_IPV4
;
1722 } else if (*tx_flags
& I40E_TX_FLAGS_IPV6
) {
1723 cmd
|= I40E_TX_DESC_CMD_IIPT_IPV6
;
1725 exthdr
= ip
.hdr
+ sizeof(*ip
.v6
);
1726 l4_proto
= ip
.v6
->nexthdr
;
1727 if (l4
.hdr
!= exthdr
)
1728 ipv6_skip_exthdr(skb
, exthdr
- skb
->data
,
1729 &l4_proto
, &frag_off
);
1732 /* compute inner L3 header size */
1733 offset
|= ((l4
.hdr
- ip
.hdr
) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT
;
1735 /* Enable L4 checksum offloads */
1738 /* enable checksum offloads */
1739 cmd
|= I40E_TX_DESC_CMD_L4T_EOFT_TCP
;
1740 offset
|= l4
.tcp
->doff
<< I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT
;
1743 /* enable SCTP checksum offload */
1744 cmd
|= I40E_TX_DESC_CMD_L4T_EOFT_SCTP
;
1745 offset
|= (sizeof(struct sctphdr
) >> 2) <<
1746 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT
;
1749 /* enable UDP checksum offload */
1750 cmd
|= I40E_TX_DESC_CMD_L4T_EOFT_UDP
;
1751 offset
|= (sizeof(struct udphdr
) >> 2) <<
1752 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT
;
1755 if (*tx_flags
& I40E_TX_FLAGS_TSO
)
1757 skb_checksum_help(skb
);
1762 *td_offset
|= offset
;
1768 * i40e_create_tx_ctx Build the Tx context descriptor
1769 * @tx_ring: ring to create the descriptor on
1770 * @cd_type_cmd_tso_mss: Quad Word 1
1771 * @cd_tunneling: Quad Word 0 - bits 0-31
1772 * @cd_l2tag2: Quad Word 0 - bits 32-63
1774 static void i40e_create_tx_ctx(struct i40e_ring
*tx_ring
,
1775 const u64 cd_type_cmd_tso_mss
,
1776 const u32 cd_tunneling
, const u32 cd_l2tag2
)
1778 struct i40e_tx_context_desc
*context_desc
;
1779 int i
= tx_ring
->next_to_use
;
1781 if ((cd_type_cmd_tso_mss
== I40E_TX_DESC_DTYPE_CONTEXT
) &&
1782 !cd_tunneling
&& !cd_l2tag2
)
1785 /* grab the next descriptor */
1786 context_desc
= I40E_TX_CTXTDESC(tx_ring
, i
);
1789 tx_ring
->next_to_use
= (i
< tx_ring
->count
) ? i
: 0;
1791 /* cpu_to_le32 and assign to struct fields */
1792 context_desc
->tunneling_params
= cpu_to_le32(cd_tunneling
);
1793 context_desc
->l2tag2
= cpu_to_le16(cd_l2tag2
);
1794 context_desc
->rsvd
= cpu_to_le16(0);
1795 context_desc
->type_cmd_tso_mss
= cpu_to_le64(cd_type_cmd_tso_mss
);
1799 * __i40evf_chk_linearize - Check if there are more than 8 buffers per packet
1802 * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire
1803 * and so we need to figure out the cases where we need to linearize the skb.
1805 * For TSO we need to count the TSO header and segment payload separately.
1806 * As such we need to check cases where we have 7 fragments or more as we
1807 * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
1808 * the segment payload in the first descriptor, and another 7 for the
1811 bool __i40evf_chk_linearize(struct sk_buff
*skb
)
1813 const struct skb_frag_struct
*frag
, *stale
;
1816 /* no need to check if number of frags is less than 7 */
1817 nr_frags
= skb_shinfo(skb
)->nr_frags
;
1818 if (nr_frags
< (I40E_MAX_BUFFER_TXD
- 1))
1821 /* We need to walk through the list and validate that each group
1822 * of 6 fragments totals at least gso_size. However we don't need
1823 * to perform such validation on the last 6 since the last 6 cannot
1824 * inherit any data from a descriptor after them.
1826 nr_frags
-= I40E_MAX_BUFFER_TXD
- 2;
1827 frag
= &skb_shinfo(skb
)->frags
[0];
1829 /* Initialize size to the negative value of gso_size minus 1. We
1830 * use this as the worst case scenerio in which the frag ahead
1831 * of us only provides one byte which is why we are limited to 6
1832 * descriptors for a single transmit as the header and previous
1833 * fragment are already consuming 2 descriptors.
1835 sum
= 1 - skb_shinfo(skb
)->gso_size
;
1837 /* Add size of frags 0 through 4 to create our initial sum */
1838 sum
+= skb_frag_size(frag
++);
1839 sum
+= skb_frag_size(frag
++);
1840 sum
+= skb_frag_size(frag
++);
1841 sum
+= skb_frag_size(frag
++);
1842 sum
+= skb_frag_size(frag
++);
1844 /* Walk through fragments adding latest fragment, testing it, and
1845 * then removing stale fragments from the sum.
1847 stale
= &skb_shinfo(skb
)->frags
[0];
1849 sum
+= skb_frag_size(frag
++);
1851 /* if sum is negative we failed to make sufficient progress */
1855 /* use pre-decrement to avoid processing last fragment */
1859 sum
-= skb_frag_size(stale
++);
1866 * __i40evf_maybe_stop_tx - 2nd level check for tx stop conditions
1867 * @tx_ring: the ring to be checked
1868 * @size: the size buffer we want to assure is available
1870 * Returns -EBUSY if a stop is needed, else 0
1872 int __i40evf_maybe_stop_tx(struct i40e_ring
*tx_ring
, int size
)
1874 netif_stop_subqueue(tx_ring
->netdev
, tx_ring
->queue_index
);
1875 /* Memory barrier before checking head and tail */
1878 /* Check again in a case another CPU has just made room available. */
1879 if (likely(I40E_DESC_UNUSED(tx_ring
) < size
))
1882 /* A reprieve! - use start_queue because it doesn't call schedule */
1883 netif_start_subqueue(tx_ring
->netdev
, tx_ring
->queue_index
);
1884 ++tx_ring
->tx_stats
.restart_queue
;
1889 * i40evf_tx_map - Build the Tx descriptor
1890 * @tx_ring: ring to send buffer on
1892 * @first: first buffer info buffer to use
1893 * @tx_flags: collected send information
1894 * @hdr_len: size of the packet header
1895 * @td_cmd: the command field in the descriptor
1896 * @td_offset: offset for checksum or crc
1898 static inline void i40evf_tx_map(struct i40e_ring
*tx_ring
, struct sk_buff
*skb
,
1899 struct i40e_tx_buffer
*first
, u32 tx_flags
,
1900 const u8 hdr_len
, u32 td_cmd
, u32 td_offset
)
1902 unsigned int data_len
= skb
->data_len
;
1903 unsigned int size
= skb_headlen(skb
);
1904 struct skb_frag_struct
*frag
;
1905 struct i40e_tx_buffer
*tx_bi
;
1906 struct i40e_tx_desc
*tx_desc
;
1907 u16 i
= tx_ring
->next_to_use
;
1912 bool tail_bump
= true;
1915 if (tx_flags
& I40E_TX_FLAGS_HW_VLAN
) {
1916 td_cmd
|= I40E_TX_DESC_CMD_IL2TAG1
;
1917 td_tag
= (tx_flags
& I40E_TX_FLAGS_VLAN_MASK
) >>
1918 I40E_TX_FLAGS_VLAN_SHIFT
;
1921 if (tx_flags
& (I40E_TX_FLAGS_TSO
| I40E_TX_FLAGS_FSO
))
1922 gso_segs
= skb_shinfo(skb
)->gso_segs
;
1926 /* multiply data chunks by size of headers */
1927 first
->bytecount
= skb
->len
- hdr_len
+ (gso_segs
* hdr_len
);
1928 first
->gso_segs
= gso_segs
;
1930 first
->tx_flags
= tx_flags
;
1932 dma
= dma_map_single(tx_ring
->dev
, skb
->data
, size
, DMA_TO_DEVICE
);
1934 tx_desc
= I40E_TX_DESC(tx_ring
, i
);
1937 for (frag
= &skb_shinfo(skb
)->frags
[0];; frag
++) {
1938 if (dma_mapping_error(tx_ring
->dev
, dma
))
1941 /* record length, and DMA address */
1942 dma_unmap_len_set(tx_bi
, len
, size
);
1943 dma_unmap_addr_set(tx_bi
, dma
, dma
);
1945 tx_desc
->buffer_addr
= cpu_to_le64(dma
);
1947 while (unlikely(size
> I40E_MAX_DATA_PER_TXD
)) {
1948 tx_desc
->cmd_type_offset_bsz
=
1949 build_ctob(td_cmd
, td_offset
,
1950 I40E_MAX_DATA_PER_TXD
, td_tag
);
1956 if (i
== tx_ring
->count
) {
1957 tx_desc
= I40E_TX_DESC(tx_ring
, 0);
1961 dma
+= I40E_MAX_DATA_PER_TXD
;
1962 size
-= I40E_MAX_DATA_PER_TXD
;
1964 tx_desc
->buffer_addr
= cpu_to_le64(dma
);
1967 if (likely(!data_len
))
1970 tx_desc
->cmd_type_offset_bsz
= build_ctob(td_cmd
, td_offset
,
1977 if (i
== tx_ring
->count
) {
1978 tx_desc
= I40E_TX_DESC(tx_ring
, 0);
1982 size
= skb_frag_size(frag
);
1985 dma
= skb_frag_dma_map(tx_ring
->dev
, frag
, 0, size
,
1988 tx_bi
= &tx_ring
->tx_bi
[i
];
1991 /* set next_to_watch value indicating a packet is present */
1992 first
->next_to_watch
= tx_desc
;
1995 if (i
== tx_ring
->count
)
1998 tx_ring
->next_to_use
= i
;
2000 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring
->netdev
,
2001 tx_ring
->queue_index
),
2003 i40e_maybe_stop_tx(tx_ring
, DESC_NEEDED
);
2005 /* Algorithm to optimize tail and RS bit setting:
2006 * if xmit_more is supported
2007 * if xmit_more is true
2008 * do not update tail and do not mark RS bit.
2009 * if xmit_more is false and last xmit_more was false
2010 * if every packet spanned less than 4 desc
2011 * then set RS bit on 4th packet and update tail
2014 * update tail and set RS bit on every packet.
2015 * if xmit_more is false and last_xmit_more was true
2016 * update tail and set RS bit.
2018 * Optimization: wmb to be issued only in case of tail update.
2019 * Also optimize the Descriptor WB path for RS bit with the same
2022 * Note: If there are less than 4 packets
2023 * pending and interrupts were disabled the service task will
2024 * trigger a force WB.
2026 if (skb
->xmit_more
&&
2027 !netif_xmit_stopped(netdev_get_tx_queue(tx_ring
->netdev
,
2028 tx_ring
->queue_index
))) {
2029 tx_ring
->flags
|= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET
;
2031 } else if (!skb
->xmit_more
&&
2032 !netif_xmit_stopped(netdev_get_tx_queue(tx_ring
->netdev
,
2033 tx_ring
->queue_index
)) &&
2034 (!(tx_ring
->flags
& I40E_TXR_FLAGS_LAST_XMIT_MORE_SET
)) &&
2035 (tx_ring
->packet_stride
< WB_STRIDE
) &&
2036 (desc_count
< WB_STRIDE
)) {
2037 tx_ring
->packet_stride
++;
2039 tx_ring
->packet_stride
= 0;
2040 tx_ring
->flags
&= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET
;
2044 tx_ring
->packet_stride
= 0;
2046 tx_desc
->cmd_type_offset_bsz
=
2047 build_ctob(td_cmd
, td_offset
, size
, td_tag
) |
2048 cpu_to_le64((u64
)(do_rs
? I40E_TXD_CMD
:
2049 I40E_TX_DESC_CMD_EOP
) <<
2050 I40E_TXD_QW1_CMD_SHIFT
);
2052 /* notify HW of packet */
2054 prefetchw(tx_desc
+ 1);
2057 /* Force memory writes to complete before letting h/w
2058 * know there are new descriptors to fetch. (Only
2059 * applicable for weak-ordered memory model archs,
2063 writel(i
, tx_ring
->tail
);
2069 dev_info(tx_ring
->dev
, "TX DMA map failed\n");
2071 /* clear dma mappings for failed tx_bi map */
2073 tx_bi
= &tx_ring
->tx_bi
[i
];
2074 i40e_unmap_and_free_tx_resource(tx_ring
, tx_bi
);
2082 tx_ring
->next_to_use
= i
;
2086 * i40e_xmit_frame_ring - Sends buffer on Tx ring
2088 * @tx_ring: ring to send buffer on
2090 * Returns NETDEV_TX_OK if sent, else an error code
2092 static netdev_tx_t
i40e_xmit_frame_ring(struct sk_buff
*skb
,
2093 struct i40e_ring
*tx_ring
)
2095 u64 cd_type_cmd_tso_mss
= I40E_TX_DESC_DTYPE_CONTEXT
;
2096 u32 cd_tunneling
= 0, cd_l2tag2
= 0;
2097 struct i40e_tx_buffer
*first
;
2105 /* prefetch the data, we'll need it later */
2106 prefetch(skb
->data
);
2108 count
= i40e_xmit_descriptor_count(skb
);
2109 if (i40e_chk_linearize(skb
, count
)) {
2110 if (__skb_linearize(skb
))
2112 count
= TXD_USE_COUNT(skb
->len
);
2113 tx_ring
->tx_stats
.tx_linearize
++;
2116 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2117 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2118 * + 4 desc gap to avoid the cache line where head is,
2119 * + 1 desc for context descriptor,
2120 * otherwise try next time
2122 if (i40e_maybe_stop_tx(tx_ring
, count
+ 4 + 1)) {
2123 tx_ring
->tx_stats
.tx_busy
++;
2124 return NETDEV_TX_BUSY
;
2127 /* prepare the xmit flags */
2128 if (i40evf_tx_prepare_vlan_flags(skb
, tx_ring
, &tx_flags
))
2131 /* obtain protocol of skb */
2132 protocol
= vlan_get_protocol(skb
);
2134 /* record the location of the first descriptor for this packet */
2135 first
= &tx_ring
->tx_bi
[tx_ring
->next_to_use
];
2137 /* setup IPv4/IPv6 offloads */
2138 if (protocol
== htons(ETH_P_IP
))
2139 tx_flags
|= I40E_TX_FLAGS_IPV4
;
2140 else if (protocol
== htons(ETH_P_IPV6
))
2141 tx_flags
|= I40E_TX_FLAGS_IPV6
;
2143 tso
= i40e_tso(tx_ring
, skb
, &hdr_len
, &cd_type_cmd_tso_mss
);
2148 tx_flags
|= I40E_TX_FLAGS_TSO
;
2150 /* Always offload the checksum, since it's in the data descriptor */
2151 tso
= i40e_tx_enable_csum(skb
, &tx_flags
, &td_cmd
, &td_offset
,
2152 tx_ring
, &cd_tunneling
);
2156 skb_tx_timestamp(skb
);
2158 /* always enable CRC insertion offload */
2159 td_cmd
|= I40E_TX_DESC_CMD_ICRC
;
2161 i40e_create_tx_ctx(tx_ring
, cd_type_cmd_tso_mss
,
2162 cd_tunneling
, cd_l2tag2
);
2164 i40evf_tx_map(tx_ring
, skb
, first
, tx_flags
, hdr_len
,
2167 return NETDEV_TX_OK
;
2170 dev_kfree_skb_any(skb
);
2171 return NETDEV_TX_OK
;
2175 * i40evf_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2177 * @netdev: network interface device structure
2179 * Returns NETDEV_TX_OK if sent, else an error code
2181 netdev_tx_t
i40evf_xmit_frame(struct sk_buff
*skb
, struct net_device
*netdev
)
2183 struct i40evf_adapter
*adapter
= netdev_priv(netdev
);
2184 struct i40e_ring
*tx_ring
= &adapter
->tx_rings
[skb
->queue_mapping
];
2186 /* hardware can't handle really short frames, hardware padding works
2189 if (unlikely(skb
->len
< I40E_MIN_TX_LEN
)) {
2190 if (skb_pad(skb
, I40E_MIN_TX_LEN
- skb
->len
))
2191 return NETDEV_TX_OK
;
2192 skb
->len
= I40E_MIN_TX_LEN
;
2193 skb_set_tail_pointer(skb
, I40E_MIN_TX_LEN
);
2196 return i40e_xmit_frame_ring(skb
, tx_ring
);