2 * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #include <linux/mlx4/cq.h>
36 #include <linux/slab.h>
37 #include <linux/mlx4/qp.h>
38 #include <linux/skbuff.h>
39 #include <linux/if_vlan.h>
40 #include <linux/prefetch.h>
41 #include <linux/vmalloc.h>
42 #include <linux/tcp.h>
44 #include <linux/moduleparam.h>
48 int mlx4_en_create_tx_ring(struct mlx4_en_priv
*priv
,
49 struct mlx4_en_tx_ring
**pring
, u32 size
,
50 u16 stride
, int node
, int queue_index
)
52 struct mlx4_en_dev
*mdev
= priv
->mdev
;
53 struct mlx4_en_tx_ring
*ring
;
57 ring
= kzalloc_node(sizeof(*ring
), GFP_KERNEL
, node
);
59 ring
= kzalloc(sizeof(*ring
), GFP_KERNEL
);
61 en_err(priv
, "Failed allocating TX ring\n");
67 ring
->size_mask
= size
- 1;
68 ring
->stride
= stride
;
69 ring
->full_size
= ring
->size
- HEADROOM
- MAX_DESC_TXBBS
;
71 tmp
= size
* sizeof(struct mlx4_en_tx_info
);
72 ring
->tx_info
= kmalloc_node(tmp
, GFP_KERNEL
| __GFP_NOWARN
, node
);
74 ring
->tx_info
= vmalloc(tmp
);
81 en_dbg(DRV
, priv
, "Allocated tx_info ring at addr:%p size:%d\n",
84 ring
->bounce_buf
= kmalloc_node(MAX_DESC_SIZE
, GFP_KERNEL
, node
);
85 if (!ring
->bounce_buf
) {
86 ring
->bounce_buf
= kmalloc(MAX_DESC_SIZE
, GFP_KERNEL
);
87 if (!ring
->bounce_buf
) {
92 ring
->buf_size
= ALIGN(size
* ring
->stride
, MLX4_EN_PAGE_SIZE
);
94 /* Allocate HW buffers on provided NUMA node */
95 set_dev_node(&mdev
->dev
->persist
->pdev
->dev
, node
);
96 err
= mlx4_alloc_hwq_res(mdev
->dev
, &ring
->wqres
, ring
->buf_size
,
98 set_dev_node(&mdev
->dev
->persist
->pdev
->dev
, mdev
->dev
->numa_node
);
100 en_err(priv
, "Failed allocating hwq resources\n");
104 err
= mlx4_en_map_buffer(&ring
->wqres
.buf
);
106 en_err(priv
, "Failed to map TX buffer\n");
110 ring
->buf
= ring
->wqres
.buf
.direct
.buf
;
112 en_dbg(DRV
, priv
, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n",
113 ring
, ring
->buf
, ring
->size
, ring
->buf_size
,
114 (unsigned long long) ring
->wqres
.buf
.direct
.map
);
116 err
= mlx4_qp_reserve_range(mdev
->dev
, 1, 1, &ring
->qpn
,
117 MLX4_RESERVE_ETH_BF_QP
);
119 en_err(priv
, "failed reserving qp for TX ring\n");
123 err
= mlx4_qp_alloc(mdev
->dev
, ring
->qpn
, &ring
->qp
, GFP_KERNEL
);
125 en_err(priv
, "Failed allocating qp %d\n", ring
->qpn
);
128 ring
->qp
.event
= mlx4_en_sqp_event
;
130 err
= mlx4_bf_alloc(mdev
->dev
, &ring
->bf
, node
);
132 en_dbg(DRV
, priv
, "working without blueflame (%d)\n", err
);
133 ring
->bf
.uar
= &mdev
->priv_uar
;
134 ring
->bf
.uar
->map
= mdev
->uar_map
;
135 ring
->bf_enabled
= false;
136 ring
->bf_alloced
= false;
137 priv
->pflags
&= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME
;
139 ring
->bf_alloced
= true;
140 ring
->bf_enabled
= !!(priv
->pflags
&
141 MLX4_EN_PRIV_FLAGS_BLUEFLAME
);
144 ring
->hwtstamp_tx_type
= priv
->hwtstamp_config
.tx_type
;
145 ring
->queue_index
= queue_index
;
147 if (queue_index
< priv
->num_tx_rings_p_up
)
148 cpumask_set_cpu(cpumask_local_spread(queue_index
,
149 priv
->mdev
->dev
->numa_node
),
150 &ring
->affinity_mask
);
156 mlx4_qp_release_range(mdev
->dev
, ring
->qpn
, 1);
158 mlx4_en_unmap_buffer(&ring
->wqres
.buf
);
160 mlx4_free_hwq_res(mdev
->dev
, &ring
->wqres
, ring
->buf_size
);
162 kfree(ring
->bounce_buf
);
163 ring
->bounce_buf
= NULL
;
165 kvfree(ring
->tx_info
);
166 ring
->tx_info
= NULL
;
173 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv
*priv
,
174 struct mlx4_en_tx_ring
**pring
)
176 struct mlx4_en_dev
*mdev
= priv
->mdev
;
177 struct mlx4_en_tx_ring
*ring
= *pring
;
178 en_dbg(DRV
, priv
, "Destroying tx ring, qpn: %d\n", ring
->qpn
);
180 if (ring
->bf_alloced
)
181 mlx4_bf_free(mdev
->dev
, &ring
->bf
);
182 mlx4_qp_remove(mdev
->dev
, &ring
->qp
);
183 mlx4_qp_free(mdev
->dev
, &ring
->qp
);
184 mlx4_qp_release_range(priv
->mdev
->dev
, ring
->qpn
, 1);
185 mlx4_en_unmap_buffer(&ring
->wqres
.buf
);
186 mlx4_free_hwq_res(mdev
->dev
, &ring
->wqres
, ring
->buf_size
);
187 kfree(ring
->bounce_buf
);
188 ring
->bounce_buf
= NULL
;
189 kvfree(ring
->tx_info
);
190 ring
->tx_info
= NULL
;
195 int mlx4_en_activate_tx_ring(struct mlx4_en_priv
*priv
,
196 struct mlx4_en_tx_ring
*ring
,
197 int cq
, int user_prio
)
199 struct mlx4_en_dev
*mdev
= priv
->mdev
;
204 ring
->cons
= 0xffffffff;
205 ring
->last_nr_txbb
= 1;
206 memset(ring
->tx_info
, 0, ring
->size
* sizeof(struct mlx4_en_tx_info
));
207 memset(ring
->buf
, 0, ring
->buf_size
);
209 ring
->qp_state
= MLX4_QP_STATE_RST
;
210 ring
->doorbell_qpn
= cpu_to_be32(ring
->qp
.qpn
<< 8);
211 ring
->mr_key
= cpu_to_be32(mdev
->mr
.key
);
213 mlx4_en_fill_qp_context(priv
, ring
->size
, ring
->stride
, 1, 0, ring
->qpn
,
214 ring
->cqn
, user_prio
, &ring
->context
);
215 if (ring
->bf_alloced
)
216 ring
->context
.usr_page
=
217 cpu_to_be32(mlx4_to_hw_uar_index(mdev
->dev
,
218 ring
->bf
.uar
->index
));
220 err
= mlx4_qp_to_ready(mdev
->dev
, &ring
->wqres
.mtt
, &ring
->context
,
221 &ring
->qp
, &ring
->qp_state
);
222 if (!cpumask_empty(&ring
->affinity_mask
))
223 netif_set_xps_queue(priv
->dev
, &ring
->affinity_mask
,
229 void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv
*priv
,
230 struct mlx4_en_tx_ring
*ring
)
232 struct mlx4_en_dev
*mdev
= priv
->mdev
;
234 mlx4_qp_modify(mdev
->dev
, NULL
, ring
->qp_state
,
235 MLX4_QP_STATE_RST
, NULL
, 0, 0, &ring
->qp
);
238 static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring
*ring
)
240 return ring
->prod
- ring
->cons
> ring
->full_size
;
243 static void mlx4_en_stamp_wqe(struct mlx4_en_priv
*priv
,
244 struct mlx4_en_tx_ring
*ring
, int index
,
247 __be32 stamp
= cpu_to_be32(STAMP_VAL
| (!!owner
<< STAMP_SHIFT
));
248 struct mlx4_en_tx_desc
*tx_desc
= ring
->buf
+ index
* TXBB_SIZE
;
249 struct mlx4_en_tx_info
*tx_info
= &ring
->tx_info
[index
];
250 void *end
= ring
->buf
+ ring
->buf_size
;
251 __be32
*ptr
= (__be32
*)tx_desc
;
254 /* Optimize the common case when there are no wraparounds */
255 if (likely((void *)tx_desc
+ tx_info
->nr_txbb
* TXBB_SIZE
<= end
)) {
256 /* Stamp the freed descriptor */
257 for (i
= 0; i
< tx_info
->nr_txbb
* TXBB_SIZE
;
263 /* Stamp the freed descriptor */
264 for (i
= 0; i
< tx_info
->nr_txbb
* TXBB_SIZE
;
268 if ((void *)ptr
>= end
) {
270 stamp
^= cpu_to_be32(0x80000000);
277 static u32
mlx4_en_free_tx_desc(struct mlx4_en_priv
*priv
,
278 struct mlx4_en_tx_ring
*ring
,
279 int index
, u8 owner
, u64 timestamp
,
282 struct mlx4_en_tx_info
*tx_info
= &ring
->tx_info
[index
];
283 struct mlx4_en_tx_desc
*tx_desc
= ring
->buf
+ index
* TXBB_SIZE
;
284 struct mlx4_wqe_data_seg
*data
= (void *) tx_desc
+ tx_info
->data_offset
;
285 void *end
= ring
->buf
+ ring
->buf_size
;
286 struct sk_buff
*skb
= tx_info
->skb
;
287 int nr_maps
= tx_info
->nr_maps
;
290 /* We do not touch skb here, so prefetch skb->users location
291 * to speedup consume_skb()
293 prefetchw(&skb
->users
);
295 if (unlikely(timestamp
)) {
296 struct skb_shared_hwtstamps hwts
;
298 mlx4_en_fill_hwtstamps(priv
->mdev
, &hwts
, timestamp
);
299 skb_tstamp_tx(skb
, &hwts
);
302 /* Optimize the common case when there are no wraparounds */
303 if (likely((void *) tx_desc
+ tx_info
->nr_txbb
* TXBB_SIZE
<= end
)) {
306 dma_unmap_single(priv
->ddev
,
308 tx_info
->map0_byte_count
,
311 dma_unmap_page(priv
->ddev
,
313 tx_info
->map0_byte_count
,
315 for (i
= 1; i
< nr_maps
; i
++) {
317 dma_unmap_page(priv
->ddev
,
318 (dma_addr_t
)be64_to_cpu(data
->addr
),
319 be32_to_cpu(data
->byte_count
),
325 if ((void *) data
>= end
) {
326 data
= ring
->buf
+ ((void *)data
- end
);
330 dma_unmap_single(priv
->ddev
,
332 tx_info
->map0_byte_count
,
335 dma_unmap_page(priv
->ddev
,
337 tx_info
->map0_byte_count
,
339 for (i
= 1; i
< nr_maps
; i
++) {
341 /* Check for wraparound before unmapping */
342 if ((void *) data
>= end
)
344 dma_unmap_page(priv
->ddev
,
345 (dma_addr_t
)be64_to_cpu(data
->addr
),
346 be32_to_cpu(data
->byte_count
),
351 napi_consume_skb(skb
, napi_mode
);
353 return tx_info
->nr_txbb
;
357 int mlx4_en_free_tx_buf(struct net_device
*dev
, struct mlx4_en_tx_ring
*ring
)
359 struct mlx4_en_priv
*priv
= netdev_priv(dev
);
362 /* Skip last polled descriptor */
363 ring
->cons
+= ring
->last_nr_txbb
;
364 en_dbg(DRV
, priv
, "Freeing Tx buf - cons:0x%x prod:0x%x\n",
365 ring
->cons
, ring
->prod
);
367 if ((u32
) (ring
->prod
- ring
->cons
) > ring
->size
) {
368 if (netif_msg_tx_err(priv
))
369 en_warn(priv
, "Tx consumer passed producer!\n");
373 while (ring
->cons
!= ring
->prod
) {
374 ring
->last_nr_txbb
= mlx4_en_free_tx_desc(priv
, ring
,
375 ring
->cons
& ring
->size_mask
,
376 !!(ring
->cons
& ring
->size
), 0,
377 0 /* Non-NAPI caller */);
378 ring
->cons
+= ring
->last_nr_txbb
;
382 netdev_tx_reset_queue(ring
->tx_queue
);
385 en_dbg(DRV
, priv
, "Freed %d uncompleted tx descriptors\n", cnt
);
390 static bool mlx4_en_process_tx_cq(struct net_device
*dev
,
391 struct mlx4_en_cq
*cq
, int napi_budget
)
393 struct mlx4_en_priv
*priv
= netdev_priv(dev
);
394 struct mlx4_cq
*mcq
= &cq
->mcq
;
395 struct mlx4_en_tx_ring
*ring
= priv
->tx_ring
[cq
->ring
];
396 struct mlx4_cqe
*cqe
;
398 u16 new_index
, ring_index
, stamp_index
;
399 u32 txbbs_skipped
= 0;
401 u32 cons_index
= mcq
->cons_index
;
403 u32 size_mask
= ring
->size_mask
;
404 struct mlx4_cqe
*buf
= cq
->buf
;
407 int factor
= priv
->cqe_factor
;
409 int budget
= priv
->tx_work_limit
;
416 netdev_txq_bql_complete_prefetchw(ring
->tx_queue
);
418 index
= cons_index
& size_mask
;
419 cqe
= mlx4_en_get_cqe(buf
, index
, priv
->cqe_size
) + factor
;
420 last_nr_txbb
= ACCESS_ONCE(ring
->last_nr_txbb
);
421 ring_cons
= ACCESS_ONCE(ring
->cons
);
422 ring_index
= ring_cons
& size_mask
;
423 stamp_index
= ring_index
;
425 /* Process all completed CQEs */
426 while (XNOR(cqe
->owner_sr_opcode
& MLX4_CQE_OWNER_MASK
,
427 cons_index
& size
) && (done
< budget
)) {
429 * make sure we read the CQE after we read the
434 if (unlikely((cqe
->owner_sr_opcode
& MLX4_CQE_OPCODE_MASK
) ==
435 MLX4_CQE_OPCODE_ERROR
)) {
436 struct mlx4_err_cqe
*cqe_err
= (struct mlx4_err_cqe
*)cqe
;
438 en_err(priv
, "CQE error - vendor syndrome: 0x%x syndrome: 0x%x\n",
439 cqe_err
->vendor_err_syndrome
,
443 /* Skip over last polled CQE */
444 new_index
= be16_to_cpu(cqe
->wqe_index
) & size_mask
;
449 txbbs_skipped
+= last_nr_txbb
;
450 ring_index
= (ring_index
+ last_nr_txbb
) & size_mask
;
452 if (unlikely(ring
->tx_info
[ring_index
].ts_requested
))
453 timestamp
= mlx4_en_get_cqe_ts(cqe
);
455 /* free next descriptor */
456 last_nr_txbb
= mlx4_en_free_tx_desc(
457 priv
, ring
, ring_index
,
458 !!((ring_cons
+ txbbs_skipped
) &
459 ring
->size
), timestamp
, napi_budget
);
461 mlx4_en_stamp_wqe(priv
, ring
, stamp_index
,
462 !!((ring_cons
+ txbbs_stamp
) &
464 stamp_index
= ring_index
;
465 txbbs_stamp
= txbbs_skipped
;
467 bytes
+= ring
->tx_info
[ring_index
].nr_bytes
;
468 } while ((++done
< budget
) && (ring_index
!= new_index
));
471 index
= cons_index
& size_mask
;
472 cqe
= mlx4_en_get_cqe(buf
, index
, priv
->cqe_size
) + factor
;
477 * To prevent CQ overflow we first update CQ consumer and only then
480 mcq
->cons_index
= cons_index
;
484 /* we want to dirty this cache line once */
485 ACCESS_ONCE(ring
->last_nr_txbb
) = last_nr_txbb
;
486 ACCESS_ONCE(ring
->cons
) = ring_cons
+ txbbs_skipped
;
488 netdev_tx_completed_queue(ring
->tx_queue
, packets
, bytes
);
490 /* Wakeup Tx queue if this stopped, and ring is not full.
492 if (netif_tx_queue_stopped(ring
->tx_queue
) &&
493 !mlx4_en_is_tx_ring_full(ring
)) {
494 netif_tx_wake_queue(ring
->tx_queue
);
497 return done
< budget
;
500 void mlx4_en_tx_irq(struct mlx4_cq
*mcq
)
502 struct mlx4_en_cq
*cq
= container_of(mcq
, struct mlx4_en_cq
, mcq
);
503 struct mlx4_en_priv
*priv
= netdev_priv(cq
->dev
);
505 if (likely(priv
->port_up
))
506 napi_schedule_irqoff(&cq
->napi
);
508 mlx4_en_arm_cq(priv
, cq
);
511 /* TX CQ polling - called by NAPI */
512 int mlx4_en_poll_tx_cq(struct napi_struct
*napi
, int budget
)
514 struct mlx4_en_cq
*cq
= container_of(napi
, struct mlx4_en_cq
, napi
);
515 struct net_device
*dev
= cq
->dev
;
516 struct mlx4_en_priv
*priv
= netdev_priv(dev
);
519 clean_complete
= mlx4_en_process_tx_cq(dev
, cq
, budget
);
524 mlx4_en_arm_cq(priv
, cq
);
529 static struct mlx4_en_tx_desc
*mlx4_en_bounce_to_desc(struct mlx4_en_priv
*priv
,
530 struct mlx4_en_tx_ring
*ring
,
532 unsigned int desc_size
)
534 u32 copy
= (ring
->size
- index
) * TXBB_SIZE
;
537 for (i
= desc_size
- copy
- 4; i
>= 0; i
-= 4) {
538 if ((i
& (TXBB_SIZE
- 1)) == 0)
541 *((u32
*) (ring
->buf
+ i
)) =
542 *((u32
*) (ring
->bounce_buf
+ copy
+ i
));
545 for (i
= copy
- 4; i
>= 4 ; i
-= 4) {
546 if ((i
& (TXBB_SIZE
- 1)) == 0)
549 *((u32
*) (ring
->buf
+ index
* TXBB_SIZE
+ i
)) =
550 *((u32
*) (ring
->bounce_buf
+ i
));
553 /* Return real descriptor location */
554 return ring
->buf
+ index
* TXBB_SIZE
;
557 /* Decide if skb can be inlined in tx descriptor to avoid dma mapping
559 * It seems strange we do not simply use skb_copy_bits().
560 * This would allow to inline all skbs iff skb->len <= inline_thold
562 * Note that caller already checked skb was not a gso packet
564 static bool is_inline(int inline_thold
, const struct sk_buff
*skb
,
565 const struct skb_shared_info
*shinfo
,
570 if (skb
->len
> inline_thold
|| !inline_thold
)
573 if (shinfo
->nr_frags
== 1) {
574 ptr
= skb_frag_address_safe(&shinfo
->frags
[0]);
580 if (shinfo
->nr_frags
)
585 static int inline_size(const struct sk_buff
*skb
)
587 if (skb
->len
+ CTRL_SIZE
+ sizeof(struct mlx4_wqe_inline_seg
)
588 <= MLX4_INLINE_ALIGN
)
589 return ALIGN(skb
->len
+ CTRL_SIZE
+
590 sizeof(struct mlx4_wqe_inline_seg
), 16);
592 return ALIGN(skb
->len
+ CTRL_SIZE
+ 2 *
593 sizeof(struct mlx4_wqe_inline_seg
), 16);
596 static int get_real_size(const struct sk_buff
*skb
,
597 const struct skb_shared_info
*shinfo
,
598 struct net_device
*dev
,
599 int *lso_header_size
,
603 struct mlx4_en_priv
*priv
= netdev_priv(dev
);
606 if (shinfo
->gso_size
) {
608 if (skb
->encapsulation
)
609 *lso_header_size
= (skb_inner_transport_header(skb
) - skb
->data
) + inner_tcp_hdrlen(skb
);
611 *lso_header_size
= skb_transport_offset(skb
) + tcp_hdrlen(skb
);
612 real_size
= CTRL_SIZE
+ shinfo
->nr_frags
* DS_SIZE
+
613 ALIGN(*lso_header_size
+ 4, DS_SIZE
);
614 if (unlikely(*lso_header_size
!= skb_headlen(skb
))) {
615 /* We add a segment for the skb linear buffer only if
616 * it contains data */
617 if (*lso_header_size
< skb_headlen(skb
))
618 real_size
+= DS_SIZE
;
620 if (netif_msg_tx_err(priv
))
621 en_warn(priv
, "Non-linear headers\n");
626 *lso_header_size
= 0;
627 *inline_ok
= is_inline(priv
->prof
->inline_thold
, skb
,
631 real_size
= inline_size(skb
);
633 real_size
= CTRL_SIZE
+
634 (shinfo
->nr_frags
+ 1) * DS_SIZE
;
640 static void build_inline_wqe(struct mlx4_en_tx_desc
*tx_desc
,
641 const struct sk_buff
*skb
,
642 const struct skb_shared_info
*shinfo
,
643 int real_size
, u16
*vlan_tag
,
644 int tx_ind
, void *fragptr
)
646 struct mlx4_wqe_inline_seg
*inl
= &tx_desc
->inl
;
647 int spc
= MLX4_INLINE_ALIGN
- CTRL_SIZE
- sizeof *inl
;
648 unsigned int hlen
= skb_headlen(skb
);
650 if (skb
->len
<= spc
) {
651 if (likely(skb
->len
>= MIN_PKT_LEN
)) {
652 inl
->byte_count
= cpu_to_be32(1 << 31 | skb
->len
);
654 inl
->byte_count
= cpu_to_be32(1 << 31 | MIN_PKT_LEN
);
655 memset(((void *)(inl
+ 1)) + skb
->len
, 0,
656 MIN_PKT_LEN
- skb
->len
);
658 skb_copy_from_linear_data(skb
, inl
+ 1, hlen
);
659 if (shinfo
->nr_frags
)
660 memcpy(((void *)(inl
+ 1)) + hlen
, fragptr
,
661 skb_frag_size(&shinfo
->frags
[0]));
664 inl
->byte_count
= cpu_to_be32(1 << 31 | spc
);
666 skb_copy_from_linear_data(skb
, inl
+ 1, hlen
);
668 memcpy(((void *)(inl
+ 1)) + hlen
,
669 fragptr
, spc
- hlen
);
670 fragptr
+= spc
- hlen
;
672 inl
= (void *) (inl
+ 1) + spc
;
673 memcpy(((void *)(inl
+ 1)), fragptr
, skb
->len
- spc
);
675 skb_copy_from_linear_data(skb
, inl
+ 1, spc
);
676 inl
= (void *) (inl
+ 1) + spc
;
677 skb_copy_from_linear_data_offset(skb
, spc
, inl
+ 1,
679 if (shinfo
->nr_frags
)
680 memcpy(((void *)(inl
+ 1)) + hlen
- spc
,
682 skb_frag_size(&shinfo
->frags
[0]));
686 inl
->byte_count
= cpu_to_be32(1 << 31 | (skb
->len
- spc
));
690 u16
mlx4_en_select_queue(struct net_device
*dev
, struct sk_buff
*skb
,
691 void *accel_priv
, select_queue_fallback_t fallback
)
693 struct mlx4_en_priv
*priv
= netdev_priv(dev
);
694 u16 rings_p_up
= priv
->num_tx_rings_p_up
;
698 return skb_tx_hash(dev
, skb
);
700 if (skb_vlan_tag_present(skb
))
701 up
= skb_vlan_tag_get(skb
) >> VLAN_PRIO_SHIFT
;
703 return fallback(dev
, skb
) % rings_p_up
+ up
* rings_p_up
;
706 static void mlx4_bf_copy(void __iomem
*dst
, const void *src
,
707 unsigned int bytecnt
)
709 __iowrite64_copy(dst
, src
, bytecnt
/ 8);
712 netdev_tx_t
mlx4_en_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
714 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
715 struct mlx4_en_priv
*priv
= netdev_priv(dev
);
716 struct device
*ddev
= priv
->ddev
;
717 struct mlx4_en_tx_ring
*ring
;
718 struct mlx4_en_tx_desc
*tx_desc
;
719 struct mlx4_wqe_data_seg
*data
;
720 struct mlx4_en_tx_info
*tx_info
;
731 void *fragptr
= NULL
;
741 tx_ind
= skb_get_queue_mapping(skb
);
742 ring
= priv
->tx_ring
[tx_ind
];
744 /* fetch ring->cons far ahead before needing it to avoid stall */
745 ring_cons
= ACCESS_ONCE(ring
->cons
);
747 real_size
= get_real_size(skb
, shinfo
, dev
, &lso_header_size
,
748 &inline_ok
, &fragptr
);
749 if (unlikely(!real_size
))
752 /* Align descriptor to TXBB size */
753 desc_size
= ALIGN(real_size
, TXBB_SIZE
);
754 nr_txbb
= desc_size
/ TXBB_SIZE
;
755 if (unlikely(nr_txbb
> MAX_DESC_TXBBS
)) {
756 if (netif_msg_tx_err(priv
))
757 en_warn(priv
, "Oversized header or SG list\n");
761 if (skb_vlan_tag_present(skb
)) {
762 vlan_tag
= skb_vlan_tag_get(skb
);
763 vlan_proto
= be16_to_cpu(skb
->vlan_proto
);
766 netdev_txq_bql_enqueue_prefetchw(ring
->tx_queue
);
768 /* Track current inflight packets for performance analysis */
769 AVG_PERF_COUNTER(priv
->pstats
.inflight_avg
,
770 (u32
)(ring
->prod
- ring_cons
- 1));
772 /* Packet is good - grab an index and transmit it */
773 index
= ring
->prod
& ring
->size_mask
;
774 bf_index
= ring
->prod
;
776 /* See if we have enough space for whole descriptor TXBB for setting
777 * SW ownership on next descriptor; if not, use a bounce buffer. */
778 if (likely(index
+ nr_txbb
<= ring
->size
))
779 tx_desc
= ring
->buf
+ index
* TXBB_SIZE
;
781 tx_desc
= (struct mlx4_en_tx_desc
*) ring
->bounce_buf
;
785 /* Save skb in tx_info ring */
786 tx_info
= &ring
->tx_info
[index
];
788 tx_info
->nr_txbb
= nr_txbb
;
790 data
= &tx_desc
->data
;
792 data
= ((void *)&tx_desc
->lso
+ ALIGN(lso_header_size
+ 4,
795 /* valid only for none inline segments */
796 tx_info
->data_offset
= (void *)data
- (void *)tx_desc
;
798 tx_info
->inl
= inline_ok
;
800 tx_info
->linear
= (lso_header_size
< skb_headlen(skb
) &&
803 tx_info
->nr_maps
= shinfo
->nr_frags
+ tx_info
->linear
;
804 data
+= tx_info
->nr_maps
- 1;
810 /* Map fragments if any */
811 for (i_frag
= shinfo
->nr_frags
- 1; i_frag
>= 0; i_frag
--) {
812 const struct skb_frag_struct
*frag
;
814 frag
= &shinfo
->frags
[i_frag
];
815 byte_count
= skb_frag_size(frag
);
816 dma
= skb_frag_dma_map(ddev
, frag
,
819 if (dma_mapping_error(ddev
, dma
))
822 data
->addr
= cpu_to_be64(dma
);
823 data
->lkey
= ring
->mr_key
;
825 data
->byte_count
= cpu_to_be32(byte_count
);
829 /* Map linear part if needed */
830 if (tx_info
->linear
) {
831 byte_count
= skb_headlen(skb
) - lso_header_size
;
833 dma
= dma_map_single(ddev
, skb
->data
+
834 lso_header_size
, byte_count
,
836 if (dma_mapping_error(ddev
, dma
))
839 data
->addr
= cpu_to_be64(dma
);
840 data
->lkey
= ring
->mr_key
;
842 data
->byte_count
= cpu_to_be32(byte_count
);
844 /* tx completion can avoid cache line miss for common cases */
845 tx_info
->map0_dma
= dma
;
846 tx_info
->map0_byte_count
= byte_count
;
850 * For timestamping add flag to skb_shinfo and
851 * set flag for further reference
853 tx_info
->ts_requested
= 0;
854 if (unlikely(ring
->hwtstamp_tx_type
== HWTSTAMP_TX_ON
&&
855 shinfo
->tx_flags
& SKBTX_HW_TSTAMP
)) {
856 shinfo
->tx_flags
|= SKBTX_IN_PROGRESS
;
857 tx_info
->ts_requested
= 1;
860 /* Prepare ctrl segement apart opcode+ownership, which depends on
861 * whether LSO is used */
862 tx_desc
->ctrl
.srcrb_flags
= priv
->ctrl_flags
;
863 if (likely(skb
->ip_summed
== CHECKSUM_PARTIAL
)) {
864 if (!skb
->encapsulation
)
865 tx_desc
->ctrl
.srcrb_flags
|= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM
|
866 MLX4_WQE_CTRL_TCP_UDP_CSUM
);
868 tx_desc
->ctrl
.srcrb_flags
|= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM
);
872 if (priv
->flags
& MLX4_EN_FLAG_ENABLE_HW_LOOPBACK
) {
875 /* Copy dst mac address to wqe. This allows loopback in eSwitch,
876 * so that VFs and PF can communicate with each other
878 ethh
= (struct ethhdr
*)skb
->data
;
879 tx_desc
->ctrl
.srcrb_flags16
[0] = get_unaligned((__be16
*)ethh
->h_dest
);
880 tx_desc
->ctrl
.imm
= get_unaligned((__be32
*)(ethh
->h_dest
+ 2));
883 /* Handle LSO (TSO) packets */
884 if (lso_header_size
) {
887 /* Mark opcode as LSO */
888 op_own
= cpu_to_be32(MLX4_OPCODE_LSO
| (1 << 6)) |
889 ((ring
->prod
& ring
->size
) ?
890 cpu_to_be32(MLX4_EN_BIT_DESC_OWN
) : 0);
892 /* Fill in the LSO prefix */
893 tx_desc
->lso
.mss_hdr_size
= cpu_to_be32(
894 shinfo
->gso_size
<< 16 | lso_header_size
);
897 * note that we already verified that it is linear */
898 memcpy(tx_desc
->lso
.header
, skb
->data
, lso_header_size
);
902 i
= ((skb
->len
- lso_header_size
) / shinfo
->gso_size
) +
903 !!((skb
->len
- lso_header_size
) % shinfo
->gso_size
);
904 tx_info
->nr_bytes
= skb
->len
+ (i
- 1) * lso_header_size
;
907 /* Normal (Non LSO) packet */
908 op_own
= cpu_to_be32(MLX4_OPCODE_SEND
) |
909 ((ring
->prod
& ring
->size
) ?
910 cpu_to_be32(MLX4_EN_BIT_DESC_OWN
) : 0);
911 tx_info
->nr_bytes
= max_t(unsigned int, skb
->len
, ETH_ZLEN
);
914 ring
->bytes
+= tx_info
->nr_bytes
;
915 netdev_tx_sent_queue(ring
->tx_queue
, tx_info
->nr_bytes
);
916 AVG_PERF_COUNTER(priv
->pstats
.tx_pktsz_avg
, skb
->len
);
919 build_inline_wqe(tx_desc
, skb
, shinfo
, real_size
, &vlan_tag
,
922 if (skb
->encapsulation
) {
923 struct iphdr
*ipv4
= (struct iphdr
*)skb_inner_network_header(skb
);
924 if (ipv4
->protocol
== IPPROTO_TCP
|| ipv4
->protocol
== IPPROTO_UDP
)
925 op_own
|= cpu_to_be32(MLX4_WQE_CTRL_IIP
| MLX4_WQE_CTRL_ILP
);
927 op_own
|= cpu_to_be32(MLX4_WQE_CTRL_IIP
);
930 ring
->prod
+= nr_txbb
;
932 /* If we used a bounce buffer then copy descriptor back into place */
933 if (unlikely(bounce
))
934 tx_desc
= mlx4_en_bounce_to_desc(priv
, ring
, index
, desc_size
);
936 skb_tx_timestamp(skb
);
938 /* Check available TXBBs And 2K spare for prefetch */
939 stop_queue
= mlx4_en_is_tx_ring_full(ring
);
940 if (unlikely(stop_queue
)) {
941 netif_tx_stop_queue(ring
->tx_queue
);
942 ring
->queue_stopped
++;
944 send_doorbell
= !skb
->xmit_more
|| netif_xmit_stopped(ring
->tx_queue
);
946 real_size
= (real_size
/ 16) & 0x3f;
948 if (ring
->bf_enabled
&& desc_size
<= MAX_BF
&& !bounce
&&
949 !skb_vlan_tag_present(skb
) && send_doorbell
) {
950 tx_desc
->ctrl
.bf_qpn
= ring
->doorbell_qpn
|
951 cpu_to_be32(real_size
);
953 op_own
|= htonl((bf_index
& 0xffff) << 8);
954 /* Ensure new descriptor hits memory
955 * before setting ownership of this descriptor to HW
958 tx_desc
->ctrl
.owner_opcode
= op_own
;
962 mlx4_bf_copy(ring
->bf
.reg
+ ring
->bf
.offset
, &tx_desc
->ctrl
,
967 ring
->bf
.offset
^= ring
->bf
.buf_size
;
969 tx_desc
->ctrl
.vlan_tag
= cpu_to_be16(vlan_tag
);
970 if (vlan_proto
== ETH_P_8021AD
)
971 tx_desc
->ctrl
.ins_vlan
= MLX4_WQE_CTRL_INS_SVLAN
;
972 else if (vlan_proto
== ETH_P_8021Q
)
973 tx_desc
->ctrl
.ins_vlan
= MLX4_WQE_CTRL_INS_CVLAN
;
975 tx_desc
->ctrl
.ins_vlan
= 0;
977 tx_desc
->ctrl
.fence_size
= real_size
;
979 /* Ensure new descriptor hits memory
980 * before setting ownership of this descriptor to HW
983 tx_desc
->ctrl
.owner_opcode
= op_own
;
986 /* Since there is no iowrite*_native() that writes the
987 * value as is, without byteswapping - using the one
988 * the doesn't do byteswapping in the relevant arch
991 #if defined(__LITTLE_ENDIAN)
997 ring
->bf
.uar
->map
+ MLX4_SEND_DOORBELL
);
1003 if (unlikely(stop_queue
)) {
1004 /* If queue was emptied after the if (stop_queue) , and before
1005 * the netif_tx_stop_queue() - need to wake the queue,
1006 * or else it will remain stopped forever.
1007 * Need a memory barrier to make sure ring->cons was not
1008 * updated before queue was stopped.
1012 ring_cons
= ACCESS_ONCE(ring
->cons
);
1013 if (unlikely(!mlx4_en_is_tx_ring_full(ring
))) {
1014 netif_tx_wake_queue(ring
->tx_queue
);
1018 return NETDEV_TX_OK
;
1021 en_err(priv
, "DMA mapping error\n");
1023 while (++i_frag
< shinfo
->nr_frags
) {
1025 dma_unmap_page(ddev
, (dma_addr_t
) be64_to_cpu(data
->addr
),
1026 be32_to_cpu(data
->byte_count
),
1031 dev_kfree_skb_any(skb
);
1032 priv
->stats
.tx_dropped
++;
1033 return NETDEV_TX_OK
;