2 * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #include <linux/mlx4/cq.h>
36 #include <linux/mlx4/qp.h>
37 #include <linux/skbuff.h>
38 #include <linux/if_vlan.h>
39 #include <linux/vmalloc.h>
44 MAX_INLINE
= 104, /* 128 - 16 - 4 - 4 */
47 static int inline_thold __read_mostly
= MAX_INLINE
;
49 module_param_named(inline_thold
, inline_thold
, int, 0444);
50 MODULE_PARM_DESC(inline_thold
, "treshold for using inline data");
52 int mlx4_en_create_tx_ring(struct mlx4_en_priv
*priv
,
53 struct mlx4_en_tx_ring
*ring
, u32 size
,
56 struct mlx4_en_dev
*mdev
= priv
->mdev
;
61 ring
->size_mask
= size
- 1;
62 ring
->stride
= stride
;
64 inline_thold
= min(inline_thold
, MAX_INLINE
);
66 spin_lock_init(&ring
->comp_lock
);
68 tmp
= size
* sizeof(struct mlx4_en_tx_info
);
69 ring
->tx_info
= vmalloc(tmp
);
71 mlx4_err(mdev
, "Failed allocating tx_info ring\n");
74 mlx4_dbg(DRV
, priv
, "Allocated tx_info ring at addr:%p size:%d\n",
77 ring
->bounce_buf
= kmalloc(MAX_DESC_SIZE
, GFP_KERNEL
);
78 if (!ring
->bounce_buf
) {
79 mlx4_err(mdev
, "Failed allocating bounce buffer\n");
83 ring
->buf_size
= ALIGN(size
* ring
->stride
, MLX4_EN_PAGE_SIZE
);
85 err
= mlx4_alloc_hwq_res(mdev
->dev
, &ring
->wqres
, ring
->buf_size
,
88 mlx4_err(mdev
, "Failed allocating hwq resources\n");
92 err
= mlx4_en_map_buffer(&ring
->wqres
.buf
);
94 mlx4_err(mdev
, "Failed to map TX buffer\n");
98 ring
->buf
= ring
->wqres
.buf
.direct
.buf
;
100 mlx4_dbg(DRV
, priv
, "Allocated TX ring (addr:%p) - buf:%p size:%d "
101 "buf_size:%d dma:%llx\n", ring
, ring
->buf
, ring
->size
,
102 ring
->buf_size
, (unsigned long long) ring
->wqres
.buf
.direct
.map
);
104 err
= mlx4_qp_reserve_range(mdev
->dev
, 1, 1, &ring
->qpn
);
106 mlx4_err(mdev
, "Failed reserving qp for tx ring.\n");
110 err
= mlx4_qp_alloc(mdev
->dev
, ring
->qpn
, &ring
->qp
);
112 mlx4_err(mdev
, "Failed allocating qp %d\n", ring
->qpn
);
115 ring
->qp
.event
= mlx4_en_sqp_event
;
120 mlx4_qp_release_range(mdev
->dev
, ring
->qpn
, 1);
122 mlx4_en_unmap_buffer(&ring
->wqres
.buf
);
124 mlx4_free_hwq_res(mdev
->dev
, &ring
->wqres
, ring
->buf_size
);
126 kfree(ring
->bounce_buf
);
127 ring
->bounce_buf
= NULL
;
129 vfree(ring
->tx_info
);
130 ring
->tx_info
= NULL
;
134 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv
*priv
,
135 struct mlx4_en_tx_ring
*ring
)
137 struct mlx4_en_dev
*mdev
= priv
->mdev
;
138 mlx4_dbg(DRV
, priv
, "Destroying tx ring, qpn: %d\n", ring
->qpn
);
140 mlx4_qp_remove(mdev
->dev
, &ring
->qp
);
141 mlx4_qp_free(mdev
->dev
, &ring
->qp
);
142 mlx4_qp_release_range(mdev
->dev
, ring
->qpn
, 1);
143 mlx4_en_unmap_buffer(&ring
->wqres
.buf
);
144 mlx4_free_hwq_res(mdev
->dev
, &ring
->wqres
, ring
->buf_size
);
145 kfree(ring
->bounce_buf
);
146 ring
->bounce_buf
= NULL
;
147 vfree(ring
->tx_info
);
148 ring
->tx_info
= NULL
;
151 int mlx4_en_activate_tx_ring(struct mlx4_en_priv
*priv
,
152 struct mlx4_en_tx_ring
*ring
,
155 struct mlx4_en_dev
*mdev
= priv
->mdev
;
160 ring
->cons
= 0xffffffff;
161 ring
->last_nr_txbb
= 1;
164 memset(ring
->tx_info
, 0, ring
->size
* sizeof(struct mlx4_en_tx_info
));
165 memset(ring
->buf
, 0, ring
->buf_size
);
167 ring
->qp_state
= MLX4_QP_STATE_RST
;
168 ring
->doorbell_qpn
= swab32(ring
->qp
.qpn
<< 8);
170 mlx4_en_fill_qp_context(priv
, ring
->size
, ring
->stride
, 1, 0, ring
->qpn
,
171 ring
->cqn
, srqn
, &ring
->context
);
173 err
= mlx4_qp_to_ready(mdev
->dev
, &ring
->wqres
.mtt
, &ring
->context
,
174 &ring
->qp
, &ring
->qp_state
);
179 void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv
*priv
,
180 struct mlx4_en_tx_ring
*ring
)
182 struct mlx4_en_dev
*mdev
= priv
->mdev
;
184 mlx4_qp_modify(mdev
->dev
, NULL
, ring
->qp_state
,
185 MLX4_QP_STATE_RST
, NULL
, 0, 0, &ring
->qp
);
189 static u32
mlx4_en_free_tx_desc(struct mlx4_en_priv
*priv
,
190 struct mlx4_en_tx_ring
*ring
,
193 struct mlx4_en_dev
*mdev
= priv
->mdev
;
194 struct mlx4_en_tx_info
*tx_info
= &ring
->tx_info
[index
];
195 struct mlx4_en_tx_desc
*tx_desc
= ring
->buf
+ index
* TXBB_SIZE
;
196 struct mlx4_wqe_data_seg
*data
= (void *) tx_desc
+ tx_info
->data_offset
;
197 struct sk_buff
*skb
= tx_info
->skb
;
198 struct skb_frag_struct
*frag
;
199 void *end
= ring
->buf
+ ring
->buf_size
;
200 int frags
= skb_shinfo(skb
)->nr_frags
;
202 __be32
*ptr
= (__be32
*)tx_desc
;
203 __be32 stamp
= cpu_to_be32(STAMP_VAL
| (!!owner
<< STAMP_SHIFT
));
205 /* Optimize the common case when there are no wraparounds */
206 if (likely((void *) tx_desc
+ tx_info
->nr_txbb
* TXBB_SIZE
<= end
)) {
208 if (tx_info
->linear
) {
209 pci_unmap_single(mdev
->pdev
,
210 (dma_addr_t
) be64_to_cpu(data
->addr
),
211 be32_to_cpu(data
->byte_count
),
216 for (i
= 0; i
< frags
; i
++) {
217 frag
= &skb_shinfo(skb
)->frags
[i
];
218 pci_unmap_page(mdev
->pdev
,
219 (dma_addr_t
) be64_to_cpu(data
[i
].addr
),
220 frag
->size
, PCI_DMA_TODEVICE
);
223 /* Stamp the freed descriptor */
224 for (i
= 0; i
< tx_info
->nr_txbb
* TXBB_SIZE
; i
+= STAMP_STRIDE
) {
231 if ((void *) data
>= end
) {
232 data
= (struct mlx4_wqe_data_seg
*)
233 (ring
->buf
+ ((void *) data
- end
));
236 if (tx_info
->linear
) {
237 pci_unmap_single(mdev
->pdev
,
238 (dma_addr_t
) be64_to_cpu(data
->addr
),
239 be32_to_cpu(data
->byte_count
),
244 for (i
= 0; i
< frags
; i
++) {
245 /* Check for wraparound before unmapping */
246 if ((void *) data
>= end
)
247 data
= (struct mlx4_wqe_data_seg
*) ring
->buf
;
248 frag
= &skb_shinfo(skb
)->frags
[i
];
249 pci_unmap_page(mdev
->pdev
,
250 (dma_addr_t
) be64_to_cpu(data
->addr
),
251 frag
->size
, PCI_DMA_TODEVICE
);
254 /* Stamp the freed descriptor */
255 for (i
= 0; i
< tx_info
->nr_txbb
* TXBB_SIZE
; i
+= STAMP_STRIDE
) {
258 if ((void *) ptr
>= end
) {
260 stamp
^= cpu_to_be32(0x80000000);
265 dev_kfree_skb_any(skb
);
266 return tx_info
->nr_txbb
;
270 int mlx4_en_free_tx_buf(struct net_device
*dev
, struct mlx4_en_tx_ring
*ring
)
272 struct mlx4_en_priv
*priv
= netdev_priv(dev
);
275 /* Skip last polled descriptor */
276 ring
->cons
+= ring
->last_nr_txbb
;
277 mlx4_dbg(DRV
, priv
, "Freeing Tx buf - cons:0x%x prod:0x%x\n",
278 ring
->cons
, ring
->prod
);
280 if ((u32
) (ring
->prod
- ring
->cons
) > ring
->size
) {
281 if (netif_msg_tx_err(priv
))
282 mlx4_warn(priv
->mdev
, "Tx consumer passed producer!\n");
286 while (ring
->cons
!= ring
->prod
) {
287 ring
->last_nr_txbb
= mlx4_en_free_tx_desc(priv
, ring
,
288 ring
->cons
& ring
->size_mask
,
289 !!(ring
->cons
& ring
->size
));
290 ring
->cons
+= ring
->last_nr_txbb
;
295 mlx4_dbg(DRV
, priv
, "Freed %d uncompleted tx descriptors\n", cnt
);
300 void mlx4_en_set_prio_map(struct mlx4_en_priv
*priv
, u16
*prio_map
, u32 ring_num
)
302 int block
= 8 / ring_num
;
303 int extra
= 8 - (block
* ring_num
);
309 for (prio
= 0; prio
< 8; prio
++)
314 for (prio
= 0; prio
< 8; prio
++) {
315 if (extra
&& (num
== block
+ 1)) {
319 } else if (!extra
&& (num
== block
)) {
323 prio_map
[prio
] = ring
;
324 mlx4_dbg(DRV
, priv
, " prio:%d --> ring:%d\n", prio
, ring
);
329 static void mlx4_en_process_tx_cq(struct net_device
*dev
, struct mlx4_en_cq
*cq
)
331 struct mlx4_en_priv
*priv
= netdev_priv(dev
);
332 struct mlx4_cq
*mcq
= &cq
->mcq
;
333 struct mlx4_en_tx_ring
*ring
= &priv
->tx_ring
[cq
->ring
];
334 struct mlx4_cqe
*cqe
= cq
->buf
;
337 u32 txbbs_skipped
= 0;
340 /* index always points to the first TXBB of the last polled descriptor */
341 index
= ring
->cons
& ring
->size_mask
;
342 new_index
= be16_to_cpu(cqe
->wqe_index
) & ring
->size_mask
;
343 if (index
== new_index
)
350 * We use a two-stage loop:
351 * - the first samples the HW-updated CQE
352 * - the second frees TXBBs until the last sample
353 * This lets us amortize CQE cache misses, while still polling the CQ
354 * until is quiescent.
356 cq_last_sav
= mcq
->cons_index
;
359 /* Skip over last polled CQE */
360 index
= (index
+ ring
->last_nr_txbb
) & ring
->size_mask
;
361 txbbs_skipped
+= ring
->last_nr_txbb
;
364 ring
->last_nr_txbb
= mlx4_en_free_tx_desc(
366 !!((ring
->cons
+ txbbs_skipped
) &
370 } while (index
!= new_index
);
372 new_index
= be16_to_cpu(cqe
->wqe_index
) & ring
->size_mask
;
373 } while (index
!= new_index
);
374 AVG_PERF_COUNTER(priv
->pstats
.tx_coal_avg
,
375 (u32
) (mcq
->cons_index
- cq_last_sav
));
378 * To prevent CQ overflow we first update CQ consumer and only then
383 ring
->cons
+= txbbs_skipped
;
385 /* Wakeup Tx queue if this ring stopped it */
386 if (unlikely(ring
->blocked
)) {
387 if ((u32
) (ring
->prod
- ring
->cons
) <=
388 ring
->size
- HEADROOM
- MAX_DESC_TXBBS
) {
390 /* TODO: support multiqueue netdevs. Currently, we block
391 * when *any* ring is full. Note that:
392 * - 2 Tx rings can unblock at the same time and call
393 * netif_wake_queue(), which is OK since this
394 * operation is idempotent.
395 * - We might wake the queue just after another ring
396 * stopped it. This is no big deal because the next
397 * transmission on that ring would stop the queue.
400 netif_wake_queue(dev
);
401 priv
->port_stats
.wake_queue
++;
406 void mlx4_en_tx_irq(struct mlx4_cq
*mcq
)
408 struct mlx4_en_cq
*cq
= container_of(mcq
, struct mlx4_en_cq
, mcq
);
409 struct mlx4_en_priv
*priv
= netdev_priv(cq
->dev
);
410 struct mlx4_en_tx_ring
*ring
= &priv
->tx_ring
[cq
->ring
];
412 if (!spin_trylock(&ring
->comp_lock
))
414 mlx4_en_process_tx_cq(cq
->dev
, cq
);
415 mod_timer(&cq
->timer
, jiffies
+ 1);
416 spin_unlock(&ring
->comp_lock
);
420 void mlx4_en_poll_tx_cq(unsigned long data
)
422 struct mlx4_en_cq
*cq
= (struct mlx4_en_cq
*) data
;
423 struct mlx4_en_priv
*priv
= netdev_priv(cq
->dev
);
424 struct mlx4_en_tx_ring
*ring
= &priv
->tx_ring
[cq
->ring
];
427 INC_PERF_COUNTER(priv
->pstats
.tx_poll
);
429 if (!spin_trylock_irq(&ring
->comp_lock
)) {
430 mod_timer(&cq
->timer
, jiffies
+ MLX4_EN_TX_POLL_TIMEOUT
);
433 mlx4_en_process_tx_cq(cq
->dev
, cq
);
434 inflight
= (u32
) (ring
->prod
- ring
->cons
- ring
->last_nr_txbb
);
436 /* If there are still packets in flight and the timer has not already
437 * been scheduled by the Tx routine then schedule it here to guarantee
438 * completion processing of these packets */
439 if (inflight
&& priv
->port_up
)
440 mod_timer(&cq
->timer
, jiffies
+ MLX4_EN_TX_POLL_TIMEOUT
);
442 spin_unlock_irq(&ring
->comp_lock
);
445 static struct mlx4_en_tx_desc
*mlx4_en_bounce_to_desc(struct mlx4_en_priv
*priv
,
446 struct mlx4_en_tx_ring
*ring
,
448 unsigned int desc_size
)
450 u32 copy
= (ring
->size
- index
) * TXBB_SIZE
;
453 for (i
= desc_size
- copy
- 4; i
>= 0; i
-= 4) {
454 if ((i
& (TXBB_SIZE
- 1)) == 0)
457 *((u32
*) (ring
->buf
+ i
)) =
458 *((u32
*) (ring
->bounce_buf
+ copy
+ i
));
461 for (i
= copy
- 4; i
>= 4 ; i
-= 4) {
462 if ((i
& (TXBB_SIZE
- 1)) == 0)
465 *((u32
*) (ring
->buf
+ index
* TXBB_SIZE
+ i
)) =
466 *((u32
*) (ring
->bounce_buf
+ i
));
469 /* Return real descriptor location */
470 return ring
->buf
+ index
* TXBB_SIZE
;
473 static inline void mlx4_en_xmit_poll(struct mlx4_en_priv
*priv
, int tx_ind
)
475 struct mlx4_en_cq
*cq
= &priv
->tx_cq
[tx_ind
];
476 struct mlx4_en_tx_ring
*ring
= &priv
->tx_ring
[tx_ind
];
478 /* If we don't have a pending timer, set one up to catch our recent
479 post in case the interface becomes idle */
480 if (!timer_pending(&cq
->timer
))
481 mod_timer(&cq
->timer
, jiffies
+ MLX4_EN_TX_POLL_TIMEOUT
);
483 /* Poll the CQ every mlx4_en_TX_MODER_POLL packets */
484 if ((++ring
->poll_cnt
& (MLX4_EN_TX_POLL_MODER
- 1)) == 0)
485 if (spin_trylock_irq(&ring
->comp_lock
)) {
486 mlx4_en_process_tx_cq(priv
->dev
, cq
);
487 spin_unlock_irq(&ring
->comp_lock
);
491 static void *get_frag_ptr(struct sk_buff
*skb
)
493 struct skb_frag_struct
*frag
= &skb_shinfo(skb
)->frags
[0];
494 struct page
*page
= frag
->page
;
497 ptr
= page_address(page
);
501 return ptr
+ frag
->page_offset
;
504 static int is_inline(struct sk_buff
*skb
, void **pfrag
)
508 if (inline_thold
&& !skb_is_gso(skb
) && skb
->len
<= inline_thold
) {
509 if (skb_shinfo(skb
)->nr_frags
== 1) {
510 ptr
= get_frag_ptr(skb
);
518 } else if (unlikely(skb_shinfo(skb
)->nr_frags
))
527 static int inline_size(struct sk_buff
*skb
)
529 if (skb
->len
+ CTRL_SIZE
+ sizeof(struct mlx4_wqe_inline_seg
)
530 <= MLX4_INLINE_ALIGN
)
531 return ALIGN(skb
->len
+ CTRL_SIZE
+
532 sizeof(struct mlx4_wqe_inline_seg
), 16);
534 return ALIGN(skb
->len
+ CTRL_SIZE
+ 2 *
535 sizeof(struct mlx4_wqe_inline_seg
), 16);
538 static int get_real_size(struct sk_buff
*skb
, struct net_device
*dev
,
539 int *lso_header_size
)
541 struct mlx4_en_priv
*priv
= netdev_priv(dev
);
542 struct mlx4_en_dev
*mdev
= priv
->mdev
;
545 if (skb_is_gso(skb
)) {
546 *lso_header_size
= skb_transport_offset(skb
) + tcp_hdrlen(skb
);
547 real_size
= CTRL_SIZE
+ skb_shinfo(skb
)->nr_frags
* DS_SIZE
+
548 ALIGN(*lso_header_size
+ 4, DS_SIZE
);
549 if (unlikely(*lso_header_size
!= skb_headlen(skb
))) {
550 /* We add a segment for the skb linear buffer only if
551 * it contains data */
552 if (*lso_header_size
< skb_headlen(skb
))
553 real_size
+= DS_SIZE
;
555 if (netif_msg_tx_err(priv
))
556 mlx4_warn(mdev
, "Non-linear headers\n");
557 dev_kfree_skb_any(skb
);
561 if (unlikely(*lso_header_size
> MAX_LSO_HDR_SIZE
)) {
562 if (netif_msg_tx_err(priv
))
563 mlx4_warn(mdev
, "LSO header size too big\n");
564 dev_kfree_skb_any(skb
);
568 *lso_header_size
= 0;
569 if (!is_inline(skb
, NULL
))
570 real_size
= CTRL_SIZE
+ (skb_shinfo(skb
)->nr_frags
+ 1) * DS_SIZE
;
572 real_size
= inline_size(skb
);
578 static void build_inline_wqe(struct mlx4_en_tx_desc
*tx_desc
, struct sk_buff
*skb
,
579 int real_size
, u16
*vlan_tag
, int tx_ind
, void *fragptr
)
581 struct mlx4_wqe_inline_seg
*inl
= &tx_desc
->inl
;
582 int spc
= MLX4_INLINE_ALIGN
- CTRL_SIZE
- sizeof *inl
;
584 if (skb
->len
<= spc
) {
585 inl
->byte_count
= cpu_to_be32(1 << 31 | skb
->len
);
586 skb_copy_from_linear_data(skb
, inl
+ 1, skb_headlen(skb
));
587 if (skb_shinfo(skb
)->nr_frags
)
588 memcpy(((void *)(inl
+ 1)) + skb_headlen(skb
), fragptr
,
589 skb_shinfo(skb
)->frags
[0].size
);
592 inl
->byte_count
= cpu_to_be32(1 << 31 | spc
);
593 if (skb_headlen(skb
) <= spc
) {
594 skb_copy_from_linear_data(skb
, inl
+ 1, skb_headlen(skb
));
595 if (skb_headlen(skb
) < spc
) {
596 memcpy(((void *)(inl
+ 1)) + skb_headlen(skb
),
597 fragptr
, spc
- skb_headlen(skb
));
598 fragptr
+= spc
- skb_headlen(skb
);
600 inl
= (void *) (inl
+ 1) + spc
;
601 memcpy(((void *)(inl
+ 1)), fragptr
, skb
->len
- spc
);
603 skb_copy_from_linear_data(skb
, inl
+ 1, spc
);
604 inl
= (void *) (inl
+ 1) + spc
;
605 skb_copy_from_linear_data_offset(skb
, spc
, inl
+ 1,
606 skb_headlen(skb
) - spc
);
607 if (skb_shinfo(skb
)->nr_frags
)
608 memcpy(((void *)(inl
+ 1)) + skb_headlen(skb
) - spc
,
609 fragptr
, skb_shinfo(skb
)->frags
[0].size
);
613 inl
->byte_count
= cpu_to_be32(1 << 31 | (skb
->len
- spc
));
615 tx_desc
->ctrl
.vlan_tag
= cpu_to_be16(*vlan_tag
);
616 tx_desc
->ctrl
.ins_vlan
= MLX4_WQE_CTRL_INS_VLAN
* !!(*vlan_tag
);
617 tx_desc
->ctrl
.fence_size
= (real_size
/ 16) & 0x3f;
620 static int get_vlan_info(struct mlx4_en_priv
*priv
, struct sk_buff
*skb
,
625 /* Obtain VLAN information if present */
626 if (priv
->vlgrp
&& vlan_tx_tag_present(skb
)) {
627 *vlan_tag
= vlan_tx_tag_get(skb
);
628 /* Set the Tx ring to use according to vlan priority */
629 tx_ind
= priv
->tx_prio_map
[*vlan_tag
>> 13];
637 int mlx4_en_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
639 struct mlx4_en_priv
*priv
= netdev_priv(dev
);
640 struct mlx4_en_dev
*mdev
= priv
->mdev
;
641 struct mlx4_en_tx_ring
*ring
;
642 struct mlx4_en_cq
*cq
;
643 struct mlx4_en_tx_desc
*tx_desc
;
644 struct mlx4_wqe_data_seg
*data
;
645 struct skb_frag_struct
*frag
;
646 struct mlx4_en_tx_info
*tx_info
;
659 if (unlikely(!skb
->len
)) {
660 dev_kfree_skb_any(skb
);
663 real_size
= get_real_size(skb
, dev
, &lso_header_size
);
664 if (unlikely(!real_size
))
667 /* Allign descriptor to TXBB size */
668 desc_size
= ALIGN(real_size
, TXBB_SIZE
);
669 nr_txbb
= desc_size
/ TXBB_SIZE
;
670 if (unlikely(nr_txbb
> MAX_DESC_TXBBS
)) {
671 if (netif_msg_tx_err(priv
))
672 mlx4_warn(mdev
, "Oversized header or SG list\n");
673 dev_kfree_skb_any(skb
);
677 tx_ind
= get_vlan_info(priv
, skb
, &vlan_tag
);
678 ring
= &priv
->tx_ring
[tx_ind
];
680 /* Check available TXBBs And 2K spare for prefetch */
681 if (unlikely(((int)(ring
->prod
- ring
->cons
)) >
682 ring
->size
- HEADROOM
- MAX_DESC_TXBBS
)) {
683 /* every full Tx ring stops queue.
684 * TODO: implement multi-queue support (per-queue stop) */
685 netif_stop_queue(dev
);
687 priv
->port_stats
.queue_stopped
++;
689 /* Use interrupts to find out when queue opened */
690 cq
= &priv
->tx_cq
[tx_ind
];
691 mlx4_en_arm_cq(priv
, cq
);
692 return NETDEV_TX_BUSY
;
695 /* Now that we know what Tx ring to use */
696 if (unlikely(!priv
->port_up
)) {
697 if (netif_msg_tx_err(priv
))
698 mlx4_warn(mdev
, "xmit: port down!\n");
699 dev_kfree_skb_any(skb
);
703 /* Track current inflight packets for performance analysis */
704 AVG_PERF_COUNTER(priv
->pstats
.inflight_avg
,
705 (u32
) (ring
->prod
- ring
->cons
- 1));
707 /* Packet is good - grab an index and transmit it */
708 index
= ring
->prod
& ring
->size_mask
;
710 /* See if we have enough space for whole descriptor TXBB for setting
711 * SW ownership on next descriptor; if not, use a bounce buffer. */
712 if (likely(index
+ nr_txbb
<= ring
->size
))
713 tx_desc
= ring
->buf
+ index
* TXBB_SIZE
;
715 tx_desc
= (struct mlx4_en_tx_desc
*) ring
->bounce_buf
;
717 /* Save skb in tx_info ring */
718 tx_info
= &ring
->tx_info
[index
];
720 tx_info
->nr_txbb
= nr_txbb
;
722 /* Prepare ctrl segement apart opcode+ownership, which depends on
723 * whether LSO is used */
724 tx_desc
->ctrl
.vlan_tag
= cpu_to_be16(vlan_tag
);
725 tx_desc
->ctrl
.ins_vlan
= MLX4_WQE_CTRL_INS_VLAN
* !!vlan_tag
;
726 tx_desc
->ctrl
.fence_size
= (real_size
/ 16) & 0x3f;
727 tx_desc
->ctrl
.srcrb_flags
= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE
|
728 MLX4_WQE_CTRL_SOLICITED
);
729 if (likely(skb
->ip_summed
== CHECKSUM_PARTIAL
)) {
730 tx_desc
->ctrl
.srcrb_flags
|= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM
|
731 MLX4_WQE_CTRL_TCP_UDP_CSUM
);
732 priv
->port_stats
.tx_chksum_offload
++;
735 /* Handle LSO (TSO) packets */
736 if (lso_header_size
) {
737 /* Mark opcode as LSO */
738 op_own
= cpu_to_be32(MLX4_OPCODE_LSO
| (1 << 6)) |
739 ((ring
->prod
& ring
->size
) ?
740 cpu_to_be32(MLX4_EN_BIT_DESC_OWN
) : 0);
742 /* Fill in the LSO prefix */
743 tx_desc
->lso
.mss_hdr_size
= cpu_to_be32(
744 skb_shinfo(skb
)->gso_size
<< 16 | lso_header_size
);
747 * note that we already verified that it is linear */
748 memcpy(tx_desc
->lso
.header
, skb
->data
, lso_header_size
);
749 data
= ((void *) &tx_desc
->lso
+
750 ALIGN(lso_header_size
+ 4, DS_SIZE
));
752 priv
->port_stats
.tso_packets
++;
753 i
= ((skb
->len
- lso_header_size
) / skb_shinfo(skb
)->gso_size
) +
754 !!((skb
->len
- lso_header_size
) % skb_shinfo(skb
)->gso_size
);
755 ring
->bytes
+= skb
->len
+ (i
- 1) * lso_header_size
;
758 /* Normal (Non LSO) packet */
759 op_own
= cpu_to_be32(MLX4_OPCODE_SEND
) |
760 ((ring
->prod
& ring
->size
) ?
761 cpu_to_be32(MLX4_EN_BIT_DESC_OWN
) : 0);
762 data
= &tx_desc
->data
;
763 ring
->bytes
+= max(skb
->len
, (unsigned int) ETH_ZLEN
);
767 AVG_PERF_COUNTER(priv
->pstats
.tx_pktsz_avg
, skb
->len
);
770 /* valid only for none inline segments */
771 tx_info
->data_offset
= (void *) data
- (void *) tx_desc
;
773 tx_info
->linear
= (lso_header_size
< skb_headlen(skb
) && !is_inline(skb
, NULL
)) ? 1 : 0;
774 data
+= skb_shinfo(skb
)->nr_frags
+ tx_info
->linear
- 1;
776 if (!is_inline(skb
, &fragptr
)) {
778 for (i
= skb_shinfo(skb
)->nr_frags
- 1; i
>= 0; i
--) {
779 frag
= &skb_shinfo(skb
)->frags
[i
];
780 dma
= pci_map_page(mdev
->dev
->pdev
, frag
->page
, frag
->page_offset
,
781 frag
->size
, PCI_DMA_TODEVICE
);
782 data
->addr
= cpu_to_be64(dma
);
783 data
->lkey
= cpu_to_be32(mdev
->mr
.key
);
785 data
->byte_count
= cpu_to_be32(frag
->size
);
789 /* Map linear part */
790 if (tx_info
->linear
) {
791 dma
= pci_map_single(mdev
->dev
->pdev
, skb
->data
+ lso_header_size
,
792 skb_headlen(skb
) - lso_header_size
, PCI_DMA_TODEVICE
);
793 data
->addr
= cpu_to_be64(dma
);
794 data
->lkey
= cpu_to_be32(mdev
->mr
.key
);
796 data
->byte_count
= cpu_to_be32(skb_headlen(skb
) - lso_header_size
);
800 build_inline_wqe(tx_desc
, skb
, real_size
, &vlan_tag
, tx_ind
, fragptr
);
804 ring
->prod
+= nr_txbb
;
806 /* If we used a bounce buffer then copy descriptor back into place */
807 if (tx_desc
== (struct mlx4_en_tx_desc
*) ring
->bounce_buf
)
808 tx_desc
= mlx4_en_bounce_to_desc(priv
, ring
, index
, desc_size
);
810 /* Run destructor before passing skb to HW */
811 if (likely(!skb_shared(skb
)))
814 /* Ensure new descirptor hits memory
815 * before setting ownership of this descriptor to HW */
817 tx_desc
->ctrl
.owner_opcode
= op_own
;
821 writel(ring
->doorbell_qpn
, mdev
->uar_map
+ MLX4_SEND_DOORBELL
);
822 dev
->trans_start
= jiffies
;
825 mlx4_en_xmit_poll(priv
, tx_ind
);