2 * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/ipv6.h>
35 #include <linux/tcp.h>
36 #include <net/busy_poll.h>
40 static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp
*tstamp
)
42 return tstamp
->hwtstamp_config
.rx_filter
== HWTSTAMP_FILTER_ALL
;
45 static inline void mlx5e_read_cqe_slot(struct mlx5e_cq
*cq
, u32 cqcc
,
48 u32 ci
= cqcc
& cq
->wq
.sz_m1
;
50 memcpy(data
, mlx5_cqwq_get_wqe(&cq
->wq
, ci
), sizeof(struct mlx5_cqe64
));
53 static inline void mlx5e_read_title_slot(struct mlx5e_rq
*rq
,
54 struct mlx5e_cq
*cq
, u32 cqcc
)
56 mlx5e_read_cqe_slot(cq
, cqcc
, &cq
->title
);
57 cq
->decmprs_left
= be32_to_cpu(cq
->title
.byte_cnt
);
58 cq
->decmprs_wqe_counter
= be16_to_cpu(cq
->title
.wqe_counter
);
59 rq
->stats
.cqe_compress_blks
++;
62 static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq
*cq
, u32 cqcc
)
64 mlx5e_read_cqe_slot(cq
, cqcc
, cq
->mini_arr
);
68 static inline void mlx5e_cqes_update_owner(struct mlx5e_cq
*cq
, u32 cqcc
, int n
)
70 u8 op_own
= (cqcc
>> cq
->wq
.log_sz
) & 1;
71 u32 wq_sz
= 1 << cq
->wq
.log_sz
;
72 u32 ci
= cqcc
& cq
->wq
.sz_m1
;
73 u32 ci_top
= min_t(u32
, wq_sz
, ci
+ n
);
75 for (; ci
< ci_top
; ci
++, n
--) {
76 struct mlx5_cqe64
*cqe
= mlx5_cqwq_get_wqe(&cq
->wq
, ci
);
81 if (unlikely(ci
== wq_sz
)) {
83 for (ci
= 0; ci
< n
; ci
++) {
84 struct mlx5_cqe64
*cqe
= mlx5_cqwq_get_wqe(&cq
->wq
, ci
);
91 static inline void mlx5e_decompress_cqe(struct mlx5e_rq
*rq
,
92 struct mlx5e_cq
*cq
, u32 cqcc
)
96 cq
->title
.byte_cnt
= cq
->mini_arr
[cq
->mini_arr_idx
].byte_cnt
;
97 cq
->title
.check_sum
= cq
->mini_arr
[cq
->mini_arr_idx
].checksum
;
98 cq
->title
.op_own
&= 0xf0;
99 cq
->title
.op_own
|= 0x01 & (cqcc
>> cq
->wq
.log_sz
);
100 cq
->title
.wqe_counter
= cpu_to_be16(cq
->decmprs_wqe_counter
);
103 rq
->wq_type
== MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ
?
104 mpwrq_get_cqe_consumed_strides(&cq
->title
) : 1;
105 cq
->decmprs_wqe_counter
=
106 (cq
->decmprs_wqe_counter
+ wqe_cnt_step
) & rq
->wq
.sz_m1
;
109 static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq
*rq
,
110 struct mlx5e_cq
*cq
, u32 cqcc
)
112 mlx5e_decompress_cqe(rq
, cq
, cqcc
);
113 cq
->title
.rss_hash_type
= 0;
114 cq
->title
.rss_hash_result
= 0;
117 static inline u32
mlx5e_decompress_cqes_cont(struct mlx5e_rq
*rq
,
119 int update_owner_only
,
122 u32 cqcc
= cq
->wq
.cc
+ update_owner_only
;
126 cqe_count
= min_t(u32
, cq
->decmprs_left
, budget_rem
);
128 for (i
= update_owner_only
; i
< cqe_count
;
129 i
++, cq
->mini_arr_idx
++, cqcc
++) {
130 if (cq
->mini_arr_idx
== MLX5_MINI_CQE_ARRAY_SIZE
)
131 mlx5e_read_mini_arr_slot(cq
, cqcc
);
133 mlx5e_decompress_cqe_no_hash(rq
, cq
, cqcc
);
134 rq
->handle_rx_cqe(rq
, &cq
->title
);
136 mlx5e_cqes_update_owner(cq
, cq
->wq
.cc
, cqcc
- cq
->wq
.cc
);
138 cq
->decmprs_left
-= cqe_count
;
139 rq
->stats
.cqe_compress_pkts
+= cqe_count
;
144 static inline u32
mlx5e_decompress_cqes_start(struct mlx5e_rq
*rq
,
148 mlx5e_read_title_slot(rq
, cq
, cq
->wq
.cc
);
149 mlx5e_read_mini_arr_slot(cq
, cq
->wq
.cc
+ 1);
150 mlx5e_decompress_cqe(rq
, cq
, cq
->wq
.cc
);
151 rq
->handle_rx_cqe(rq
, &cq
->title
);
154 return mlx5e_decompress_cqes_cont(rq
, cq
, 1, budget_rem
) - 1;
157 void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv
*priv
, bool val
)
161 if (!MLX5_CAP_GEN(priv
->mdev
, cqe_compression
))
164 mutex_lock(&priv
->state_lock
);
166 if (priv
->params
.rx_cqe_compress
== val
)
169 was_opened
= test_bit(MLX5E_STATE_OPENED
, &priv
->state
);
171 mlx5e_close_locked(priv
->netdev
);
173 priv
->params
.rx_cqe_compress
= val
;
176 mlx5e_open_locked(priv
->netdev
);
179 mutex_unlock(&priv
->state_lock
);
182 int mlx5e_alloc_rx_wqe(struct mlx5e_rq
*rq
, struct mlx5e_rx_wqe
*wqe
, u16 ix
)
187 skb
= napi_alloc_skb(rq
->cq
.napi
, rq
->wqe_sz
);
191 dma_addr
= dma_map_single(rq
->pdev
,
192 /* hw start padding */
198 if (unlikely(dma_mapping_error(rq
->pdev
, dma_addr
)))
201 *((dma_addr_t
*)skb
->cb
) = dma_addr
;
202 wqe
->data
.addr
= cpu_to_be64(dma_addr
);
203 wqe
->data
.lkey
= rq
->mkey_be
;
215 void mlx5e_dealloc_rx_wqe(struct mlx5e_rq
*rq
, u16 ix
)
217 struct sk_buff
*skb
= rq
->skb
[ix
];
221 dma_unmap_single(rq
->pdev
,
222 *((dma_addr_t
*)skb
->cb
),
229 static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq
*rq
)
231 return rq
->mpwqe_num_strides
>> MLX5_MPWRQ_WQE_PAGE_ORDER
;
235 mlx5e_dma_pre_sync_linear_mpwqe(struct device
*pdev
,
236 struct mlx5e_mpw_info
*wi
,
237 u32 wqe_offset
, u32 len
)
239 dma_sync_single_for_cpu(pdev
, wi
->dma_info
.addr
+ wqe_offset
,
240 len
, DMA_FROM_DEVICE
);
244 mlx5e_dma_pre_sync_fragmented_mpwqe(struct device
*pdev
,
245 struct mlx5e_mpw_info
*wi
,
246 u32 wqe_offset
, u32 len
)
248 /* No dma pre sync for fragmented MPWQE */
252 mlx5e_add_skb_frag_linear_mpwqe(struct mlx5e_rq
*rq
,
254 struct mlx5e_mpw_info
*wi
,
255 u32 page_idx
, u32 frag_offset
,
258 unsigned int truesize
= ALIGN(len
, rq
->mpwqe_stride_sz
);
260 wi
->skbs_frags
[page_idx
]++;
261 skb_add_rx_frag(skb
, skb_shinfo(skb
)->nr_frags
,
262 &wi
->dma_info
.page
[page_idx
], frag_offset
,
267 mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq
*rq
,
269 struct mlx5e_mpw_info
*wi
,
270 u32 page_idx
, u32 frag_offset
,
273 unsigned int truesize
= ALIGN(len
, rq
->mpwqe_stride_sz
);
275 dma_sync_single_for_cpu(rq
->pdev
,
276 wi
->umr
.dma_info
[page_idx
].addr
+ frag_offset
,
277 len
, DMA_FROM_DEVICE
);
278 wi
->skbs_frags
[page_idx
]++;
279 skb_add_rx_frag(skb
, skb_shinfo(skb
)->nr_frags
,
280 wi
->umr
.dma_info
[page_idx
].page
, frag_offset
,
285 mlx5e_copy_skb_header_linear_mpwqe(struct device
*pdev
,
287 struct mlx5e_mpw_info
*wi
,
288 u32 page_idx
, u32 offset
,
291 struct page
*page
= &wi
->dma_info
.page
[page_idx
];
293 skb_copy_to_linear_data(skb
, page_address(page
) + offset
,
294 ALIGN(headlen
, sizeof(long)));
298 mlx5e_copy_skb_header_fragmented_mpwqe(struct device
*pdev
,
300 struct mlx5e_mpw_info
*wi
,
301 u32 page_idx
, u32 offset
,
304 u16 headlen_pg
= min_t(u32
, headlen
, PAGE_SIZE
- offset
);
305 struct mlx5e_dma_info
*dma_info
= &wi
->umr
.dma_info
[page_idx
];
308 /* Aligning len to sizeof(long) optimizes memcpy performance */
309 len
= ALIGN(headlen_pg
, sizeof(long));
310 dma_sync_single_for_cpu(pdev
, dma_info
->addr
+ offset
, len
,
312 skb_copy_to_linear_data_offset(skb
, 0,
313 page_address(dma_info
->page
) + offset
,
315 if (unlikely(offset
+ headlen
> PAGE_SIZE
)) {
318 len
= ALIGN(headlen
- headlen_pg
, sizeof(long));
319 dma_sync_single_for_cpu(pdev
, dma_info
->addr
, len
,
321 skb_copy_to_linear_data_offset(skb
, headlen_pg
,
322 page_address(dma_info
->page
),
327 static u32
mlx5e_get_wqe_mtt_offset(struct mlx5e_rq
*rq
, u16 wqe_ix
)
329 return rq
->mpwqe_mtt_offset
+
330 wqe_ix
* ALIGN(MLX5_MPWRQ_PAGES_PER_WQE
, 8);
333 static void mlx5e_build_umr_wqe(struct mlx5e_rq
*rq
,
335 struct mlx5e_umr_wqe
*wqe
,
338 struct mlx5_wqe_ctrl_seg
*cseg
= &wqe
->ctrl
;
339 struct mlx5_wqe_umr_ctrl_seg
*ucseg
= &wqe
->uctrl
;
340 struct mlx5_wqe_data_seg
*dseg
= &wqe
->data
;
341 struct mlx5e_mpw_info
*wi
= &rq
->wqe_info
[ix
];
342 u8 ds_cnt
= DIV_ROUND_UP(sizeof(*wqe
), MLX5_SEND_WQE_DS
);
343 u32 umr_wqe_mtt_offset
= mlx5e_get_wqe_mtt_offset(rq
, ix
);
345 memset(wqe
, 0, sizeof(*wqe
));
346 cseg
->opmod_idx_opcode
=
347 cpu_to_be32((sq
->pc
<< MLX5_WQE_CTRL_WQE_INDEX_SHIFT
) |
349 cseg
->qpn_ds
= cpu_to_be32((sq
->sqn
<< MLX5_WQE_CTRL_QPN_SHIFT
) |
351 cseg
->fm_ce_se
= MLX5_WQE_CTRL_CQ_UPDATE
;
352 cseg
->imm
= rq
->umr_mkey_be
;
354 ucseg
->flags
= MLX5_UMR_TRANSLATION_OFFSET_EN
;
355 ucseg
->klm_octowords
=
356 cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE
));
357 ucseg
->bsf_octowords
=
358 cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset
));
359 ucseg
->mkey_mask
= cpu_to_be64(MLX5_MKEY_MASK_FREE
);
361 dseg
->lkey
= sq
->mkey_be
;
362 dseg
->addr
= cpu_to_be64(wi
->umr
.mtt_addr
);
365 static void mlx5e_post_umr_wqe(struct mlx5e_rq
*rq
, u16 ix
)
367 struct mlx5e_sq
*sq
= &rq
->channel
->icosq
;
368 struct mlx5_wq_cyc
*wq
= &sq
->wq
;
369 struct mlx5e_umr_wqe
*wqe
;
370 u8 num_wqebbs
= DIV_ROUND_UP(sizeof(*wqe
), MLX5_SEND_WQE_BB
);
373 /* fill sq edge with nops to avoid wqe wrap around */
374 while ((pi
= (sq
->pc
& wq
->sz_m1
)) > sq
->edge
) {
375 sq
->ico_wqe_info
[pi
].opcode
= MLX5_OPCODE_NOP
;
376 sq
->ico_wqe_info
[pi
].num_wqebbs
= 1;
377 mlx5e_send_nop(sq
, true);
380 wqe
= mlx5_wq_cyc_get_wqe(wq
, pi
);
381 mlx5e_build_umr_wqe(rq
, sq
, wqe
, ix
);
382 sq
->ico_wqe_info
[pi
].opcode
= MLX5_OPCODE_UMR
;
383 sq
->ico_wqe_info
[pi
].num_wqebbs
= num_wqebbs
;
384 sq
->pc
+= num_wqebbs
;
385 mlx5e_tx_notify_hw(sq
, &wqe
->ctrl
, 0);
388 static inline int mlx5e_get_wqe_mtt_sz(void)
390 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
391 * To avoid copying garbage after the mtt array, we allocate
394 return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE
* sizeof(__be64
),
395 MLX5_UMR_MTT_ALIGNMENT
);
398 static int mlx5e_alloc_and_map_page(struct mlx5e_rq
*rq
,
399 struct mlx5e_mpw_info
*wi
,
404 page
= dev_alloc_page();
408 wi
->umr
.dma_info
[i
].page
= page
;
409 wi
->umr
.dma_info
[i
].addr
= dma_map_page(rq
->pdev
, page
, 0, PAGE_SIZE
,
411 if (unlikely(dma_mapping_error(rq
->pdev
, wi
->umr
.dma_info
[i
].addr
))) {
415 wi
->umr
.mtt
[i
] = cpu_to_be64(wi
->umr
.dma_info
[i
].addr
| MLX5_EN_WR
);
420 static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq
*rq
,
421 struct mlx5e_rx_wqe
*wqe
,
424 struct mlx5e_mpw_info
*wi
= &rq
->wqe_info
[ix
];
425 int mtt_sz
= mlx5e_get_wqe_mtt_sz();
426 u64 dma_offset
= (u64
)mlx5e_get_wqe_mtt_offset(rq
, ix
) << PAGE_SHIFT
;
429 wi
->umr
.dma_info
= kmalloc(sizeof(*wi
->umr
.dma_info
) *
430 MLX5_MPWRQ_PAGES_PER_WQE
,
432 if (unlikely(!wi
->umr
.dma_info
))
435 /* We allocate more than mtt_sz as we will align the pointer */
436 wi
->umr
.mtt_no_align
= kzalloc(mtt_sz
+ MLX5_UMR_ALIGN
- 1,
438 if (unlikely(!wi
->umr
.mtt_no_align
))
441 wi
->umr
.mtt
= PTR_ALIGN(wi
->umr
.mtt_no_align
, MLX5_UMR_ALIGN
);
442 wi
->umr
.mtt_addr
= dma_map_single(rq
->pdev
, wi
->umr
.mtt
, mtt_sz
,
444 if (unlikely(dma_mapping_error(rq
->pdev
, wi
->umr
.mtt_addr
)))
447 for (i
= 0; i
< MLX5_MPWRQ_PAGES_PER_WQE
; i
++) {
448 if (unlikely(mlx5e_alloc_and_map_page(rq
, wi
, i
)))
450 page_ref_add(wi
->umr
.dma_info
[i
].page
,
451 mlx5e_mpwqe_strides_per_page(rq
));
452 wi
->skbs_frags
[i
] = 0;
455 wi
->consumed_strides
= 0;
456 wi
->dma_pre_sync
= mlx5e_dma_pre_sync_fragmented_mpwqe
;
457 wi
->add_skb_frag
= mlx5e_add_skb_frag_fragmented_mpwqe
;
458 wi
->copy_skb_header
= mlx5e_copy_skb_header_fragmented_mpwqe
;
459 wi
->free_wqe
= mlx5e_free_rx_fragmented_mpwqe
;
460 wqe
->data
.lkey
= rq
->umr_mkey_be
;
461 wqe
->data
.addr
= cpu_to_be64(dma_offset
);
467 dma_unmap_page(rq
->pdev
, wi
->umr
.dma_info
[i
].addr
, PAGE_SIZE
,
469 page_ref_sub(wi
->umr
.dma_info
[i
].page
,
470 mlx5e_mpwqe_strides_per_page(rq
));
471 put_page(wi
->umr
.dma_info
[i
].page
);
473 dma_unmap_single(rq
->pdev
, wi
->umr
.mtt_addr
, mtt_sz
, PCI_DMA_TODEVICE
);
476 kfree(wi
->umr
.mtt_no_align
);
479 kfree(wi
->umr
.dma_info
);
485 void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq
*rq
,
486 struct mlx5e_mpw_info
*wi
)
488 int mtt_sz
= mlx5e_get_wqe_mtt_sz();
491 for (i
= 0; i
< MLX5_MPWRQ_PAGES_PER_WQE
; i
++) {
492 dma_unmap_page(rq
->pdev
, wi
->umr
.dma_info
[i
].addr
, PAGE_SIZE
,
494 page_ref_sub(wi
->umr
.dma_info
[i
].page
,
495 mlx5e_mpwqe_strides_per_page(rq
) - wi
->skbs_frags
[i
]);
496 put_page(wi
->umr
.dma_info
[i
].page
);
498 dma_unmap_single(rq
->pdev
, wi
->umr
.mtt_addr
, mtt_sz
, PCI_DMA_TODEVICE
);
499 kfree(wi
->umr
.mtt_no_align
);
500 kfree(wi
->umr
.dma_info
);
503 void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq
*rq
)
505 struct mlx5_wq_ll
*wq
= &rq
->wq
;
506 struct mlx5e_rx_wqe
*wqe
= mlx5_wq_ll_get_wqe(wq
, wq
->head
);
508 clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS
, &rq
->state
);
510 if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH
, &rq
->state
))) {
511 mlx5e_free_rx_fragmented_mpwqe(rq
, &rq
->wqe_info
[wq
->head
]);
515 mlx5_wq_ll_push(wq
, be16_to_cpu(wqe
->next
.next_wqe_index
));
516 rq
->stats
.mpwqe_frag
++;
518 /* ensure wqes are visible to device before updating doorbell record */
521 mlx5_wq_ll_update_db_record(wq
);
524 static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq
*rq
,
525 struct mlx5e_rx_wqe
*wqe
,
528 struct mlx5e_mpw_info
*wi
= &rq
->wqe_info
[ix
];
532 gfp_mask
= GFP_ATOMIC
| __GFP_COLD
| __GFP_MEMALLOC
;
533 wi
->dma_info
.page
= alloc_pages_node(NUMA_NO_NODE
, gfp_mask
,
534 MLX5_MPWRQ_WQE_PAGE_ORDER
);
535 if (unlikely(!wi
->dma_info
.page
))
538 wi
->dma_info
.addr
= dma_map_page(rq
->pdev
, wi
->dma_info
.page
, 0,
539 rq
->wqe_sz
, PCI_DMA_FROMDEVICE
);
540 if (unlikely(dma_mapping_error(rq
->pdev
, wi
->dma_info
.addr
))) {
541 put_page(wi
->dma_info
.page
);
545 /* We split the high-order page into order-0 ones and manage their
546 * reference counter to minimize the memory held by small skb fragments
548 split_page(wi
->dma_info
.page
, MLX5_MPWRQ_WQE_PAGE_ORDER
);
549 for (i
= 0; i
< MLX5_MPWRQ_PAGES_PER_WQE
; i
++) {
550 page_ref_add(&wi
->dma_info
.page
[i
],
551 mlx5e_mpwqe_strides_per_page(rq
));
552 wi
->skbs_frags
[i
] = 0;
555 wi
->consumed_strides
= 0;
556 wi
->dma_pre_sync
= mlx5e_dma_pre_sync_linear_mpwqe
;
557 wi
->add_skb_frag
= mlx5e_add_skb_frag_linear_mpwqe
;
558 wi
->copy_skb_header
= mlx5e_copy_skb_header_linear_mpwqe
;
559 wi
->free_wqe
= mlx5e_free_rx_linear_mpwqe
;
560 wqe
->data
.lkey
= rq
->mkey_be
;
561 wqe
->data
.addr
= cpu_to_be64(wi
->dma_info
.addr
);
566 void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq
*rq
,
567 struct mlx5e_mpw_info
*wi
)
571 dma_unmap_page(rq
->pdev
, wi
->dma_info
.addr
, rq
->wqe_sz
,
573 for (i
= 0; i
< MLX5_MPWRQ_PAGES_PER_WQE
; i
++) {
574 page_ref_sub(&wi
->dma_info
.page
[i
],
575 mlx5e_mpwqe_strides_per_page(rq
) - wi
->skbs_frags
[i
]);
576 put_page(&wi
->dma_info
.page
[i
]);
580 int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq
*rq
, struct mlx5e_rx_wqe
*wqe
, u16 ix
)
584 err
= mlx5e_alloc_rx_linear_mpwqe(rq
, wqe
, ix
);
586 err
= mlx5e_alloc_rx_fragmented_mpwqe(rq
, wqe
, ix
);
589 set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS
, &rq
->state
);
590 mlx5e_post_umr_wqe(rq
, ix
);
597 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq
*rq
, u16 ix
)
599 struct mlx5e_mpw_info
*wi
= &rq
->wqe_info
[ix
];
601 wi
->free_wqe(rq
, wi
);
604 #define RQ_CANNOT_POST(rq) \
605 (test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state) || \
606 test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
608 bool mlx5e_post_rx_wqes(struct mlx5e_rq
*rq
)
610 struct mlx5_wq_ll
*wq
= &rq
->wq
;
612 if (unlikely(RQ_CANNOT_POST(rq
)))
615 while (!mlx5_wq_ll_is_full(wq
)) {
616 struct mlx5e_rx_wqe
*wqe
= mlx5_wq_ll_get_wqe(wq
, wq
->head
);
619 err
= rq
->alloc_wqe(rq
, wqe
, wq
->head
);
622 rq
->stats
.buff_alloc_err
++;
626 mlx5_wq_ll_push(wq
, be16_to_cpu(wqe
->next
.next_wqe_index
));
629 /* ensure wqes are visible to device before updating doorbell record */
632 mlx5_wq_ll_update_db_record(wq
);
634 return !mlx5_wq_ll_is_full(wq
);
637 static void mlx5e_lro_update_hdr(struct sk_buff
*skb
, struct mlx5_cqe64
*cqe
,
640 struct ethhdr
*eth
= (struct ethhdr
*)(skb
->data
);
641 struct iphdr
*ipv4
= (struct iphdr
*)(skb
->data
+ ETH_HLEN
);
642 struct ipv6hdr
*ipv6
= (struct ipv6hdr
*)(skb
->data
+ ETH_HLEN
);
645 u8 l4_hdr_type
= get_cqe_l4_hdr_type(cqe
);
646 int tcp_ack
= ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA
== l4_hdr_type
) ||
647 (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA
== l4_hdr_type
));
649 u16 tot_len
= cqe_bcnt
- ETH_HLEN
;
651 if (eth
->h_proto
== htons(ETH_P_IP
)) {
652 tcp
= (struct tcphdr
*)(skb
->data
+ ETH_HLEN
+
653 sizeof(struct iphdr
));
655 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
657 tcp
= (struct tcphdr
*)(skb
->data
+ ETH_HLEN
+
658 sizeof(struct ipv6hdr
));
660 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
663 if (get_cqe_lro_tcppsh(cqe
))
668 tcp
->ack_seq
= cqe
->lro_ack_seq_num
;
669 tcp
->window
= cqe
->lro_tcp_win
;
673 ipv4
->ttl
= cqe
->lro_min_ttl
;
674 ipv4
->tot_len
= cpu_to_be16(tot_len
);
676 ipv4
->check
= ip_fast_csum((unsigned char *)ipv4
,
679 ipv6
->hop_limit
= cqe
->lro_min_ttl
;
680 ipv6
->payload_len
= cpu_to_be16(tot_len
-
681 sizeof(struct ipv6hdr
));
685 static inline void mlx5e_skb_set_hash(struct mlx5_cqe64
*cqe
,
688 u8 cht
= cqe
->rss_hash_type
;
689 int ht
= (cht
& CQE_RSS_HTYPE_L4
) ? PKT_HASH_TYPE_L4
:
690 (cht
& CQE_RSS_HTYPE_IP
) ? PKT_HASH_TYPE_L3
:
692 skb_set_hash(skb
, be32_to_cpu(cqe
->rss_hash_result
), ht
);
695 static inline bool is_first_ethertype_ip(struct sk_buff
*skb
)
697 __be16 ethertype
= ((struct ethhdr
*)skb
->data
)->h_proto
;
699 return (ethertype
== htons(ETH_P_IP
) || ethertype
== htons(ETH_P_IPV6
));
702 static inline void mlx5e_handle_csum(struct net_device
*netdev
,
703 struct mlx5_cqe64
*cqe
,
708 if (unlikely(!(netdev
->features
& NETIF_F_RXCSUM
)))
712 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
716 if (is_first_ethertype_ip(skb
)) {
717 skb
->ip_summed
= CHECKSUM_COMPLETE
;
718 skb
->csum
= csum_unfold((__force __sum16
)cqe
->check_sum
);
719 rq
->stats
.csum_complete
++;
723 if (likely((cqe
->hds_ip_ext
& CQE_L3_OK
) &&
724 (cqe
->hds_ip_ext
& CQE_L4_OK
))) {
725 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
726 if (cqe_is_tunneled(cqe
)) {
728 skb
->encapsulation
= 1;
729 rq
->stats
.csum_unnecessary_inner
++;
734 skb
->ip_summed
= CHECKSUM_NONE
;
735 rq
->stats
.csum_none
++;
738 static inline void mlx5e_build_rx_skb(struct mlx5_cqe64
*cqe
,
743 struct net_device
*netdev
= rq
->netdev
;
744 struct mlx5e_tstamp
*tstamp
= rq
->tstamp
;
747 lro_num_seg
= be32_to_cpu(cqe
->srqn
) >> 24;
748 if (lro_num_seg
> 1) {
749 mlx5e_lro_update_hdr(skb
, cqe
, cqe_bcnt
);
750 skb_shinfo(skb
)->gso_size
= DIV_ROUND_UP(cqe_bcnt
, lro_num_seg
);
751 rq
->stats
.lro_packets
++;
752 rq
->stats
.lro_bytes
+= cqe_bcnt
;
755 if (unlikely(mlx5e_rx_hw_stamp(tstamp
)))
756 mlx5e_fill_hwstamp(tstamp
, get_cqe_ts(cqe
), skb_hwtstamps(skb
));
758 skb_record_rx_queue(skb
, rq
->ix
);
760 if (likely(netdev
->features
& NETIF_F_RXHASH
))
761 mlx5e_skb_set_hash(cqe
, skb
);
763 if (cqe_has_vlan(cqe
))
764 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
),
765 be16_to_cpu(cqe
->vlan_info
));
767 skb
->mark
= be32_to_cpu(cqe
->sop_drop_qpn
) & MLX5E_TC_FLOW_ID_MASK
;
769 mlx5e_handle_csum(netdev
, cqe
, rq
, skb
, !!lro_num_seg
);
770 skb
->protocol
= eth_type_trans(skb
, netdev
);
773 static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq
*rq
,
774 struct mlx5_cqe64
*cqe
,
779 rq
->stats
.bytes
+= cqe_bcnt
;
780 mlx5e_build_rx_skb(cqe
, cqe_bcnt
, rq
, skb
);
781 napi_gro_receive(rq
->cq
.napi
, skb
);
784 void mlx5e_handle_rx_cqe(struct mlx5e_rq
*rq
, struct mlx5_cqe64
*cqe
)
786 struct mlx5e_rx_wqe
*wqe
;
788 __be16 wqe_counter_be
;
792 wqe_counter_be
= cqe
->wqe_counter
;
793 wqe_counter
= be16_to_cpu(wqe_counter_be
);
794 wqe
= mlx5_wq_ll_get_wqe(&rq
->wq
, wqe_counter
);
795 skb
= rq
->skb
[wqe_counter
];
797 rq
->skb
[wqe_counter
] = NULL
;
799 dma_unmap_single(rq
->pdev
,
800 *((dma_addr_t
*)skb
->cb
),
804 if (unlikely((cqe
->op_own
>> 4) != MLX5_CQE_RESP_SEND
)) {
810 cqe_bcnt
= be32_to_cpu(cqe
->byte_cnt
);
811 skb_put(skb
, cqe_bcnt
);
813 mlx5e_complete_rx_cqe(rq
, cqe
, cqe_bcnt
, skb
);
816 mlx5_wq_ll_pop(&rq
->wq
, wqe_counter_be
,
817 &wqe
->next
.next_wqe_index
);
820 static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq
*rq
,
821 struct mlx5_cqe64
*cqe
,
822 struct mlx5e_mpw_info
*wi
,
826 u32 consumed_bytes
= ALIGN(cqe_bcnt
, rq
->mpwqe_stride_sz
);
827 u16 stride_ix
= mpwrq_get_cqe_stride_index(cqe
);
828 u32 wqe_offset
= stride_ix
* rq
->mpwqe_stride_sz
;
829 u32 head_offset
= wqe_offset
& (PAGE_SIZE
- 1);
830 u32 page_idx
= wqe_offset
>> PAGE_SHIFT
;
831 u32 head_page_idx
= page_idx
;
832 u16 headlen
= min_t(u16
, MLX5_MPWRQ_SMALL_PACKET_THRESHOLD
, cqe_bcnt
);
833 u32 frag_offset
= head_offset
+ headlen
;
834 u16 byte_cnt
= cqe_bcnt
- headlen
;
836 if (unlikely(frag_offset
>= PAGE_SIZE
)) {
838 frag_offset
-= PAGE_SIZE
;
840 wi
->dma_pre_sync(rq
->pdev
, wi
, wqe_offset
, consumed_bytes
);
843 u32 pg_consumed_bytes
=
844 min_t(u32
, PAGE_SIZE
- frag_offset
, byte_cnt
);
846 wi
->add_skb_frag(rq
, skb
, wi
, page_idx
, frag_offset
,
848 byte_cnt
-= pg_consumed_bytes
;
853 wi
->copy_skb_header(rq
->pdev
, skb
, wi
, head_page_idx
, head_offset
,
855 /* skb linear part was allocated with headlen and aligned to long */
856 skb
->tail
+= headlen
;
860 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq
*rq
, struct mlx5_cqe64
*cqe
)
862 u16 cstrides
= mpwrq_get_cqe_consumed_strides(cqe
);
863 u16 wqe_id
= be16_to_cpu(cqe
->wqe_id
);
864 struct mlx5e_mpw_info
*wi
= &rq
->wqe_info
[wqe_id
];
865 struct mlx5e_rx_wqe
*wqe
= mlx5_wq_ll_get_wqe(&rq
->wq
, wqe_id
);
869 wi
->consumed_strides
+= cstrides
;
871 if (unlikely((cqe
->op_own
>> 4) != MLX5_CQE_RESP_SEND
)) {
876 if (unlikely(mpwrq_is_filler_cqe(cqe
))) {
877 rq
->stats
.mpwqe_filler
++;
881 skb
= napi_alloc_skb(rq
->cq
.napi
,
882 ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD
,
884 if (unlikely(!skb
)) {
885 rq
->stats
.buff_alloc_err
++;
890 cqe_bcnt
= mpwrq_get_cqe_byte_cnt(cqe
);
892 mlx5e_mpwqe_fill_rx_skb(rq
, cqe
, wi
, cqe_bcnt
, skb
);
893 mlx5e_complete_rx_cqe(rq
, cqe
, cqe_bcnt
, skb
);
896 if (likely(wi
->consumed_strides
< rq
->mpwqe_num_strides
))
899 wi
->free_wqe(rq
, wi
);
900 mlx5_wq_ll_pop(&rq
->wq
, cqe
->wqe_id
, &wqe
->next
.next_wqe_index
);
903 int mlx5e_poll_rx_cq(struct mlx5e_cq
*cq
, int budget
)
905 struct mlx5e_rq
*rq
= container_of(cq
, struct mlx5e_rq
, cq
);
908 if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH
, &rq
->state
)))
911 if (cq
->decmprs_left
)
912 work_done
+= mlx5e_decompress_cqes_cont(rq
, cq
, 0, budget
);
914 for (; work_done
< budget
; work_done
++) {
915 struct mlx5_cqe64
*cqe
= mlx5e_get_cqe(cq
);
920 if (mlx5_get_cqe_format(cqe
) == MLX5_COMPRESSED
) {
922 mlx5e_decompress_cqes_start(rq
, cq
,
927 mlx5_cqwq_pop(&cq
->wq
);
929 rq
->handle_rx_cqe(rq
, cqe
);
932 mlx5_cqwq_update_db_record(&cq
->wq
);
934 /* ensure cq space is freed before enabling more cqes */