2 * Copyright (C) 2015 Cavium, Inc.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License
6 * as published by the Free Software Foundation.
9 #include <linux/module.h>
10 #include <linux/interrupt.h>
11 #include <linux/pci.h>
12 #include <linux/netdevice.h>
13 #include <linux/etherdevice.h>
14 #include <linux/ethtool.h>
15 #include <linux/log2.h>
16 #include <linux/prefetch.h>
17 #include <linux/irq.h>
21 #include "nicvf_queues.h"
22 #include "thunder_bgx.h"
24 #define DRV_NAME "thunder-nicvf"
25 #define DRV_VERSION "1.0"
27 /* Supported devices */
28 static const struct pci_device_id nicvf_id_table
[] = {
29 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM
,
30 PCI_DEVICE_ID_THUNDER_NIC_VF
,
31 PCI_VENDOR_ID_CAVIUM
, 0xA11E) },
32 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM
,
33 PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF
,
34 PCI_VENDOR_ID_CAVIUM
, 0xA11E) },
35 { 0, } /* end of table */
38 MODULE_AUTHOR("Sunil Goutham");
39 MODULE_DESCRIPTION("Cavium Thunder NIC Virtual Function Driver");
40 MODULE_LICENSE("GPL v2");
41 MODULE_VERSION(DRV_VERSION
);
42 MODULE_DEVICE_TABLE(pci
, nicvf_id_table
);
44 static int debug
= 0x00;
45 module_param(debug
, int, 0644);
46 MODULE_PARM_DESC(debug
, "Debug message level bitmap");
48 static int cpi_alg
= CPI_ALG_NONE
;
49 module_param(cpi_alg
, int, S_IRUGO
);
50 MODULE_PARM_DESC(cpi_alg
,
51 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
53 static inline void nicvf_set_rx_frame_cnt(struct nicvf
*nic
,
57 nic
->drv_stats
.rx_frames_64
++;
58 else if (skb
->len
<= 127)
59 nic
->drv_stats
.rx_frames_127
++;
60 else if (skb
->len
<= 255)
61 nic
->drv_stats
.rx_frames_255
++;
62 else if (skb
->len
<= 511)
63 nic
->drv_stats
.rx_frames_511
++;
64 else if (skb
->len
<= 1023)
65 nic
->drv_stats
.rx_frames_1023
++;
66 else if (skb
->len
<= 1518)
67 nic
->drv_stats
.rx_frames_1518
++;
69 nic
->drv_stats
.rx_frames_jumbo
++;
72 /* The Cavium ThunderX network controller can *only* be found in SoCs
73 * containing the ThunderX ARM64 CPU implementation. All accesses to the device
74 * registers on this platform are implicitly strongly ordered with respect
75 * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use
76 * with no memory barriers in this driver. The readq()/writeq() functions add
77 * explicit ordering operation which in this case are redundant, and only
81 /* Register read/write APIs */
82 void nicvf_reg_write(struct nicvf
*nic
, u64 offset
, u64 val
)
84 writeq_relaxed(val
, nic
->reg_base
+ offset
);
87 u64
nicvf_reg_read(struct nicvf
*nic
, u64 offset
)
89 return readq_relaxed(nic
->reg_base
+ offset
);
92 void nicvf_queue_reg_write(struct nicvf
*nic
, u64 offset
,
95 void __iomem
*addr
= nic
->reg_base
+ offset
;
97 writeq_relaxed(val
, addr
+ (qidx
<< NIC_Q_NUM_SHIFT
));
100 u64
nicvf_queue_reg_read(struct nicvf
*nic
, u64 offset
, u64 qidx
)
102 void __iomem
*addr
= nic
->reg_base
+ offset
;
104 return readq_relaxed(addr
+ (qidx
<< NIC_Q_NUM_SHIFT
));
107 /* VF -> PF mailbox communication */
108 static void nicvf_write_to_mbx(struct nicvf
*nic
, union nic_mbx
*mbx
)
110 u64
*msg
= (u64
*)mbx
;
112 nicvf_reg_write(nic
, NIC_VF_PF_MAILBOX_0_1
+ 0, msg
[0]);
113 nicvf_reg_write(nic
, NIC_VF_PF_MAILBOX_0_1
+ 8, msg
[1]);
116 int nicvf_send_msg_to_pf(struct nicvf
*nic
, union nic_mbx
*mbx
)
118 int timeout
= NIC_MBOX_MSG_TIMEOUT
;
121 nic
->pf_acked
= false;
122 nic
->pf_nacked
= false;
124 nicvf_write_to_mbx(nic
, mbx
);
126 /* Wait for previous message to be acked, timeout 2sec */
127 while (!nic
->pf_acked
) {
135 netdev_err(nic
->netdev
,
136 "PF didn't ack to mbox msg %d from VF%d\n",
137 (mbx
->msg
.msg
& 0xFF), nic
->vf_id
);
144 /* Checks if VF is able to comminicate with PF
145 * and also gets the VNIC number this VF is associated to.
147 static int nicvf_check_pf_ready(struct nicvf
*nic
)
149 union nic_mbx mbx
= {};
151 mbx
.msg
.msg
= NIC_MBOX_MSG_READY
;
152 if (nicvf_send_msg_to_pf(nic
, &mbx
)) {
153 netdev_err(nic
->netdev
,
154 "PF didn't respond to READY msg\n");
161 static void nicvf_read_bgx_stats(struct nicvf
*nic
, struct bgx_stats_msg
*bgx
)
164 nic
->bgx_stats
.rx_stats
[bgx
->idx
] = bgx
->stats
;
166 nic
->bgx_stats
.tx_stats
[bgx
->idx
] = bgx
->stats
;
169 static void nicvf_handle_mbx_intr(struct nicvf
*nic
)
171 union nic_mbx mbx
= {};
176 mbx_addr
= NIC_VF_PF_MAILBOX_0_1
;
177 mbx_data
= (u64
*)&mbx
;
179 for (i
= 0; i
< NIC_PF_VF_MAILBOX_SIZE
; i
++) {
180 *mbx_data
= nicvf_reg_read(nic
, mbx_addr
);
182 mbx_addr
+= sizeof(u64
);
185 netdev_dbg(nic
->netdev
, "Mbox message: msg: 0x%x\n", mbx
.msg
.msg
);
186 switch (mbx
.msg
.msg
) {
187 case NIC_MBOX_MSG_READY
:
188 nic
->pf_acked
= true;
189 nic
->vf_id
= mbx
.nic_cfg
.vf_id
& 0x7F;
190 nic
->tns_mode
= mbx
.nic_cfg
.tns_mode
& 0x7F;
191 nic
->node
= mbx
.nic_cfg
.node_id
;
192 if (!nic
->set_mac_pending
)
193 ether_addr_copy(nic
->netdev
->dev_addr
,
194 mbx
.nic_cfg
.mac_addr
);
195 nic
->link_up
= false;
199 case NIC_MBOX_MSG_ACK
:
200 nic
->pf_acked
= true;
202 case NIC_MBOX_MSG_NACK
:
203 nic
->pf_nacked
= true;
205 case NIC_MBOX_MSG_RSS_SIZE
:
206 nic
->rss_info
.rss_size
= mbx
.rss_size
.ind_tbl_size
;
207 nic
->pf_acked
= true;
209 case NIC_MBOX_MSG_BGX_STATS
:
210 nicvf_read_bgx_stats(nic
, &mbx
.bgx_stats
);
211 nic
->pf_acked
= true;
213 case NIC_MBOX_MSG_BGX_LINK_CHANGE
:
214 nic
->pf_acked
= true;
215 nic
->link_up
= mbx
.link_status
.link_up
;
216 nic
->duplex
= mbx
.link_status
.duplex
;
217 nic
->speed
= mbx
.link_status
.speed
;
219 netdev_info(nic
->netdev
, "%s: Link is Up %d Mbps %s\n",
220 nic
->netdev
->name
, nic
->speed
,
221 nic
->duplex
== DUPLEX_FULL
?
222 "Full duplex" : "Half duplex");
223 netif_carrier_on(nic
->netdev
);
224 netif_tx_start_all_queues(nic
->netdev
);
226 netdev_info(nic
->netdev
, "%s: Link is Down\n",
228 netif_carrier_off(nic
->netdev
);
229 netif_tx_stop_all_queues(nic
->netdev
);
233 netdev_err(nic
->netdev
,
234 "Invalid message from PF, msg 0x%x\n", mbx
.msg
.msg
);
237 nicvf_clear_intr(nic
, NICVF_INTR_MBOX
, 0);
240 static int nicvf_hw_set_mac_addr(struct nicvf
*nic
, struct net_device
*netdev
)
242 union nic_mbx mbx
= {};
244 mbx
.mac
.msg
= NIC_MBOX_MSG_SET_MAC
;
245 mbx
.mac
.vf_id
= nic
->vf_id
;
246 ether_addr_copy(mbx
.mac
.mac_addr
, netdev
->dev_addr
);
248 return nicvf_send_msg_to_pf(nic
, &mbx
);
251 static void nicvf_config_cpi(struct nicvf
*nic
)
253 union nic_mbx mbx
= {};
255 mbx
.cpi_cfg
.msg
= NIC_MBOX_MSG_CPI_CFG
;
256 mbx
.cpi_cfg
.vf_id
= nic
->vf_id
;
257 mbx
.cpi_cfg
.cpi_alg
= nic
->cpi_alg
;
258 mbx
.cpi_cfg
.rq_cnt
= nic
->qs
->rq_cnt
;
260 nicvf_send_msg_to_pf(nic
, &mbx
);
263 static void nicvf_get_rss_size(struct nicvf
*nic
)
265 union nic_mbx mbx
= {};
267 mbx
.rss_size
.msg
= NIC_MBOX_MSG_RSS_SIZE
;
268 mbx
.rss_size
.vf_id
= nic
->vf_id
;
269 nicvf_send_msg_to_pf(nic
, &mbx
);
272 void nicvf_config_rss(struct nicvf
*nic
)
274 union nic_mbx mbx
= {};
275 struct nicvf_rss_info
*rss
= &nic
->rss_info
;
276 int ind_tbl_len
= rss
->rss_size
;
279 mbx
.rss_cfg
.vf_id
= nic
->vf_id
;
280 mbx
.rss_cfg
.hash_bits
= rss
->hash_bits
;
281 while (ind_tbl_len
) {
282 mbx
.rss_cfg
.tbl_offset
= nextq
;
283 mbx
.rss_cfg
.tbl_len
= min(ind_tbl_len
,
284 RSS_IND_TBL_LEN_PER_MBX_MSG
);
285 mbx
.rss_cfg
.msg
= mbx
.rss_cfg
.tbl_offset
?
286 NIC_MBOX_MSG_RSS_CFG_CONT
: NIC_MBOX_MSG_RSS_CFG
;
288 for (i
= 0; i
< mbx
.rss_cfg
.tbl_len
; i
++)
289 mbx
.rss_cfg
.ind_tbl
[i
] = rss
->ind_tbl
[nextq
++];
291 nicvf_send_msg_to_pf(nic
, &mbx
);
293 ind_tbl_len
-= mbx
.rss_cfg
.tbl_len
;
297 void nicvf_set_rss_key(struct nicvf
*nic
)
299 struct nicvf_rss_info
*rss
= &nic
->rss_info
;
300 u64 key_addr
= NIC_VNIC_RSS_KEY_0_4
;
303 for (idx
= 0; idx
< RSS_HASH_KEY_SIZE
; idx
++) {
304 nicvf_reg_write(nic
, key_addr
, rss
->key
[idx
]);
305 key_addr
+= sizeof(u64
);
309 static int nicvf_rss_init(struct nicvf
*nic
)
311 struct nicvf_rss_info
*rss
= &nic
->rss_info
;
314 nicvf_get_rss_size(nic
);
316 if ((nic
->qs
->rq_cnt
<= 1) || (cpi_alg
!= CPI_ALG_NONE
)) {
324 /* Using the HW reset value for now */
325 rss
->key
[0] = 0xFEED0BADFEED0BADULL
;
326 rss
->key
[1] = 0xFEED0BADFEED0BADULL
;
327 rss
->key
[2] = 0xFEED0BADFEED0BADULL
;
328 rss
->key
[3] = 0xFEED0BADFEED0BADULL
;
329 rss
->key
[4] = 0xFEED0BADFEED0BADULL
;
331 nicvf_set_rss_key(nic
);
333 rss
->cfg
= RSS_IP_HASH_ENA
| RSS_TCP_HASH_ENA
| RSS_UDP_HASH_ENA
;
334 nicvf_reg_write(nic
, NIC_VNIC_RSS_CFG
, rss
->cfg
);
336 rss
->hash_bits
= ilog2(rounddown_pow_of_two(rss
->rss_size
));
338 for (idx
= 0; idx
< rss
->rss_size
; idx
++)
339 rss
->ind_tbl
[idx
] = ethtool_rxfh_indir_default(idx
,
341 nicvf_config_rss(nic
);
345 int nicvf_set_real_num_queues(struct net_device
*netdev
,
346 int tx_queues
, int rx_queues
)
350 err
= netif_set_real_num_tx_queues(netdev
, tx_queues
);
353 "Failed to set no of Tx queues: %d\n", tx_queues
);
357 err
= netif_set_real_num_rx_queues(netdev
, rx_queues
);
360 "Failed to set no of Rx queues: %d\n", rx_queues
);
364 static int nicvf_init_resources(struct nicvf
*nic
)
367 union nic_mbx mbx
= {};
369 mbx
.msg
.msg
= NIC_MBOX_MSG_CFG_DONE
;
372 nicvf_qset_config(nic
, true);
374 /* Initialize queues and HW for data transfer */
375 err
= nicvf_config_data_transfer(nic
, true);
377 netdev_err(nic
->netdev
,
378 "Failed to alloc/config VF's QSet resources\n");
382 /* Send VF config done msg to PF */
383 nicvf_write_to_mbx(nic
, &mbx
);
388 static void nicvf_snd_pkt_handler(struct net_device
*netdev
,
389 struct cmp_queue
*cq
,
390 struct cqe_send_t
*cqe_tx
, int cqe_type
)
392 struct sk_buff
*skb
= NULL
;
393 struct nicvf
*nic
= netdev_priv(netdev
);
394 struct snd_queue
*sq
;
395 struct sq_hdr_subdesc
*hdr
;
397 sq
= &nic
->qs
->sq
[cqe_tx
->sq_idx
];
399 hdr
= (struct sq_hdr_subdesc
*)GET_SQ_DESC(sq
, cqe_tx
->sqe_ptr
);
400 if (hdr
->subdesc_type
!= SQ_DESC_TYPE_HEADER
)
403 netdev_dbg(nic
->netdev
,
404 "%s Qset #%d SQ #%d SQ ptr #%d subdesc count %d\n",
405 __func__
, cqe_tx
->sq_qs
, cqe_tx
->sq_idx
,
406 cqe_tx
->sqe_ptr
, hdr
->subdesc_cnt
);
408 nicvf_put_sq_desc(sq
, hdr
->subdesc_cnt
+ 1);
409 nicvf_check_cqe_tx_errs(nic
, cq
, cqe_tx
);
410 skb
= (struct sk_buff
*)sq
->skbuff
[cqe_tx
->sqe_ptr
];
411 /* For TSO offloaded packets only one head SKB needs to be freed */
414 dev_consume_skb_any(skb
);
415 sq
->skbuff
[cqe_tx
->sqe_ptr
] = (u64
)NULL
;
419 static void nicvf_rcv_pkt_handler(struct net_device
*netdev
,
420 struct napi_struct
*napi
,
421 struct cmp_queue
*cq
,
422 struct cqe_rx_t
*cqe_rx
, int cqe_type
)
425 struct nicvf
*nic
= netdev_priv(netdev
);
428 /* Check for errors */
429 err
= nicvf_check_cqe_rx_errs(nic
, cq
, cqe_rx
);
430 if (err
&& !cqe_rx
->rb_cnt
)
433 skb
= nicvf_get_rcv_skb(nic
, cqe_rx
);
435 netdev_dbg(nic
->netdev
, "Packet not received\n");
439 if (netif_msg_pktdata(nic
)) {
440 netdev_info(nic
->netdev
, "%s: skb 0x%p, len=%d\n", netdev
->name
,
442 print_hex_dump(KERN_INFO
, "", DUMP_PREFIX_OFFSET
, 16, 1,
443 skb
->data
, skb
->len
, true);
446 /* If error packet, drop it here */
448 dev_kfree_skb_any(skb
);
452 nicvf_set_rx_frame_cnt(nic
, skb
);
454 skb_record_rx_queue(skb
, cqe_rx
->rq_idx
);
455 if (netdev
->hw_features
& NETIF_F_RXCSUM
) {
456 /* HW by default verifies TCP/UDP/SCTP checksums */
457 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
459 skb_checksum_none_assert(skb
);
462 skb
->protocol
= eth_type_trans(skb
, netdev
);
464 if (napi
&& (netdev
->features
& NETIF_F_GRO
))
465 napi_gro_receive(napi
, skb
);
467 netif_receive_skb(skb
);
470 static int nicvf_cq_intr_handler(struct net_device
*netdev
, u8 cq_idx
,
471 struct napi_struct
*napi
, int budget
)
473 int processed_cqe
, work_done
= 0, tx_done
= 0;
474 int cqe_count
, cqe_head
;
475 struct nicvf
*nic
= netdev_priv(netdev
);
476 struct queue_set
*qs
= nic
->qs
;
477 struct cmp_queue
*cq
= &qs
->cq
[cq_idx
];
478 struct cqe_rx_t
*cq_desc
;
479 struct netdev_queue
*txq
;
481 spin_lock_bh(&cq
->lock
);
484 /* Get no of valid CQ entries to process */
485 cqe_count
= nicvf_queue_reg_read(nic
, NIC_QSET_CQ_0_7_STATUS
, cq_idx
);
486 cqe_count
&= CQ_CQE_COUNT
;
490 /* Get head of the valid CQ entries */
491 cqe_head
= nicvf_queue_reg_read(nic
, NIC_QSET_CQ_0_7_HEAD
, cq_idx
) >> 9;
494 netdev_dbg(nic
->netdev
, "%s CQ%d cqe_count %d cqe_head %d\n",
495 __func__
, cq_idx
, cqe_count
, cqe_head
);
496 while (processed_cqe
< cqe_count
) {
497 /* Get the CQ descriptor */
498 cq_desc
= (struct cqe_rx_t
*)GET_CQ_DESC(cq
, cqe_head
);
500 cqe_head
&= (cq
->dmem
.q_len
- 1);
501 /* Initiate prefetch for next descriptor */
502 prefetch((struct cqe_rx_t
*)GET_CQ_DESC(cq
, cqe_head
));
504 if ((work_done
>= budget
) && napi
&&
505 (cq_desc
->cqe_type
!= CQE_TYPE_SEND
)) {
509 netdev_dbg(nic
->netdev
, "CQ%d cq_desc->cqe_type %d\n",
510 cq_idx
, cq_desc
->cqe_type
);
511 switch (cq_desc
->cqe_type
) {
513 nicvf_rcv_pkt_handler(netdev
, napi
, cq
,
514 cq_desc
, CQE_TYPE_RX
);
518 nicvf_snd_pkt_handler(netdev
, cq
,
519 (void *)cq_desc
, CQE_TYPE_SEND
);
522 case CQE_TYPE_INVALID
:
523 case CQE_TYPE_RX_SPLIT
:
524 case CQE_TYPE_RX_TCP
:
525 case CQE_TYPE_SEND_PTP
:
531 netdev_dbg(nic
->netdev
,
532 "%s CQ%d processed_cqe %d work_done %d budget %d\n",
533 __func__
, cq_idx
, processed_cqe
, work_done
, budget
);
535 /* Ring doorbell to inform H/W to reuse processed CQEs */
536 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_DOOR
,
537 cq_idx
, processed_cqe
);
539 if ((work_done
< budget
) && napi
)
543 /* Wakeup TXQ if its stopped earlier due to SQ full */
545 txq
= netdev_get_tx_queue(netdev
, cq_idx
);
546 if (netif_tx_queue_stopped(txq
)) {
547 netif_tx_start_queue(txq
);
548 nic
->drv_stats
.txq_wake
++;
549 if (netif_msg_tx_err(nic
))
551 "%s: Transmit queue wakeup SQ%d\n",
552 netdev
->name
, cq_idx
);
556 spin_unlock_bh(&cq
->lock
);
560 static int nicvf_poll(struct napi_struct
*napi
, int budget
)
564 struct net_device
*netdev
= napi
->dev
;
565 struct nicvf
*nic
= netdev_priv(netdev
);
566 struct nicvf_cq_poll
*cq
;
568 cq
= container_of(napi
, struct nicvf_cq_poll
, napi
);
569 work_done
= nicvf_cq_intr_handler(netdev
, cq
->cq_idx
, napi
, budget
);
571 if (work_done
< budget
) {
572 /* Slow packet rate, exit polling */
574 /* Re-enable interrupts */
575 cq_head
= nicvf_queue_reg_read(nic
, NIC_QSET_CQ_0_7_HEAD
,
577 nicvf_clear_intr(nic
, NICVF_INTR_CQ
, cq
->cq_idx
);
578 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_HEAD
,
579 cq
->cq_idx
, cq_head
);
580 nicvf_enable_intr(nic
, NICVF_INTR_CQ
, cq
->cq_idx
);
585 /* Qset error interrupt handler
587 * As of now only CQ errors are handled
589 static void nicvf_handle_qs_err(unsigned long data
)
591 struct nicvf
*nic
= (struct nicvf
*)data
;
592 struct queue_set
*qs
= nic
->qs
;
596 netif_tx_disable(nic
->netdev
);
598 /* Check if it is CQ err */
599 for (qidx
= 0; qidx
< qs
->cq_cnt
; qidx
++) {
600 status
= nicvf_queue_reg_read(nic
, NIC_QSET_CQ_0_7_STATUS
,
602 if (!(status
& CQ_ERR_MASK
))
604 /* Process already queued CQEs and reconfig CQ */
605 nicvf_disable_intr(nic
, NICVF_INTR_CQ
, qidx
);
606 nicvf_sq_disable(nic
, qidx
);
607 nicvf_cq_intr_handler(nic
->netdev
, qidx
, NULL
, 0);
608 nicvf_cmp_queue_config(nic
, qs
, qidx
, true);
609 nicvf_sq_free_used_descs(nic
->netdev
, &qs
->sq
[qidx
], qidx
);
610 nicvf_sq_enable(nic
, &qs
->sq
[qidx
], qidx
);
612 nicvf_enable_intr(nic
, NICVF_INTR_CQ
, qidx
);
615 netif_tx_start_all_queues(nic
->netdev
);
616 /* Re-enable Qset error interrupt */
617 nicvf_enable_intr(nic
, NICVF_INTR_QS_ERR
, 0);
620 static irqreturn_t
nicvf_misc_intr_handler(int irq
, void *nicvf_irq
)
622 struct nicvf
*nic
= (struct nicvf
*)nicvf_irq
;
625 intr
= nicvf_reg_read(nic
, NIC_VF_INT
);
626 /* Check for spurious interrupt */
627 if (!(intr
& NICVF_INTR_MBOX_MASK
))
630 nicvf_handle_mbx_intr(nic
);
635 static irqreturn_t
nicvf_intr_handler(int irq
, void *nicvf_irq
)
637 u64 qidx
, intr
, clear_intr
= 0;
638 u64 cq_intr
, rbdr_intr
, qs_err_intr
;
639 struct nicvf
*nic
= (struct nicvf
*)nicvf_irq
;
640 struct queue_set
*qs
= nic
->qs
;
641 struct nicvf_cq_poll
*cq_poll
= NULL
;
643 intr
= nicvf_reg_read(nic
, NIC_VF_INT
);
644 if (netif_msg_intr(nic
))
645 netdev_info(nic
->netdev
, "%s: interrupt status 0x%llx\n",
646 nic
->netdev
->name
, intr
);
648 qs_err_intr
= intr
& NICVF_INTR_QS_ERR_MASK
;
650 /* Disable Qset err interrupt and schedule softirq */
651 nicvf_disable_intr(nic
, NICVF_INTR_QS_ERR
, 0);
652 tasklet_hi_schedule(&nic
->qs_err_task
);
653 clear_intr
|= qs_err_intr
;
656 /* Disable interrupts and start polling */
657 cq_intr
= (intr
& NICVF_INTR_CQ_MASK
) >> NICVF_INTR_CQ_SHIFT
;
658 for (qidx
= 0; qidx
< qs
->cq_cnt
; qidx
++) {
659 if (!(cq_intr
& (1 << qidx
)))
661 if (!nicvf_is_intr_enabled(nic
, NICVF_INTR_CQ
, qidx
))
664 nicvf_disable_intr(nic
, NICVF_INTR_CQ
, qidx
);
665 clear_intr
|= ((1 << qidx
) << NICVF_INTR_CQ_SHIFT
);
667 cq_poll
= nic
->napi
[qidx
];
670 napi_schedule(&cq_poll
->napi
);
673 /* Handle RBDR interrupts */
674 rbdr_intr
= (intr
& NICVF_INTR_RBDR_MASK
) >> NICVF_INTR_RBDR_SHIFT
;
676 /* Disable RBDR interrupt and schedule softirq */
677 for (qidx
= 0; qidx
< qs
->rbdr_cnt
; qidx
++) {
678 if (!nicvf_is_intr_enabled(nic
, NICVF_INTR_RBDR
, qidx
))
680 nicvf_disable_intr(nic
, NICVF_INTR_RBDR
, qidx
);
681 tasklet_hi_schedule(&nic
->rbdr_task
);
682 clear_intr
|= ((1 << qidx
) << NICVF_INTR_RBDR_SHIFT
);
686 /* Clear interrupts */
687 nicvf_reg_write(nic
, NIC_VF_INT
, clear_intr
);
691 static int nicvf_enable_msix(struct nicvf
*nic
)
695 nic
->num_vec
= NIC_VF_MSIX_VECTORS
;
697 for (vec
= 0; vec
< nic
->num_vec
; vec
++)
698 nic
->msix_entries
[vec
].entry
= vec
;
700 ret
= pci_enable_msix(nic
->pdev
, nic
->msix_entries
, nic
->num_vec
);
702 netdev_err(nic
->netdev
,
703 "Req for #%d msix vectors failed\n", nic
->num_vec
);
706 nic
->msix_enabled
= 1;
710 static void nicvf_disable_msix(struct nicvf
*nic
)
712 if (nic
->msix_enabled
) {
713 pci_disable_msix(nic
->pdev
);
714 nic
->msix_enabled
= 0;
719 static int nicvf_register_interrupts(struct nicvf
*nic
)
721 int irq
, free
, ret
= 0;
725 sprintf(nic
->irq_name
[irq
], "NICVF%d CQ%d",
729 sprintf(nic
->irq_name
[irq
], "NICVF%d SQ%d",
730 nic
->vf_id
, irq
- NICVF_INTR_ID_SQ
);
732 for_each_rbdr_irq(irq
)
733 sprintf(nic
->irq_name
[irq
], "NICVF%d RBDR%d",
734 nic
->vf_id
, irq
- NICVF_INTR_ID_RBDR
);
736 /* Register all interrupts except mailbox */
737 for (irq
= 0; irq
< NICVF_INTR_ID_SQ
; irq
++) {
738 vector
= nic
->msix_entries
[irq
].vector
;
739 ret
= request_irq(vector
, nicvf_intr_handler
,
740 0, nic
->irq_name
[irq
], nic
);
743 nic
->irq_allocated
[irq
] = true;
746 for (irq
= NICVF_INTR_ID_SQ
; irq
< NICVF_INTR_ID_MISC
; irq
++) {
747 vector
= nic
->msix_entries
[irq
].vector
;
748 ret
= request_irq(vector
, nicvf_intr_handler
,
749 0, nic
->irq_name
[irq
], nic
);
752 nic
->irq_allocated
[irq
] = true;
755 sprintf(nic
->irq_name
[NICVF_INTR_ID_QS_ERR
],
756 "NICVF%d Qset error", nic
->vf_id
);
758 vector
= nic
->msix_entries
[NICVF_INTR_ID_QS_ERR
].vector
;
759 irq
= NICVF_INTR_ID_QS_ERR
;
760 ret
= request_irq(vector
, nicvf_intr_handler
,
761 0, nic
->irq_name
[irq
], nic
);
763 nic
->irq_allocated
[irq
] = true;
767 netdev_err(nic
->netdev
, "Request irq failed\n");
768 for (free
= 0; free
< irq
; free
++)
769 free_irq(nic
->msix_entries
[free
].vector
, nic
);
776 static void nicvf_unregister_interrupts(struct nicvf
*nic
)
780 /* Free registered interrupts */
781 for (irq
= 0; irq
< nic
->num_vec
; irq
++) {
782 if (nic
->irq_allocated
[irq
])
783 free_irq(nic
->msix_entries
[irq
].vector
, nic
);
784 nic
->irq_allocated
[irq
] = false;
788 nicvf_disable_msix(nic
);
791 /* Initialize MSIX vectors and register MISC interrupt.
792 * Send READY message to PF to check if its alive
794 static int nicvf_register_misc_interrupt(struct nicvf
*nic
)
797 int irq
= NICVF_INTR_ID_MISC
;
799 /* Return if mailbox interrupt is already registered */
800 if (nic
->msix_enabled
)
804 if (!nicvf_enable_msix(nic
))
807 sprintf(nic
->irq_name
[irq
], "%s Mbox", "NICVF");
808 /* Register Misc interrupt */
809 ret
= request_irq(nic
->msix_entries
[irq
].vector
,
810 nicvf_misc_intr_handler
, 0, nic
->irq_name
[irq
], nic
);
814 nic
->irq_allocated
[irq
] = true;
816 /* Enable mailbox interrupt */
817 nicvf_enable_intr(nic
, NICVF_INTR_MBOX
, 0);
819 /* Check if VF is able to communicate with PF */
820 if (!nicvf_check_pf_ready(nic
)) {
821 nicvf_disable_intr(nic
, NICVF_INTR_MBOX
, 0);
822 nicvf_unregister_interrupts(nic
);
829 static netdev_tx_t
nicvf_xmit(struct sk_buff
*skb
, struct net_device
*netdev
)
831 struct nicvf
*nic
= netdev_priv(netdev
);
832 int qid
= skb_get_queue_mapping(skb
);
833 struct netdev_queue
*txq
= netdev_get_tx_queue(netdev
, qid
);
835 /* Check for minimum packet length */
836 if (skb
->len
<= ETH_HLEN
) {
841 if (!netif_tx_queue_stopped(txq
) && !nicvf_sq_append_skb(nic
, skb
)) {
842 netif_tx_stop_queue(txq
);
843 nic
->drv_stats
.txq_stop
++;
844 if (netif_msg_tx_err(nic
))
846 "%s: Transmit ring full, stopping SQ%d\n",
849 return NETDEV_TX_BUSY
;
855 int nicvf_stop(struct net_device
*netdev
)
858 struct nicvf
*nic
= netdev_priv(netdev
);
859 struct queue_set
*qs
= nic
->qs
;
860 struct nicvf_cq_poll
*cq_poll
= NULL
;
861 union nic_mbx mbx
= {};
863 mbx
.msg
.msg
= NIC_MBOX_MSG_SHUTDOWN
;
864 nicvf_send_msg_to_pf(nic
, &mbx
);
866 netif_carrier_off(netdev
);
868 /* Disable RBDR & QS error interrupts */
869 for (qidx
= 0; qidx
< qs
->rbdr_cnt
; qidx
++) {
870 nicvf_disable_intr(nic
, NICVF_INTR_RBDR
, qidx
);
871 nicvf_clear_intr(nic
, NICVF_INTR_RBDR
, qidx
);
873 nicvf_disable_intr(nic
, NICVF_INTR_QS_ERR
, 0);
874 nicvf_clear_intr(nic
, NICVF_INTR_QS_ERR
, 0);
876 /* Wait for pending IRQ handlers to finish */
877 for (irq
= 0; irq
< nic
->num_vec
; irq
++)
878 synchronize_irq(nic
->msix_entries
[irq
].vector
);
880 tasklet_kill(&nic
->rbdr_task
);
881 tasklet_kill(&nic
->qs_err_task
);
882 if (nic
->rb_work_scheduled
)
883 cancel_delayed_work_sync(&nic
->rbdr_work
);
885 for (qidx
= 0; qidx
< nic
->qs
->cq_cnt
; qidx
++) {
886 cq_poll
= nic
->napi
[qidx
];
889 nic
->napi
[qidx
] = NULL
;
890 napi_synchronize(&cq_poll
->napi
);
891 /* CQ intr is enabled while napi_complete,
894 nicvf_disable_intr(nic
, NICVF_INTR_CQ
, qidx
);
895 nicvf_clear_intr(nic
, NICVF_INTR_CQ
, qidx
);
896 napi_disable(&cq_poll
->napi
);
897 netif_napi_del(&cq_poll
->napi
);
901 netif_tx_disable(netdev
);
904 nicvf_config_data_transfer(nic
, false);
906 /* Disable HW Qset */
907 nicvf_qset_config(nic
, false);
909 /* disable mailbox interrupt */
910 nicvf_disable_intr(nic
, NICVF_INTR_MBOX
, 0);
912 nicvf_unregister_interrupts(nic
);
917 int nicvf_open(struct net_device
*netdev
)
920 struct nicvf
*nic
= netdev_priv(netdev
);
921 struct queue_set
*qs
= nic
->qs
;
922 struct nicvf_cq_poll
*cq_poll
= NULL
;
924 nic
->mtu
= netdev
->mtu
;
926 netif_carrier_off(netdev
);
928 err
= nicvf_register_misc_interrupt(nic
);
932 /* Register NAPI handler for processing CQEs */
933 for (qidx
= 0; qidx
< qs
->cq_cnt
; qidx
++) {
934 cq_poll
= kzalloc(sizeof(*cq_poll
), GFP_KERNEL
);
939 cq_poll
->cq_idx
= qidx
;
940 netif_napi_add(netdev
, &cq_poll
->napi
, nicvf_poll
,
942 napi_enable(&cq_poll
->napi
);
943 nic
->napi
[qidx
] = cq_poll
;
946 /* Check if we got MAC address from PF or else generate a radom MAC */
947 if (is_zero_ether_addr(netdev
->dev_addr
)) {
948 eth_hw_addr_random(netdev
);
949 nicvf_hw_set_mac_addr(nic
, netdev
);
952 if (nic
->set_mac_pending
) {
953 nic
->set_mac_pending
= false;
954 nicvf_hw_set_mac_addr(nic
, netdev
);
957 /* Init tasklet for handling Qset err interrupt */
958 tasklet_init(&nic
->qs_err_task
, nicvf_handle_qs_err
,
961 /* Init RBDR tasklet which will refill RBDR */
962 tasklet_init(&nic
->rbdr_task
, nicvf_rbdr_task
,
964 INIT_DELAYED_WORK(&nic
->rbdr_work
, nicvf_rbdr_work
);
966 /* Configure CPI alorithm */
967 nic
->cpi_alg
= cpi_alg
;
968 nicvf_config_cpi(nic
);
970 /* Configure receive side scaling */
973 err
= nicvf_register_interrupts(nic
);
977 /* Initialize the queues */
978 err
= nicvf_init_resources(nic
);
982 /* Make sure queue initialization is written */
985 nicvf_reg_write(nic
, NIC_VF_INT
, -1);
986 /* Enable Qset err interrupt */
987 nicvf_enable_intr(nic
, NICVF_INTR_QS_ERR
, 0);
989 /* Enable completion queue interrupt */
990 for (qidx
= 0; qidx
< qs
->cq_cnt
; qidx
++)
991 nicvf_enable_intr(nic
, NICVF_INTR_CQ
, qidx
);
993 /* Enable RBDR threshold interrupt */
994 for (qidx
= 0; qidx
< qs
->rbdr_cnt
; qidx
++)
995 nicvf_enable_intr(nic
, NICVF_INTR_RBDR
, qidx
);
997 nic
->drv_stats
.txq_stop
= 0;
998 nic
->drv_stats
.txq_wake
= 0;
1000 netif_carrier_on(netdev
);
1001 netif_tx_start_all_queues(netdev
);
1005 nicvf_disable_intr(nic
, NICVF_INTR_MBOX
, 0);
1006 nicvf_unregister_interrupts(nic
);
1008 for (qidx
= 0; qidx
< qs
->cq_cnt
; qidx
++) {
1009 cq_poll
= nic
->napi
[qidx
];
1012 napi_disable(&cq_poll
->napi
);
1013 netif_napi_del(&cq_poll
->napi
);
1015 nic
->napi
[qidx
] = NULL
;
1020 static int nicvf_update_hw_max_frs(struct nicvf
*nic
, int mtu
)
1022 union nic_mbx mbx
= {};
1024 mbx
.frs
.msg
= NIC_MBOX_MSG_SET_MAX_FRS
;
1025 mbx
.frs
.max_frs
= mtu
;
1026 mbx
.frs
.vf_id
= nic
->vf_id
;
1028 return nicvf_send_msg_to_pf(nic
, &mbx
);
1031 static int nicvf_change_mtu(struct net_device
*netdev
, int new_mtu
)
1033 struct nicvf
*nic
= netdev_priv(netdev
);
1035 if (new_mtu
> NIC_HW_MAX_FRS
)
1038 if (new_mtu
< NIC_HW_MIN_FRS
)
1041 if (nicvf_update_hw_max_frs(nic
, new_mtu
))
1043 netdev
->mtu
= new_mtu
;
1049 static int nicvf_set_mac_address(struct net_device
*netdev
, void *p
)
1051 struct sockaddr
*addr
= p
;
1052 struct nicvf
*nic
= netdev_priv(netdev
);
1054 if (!is_valid_ether_addr(addr
->sa_data
))
1055 return -EADDRNOTAVAIL
;
1057 memcpy(netdev
->dev_addr
, addr
->sa_data
, netdev
->addr_len
);
1059 if (nic
->msix_enabled
) {
1060 if (nicvf_hw_set_mac_addr(nic
, netdev
))
1063 nic
->set_mac_pending
= true;
1069 void nicvf_update_lmac_stats(struct nicvf
*nic
)
1072 union nic_mbx mbx
= {};
1074 if (!netif_running(nic
->netdev
))
1077 mbx
.bgx_stats
.msg
= NIC_MBOX_MSG_BGX_STATS
;
1078 mbx
.bgx_stats
.vf_id
= nic
->vf_id
;
1080 mbx
.bgx_stats
.rx
= 1;
1081 while (stat
< BGX_RX_STATS_COUNT
) {
1082 mbx
.bgx_stats
.idx
= stat
;
1083 if (nicvf_send_msg_to_pf(nic
, &mbx
))
1091 mbx
.bgx_stats
.rx
= 0;
1092 while (stat
< BGX_TX_STATS_COUNT
) {
1093 mbx
.bgx_stats
.idx
= stat
;
1094 if (nicvf_send_msg_to_pf(nic
, &mbx
))
1100 void nicvf_update_stats(struct nicvf
*nic
)
1103 struct nicvf_hw_stats
*stats
= &nic
->hw_stats
;
1104 struct nicvf_drv_stats
*drv_stats
= &nic
->drv_stats
;
1105 struct queue_set
*qs
= nic
->qs
;
1107 #define GET_RX_STATS(reg) \
1108 nicvf_reg_read(nic, NIC_VNIC_RX_STAT_0_13 | (reg << 3))
1109 #define GET_TX_STATS(reg) \
1110 nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | (reg << 3))
1112 stats
->rx_bytes
= GET_RX_STATS(RX_OCTS
);
1113 stats
->rx_ucast_frames
= GET_RX_STATS(RX_UCAST
);
1114 stats
->rx_bcast_frames
= GET_RX_STATS(RX_BCAST
);
1115 stats
->rx_mcast_frames
= GET_RX_STATS(RX_MCAST
);
1116 stats
->rx_fcs_errors
= GET_RX_STATS(RX_FCS
);
1117 stats
->rx_l2_errors
= GET_RX_STATS(RX_L2ERR
);
1118 stats
->rx_drop_red
= GET_RX_STATS(RX_RED
);
1119 stats
->rx_drop_red_bytes
= GET_RX_STATS(RX_RED_OCTS
);
1120 stats
->rx_drop_overrun
= GET_RX_STATS(RX_ORUN
);
1121 stats
->rx_drop_overrun_bytes
= GET_RX_STATS(RX_ORUN_OCTS
);
1122 stats
->rx_drop_bcast
= GET_RX_STATS(RX_DRP_BCAST
);
1123 stats
->rx_drop_mcast
= GET_RX_STATS(RX_DRP_MCAST
);
1124 stats
->rx_drop_l3_bcast
= GET_RX_STATS(RX_DRP_L3BCAST
);
1125 stats
->rx_drop_l3_mcast
= GET_RX_STATS(RX_DRP_L3MCAST
);
1127 stats
->tx_bytes_ok
= GET_TX_STATS(TX_OCTS
);
1128 stats
->tx_ucast_frames_ok
= GET_TX_STATS(TX_UCAST
);
1129 stats
->tx_bcast_frames_ok
= GET_TX_STATS(TX_BCAST
);
1130 stats
->tx_mcast_frames_ok
= GET_TX_STATS(TX_MCAST
);
1131 stats
->tx_drops
= GET_TX_STATS(TX_DROP
);
1133 drv_stats
->tx_frames_ok
= stats
->tx_ucast_frames_ok
+
1134 stats
->tx_bcast_frames_ok
+
1135 stats
->tx_mcast_frames_ok
;
1136 drv_stats
->rx_drops
= stats
->rx_drop_red
+
1137 stats
->rx_drop_overrun
;
1138 drv_stats
->tx_drops
= stats
->tx_drops
;
1140 /* Update RQ and SQ stats */
1141 for (qidx
= 0; qidx
< qs
->rq_cnt
; qidx
++)
1142 nicvf_update_rq_stats(nic
, qidx
);
1143 for (qidx
= 0; qidx
< qs
->sq_cnt
; qidx
++)
1144 nicvf_update_sq_stats(nic
, qidx
);
1147 static struct rtnl_link_stats64
*nicvf_get_stats64(struct net_device
*netdev
,
1148 struct rtnl_link_stats64
*stats
)
1150 struct nicvf
*nic
= netdev_priv(netdev
);
1151 struct nicvf_hw_stats
*hw_stats
= &nic
->hw_stats
;
1152 struct nicvf_drv_stats
*drv_stats
= &nic
->drv_stats
;
1154 nicvf_update_stats(nic
);
1156 stats
->rx_bytes
= hw_stats
->rx_bytes
;
1157 stats
->rx_packets
= drv_stats
->rx_frames_ok
;
1158 stats
->rx_dropped
= drv_stats
->rx_drops
;
1159 stats
->multicast
= hw_stats
->rx_mcast_frames
;
1161 stats
->tx_bytes
= hw_stats
->tx_bytes_ok
;
1162 stats
->tx_packets
= drv_stats
->tx_frames_ok
;
1163 stats
->tx_dropped
= drv_stats
->tx_drops
;
1168 static void nicvf_tx_timeout(struct net_device
*dev
)
1170 struct nicvf
*nic
= netdev_priv(dev
);
1172 if (netif_msg_tx_err(nic
))
1173 netdev_warn(dev
, "%s: Transmit timed out, resetting\n",
1176 schedule_work(&nic
->reset_task
);
1179 static void nicvf_reset_task(struct work_struct
*work
)
1183 nic
= container_of(work
, struct nicvf
, reset_task
);
1185 if (!netif_running(nic
->netdev
))
1188 nicvf_stop(nic
->netdev
);
1189 nicvf_open(nic
->netdev
);
1190 nic
->netdev
->trans_start
= jiffies
;
1193 static const struct net_device_ops nicvf_netdev_ops
= {
1194 .ndo_open
= nicvf_open
,
1195 .ndo_stop
= nicvf_stop
,
1196 .ndo_start_xmit
= nicvf_xmit
,
1197 .ndo_change_mtu
= nicvf_change_mtu
,
1198 .ndo_set_mac_address
= nicvf_set_mac_address
,
1199 .ndo_get_stats64
= nicvf_get_stats64
,
1200 .ndo_tx_timeout
= nicvf_tx_timeout
,
1203 static int nicvf_probe(struct pci_dev
*pdev
, const struct pci_device_id
*ent
)
1205 struct device
*dev
= &pdev
->dev
;
1206 struct net_device
*netdev
;
1208 struct queue_set
*qs
;
1211 err
= pci_enable_device(pdev
);
1213 dev_err(dev
, "Failed to enable PCI device\n");
1217 err
= pci_request_regions(pdev
, DRV_NAME
);
1219 dev_err(dev
, "PCI request regions failed 0x%x\n", err
);
1220 goto err_disable_device
;
1223 err
= pci_set_dma_mask(pdev
, DMA_BIT_MASK(48));
1225 dev_err(dev
, "Unable to get usable DMA configuration\n");
1226 goto err_release_regions
;
1229 err
= pci_set_consistent_dma_mask(pdev
, DMA_BIT_MASK(48));
1231 dev_err(dev
, "unable to get 48-bit DMA for consistent allocations\n");
1232 goto err_release_regions
;
1235 netdev
= alloc_etherdev_mqs(sizeof(struct nicvf
),
1236 MAX_RCV_QUEUES_PER_QS
,
1237 MAX_SND_QUEUES_PER_QS
);
1240 goto err_release_regions
;
1243 pci_set_drvdata(pdev
, netdev
);
1245 SET_NETDEV_DEV(netdev
, &pdev
->dev
);
1247 nic
= netdev_priv(netdev
);
1248 nic
->netdev
= netdev
;
1251 /* MAP VF's configuration registers */
1252 nic
->reg_base
= pcim_iomap(pdev
, PCI_CFG_REG_BAR_NUM
, 0);
1253 if (!nic
->reg_base
) {
1254 dev_err(dev
, "Cannot map config register space, aborting\n");
1256 goto err_free_netdev
;
1259 err
= nicvf_set_qset_resources(nic
);
1261 goto err_free_netdev
;
1265 err
= nicvf_set_real_num_queues(netdev
, qs
->sq_cnt
, qs
->rq_cnt
);
1267 goto err_free_netdev
;
1269 /* Check if PF is alive and get MAC address for this VF */
1270 err
= nicvf_register_misc_interrupt(nic
);
1272 goto err_free_netdev
;
1274 netdev
->features
|= (NETIF_F_RXCSUM
| NETIF_F_IP_CSUM
| NETIF_F_SG
|
1275 NETIF_F_TSO
| NETIF_F_GRO
);
1276 netdev
->hw_features
= netdev
->features
;
1278 netdev
->netdev_ops
= &nicvf_netdev_ops
;
1279 netdev
->watchdog_timeo
= NICVF_TX_TIMEOUT
;
1281 INIT_WORK(&nic
->reset_task
, nicvf_reset_task
);
1283 err
= register_netdev(netdev
);
1285 dev_err(dev
, "Failed to register netdevice\n");
1286 goto err_unregister_interrupts
;
1289 nic
->msg_enable
= debug
;
1291 nicvf_set_ethtool_ops(netdev
);
1295 err_unregister_interrupts
:
1296 nicvf_unregister_interrupts(nic
);
1298 pci_set_drvdata(pdev
, NULL
);
1299 free_netdev(netdev
);
1300 err_release_regions
:
1301 pci_release_regions(pdev
);
1303 pci_disable_device(pdev
);
1307 static void nicvf_remove(struct pci_dev
*pdev
)
1309 struct net_device
*netdev
= pci_get_drvdata(pdev
);
1310 struct nicvf
*nic
= netdev_priv(netdev
);
1312 unregister_netdev(netdev
);
1313 nicvf_unregister_interrupts(nic
);
1314 pci_set_drvdata(pdev
, NULL
);
1315 free_netdev(netdev
);
1316 pci_release_regions(pdev
);
1317 pci_disable_device(pdev
);
1320 static void nicvf_shutdown(struct pci_dev
*pdev
)
1325 static struct pci_driver nicvf_driver
= {
1327 .id_table
= nicvf_id_table
,
1328 .probe
= nicvf_probe
,
1329 .remove
= nicvf_remove
,
1330 .shutdown
= nicvf_shutdown
,
1333 static int __init
nicvf_init_module(void)
1335 pr_info("%s, ver %s\n", DRV_NAME
, DRV_VERSION
);
1337 return pci_register_driver(&nicvf_driver
);
1340 static void __exit
nicvf_cleanup_module(void)
1342 pci_unregister_driver(&nicvf_driver
);
1345 module_init(nicvf_init_module
);
1346 module_exit(nicvf_cleanup_module
);