2 * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37 #include <linux/bitops.h>
38 #include <linux/compiler.h>
39 #include <linux/list.h>
40 #include <linux/mutex.h>
41 #include <linux/netdevice.h>
42 #include <linux/if_vlan.h>
43 #include <linux/net_tstamp.h>
44 #ifdef CONFIG_MLX4_EN_DCB
45 #include <linux/dcbnl.h>
47 #include <linux/cpu_rmap.h>
49 #include <linux/mlx4/device.h>
50 #include <linux/mlx4/qp.h>
51 #include <linux/mlx4/cq.h>
52 #include <linux/mlx4/srq.h>
53 #include <linux/mlx4/doorbell.h>
54 #include <linux/mlx4/cmd.h>
58 #define DRV_NAME "mlx4_en"
59 #define DRV_VERSION "2.0"
60 #define DRV_RELDATE "Dec 2011"
62 #define MLX4_EN_MSG_LEVEL (NETIF_MSG_LINK | NETIF_MSG_IFDOWN)
69 #define MLX4_EN_PAGE_SHIFT 12
70 #define MLX4_EN_PAGE_SIZE (1 << MLX4_EN_PAGE_SHIFT)
71 #define DEF_RX_RINGS 16
72 #define MAX_RX_RINGS 128
73 #define MIN_RX_RINGS 4
75 #define HEADROOM (2048 / TXBB_SIZE + 1)
76 #define STAMP_STRIDE 64
77 #define STAMP_DWORDS (STAMP_STRIDE / 4)
78 #define STAMP_SHIFT 31
79 #define STAMP_VAL 0x7fffffff
80 #define STATS_DELAY (HZ / 4)
81 #define SERVICE_TASK_DELAY (HZ / 4)
82 #define MAX_NUM_OF_FS_RULES 256
84 #define MLX4_EN_FILTER_HASH_SHIFT 4
85 #define MLX4_EN_FILTER_EXPIRY_QUOTA 60
87 /* Typical TSO descriptor with 16 gather entries is 352 bytes... */
88 #define MAX_DESC_SIZE 512
89 #define MAX_DESC_TXBBS (MAX_DESC_SIZE / TXBB_SIZE)
92 * OS related constants and tunables
95 #define MLX4_EN_WATCHDOG_TIMEOUT (15 * HZ)
97 /* Use the maximum between 16384 and a single page */
98 #define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384)
99 #define MLX4_EN_ALLOC_ORDER get_order(MLX4_EN_ALLOC_SIZE)
101 /* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU
102 * and 4K allocations) */
104 FRAG_SZ0
= 1536 - NET_IP_ALIGN
,
107 FRAG_SZ3
= MLX4_EN_ALLOC_SIZE
109 #define MLX4_EN_MAX_RX_FRAGS 4
111 /* Maximum ring sizes */
112 #define MLX4_EN_MAX_TX_SIZE 8192
113 #define MLX4_EN_MAX_RX_SIZE 8192
115 /* Minimum ring size for our page-allocation scheme to work */
116 #define MLX4_EN_MIN_RX_SIZE (MLX4_EN_ALLOC_SIZE / SMP_CACHE_BYTES)
117 #define MLX4_EN_MIN_TX_SIZE (4096 / TXBB_SIZE)
119 #define MLX4_EN_SMALL_PKT_SIZE 64
120 #define MLX4_EN_MAX_TX_RING_P_UP 32
121 #define MLX4_EN_NUM_UP 8
122 #define MLX4_EN_DEF_TX_RING_SIZE 512
123 #define MLX4_EN_DEF_RX_RING_SIZE 1024
124 #define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \
127 /* Target number of packets to coalesce with interrupt moderation */
128 #define MLX4_EN_RX_COAL_TARGET 44
129 #define MLX4_EN_RX_COAL_TIME 0x10
131 #define MLX4_EN_TX_COAL_PKTS 16
132 #define MLX4_EN_TX_COAL_TIME 0x10
134 #define MLX4_EN_RX_RATE_LOW 400000
135 #define MLX4_EN_RX_COAL_TIME_LOW 0
136 #define MLX4_EN_RX_RATE_HIGH 450000
137 #define MLX4_EN_RX_COAL_TIME_HIGH 128
138 #define MLX4_EN_RX_SIZE_THRESH 1024
139 #define MLX4_EN_RX_RATE_THRESH (1000000 / MLX4_EN_RX_COAL_TIME_HIGH)
140 #define MLX4_EN_SAMPLE_INTERVAL 0
141 #define MLX4_EN_AVG_PKT_SMALL 256
143 #define MLX4_EN_AUTO_CONF 0xffff
145 #define MLX4_EN_DEF_RX_PAUSE 1
146 #define MLX4_EN_DEF_TX_PAUSE 1
148 /* Interval between successive polls in the Tx routine when polling is used
149 instead of interrupts (in per-core Tx rings) - should be power of 2 */
150 #define MLX4_EN_TX_POLL_MODER 16
151 #define MLX4_EN_TX_POLL_TIMEOUT (HZ / 4)
153 #define ETH_LLC_SNAP_SIZE 8
155 #define SMALL_PACKET_SIZE (256 - NET_IP_ALIGN)
156 #define HEADER_COPY_SIZE (128 - NET_IP_ALIGN)
157 #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN)
159 #define MLX4_EN_MIN_MTU 46
160 #define ETH_BCAST 0xffffffffffffULL
162 #define MLX4_EN_LOOPBACK_RETRIES 5
163 #define MLX4_EN_LOOPBACK_TIMEOUT 100
165 #ifdef MLX4_EN_PERF_STAT
166 /* Number of samples to 'average' */
168 #define AVG_FACTOR 1024
169 #define NUM_PERF_STATS NUM_PERF_COUNTERS
171 #define INC_PERF_COUNTER(cnt) (++(cnt))
172 #define ADD_PERF_COUNTER(cnt, add) ((cnt) += (add))
173 #define AVG_PERF_COUNTER(cnt, sample) \
174 ((cnt) = ((cnt) * (AVG_SIZE - 1) + (sample) * AVG_FACTOR) / AVG_SIZE)
175 #define GET_PERF_COUNTER(cnt) (cnt)
176 #define GET_AVG_PERF_COUNTER(cnt) ((cnt) / AVG_FACTOR)
180 #define NUM_PERF_STATS 0
181 #define INC_PERF_COUNTER(cnt) do {} while (0)
182 #define ADD_PERF_COUNTER(cnt, add) do {} while (0)
183 #define AVG_PERF_COUNTER(cnt, sample) do {} while (0)
184 #define GET_PERF_COUNTER(cnt) (0)
185 #define GET_AVG_PERF_COUNTER(cnt) (0)
186 #endif /* MLX4_EN_PERF_STAT */
201 #define ROUNDUP_LOG2(x) ilog2(roundup_pow_of_two(x))
202 #define XNOR(x, y) (!(x) == !(y))
205 struct mlx4_en_tx_info
{
216 #define MLX4_EN_BIT_DESC_OWN 0x80000000
217 #define CTRL_SIZE sizeof(struct mlx4_wqe_ctrl_seg)
218 #define MLX4_EN_MEMTYPE_PAD 0x100
219 #define DS_SIZE sizeof(struct mlx4_wqe_data_seg)
222 struct mlx4_en_tx_desc
{
223 struct mlx4_wqe_ctrl_seg ctrl
;
225 struct mlx4_wqe_data_seg data
; /* at least one data segment */
226 struct mlx4_wqe_lso_seg lso
;
227 struct mlx4_wqe_inline_seg inl
;
231 #define MLX4_EN_USE_SRQ 0x01000000
233 #define MLX4_EN_CX3_LOW_ID 0x1000
234 #define MLX4_EN_CX3_HIGH_ID 0x1005
236 struct mlx4_en_rx_alloc
{
242 struct mlx4_en_tx_ring
{
243 struct mlx4_hwq_resources wqres
;
244 u32 size
; /* number of TXBBs */
247 u16 cqn
; /* index of port CQ associated with this ring */
254 struct mlx4_en_tx_info
*tx_info
;
258 struct mlx4_qp_context context
;
260 enum mlx4_qp_state qp_state
;
261 struct mlx4_srq dummy
;
263 unsigned long packets
;
264 unsigned long tx_csum
;
267 struct netdev_queue
*tx_queue
;
268 int hwtstamp_tx_type
;
271 struct mlx4_en_rx_desc
{
272 /* actual number of entries depends on rx ring stride */
273 struct mlx4_wqe_data_seg data
[0];
276 struct mlx4_en_rx_ring
{
277 struct mlx4_hwq_resources wqres
;
278 struct mlx4_en_rx_alloc page_alloc
[MLX4_EN_MAX_RX_FRAGS
];
279 u32 size
; /* number of Rx descs*/
284 u16 cqn
; /* index of port CQ associated with this ring */
292 unsigned long packets
;
293 unsigned long csum_ok
;
294 unsigned long csum_none
;
295 int hwtstamp_rx_filter
;
300 struct mlx4_hwq_resources wqres
;
303 struct net_device
*dev
;
304 struct napi_struct napi
;
311 struct mlx4_cqe
*buf
;
312 #define MLX4_EN_OPCODE_ERROR 0x1e
315 struct mlx4_en_port_profile
{
328 struct mlx4_en_profile
{
335 u8 num_tx_rings_p_up
;
336 struct mlx4_en_port_profile prof
[MLX4_MAX_PORTS
+ 1];
340 struct mlx4_dev
*dev
;
341 struct pci_dev
*pdev
;
342 struct mutex state_lock
;
343 struct net_device
*pndev
[MLX4_MAX_PORTS
+ 1];
346 struct mlx4_en_profile profile
;
348 struct workqueue_struct
*workqueue
;
349 struct device
*dma_device
;
350 void __iomem
*uar_map
;
351 struct mlx4_uar priv_uar
;
355 u8 mac_removed
[MLX4_MAX_PORTS
+ 1];
356 struct cyclecounter cycles
;
357 struct timecounter clock
;
358 unsigned long last_overflow_check
;
359 unsigned long overflow_period
;
363 struct mlx4_en_rss_map
{
365 struct mlx4_qp qps
[MAX_RX_RINGS
];
366 enum mlx4_qp_state state
[MAX_RX_RINGS
];
367 struct mlx4_qp indir_qp
;
368 enum mlx4_qp_state indir_state
;
371 struct mlx4_en_port_state
{
377 struct mlx4_en_pkt_stats
{
378 unsigned long broadcast
;
379 unsigned long rx_prio
[8];
380 unsigned long tx_prio
[8];
381 #define NUM_PKT_STATS 17
384 struct mlx4_en_port_stats
{
385 unsigned long tso_packets
;
386 unsigned long queue_stopped
;
387 unsigned long wake_queue
;
388 unsigned long tx_timeout
;
389 unsigned long rx_alloc_failed
;
390 unsigned long rx_chksum_good
;
391 unsigned long rx_chksum_none
;
392 unsigned long tx_chksum_offload
;
393 #define NUM_PORT_STATS 8
396 struct mlx4_en_perf_stats
{
403 #define NUM_PERF_COUNTERS 6
406 enum mlx4_en_mclist_act
{
412 struct mlx4_en_mc_list
{
413 struct list_head list
;
414 enum mlx4_en_mclist_act action
;
419 struct mlx4_en_frag_info
{
421 u16 frag_prefix_size
;
428 #ifdef CONFIG_MLX4_EN_DCB
429 /* Minimal TC BW - setting to 0 will block traffic */
430 #define MLX4_EN_BW_MIN 1
431 #define MLX4_EN_BW_MAX 100 /* Utilize 100% of the line */
433 #define MLX4_EN_TC_ETS 7
437 struct ethtool_flow_id
{
438 struct list_head list
;
439 struct ethtool_rx_flow_spec flow_spec
;
444 MLX4_EN_FLAG_PROMISC
= (1 << 0),
445 MLX4_EN_FLAG_MC_PROMISC
= (1 << 1),
446 /* whether we need to enable hardware loopback by putting dmac
449 MLX4_EN_FLAG_ENABLE_HW_LOOPBACK
= (1 << 2),
450 /* whether we need to drop packets that hardware loopback-ed */
451 MLX4_EN_FLAG_RX_FILTER_NEEDED
= (1 << 3),
452 MLX4_EN_FLAG_FORCE_PROMISC
= (1 << 4)
455 #define MLX4_EN_MAC_HASH_SIZE (1 << BITS_PER_BYTE)
456 #define MLX4_EN_MAC_HASH_IDX 5
458 struct mlx4_en_priv
{
459 struct mlx4_en_dev
*mdev
;
460 struct mlx4_en_port_profile
*prof
;
461 struct net_device
*dev
;
462 unsigned long active_vlans
[BITS_TO_LONGS(VLAN_N_VID
)];
463 struct net_device_stats stats
;
464 struct net_device_stats ret_stats
;
465 struct mlx4_en_port_state port_state
;
466 spinlock_t stats_lock
;
467 struct ethtool_flow_id ethtool_rules
[MAX_NUM_OF_FS_RULES
];
468 /* To allow rules removal while port is going down */
469 struct list_head ethtool_list
;
471 unsigned long last_moder_packets
[MAX_RX_RINGS
];
472 unsigned long last_moder_tx_packets
;
473 unsigned long last_moder_bytes
[MAX_RX_RINGS
];
474 unsigned long last_moder_jiffies
;
475 int last_moder_time
[MAX_RX_RINGS
];
485 u16 adaptive_rx_coal
;
488 u32 validate_loopback
;
490 struct mlx4_hwq_resources res
;
498 unsigned char prev_mac
[ETH_ALEN
+ 2];
504 struct mlx4_en_rss_map rss_map
;
507 u8 num_tx_rings_p_up
;
511 struct mlx4_en_frag_info frag_info
[MLX4_EN_MAX_RX_FRAGS
];
515 struct mlx4_en_tx_ring
*tx_ring
;
516 struct mlx4_en_rx_ring rx_ring
[MAX_RX_RINGS
];
517 struct mlx4_en_cq
*tx_cq
;
518 struct mlx4_en_cq rx_cq
[MAX_RX_RINGS
];
519 struct mlx4_qp drop_qp
;
520 struct work_struct rx_mode_task
;
521 struct work_struct watchdog_task
;
522 struct work_struct linkstate_task
;
523 struct delayed_work stats_task
;
524 struct delayed_work service_task
;
525 struct mlx4_en_perf_stats pstats
;
526 struct mlx4_en_pkt_stats pkstats
;
527 struct mlx4_en_port_stats port_stats
;
529 struct list_head mc_list
;
530 struct list_head curr_list
;
532 struct mlx4_en_stat_out_mbox hw_stats
;
537 struct hlist_head mac_hash
[MLX4_EN_MAC_HASH_SIZE
];
538 struct hwtstamp_config hwtstamp_config
;
540 #ifdef CONFIG_MLX4_EN_DCB
542 u16 maxrate
[IEEE_8021QAZ_MAX_TCS
];
544 #ifdef CONFIG_RFS_ACCEL
545 spinlock_t filters_lock
;
547 struct list_head filters
;
548 struct hlist_head filter_hash
[1 << MLX4_EN_FILTER_HASH_SHIFT
];
554 MLX4_EN_WOL_MAGIC
= (1ULL << 61),
555 MLX4_EN_WOL_ENABLED
= (1ULL << 62),
558 struct mlx4_mac_entry
{
559 struct hlist_node hlist
;
560 unsigned char mac
[ETH_ALEN
+ 2];
565 #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63)
567 void mlx4_en_update_loopback_state(struct net_device
*dev
,
568 netdev_features_t features
);
570 void mlx4_en_destroy_netdev(struct net_device
*dev
);
571 int mlx4_en_init_netdev(struct mlx4_en_dev
*mdev
, int port
,
572 struct mlx4_en_port_profile
*prof
);
574 int mlx4_en_start_port(struct net_device
*dev
);
575 void mlx4_en_stop_port(struct net_device
*dev
, int detach
);
577 void mlx4_en_free_resources(struct mlx4_en_priv
*priv
);
578 int mlx4_en_alloc_resources(struct mlx4_en_priv
*priv
);
580 int mlx4_en_create_cq(struct mlx4_en_priv
*priv
, struct mlx4_en_cq
*cq
,
581 int entries
, int ring
, enum cq_type mode
);
582 void mlx4_en_destroy_cq(struct mlx4_en_priv
*priv
, struct mlx4_en_cq
*cq
);
583 int mlx4_en_activate_cq(struct mlx4_en_priv
*priv
, struct mlx4_en_cq
*cq
,
585 void mlx4_en_deactivate_cq(struct mlx4_en_priv
*priv
, struct mlx4_en_cq
*cq
);
586 int mlx4_en_set_cq_moder(struct mlx4_en_priv
*priv
, struct mlx4_en_cq
*cq
);
587 int mlx4_en_arm_cq(struct mlx4_en_priv
*priv
, struct mlx4_en_cq
*cq
);
589 void mlx4_en_tx_irq(struct mlx4_cq
*mcq
);
590 u16
mlx4_en_select_queue(struct net_device
*dev
, struct sk_buff
*skb
);
591 netdev_tx_t
mlx4_en_xmit(struct sk_buff
*skb
, struct net_device
*dev
);
593 int mlx4_en_create_tx_ring(struct mlx4_en_priv
*priv
, struct mlx4_en_tx_ring
*ring
,
594 int qpn
, u32 size
, u16 stride
);
595 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv
*priv
, struct mlx4_en_tx_ring
*ring
);
596 int mlx4_en_activate_tx_ring(struct mlx4_en_priv
*priv
,
597 struct mlx4_en_tx_ring
*ring
,
598 int cq
, int user_prio
);
599 void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv
*priv
,
600 struct mlx4_en_tx_ring
*ring
);
602 int mlx4_en_create_rx_ring(struct mlx4_en_priv
*priv
,
603 struct mlx4_en_rx_ring
*ring
,
604 u32 size
, u16 stride
);
605 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv
*priv
,
606 struct mlx4_en_rx_ring
*ring
,
607 u32 size
, u16 stride
);
608 int mlx4_en_activate_rx_rings(struct mlx4_en_priv
*priv
);
609 void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv
*priv
,
610 struct mlx4_en_rx_ring
*ring
);
611 int mlx4_en_process_rx_cq(struct net_device
*dev
,
612 struct mlx4_en_cq
*cq
,
614 int mlx4_en_poll_rx_cq(struct napi_struct
*napi
, int budget
);
615 void mlx4_en_fill_qp_context(struct mlx4_en_priv
*priv
, int size
, int stride
,
616 int is_tx
, int rss
, int qpn
, int cqn
, int user_prio
,
617 struct mlx4_qp_context
*context
);
618 void mlx4_en_sqp_event(struct mlx4_qp
*qp
, enum mlx4_event event
);
619 int mlx4_en_map_buffer(struct mlx4_buf
*buf
);
620 void mlx4_en_unmap_buffer(struct mlx4_buf
*buf
);
622 void mlx4_en_calc_rx_buf(struct net_device
*dev
);
623 int mlx4_en_config_rss_steer(struct mlx4_en_priv
*priv
);
624 void mlx4_en_release_rss_steer(struct mlx4_en_priv
*priv
);
625 int mlx4_en_create_drop_qp(struct mlx4_en_priv
*priv
);
626 void mlx4_en_destroy_drop_qp(struct mlx4_en_priv
*priv
);
627 int mlx4_en_free_tx_buf(struct net_device
*dev
, struct mlx4_en_tx_ring
*ring
);
628 void mlx4_en_rx_irq(struct mlx4_cq
*mcq
);
630 int mlx4_SET_MCAST_FLTR(struct mlx4_dev
*dev
, u8 port
, u64 mac
, u64 clear
, u8 mode
);
631 int mlx4_SET_VLAN_FLTR(struct mlx4_dev
*dev
, struct mlx4_en_priv
*priv
);
633 int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev
*mdev
, u8 port
, u8 reset
);
634 int mlx4_en_QUERY_PORT(struct mlx4_en_dev
*mdev
, u8 port
);
636 #ifdef CONFIG_MLX4_EN_DCB
637 extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops
;
638 extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops
;
641 int mlx4_en_setup_tc(struct net_device
*dev
, u8 up
);
643 #ifdef CONFIG_RFS_ACCEL
644 void mlx4_en_cleanup_filters(struct mlx4_en_priv
*priv
,
645 struct mlx4_en_rx_ring
*rx_ring
);
648 #define MLX4_EN_NUM_SELF_TEST 5
649 void mlx4_en_ex_selftest(struct net_device
*dev
, u32
*flags
, u64
*buf
);
650 u64
mlx4_en_mac_to_u64(u8
*addr
);
651 void mlx4_en_ptp_overflow_check(struct mlx4_en_dev
*mdev
);
654 * Functions for time stamping
656 u64
mlx4_en_get_cqe_ts(struct mlx4_cqe
*cqe
);
657 void mlx4_en_fill_hwtstamps(struct mlx4_en_dev
*mdev
,
658 struct skb_shared_hwtstamps
*hwts
,
660 void mlx4_en_init_timestamp(struct mlx4_en_dev
*mdev
);
661 int mlx4_en_timestamp_config(struct net_device
*dev
,
667 extern const struct ethtool_ops mlx4_en_ethtool_ops
;
672 * printk / logging functions
676 int en_print(const char *level
, const struct mlx4_en_priv
*priv
,
677 const char *format
, ...);
679 #define en_dbg(mlevel, priv, format, arg...) \
681 if (NETIF_MSG_##mlevel & priv->msg_enable) \
682 en_print(KERN_DEBUG, priv, format, ##arg); \
684 #define en_warn(priv, format, arg...) \
685 en_print(KERN_WARNING, priv, format, ##arg)
686 #define en_err(priv, format, arg...) \
687 en_print(KERN_ERR, priv, format, ##arg)
688 #define en_info(priv, format, arg...) \
689 en_print(KERN_INFO, priv, format, ## arg)
691 #define mlx4_err(mdev, format, arg...) \
692 pr_err("%s %s: " format, DRV_NAME, \
693 dev_name(&mdev->pdev->dev), ##arg)
694 #define mlx4_info(mdev, format, arg...) \
695 pr_info("%s %s: " format, DRV_NAME, \
696 dev_name(&mdev->pdev->dev), ##arg)
697 #define mlx4_warn(mdev, format, arg...) \
698 pr_warning("%s %s: " format, DRV_NAME, \
699 dev_name(&mdev->pdev->dev), ##arg)