2 * Copyright (c) 2009, Microsoft Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, see <http://www.gnu.org/licenses/>.
17 * Haiyang Zhang <haiyangz@microsoft.com>
18 * Hank Janssen <hjanssen@microsoft.com>
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/init.h>
23 #include <linux/atomic.h>
24 #include <linux/module.h>
25 #include <linux/highmem.h>
26 #include <linux/device.h>
28 #include <linux/delay.h>
29 #include <linux/netdevice.h>
30 #include <linux/inetdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/skbuff.h>
33 #include <linux/if_vlan.h>
35 #include <linux/slab.h>
37 #include <net/route.h>
39 #include <net/pkt_sched.h>
41 #include "hyperv_net.h"
44 #define RING_SIZE_MIN 64
45 #define LINKCHANGE_INT (2 * HZ)
46 static int ring_size
= 128;
47 module_param(ring_size
, int, S_IRUGO
);
48 MODULE_PARM_DESC(ring_size
, "Ring buffer size (# of pages)");
50 static int max_num_vrss_chns
= 8;
52 static const u32 default_msg
= NETIF_MSG_DRV
| NETIF_MSG_PROBE
|
53 NETIF_MSG_LINK
| NETIF_MSG_IFUP
|
54 NETIF_MSG_IFDOWN
| NETIF_MSG_RX_ERR
|
57 static int debug
= -1;
58 module_param(debug
, int, S_IRUGO
);
59 MODULE_PARM_DESC(debug
, "Debug level (0=none,...,16=all)");
61 static void do_set_multicast(struct work_struct
*w
)
63 struct net_device_context
*ndevctx
=
64 container_of(w
, struct net_device_context
, work
);
65 struct netvsc_device
*nvdev
;
66 struct rndis_device
*rdev
;
68 nvdev
= hv_get_drvdata(ndevctx
->device_ctx
);
69 if (nvdev
== NULL
|| nvdev
->ndev
== NULL
)
72 rdev
= nvdev
->extension
;
76 if (nvdev
->ndev
->flags
& IFF_PROMISC
)
77 rndis_filter_set_packet_filter(rdev
,
78 NDIS_PACKET_TYPE_PROMISCUOUS
);
80 rndis_filter_set_packet_filter(rdev
,
81 NDIS_PACKET_TYPE_BROADCAST
|
82 NDIS_PACKET_TYPE_ALL_MULTICAST
|
83 NDIS_PACKET_TYPE_DIRECTED
);
86 static void netvsc_set_multicast_list(struct net_device
*net
)
88 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
90 schedule_work(&net_device_ctx
->work
);
93 static int netvsc_open(struct net_device
*net
)
95 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
96 struct hv_device
*device_obj
= net_device_ctx
->device_ctx
;
97 struct netvsc_device
*nvdev
;
98 struct rndis_device
*rdev
;
101 netif_carrier_off(net
);
103 /* Open up the device */
104 ret
= rndis_filter_open(device_obj
);
106 netdev_err(net
, "unable to open device (ret %d).\n", ret
);
110 netif_tx_wake_all_queues(net
);
112 nvdev
= hv_get_drvdata(device_obj
);
113 rdev
= nvdev
->extension
;
114 if (!rdev
->link_state
)
115 netif_carrier_on(net
);
120 static int netvsc_close(struct net_device
*net
)
122 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
123 struct hv_device
*device_obj
= net_device_ctx
->device_ctx
;
124 struct netvsc_device
*nvdev
= hv_get_drvdata(device_obj
);
126 u32 aread
, awrite
, i
, msec
= 10, retry
= 0, retry_max
= 20;
127 struct vmbus_channel
*chn
;
129 netif_tx_disable(net
);
131 /* Make sure netvsc_set_multicast_list doesn't re-enable filter! */
132 cancel_work_sync(&net_device_ctx
->work
);
133 ret
= rndis_filter_close(device_obj
);
135 netdev_err(net
, "unable to close device (ret %d).\n", ret
);
139 /* Ensure pending bytes in ring are read */
142 for (i
= 0; i
< nvdev
->num_chn
; i
++) {
143 chn
= nvdev
->chn_table
[i
];
147 hv_get_ringbuffer_availbytes(&chn
->inbound
, &aread
,
153 hv_get_ringbuffer_availbytes(&chn
->outbound
, &aread
,
161 if (retry
> retry_max
|| aread
== 0)
171 netdev_err(net
, "Ring buffer not empty after closing rndis\n");
178 static void *init_ppi_data(struct rndis_message
*msg
, u32 ppi_size
,
181 struct rndis_packet
*rndis_pkt
;
182 struct rndis_per_packet_info
*ppi
;
184 rndis_pkt
= &msg
->msg
.pkt
;
185 rndis_pkt
->data_offset
+= ppi_size
;
187 ppi
= (struct rndis_per_packet_info
*)((void *)rndis_pkt
+
188 rndis_pkt
->per_pkt_info_offset
+ rndis_pkt
->per_pkt_info_len
);
190 ppi
->size
= ppi_size
;
191 ppi
->type
= pkt_type
;
192 ppi
->ppi_offset
= sizeof(struct rndis_per_packet_info
);
194 rndis_pkt
->per_pkt_info_len
+= ppi_size
;
208 /* Toeplitz hash function
209 * data: network byte order
210 * return: host byte order
212 static u32
comp_hash(u8
*key
, int klen
, void *data
, int dlen
)
221 subk
.ka
= ntohl(*(u32
*)key
);
223 for (i
= 0; i
< dlen
; i
++) {
224 subk
.kb
= key
[k_next
];
225 k_next
= (k_next
+ 1) % klen
;
226 dt
= ((u8
*)data
)[i
];
227 for (j
= 0; j
< 8; j
++) {
238 static bool netvsc_set_hash(u32
*hash
, struct sk_buff
*skb
)
240 struct flow_keys flow
;
243 if (!skb_flow_dissect_flow_keys(skb
, &flow
, 0) ||
244 !(flow
.basic
.n_proto
== htons(ETH_P_IP
) ||
245 flow
.basic
.n_proto
== htons(ETH_P_IPV6
)))
248 if (flow
.basic
.ip_proto
== IPPROTO_TCP
)
253 *hash
= comp_hash(netvsc_hash_key
, HASH_KEYLEN
, &flow
, data_len
);
258 static u16
netvsc_select_queue(struct net_device
*ndev
, struct sk_buff
*skb
,
259 void *accel_priv
, select_queue_fallback_t fallback
)
261 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
262 struct hv_device
*hdev
= net_device_ctx
->device_ctx
;
263 struct netvsc_device
*nvsc_dev
= hv_get_drvdata(hdev
);
267 if (nvsc_dev
== NULL
|| ndev
->real_num_tx_queues
<= 1)
270 if (netvsc_set_hash(&hash
, skb
)) {
271 q_idx
= nvsc_dev
->send_table
[hash
% VRSS_SEND_TAB_SIZE
] %
272 ndev
->real_num_tx_queues
;
273 skb_set_hash(skb
, hash
, PKT_HASH_TYPE_L3
);
276 if (!nvsc_dev
->chn_table
[q_idx
])
282 static u32
fill_pg_buf(struct page
*page
, u32 offset
, u32 len
,
283 struct hv_page_buffer
*pb
)
287 /* Deal with compund pages by ignoring unused part
290 page
+= (offset
>> PAGE_SHIFT
);
291 offset
&= ~PAGE_MASK
;
296 bytes
= PAGE_SIZE
- offset
;
299 pb
[j
].pfn
= page_to_pfn(page
);
300 pb
[j
].offset
= offset
;
306 if (offset
== PAGE_SIZE
&& len
) {
316 static u32
init_page_array(void *hdr
, u32 len
, struct sk_buff
*skb
,
317 struct hv_netvsc_packet
*packet
,
318 struct hv_page_buffer
**page_buf
)
320 struct hv_page_buffer
*pb
= *page_buf
;
322 char *data
= skb
->data
;
323 int frags
= skb_shinfo(skb
)->nr_frags
;
326 /* The packet is laid out thus:
327 * 1. hdr: RNDIS header and PPI
329 * 3. skb fragment data
332 slots_used
+= fill_pg_buf(virt_to_page(hdr
),
334 len
, &pb
[slots_used
]);
336 packet
->rmsg_size
= len
;
337 packet
->rmsg_pgcnt
= slots_used
;
339 slots_used
+= fill_pg_buf(virt_to_page(data
),
340 offset_in_page(data
),
341 skb_headlen(skb
), &pb
[slots_used
]);
343 for (i
= 0; i
< frags
; i
++) {
344 skb_frag_t
*frag
= skb_shinfo(skb
)->frags
+ i
;
346 slots_used
+= fill_pg_buf(skb_frag_page(frag
),
348 skb_frag_size(frag
), &pb
[slots_used
]);
353 static int count_skb_frag_slots(struct sk_buff
*skb
)
355 int i
, frags
= skb_shinfo(skb
)->nr_frags
;
358 for (i
= 0; i
< frags
; i
++) {
359 skb_frag_t
*frag
= skb_shinfo(skb
)->frags
+ i
;
360 unsigned long size
= skb_frag_size(frag
);
361 unsigned long offset
= frag
->page_offset
;
363 /* Skip unused frames from start of page */
364 offset
&= ~PAGE_MASK
;
365 pages
+= PFN_UP(offset
+ size
);
370 static int netvsc_get_slots(struct sk_buff
*skb
)
372 char *data
= skb
->data
;
373 unsigned int offset
= offset_in_page(data
);
374 unsigned int len
= skb_headlen(skb
);
378 slots
= DIV_ROUND_UP(offset
+ len
, PAGE_SIZE
);
379 frag_slots
= count_skb_frag_slots(skb
);
380 return slots
+ frag_slots
;
383 static u32
get_net_transport_info(struct sk_buff
*skb
, u32
*trans_off
)
385 u32 ret_val
= TRANSPORT_INFO_NOT_IP
;
387 if ((eth_hdr(skb
)->h_proto
!= htons(ETH_P_IP
)) &&
388 (eth_hdr(skb
)->h_proto
!= htons(ETH_P_IPV6
))) {
392 *trans_off
= skb_transport_offset(skb
);
394 if ((eth_hdr(skb
)->h_proto
== htons(ETH_P_IP
))) {
395 struct iphdr
*iphdr
= ip_hdr(skb
);
397 if (iphdr
->protocol
== IPPROTO_TCP
)
398 ret_val
= TRANSPORT_INFO_IPV4_TCP
;
399 else if (iphdr
->protocol
== IPPROTO_UDP
)
400 ret_val
= TRANSPORT_INFO_IPV4_UDP
;
402 if (ipv6_hdr(skb
)->nexthdr
== IPPROTO_TCP
)
403 ret_val
= TRANSPORT_INFO_IPV6_TCP
;
404 else if (ipv6_hdr(skb
)->nexthdr
== IPPROTO_UDP
)
405 ret_val
= TRANSPORT_INFO_IPV6_UDP
;
412 static int netvsc_start_xmit(struct sk_buff
*skb
, struct net_device
*net
)
414 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
415 struct hv_netvsc_packet
*packet
= NULL
;
417 unsigned int num_data_pgs
;
418 struct rndis_message
*rndis_msg
;
419 struct rndis_packet
*rndis_pkt
;
423 struct rndis_per_packet_info
*ppi
;
424 struct ndis_tcp_ip_checksum_info
*csum_info
;
425 struct ndis_tcp_lso_info
*lso_info
;
430 struct hv_page_buffer page_buf
[MAX_PAGE_BUFFER_COUNT
];
431 struct hv_page_buffer
*pb
= page_buf
;
432 struct netvsc_stats
*tx_stats
= this_cpu_ptr(net_device_ctx
->tx_stats
);
434 /* We will atmost need two pages to describe the rndis
435 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
436 * of pages in a single packet. If skb is scattered around
437 * more pages we try linearizing it.
441 skb_length
= skb
->len
;
442 num_data_pgs
= netvsc_get_slots(skb
) + 2;
443 if (num_data_pgs
> MAX_PAGE_BUFFER_COUNT
&& linear
) {
444 net_alert_ratelimited("packet too big: %u pages (%u bytes)\n",
445 num_data_pgs
, skb
->len
);
448 } else if (num_data_pgs
> MAX_PAGE_BUFFER_COUNT
) {
449 if (skb_linearize(skb
)) {
450 net_alert_ratelimited("failed to linearize skb\n");
459 * Place the rndis header in the skb head room and
460 * the skb->cb will be used for hv_netvsc_packet
463 ret
= skb_cow_head(skb
, RNDIS_AND_PPI_SIZE
);
465 netdev_err(net
, "unable to alloc hv_netvsc_packet\n");
469 /* Use the skb control buffer for building up the packet */
470 BUILD_BUG_ON(sizeof(struct hv_netvsc_packet
) >
471 FIELD_SIZEOF(struct sk_buff
, cb
));
472 packet
= (struct hv_netvsc_packet
*)skb
->cb
;
475 packet
->xmit_more
= skb
->xmit_more
;
477 packet
->vlan_tci
= skb
->vlan_tci
;
479 packet
->q_idx
= skb_get_queue_mapping(skb
);
481 packet
->is_data_pkt
= true;
482 packet
->total_data_buflen
= skb
->len
;
484 rndis_msg
= (struct rndis_message
*)skb
->head
;
486 memset(rndis_msg
, 0, RNDIS_AND_PPI_SIZE
);
488 /* Set the completion routine */
489 packet
->completion_func
= 1;
491 isvlan
= packet
->vlan_tci
& VLAN_TAG_PRESENT
;
493 /* Add the rndis header */
494 rndis_msg
->ndis_msg_type
= RNDIS_MSG_PACKET
;
495 rndis_msg
->msg_len
= packet
->total_data_buflen
;
496 rndis_pkt
= &rndis_msg
->msg
.pkt
;
497 rndis_pkt
->data_offset
= sizeof(struct rndis_packet
);
498 rndis_pkt
->data_len
= packet
->total_data_buflen
;
499 rndis_pkt
->per_pkt_info_offset
= sizeof(struct rndis_packet
);
501 rndis_msg_size
= RNDIS_MESSAGE_SIZE(struct rndis_packet
);
503 hash
= skb_get_hash_raw(skb
);
504 if (hash
!= 0 && net
->real_num_tx_queues
> 1) {
505 rndis_msg_size
+= NDIS_HASH_PPI_SIZE
;
506 ppi
= init_ppi_data(rndis_msg
, NDIS_HASH_PPI_SIZE
,
508 *(u32
*)((void *)ppi
+ ppi
->ppi_offset
) = hash
;
512 struct ndis_pkt_8021q_info
*vlan
;
514 rndis_msg_size
+= NDIS_VLAN_PPI_SIZE
;
515 ppi
= init_ppi_data(rndis_msg
, NDIS_VLAN_PPI_SIZE
,
517 vlan
= (struct ndis_pkt_8021q_info
*)((void *)ppi
+
519 vlan
->vlanid
= packet
->vlan_tci
& VLAN_VID_MASK
;
520 vlan
->pri
= (packet
->vlan_tci
& VLAN_PRIO_MASK
) >>
524 net_trans_info
= get_net_transport_info(skb
, &hdr_offset
);
525 if (net_trans_info
== TRANSPORT_INFO_NOT_IP
)
529 * Setup the sendside checksum offload only if this is not a
535 if ((skb
->ip_summed
== CHECKSUM_NONE
) ||
536 (skb
->ip_summed
== CHECKSUM_UNNECESSARY
))
539 rndis_msg_size
+= NDIS_CSUM_PPI_SIZE
;
540 ppi
= init_ppi_data(rndis_msg
, NDIS_CSUM_PPI_SIZE
,
541 TCPIP_CHKSUM_PKTINFO
);
543 csum_info
= (struct ndis_tcp_ip_checksum_info
*)((void *)ppi
+
546 if (net_trans_info
& (INFO_IPV4
<< 16))
547 csum_info
->transmit
.is_ipv4
= 1;
549 csum_info
->transmit
.is_ipv6
= 1;
551 if (net_trans_info
& INFO_TCP
) {
552 csum_info
->transmit
.tcp_checksum
= 1;
553 csum_info
->transmit
.tcp_header_offset
= hdr_offset
;
554 } else if (net_trans_info
& INFO_UDP
) {
555 /* UDP checksum offload is not supported on ws2008r2.
556 * Furthermore, on ws2012 and ws2012r2, there are some
557 * issues with udp checksum offload from Linux guests.
558 * (these are host issues).
559 * For now compute the checksum here.
564 ret
= skb_cow_head(skb
, 0);
569 udp_len
= ntohs(uh
->len
);
571 uh
->check
= csum_tcpudp_magic(ip_hdr(skb
)->saddr
,
573 udp_len
, IPPROTO_UDP
,
574 csum_partial(uh
, udp_len
, 0));
576 uh
->check
= CSUM_MANGLED_0
;
578 csum_info
->transmit
.udp_checksum
= 0;
583 rndis_msg_size
+= NDIS_LSO_PPI_SIZE
;
584 ppi
= init_ppi_data(rndis_msg
, NDIS_LSO_PPI_SIZE
,
585 TCP_LARGESEND_PKTINFO
);
587 lso_info
= (struct ndis_tcp_lso_info
*)((void *)ppi
+
590 lso_info
->lso_v2_transmit
.type
= NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE
;
591 if (net_trans_info
& (INFO_IPV4
<< 16)) {
592 lso_info
->lso_v2_transmit
.ip_version
=
593 NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4
;
594 ip_hdr(skb
)->tot_len
= 0;
595 ip_hdr(skb
)->check
= 0;
596 tcp_hdr(skb
)->check
=
597 ~csum_tcpudp_magic(ip_hdr(skb
)->saddr
,
598 ip_hdr(skb
)->daddr
, 0, IPPROTO_TCP
, 0);
600 lso_info
->lso_v2_transmit
.ip_version
=
601 NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6
;
602 ipv6_hdr(skb
)->payload_len
= 0;
603 tcp_hdr(skb
)->check
=
604 ~csum_ipv6_magic(&ipv6_hdr(skb
)->saddr
,
605 &ipv6_hdr(skb
)->daddr
, 0, IPPROTO_TCP
, 0);
607 lso_info
->lso_v2_transmit
.tcp_header_offset
= hdr_offset
;
608 lso_info
->lso_v2_transmit
.mss
= skb_shinfo(skb
)->gso_size
;
611 /* Start filling in the page buffers with the rndis hdr */
612 rndis_msg
->msg_len
+= rndis_msg_size
;
613 packet
->total_data_buflen
= rndis_msg
->msg_len
;
614 packet
->page_buf_cnt
= init_page_array(rndis_msg
, rndis_msg_size
,
617 ret
= netvsc_send(net_device_ctx
->device_ctx
, packet
,
618 rndis_msg
, &pb
, skb
);
622 u64_stats_update_begin(&tx_stats
->syncp
);
624 tx_stats
->bytes
+= skb_length
;
625 u64_stats_update_end(&tx_stats
->syncp
);
627 if (ret
!= -EAGAIN
) {
628 dev_kfree_skb_any(skb
);
629 net
->stats
.tx_dropped
++;
633 return (ret
== -EAGAIN
) ? NETDEV_TX_BUSY
: NETDEV_TX_OK
;
637 * netvsc_linkstatus_callback - Link up/down notification
639 void netvsc_linkstatus_callback(struct hv_device
*device_obj
,
640 struct rndis_message
*resp
)
642 struct rndis_indicate_status
*indicate
= &resp
->msg
.indicate_status
;
643 struct net_device
*net
;
644 struct net_device_context
*ndev_ctx
;
645 struct netvsc_device
*net_device
;
646 struct netvsc_reconfig
*event
;
649 /* Handle link change statuses only */
650 if (indicate
->status
!= RNDIS_STATUS_NETWORK_CHANGE
&&
651 indicate
->status
!= RNDIS_STATUS_MEDIA_CONNECT
&&
652 indicate
->status
!= RNDIS_STATUS_MEDIA_DISCONNECT
)
655 net_device
= hv_get_drvdata(device_obj
);
656 net
= net_device
->ndev
;
658 if (!net
|| net
->reg_state
!= NETREG_REGISTERED
)
661 ndev_ctx
= netdev_priv(net
);
663 event
= kzalloc(sizeof(*event
), GFP_ATOMIC
);
666 event
->event
= indicate
->status
;
668 spin_lock_irqsave(&ndev_ctx
->lock
, flags
);
669 list_add_tail(&event
->list
, &ndev_ctx
->reconfig_events
);
670 spin_unlock_irqrestore(&ndev_ctx
->lock
, flags
);
672 schedule_delayed_work(&ndev_ctx
->dwork
, 0);
676 * netvsc_recv_callback - Callback when we receive a packet from the
677 * "wire" on the specified device.
679 int netvsc_recv_callback(struct hv_device
*device_obj
,
680 struct hv_netvsc_packet
*packet
,
682 struct ndis_tcp_ip_checksum_info
*csum_info
,
683 struct vmbus_channel
*channel
)
685 struct net_device
*net
;
686 struct net_device_context
*net_device_ctx
;
688 struct netvsc_stats
*rx_stats
;
690 net
= ((struct netvsc_device
*)hv_get_drvdata(device_obj
))->ndev
;
691 if (!net
|| net
->reg_state
!= NETREG_REGISTERED
) {
692 packet
->status
= NVSP_STAT_FAIL
;
695 net_device_ctx
= netdev_priv(net
);
696 rx_stats
= this_cpu_ptr(net_device_ctx
->rx_stats
);
698 /* Allocate a skb - TODO direct I/O to pages? */
699 skb
= netdev_alloc_skb_ip_align(net
, packet
->total_data_buflen
);
700 if (unlikely(!skb
)) {
701 ++net
->stats
.rx_dropped
;
702 packet
->status
= NVSP_STAT_FAIL
;
707 * Copy to skb. This copy is needed here since the memory pointed by
708 * hv_netvsc_packet cannot be deallocated
710 memcpy(skb_put(skb
, packet
->total_data_buflen
), *data
,
711 packet
->total_data_buflen
);
713 skb
->protocol
= eth_type_trans(skb
, net
);
715 /* We only look at the IP checksum here.
716 * Should we be dropping the packet if checksum
717 * failed? How do we deal with other checksums - TCP/UDP?
719 if (csum_info
->receive
.ip_checksum_succeeded
)
720 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
722 skb
->ip_summed
= CHECKSUM_NONE
;
725 if (packet
->vlan_tci
& VLAN_TAG_PRESENT
)
726 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
),
729 skb_record_rx_queue(skb
, channel
->
730 offermsg
.offer
.sub_channel_index
);
732 u64_stats_update_begin(&rx_stats
->syncp
);
734 rx_stats
->bytes
+= packet
->total_data_buflen
;
735 u64_stats_update_end(&rx_stats
->syncp
);
738 * Pass the skb back up. Network stack will deallocate the skb when it
747 static void netvsc_get_drvinfo(struct net_device
*net
,
748 struct ethtool_drvinfo
*info
)
750 strlcpy(info
->driver
, KBUILD_MODNAME
, sizeof(info
->driver
));
751 strlcpy(info
->fw_version
, "N/A", sizeof(info
->fw_version
));
754 static void netvsc_get_channels(struct net_device
*net
,
755 struct ethtool_channels
*channel
)
757 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
758 struct hv_device
*dev
= net_device_ctx
->device_ctx
;
759 struct netvsc_device
*nvdev
= hv_get_drvdata(dev
);
762 channel
->max_combined
= nvdev
->max_chn
;
763 channel
->combined_count
= nvdev
->num_chn
;
767 static int netvsc_set_channels(struct net_device
*net
,
768 struct ethtool_channels
*channels
)
770 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
771 struct hv_device
*dev
= net_device_ctx
->device_ctx
;
772 struct netvsc_device
*nvdev
= hv_get_drvdata(dev
);
773 struct netvsc_device_info device_info
;
777 bool recovering
= false;
779 if (!nvdev
|| nvdev
->destroy
)
782 num_chn
= nvdev
->num_chn
;
783 max_chn
= min_t(u32
, nvdev
->max_chn
, num_online_cpus());
785 if (nvdev
->nvsp_version
< NVSP_PROTOCOL_VERSION_5
) {
786 pr_info("vRSS unsupported before NVSP Version 5\n");
790 /* We do not support rx, tx, or other */
792 channels
->rx_count
||
793 channels
->tx_count
||
794 channels
->other_count
||
795 (channels
->combined_count
< 1))
798 if (channels
->combined_count
> max_chn
) {
799 pr_info("combined channels too high, using %d\n", max_chn
);
800 channels
->combined_count
= max_chn
;
803 ret
= netvsc_close(net
);
808 nvdev
->start_remove
= true;
809 rndis_filter_device_remove(dev
);
811 nvdev
->num_chn
= channels
->combined_count
;
813 net_device_ctx
->device_ctx
= dev
;
814 hv_set_drvdata(dev
, net
);
816 memset(&device_info
, 0, sizeof(device_info
));
817 device_info
.num_chn
= nvdev
->num_chn
; /* passed to RNDIS */
818 device_info
.ring_size
= ring_size
;
819 device_info
.max_num_vrss_chns
= max_num_vrss_chns
;
821 ret
= rndis_filter_device_add(dev
, &device_info
);
824 netdev_err(net
, "unable to add netvsc device (ret %d)\n", ret
);
830 nvdev
= hv_get_drvdata(dev
);
832 ret
= netif_set_real_num_tx_queues(net
, nvdev
->num_chn
);
835 netdev_err(net
, "could not set tx queue count (ret %d)\n", ret
);
841 ret
= netif_set_real_num_rx_queues(net
, nvdev
->num_chn
);
844 netdev_err(net
, "could not set rx queue count (ret %d)\n", ret
);
856 /* If the above failed, we attempt to recover through the same
857 * process but with the original number of channels.
859 netdev_err(net
, "could not set channels, recovering\n");
861 channels
->combined_count
= num_chn
;
865 static int netvsc_change_mtu(struct net_device
*ndev
, int mtu
)
867 struct net_device_context
*ndevctx
= netdev_priv(ndev
);
868 struct hv_device
*hdev
= ndevctx
->device_ctx
;
869 struct netvsc_device
*nvdev
= hv_get_drvdata(hdev
);
870 struct netvsc_device_info device_info
;
871 int limit
= ETH_DATA_LEN
;
874 if (nvdev
== NULL
|| nvdev
->destroy
)
877 if (nvdev
->nvsp_version
>= NVSP_PROTOCOL_VERSION_2
)
878 limit
= NETVSC_MTU
- ETH_HLEN
;
880 if (mtu
< NETVSC_MTU_MIN
|| mtu
> limit
)
883 ret
= netvsc_close(ndev
);
887 nvdev
->start_remove
= true;
888 rndis_filter_device_remove(hdev
);
892 ndevctx
->device_ctx
= hdev
;
893 hv_set_drvdata(hdev
, ndev
);
895 memset(&device_info
, 0, sizeof(device_info
));
896 device_info
.ring_size
= ring_size
;
897 device_info
.num_chn
= nvdev
->num_chn
;
898 device_info
.max_num_vrss_chns
= max_num_vrss_chns
;
899 rndis_filter_device_add(hdev
, &device_info
);
907 static struct rtnl_link_stats64
*netvsc_get_stats64(struct net_device
*net
,
908 struct rtnl_link_stats64
*t
)
910 struct net_device_context
*ndev_ctx
= netdev_priv(net
);
913 for_each_possible_cpu(cpu
) {
914 struct netvsc_stats
*tx_stats
= per_cpu_ptr(ndev_ctx
->tx_stats
,
916 struct netvsc_stats
*rx_stats
= per_cpu_ptr(ndev_ctx
->rx_stats
,
918 u64 tx_packets
, tx_bytes
, rx_packets
, rx_bytes
;
922 start
= u64_stats_fetch_begin_irq(&tx_stats
->syncp
);
923 tx_packets
= tx_stats
->packets
;
924 tx_bytes
= tx_stats
->bytes
;
925 } while (u64_stats_fetch_retry_irq(&tx_stats
->syncp
, start
));
928 start
= u64_stats_fetch_begin_irq(&rx_stats
->syncp
);
929 rx_packets
= rx_stats
->packets
;
930 rx_bytes
= rx_stats
->bytes
;
931 } while (u64_stats_fetch_retry_irq(&rx_stats
->syncp
, start
));
933 t
->tx_bytes
+= tx_bytes
;
934 t
->tx_packets
+= tx_packets
;
935 t
->rx_bytes
+= rx_bytes
;
936 t
->rx_packets
+= rx_packets
;
939 t
->tx_dropped
= net
->stats
.tx_dropped
;
940 t
->tx_errors
= net
->stats
.tx_dropped
;
942 t
->rx_dropped
= net
->stats
.rx_dropped
;
943 t
->rx_errors
= net
->stats
.rx_errors
;
948 static int netvsc_set_mac_addr(struct net_device
*ndev
, void *p
)
950 struct net_device_context
*ndevctx
= netdev_priv(ndev
);
951 struct hv_device
*hdev
= ndevctx
->device_ctx
;
952 struct sockaddr
*addr
= p
;
953 char save_adr
[ETH_ALEN
];
954 unsigned char save_aatype
;
957 memcpy(save_adr
, ndev
->dev_addr
, ETH_ALEN
);
958 save_aatype
= ndev
->addr_assign_type
;
960 err
= eth_mac_addr(ndev
, p
);
964 err
= rndis_filter_set_device_mac(hdev
, addr
->sa_data
);
966 /* roll back to saved MAC */
967 memcpy(ndev
->dev_addr
, save_adr
, ETH_ALEN
);
968 ndev
->addr_assign_type
= save_aatype
;
974 #ifdef CONFIG_NET_POLL_CONTROLLER
975 static void netvsc_poll_controller(struct net_device
*net
)
977 /* As netvsc_start_xmit() works synchronous we don't have to
978 * trigger anything here.
983 static const struct ethtool_ops ethtool_ops
= {
984 .get_drvinfo
= netvsc_get_drvinfo
,
985 .get_link
= ethtool_op_get_link
,
986 .get_channels
= netvsc_get_channels
,
987 .set_channels
= netvsc_set_channels
,
990 static const struct net_device_ops device_ops
= {
991 .ndo_open
= netvsc_open
,
992 .ndo_stop
= netvsc_close
,
993 .ndo_start_xmit
= netvsc_start_xmit
,
994 .ndo_set_rx_mode
= netvsc_set_multicast_list
,
995 .ndo_change_mtu
= netvsc_change_mtu
,
996 .ndo_validate_addr
= eth_validate_addr
,
997 .ndo_set_mac_address
= netvsc_set_mac_addr
,
998 .ndo_select_queue
= netvsc_select_queue
,
999 .ndo_get_stats64
= netvsc_get_stats64
,
1000 #ifdef CONFIG_NET_POLL_CONTROLLER
1001 .ndo_poll_controller
= netvsc_poll_controller
,
1006 * Handle link status changes. For RNDIS_STATUS_NETWORK_CHANGE emulate link
1007 * down/up sequence. In case of RNDIS_STATUS_MEDIA_CONNECT when carrier is
1008 * present send GARP packet to network peers with netif_notify_peers().
1010 static void netvsc_link_change(struct work_struct
*w
)
1012 struct net_device_context
*ndev_ctx
;
1013 struct net_device
*net
;
1014 struct netvsc_device
*net_device
;
1015 struct rndis_device
*rdev
;
1016 struct netvsc_reconfig
*event
= NULL
;
1017 bool notify
= false, reschedule
= false;
1018 unsigned long flags
, next_reconfig
, delay
;
1020 ndev_ctx
= container_of(w
, struct net_device_context
, dwork
.work
);
1021 net_device
= hv_get_drvdata(ndev_ctx
->device_ctx
);
1022 rdev
= net_device
->extension
;
1023 net
= net_device
->ndev
;
1025 next_reconfig
= ndev_ctx
->last_reconfig
+ LINKCHANGE_INT
;
1026 if (time_is_after_jiffies(next_reconfig
)) {
1027 /* link_watch only sends one notification with current state
1028 * per second, avoid doing reconfig more frequently. Handle
1031 delay
= next_reconfig
- jiffies
;
1032 delay
= delay
< LINKCHANGE_INT
? delay
: LINKCHANGE_INT
;
1033 schedule_delayed_work(&ndev_ctx
->dwork
, delay
);
1036 ndev_ctx
->last_reconfig
= jiffies
;
1038 spin_lock_irqsave(&ndev_ctx
->lock
, flags
);
1039 if (!list_empty(&ndev_ctx
->reconfig_events
)) {
1040 event
= list_first_entry(&ndev_ctx
->reconfig_events
,
1041 struct netvsc_reconfig
, list
);
1042 list_del(&event
->list
);
1043 reschedule
= !list_empty(&ndev_ctx
->reconfig_events
);
1045 spin_unlock_irqrestore(&ndev_ctx
->lock
, flags
);
1052 switch (event
->event
) {
1053 /* Only the following events are possible due to the check in
1054 * netvsc_linkstatus_callback()
1056 case RNDIS_STATUS_MEDIA_CONNECT
:
1057 if (rdev
->link_state
) {
1058 rdev
->link_state
= false;
1059 netif_carrier_on(net
);
1060 netif_tx_wake_all_queues(net
);
1066 case RNDIS_STATUS_MEDIA_DISCONNECT
:
1067 if (!rdev
->link_state
) {
1068 rdev
->link_state
= true;
1069 netif_carrier_off(net
);
1070 netif_tx_stop_all_queues(net
);
1074 case RNDIS_STATUS_NETWORK_CHANGE
:
1075 /* Only makes sense if carrier is present */
1076 if (!rdev
->link_state
) {
1077 rdev
->link_state
= true;
1078 netif_carrier_off(net
);
1079 netif_tx_stop_all_queues(net
);
1080 event
->event
= RNDIS_STATUS_MEDIA_CONNECT
;
1081 spin_lock_irqsave(&ndev_ctx
->lock
, flags
);
1082 list_add_tail(&event
->list
, &ndev_ctx
->reconfig_events
);
1083 spin_unlock_irqrestore(&ndev_ctx
->lock
, flags
);
1092 netdev_notify_peers(net
);
1094 /* link_watch only sends one notification with current state per
1095 * second, handle next reconfig event in 2 seconds.
1098 schedule_delayed_work(&ndev_ctx
->dwork
, LINKCHANGE_INT
);
1101 static void netvsc_free_netdev(struct net_device
*netdev
)
1103 struct net_device_context
*net_device_ctx
= netdev_priv(netdev
);
1105 free_percpu(net_device_ctx
->tx_stats
);
1106 free_percpu(net_device_ctx
->rx_stats
);
1107 free_netdev(netdev
);
1110 static int netvsc_probe(struct hv_device
*dev
,
1111 const struct hv_vmbus_device_id
*dev_id
)
1113 struct net_device
*net
= NULL
;
1114 struct net_device_context
*net_device_ctx
;
1115 struct netvsc_device_info device_info
;
1116 struct netvsc_device
*nvdev
;
1119 net
= alloc_etherdev_mq(sizeof(struct net_device_context
),
1124 netif_carrier_off(net
);
1126 net_device_ctx
= netdev_priv(net
);
1127 net_device_ctx
->device_ctx
= dev
;
1128 net_device_ctx
->msg_enable
= netif_msg_init(debug
, default_msg
);
1129 if (netif_msg_probe(net_device_ctx
))
1130 netdev_dbg(net
, "netvsc msg_enable: %d\n",
1131 net_device_ctx
->msg_enable
);
1133 net_device_ctx
->tx_stats
= netdev_alloc_pcpu_stats(struct netvsc_stats
);
1134 if (!net_device_ctx
->tx_stats
) {
1138 net_device_ctx
->rx_stats
= netdev_alloc_pcpu_stats(struct netvsc_stats
);
1139 if (!net_device_ctx
->rx_stats
) {
1140 free_percpu(net_device_ctx
->tx_stats
);
1145 hv_set_drvdata(dev
, net
);
1146 INIT_DELAYED_WORK(&net_device_ctx
->dwork
, netvsc_link_change
);
1147 INIT_WORK(&net_device_ctx
->work
, do_set_multicast
);
1149 spin_lock_init(&net_device_ctx
->lock
);
1150 INIT_LIST_HEAD(&net_device_ctx
->reconfig_events
);
1152 net
->netdev_ops
= &device_ops
;
1154 net
->hw_features
= NETIF_F_RXCSUM
| NETIF_F_SG
| NETIF_F_IP_CSUM
|
1156 net
->features
= NETIF_F_HW_VLAN_CTAG_TX
| NETIF_F_SG
| NETIF_F_RXCSUM
|
1157 NETIF_F_IP_CSUM
| NETIF_F_TSO
;
1159 net
->ethtool_ops
= ðtool_ops
;
1160 SET_NETDEV_DEV(net
, &dev
->device
);
1162 /* Notify the netvsc driver of the new device */
1163 memset(&device_info
, 0, sizeof(device_info
));
1164 device_info
.ring_size
= ring_size
;
1165 device_info
.max_num_vrss_chns
= max_num_vrss_chns
;
1166 ret
= rndis_filter_device_add(dev
, &device_info
);
1168 netdev_err(net
, "unable to add netvsc device (ret %d)\n", ret
);
1169 netvsc_free_netdev(net
);
1170 hv_set_drvdata(dev
, NULL
);
1173 memcpy(net
->dev_addr
, device_info
.mac_adr
, ETH_ALEN
);
1175 nvdev
= hv_get_drvdata(dev
);
1176 netif_set_real_num_tx_queues(net
, nvdev
->num_chn
);
1177 netif_set_real_num_rx_queues(net
, nvdev
->num_chn
);
1179 ret
= register_netdev(net
);
1181 pr_err("Unable to register netdev.\n");
1182 rndis_filter_device_remove(dev
);
1183 netvsc_free_netdev(net
);
1189 static int netvsc_remove(struct hv_device
*dev
)
1191 struct net_device
*net
;
1192 struct net_device_context
*ndev_ctx
;
1193 struct netvsc_device
*net_device
;
1195 net_device
= hv_get_drvdata(dev
);
1196 net
= net_device
->ndev
;
1199 dev_err(&dev
->device
, "No net device to remove\n");
1203 net_device
->start_remove
= true;
1205 ndev_ctx
= netdev_priv(net
);
1206 cancel_delayed_work_sync(&ndev_ctx
->dwork
);
1207 cancel_work_sync(&ndev_ctx
->work
);
1209 /* Stop outbound asap */
1210 netif_tx_disable(net
);
1212 unregister_netdev(net
);
1215 * Call to the vsc driver to let it know that the device is being
1218 rndis_filter_device_remove(dev
);
1220 netvsc_free_netdev(net
);
1224 static const struct hv_vmbus_device_id id_table
[] = {
1230 MODULE_DEVICE_TABLE(vmbus
, id_table
);
1232 /* The one and only one */
1233 static struct hv_driver netvsc_drv
= {
1234 .name
= KBUILD_MODNAME
,
1235 .id_table
= id_table
,
1236 .probe
= netvsc_probe
,
1237 .remove
= netvsc_remove
,
1240 static void __exit
netvsc_drv_exit(void)
1242 vmbus_driver_unregister(&netvsc_drv
);
1245 static int __init
netvsc_drv_init(void)
1247 if (ring_size
< RING_SIZE_MIN
) {
1248 ring_size
= RING_SIZE_MIN
;
1249 pr_info("Increased ring_size to %d (min allowed)\n",
1252 return vmbus_driver_register(&netvsc_drv
);
1255 MODULE_LICENSE("GPL");
1256 MODULE_DESCRIPTION("Microsoft Hyper-V network driver");
1258 module_init(netvsc_drv_init
);
1259 module_exit(netvsc_drv_exit
);