2 * Copyright (c) 2009, Microsoft Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, see <http://www.gnu.org/licenses/>.
17 * Haiyang Zhang <haiyangz@microsoft.com>
18 * Hank Janssen <hjanssen@microsoft.com>
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/init.h>
23 #include <linux/atomic.h>
24 #include <linux/module.h>
25 #include <linux/highmem.h>
26 #include <linux/device.h>
28 #include <linux/delay.h>
29 #include <linux/netdevice.h>
30 #include <linux/inetdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/skbuff.h>
33 #include <linux/if_vlan.h>
35 #include <linux/slab.h>
37 #include <net/route.h>
39 #include <net/pkt_sched.h>
41 #include "hyperv_net.h"
44 #define RING_SIZE_MIN 64
45 #define LINKCHANGE_INT (2 * HZ)
46 static int ring_size
= 128;
47 module_param(ring_size
, int, S_IRUGO
);
48 MODULE_PARM_DESC(ring_size
, "Ring buffer size (# of pages)");
50 static int max_num_vrss_chns
= 8;
52 static const u32 default_msg
= NETIF_MSG_DRV
| NETIF_MSG_PROBE
|
53 NETIF_MSG_LINK
| NETIF_MSG_IFUP
|
54 NETIF_MSG_IFDOWN
| NETIF_MSG_RX_ERR
|
57 static int debug
= -1;
58 module_param(debug
, int, S_IRUGO
);
59 MODULE_PARM_DESC(debug
, "Debug level (0=none,...,16=all)");
61 static void do_set_multicast(struct work_struct
*w
)
63 struct net_device_context
*ndevctx
=
64 container_of(w
, struct net_device_context
, work
);
65 struct netvsc_device
*nvdev
;
66 struct rndis_device
*rdev
;
68 nvdev
= hv_get_drvdata(ndevctx
->device_ctx
);
69 if (nvdev
== NULL
|| nvdev
->ndev
== NULL
)
72 rdev
= nvdev
->extension
;
76 if (nvdev
->ndev
->flags
& IFF_PROMISC
)
77 rndis_filter_set_packet_filter(rdev
,
78 NDIS_PACKET_TYPE_PROMISCUOUS
);
80 rndis_filter_set_packet_filter(rdev
,
81 NDIS_PACKET_TYPE_BROADCAST
|
82 NDIS_PACKET_TYPE_ALL_MULTICAST
|
83 NDIS_PACKET_TYPE_DIRECTED
);
86 static void netvsc_set_multicast_list(struct net_device
*net
)
88 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
90 schedule_work(&net_device_ctx
->work
);
93 static int netvsc_open(struct net_device
*net
)
95 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
96 struct hv_device
*device_obj
= net_device_ctx
->device_ctx
;
97 struct netvsc_device
*nvdev
;
98 struct rndis_device
*rdev
;
101 netif_carrier_off(net
);
103 /* Open up the device */
104 ret
= rndis_filter_open(device_obj
);
106 netdev_err(net
, "unable to open device (ret %d).\n", ret
);
110 netif_tx_wake_all_queues(net
);
112 nvdev
= hv_get_drvdata(device_obj
);
113 rdev
= nvdev
->extension
;
114 if (!rdev
->link_state
)
115 netif_carrier_on(net
);
120 static int netvsc_close(struct net_device
*net
)
122 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
123 struct hv_device
*device_obj
= net_device_ctx
->device_ctx
;
124 struct netvsc_device
*nvdev
= hv_get_drvdata(device_obj
);
126 u32 aread
, awrite
, i
, msec
= 10, retry
= 0, retry_max
= 20;
127 struct vmbus_channel
*chn
;
129 netif_tx_disable(net
);
131 /* Make sure netvsc_set_multicast_list doesn't re-enable filter! */
132 cancel_work_sync(&net_device_ctx
->work
);
133 ret
= rndis_filter_close(device_obj
);
135 netdev_err(net
, "unable to close device (ret %d).\n", ret
);
139 /* Ensure pending bytes in ring are read */
142 for (i
= 0; i
< nvdev
->num_chn
; i
++) {
143 chn
= nvdev
->chn_table
[i
];
147 hv_get_ringbuffer_availbytes(&chn
->inbound
, &aread
,
153 hv_get_ringbuffer_availbytes(&chn
->outbound
, &aread
,
161 if (retry
> retry_max
|| aread
== 0)
171 netdev_err(net
, "Ring buffer not empty after closing rndis\n");
178 static void *init_ppi_data(struct rndis_message
*msg
, u32 ppi_size
,
181 struct rndis_packet
*rndis_pkt
;
182 struct rndis_per_packet_info
*ppi
;
184 rndis_pkt
= &msg
->msg
.pkt
;
185 rndis_pkt
->data_offset
+= ppi_size
;
187 ppi
= (struct rndis_per_packet_info
*)((void *)rndis_pkt
+
188 rndis_pkt
->per_pkt_info_offset
+ rndis_pkt
->per_pkt_info_len
);
190 ppi
->size
= ppi_size
;
191 ppi
->type
= pkt_type
;
192 ppi
->ppi_offset
= sizeof(struct rndis_per_packet_info
);
194 rndis_pkt
->per_pkt_info_len
+= ppi_size
;
199 static u16
netvsc_select_queue(struct net_device
*ndev
, struct sk_buff
*skb
,
200 void *accel_priv
, select_queue_fallback_t fallback
)
202 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
203 struct hv_device
*hdev
= net_device_ctx
->device_ctx
;
204 struct netvsc_device
*nvsc_dev
= hv_get_drvdata(hdev
);
208 if (nvsc_dev
== NULL
|| ndev
->real_num_tx_queues
<= 1)
211 hash
= skb_get_hash(skb
);
212 q_idx
= nvsc_dev
->send_table
[hash
% VRSS_SEND_TAB_SIZE
] %
213 ndev
->real_num_tx_queues
;
215 if (!nvsc_dev
->chn_table
[q_idx
])
221 static u32
fill_pg_buf(struct page
*page
, u32 offset
, u32 len
,
222 struct hv_page_buffer
*pb
)
226 /* Deal with compund pages by ignoring unused part
229 page
+= (offset
>> PAGE_SHIFT
);
230 offset
&= ~PAGE_MASK
;
235 bytes
= PAGE_SIZE
- offset
;
238 pb
[j
].pfn
= page_to_pfn(page
);
239 pb
[j
].offset
= offset
;
245 if (offset
== PAGE_SIZE
&& len
) {
255 static u32
init_page_array(void *hdr
, u32 len
, struct sk_buff
*skb
,
256 struct hv_netvsc_packet
*packet
,
257 struct hv_page_buffer
**page_buf
)
259 struct hv_page_buffer
*pb
= *page_buf
;
261 char *data
= skb
->data
;
262 int frags
= skb_shinfo(skb
)->nr_frags
;
265 /* The packet is laid out thus:
266 * 1. hdr: RNDIS header and PPI
268 * 3. skb fragment data
271 slots_used
+= fill_pg_buf(virt_to_page(hdr
),
273 len
, &pb
[slots_used
]);
275 packet
->rmsg_size
= len
;
276 packet
->rmsg_pgcnt
= slots_used
;
278 slots_used
+= fill_pg_buf(virt_to_page(data
),
279 offset_in_page(data
),
280 skb_headlen(skb
), &pb
[slots_used
]);
282 for (i
= 0; i
< frags
; i
++) {
283 skb_frag_t
*frag
= skb_shinfo(skb
)->frags
+ i
;
285 slots_used
+= fill_pg_buf(skb_frag_page(frag
),
287 skb_frag_size(frag
), &pb
[slots_used
]);
292 static int count_skb_frag_slots(struct sk_buff
*skb
)
294 int i
, frags
= skb_shinfo(skb
)->nr_frags
;
297 for (i
= 0; i
< frags
; i
++) {
298 skb_frag_t
*frag
= skb_shinfo(skb
)->frags
+ i
;
299 unsigned long size
= skb_frag_size(frag
);
300 unsigned long offset
= frag
->page_offset
;
302 /* Skip unused frames from start of page */
303 offset
&= ~PAGE_MASK
;
304 pages
+= PFN_UP(offset
+ size
);
309 static int netvsc_get_slots(struct sk_buff
*skb
)
311 char *data
= skb
->data
;
312 unsigned int offset
= offset_in_page(data
);
313 unsigned int len
= skb_headlen(skb
);
317 slots
= DIV_ROUND_UP(offset
+ len
, PAGE_SIZE
);
318 frag_slots
= count_skb_frag_slots(skb
);
319 return slots
+ frag_slots
;
322 static u32
get_net_transport_info(struct sk_buff
*skb
, u32
*trans_off
)
324 u32 ret_val
= TRANSPORT_INFO_NOT_IP
;
326 if ((eth_hdr(skb
)->h_proto
!= htons(ETH_P_IP
)) &&
327 (eth_hdr(skb
)->h_proto
!= htons(ETH_P_IPV6
))) {
331 *trans_off
= skb_transport_offset(skb
);
333 if ((eth_hdr(skb
)->h_proto
== htons(ETH_P_IP
))) {
334 struct iphdr
*iphdr
= ip_hdr(skb
);
336 if (iphdr
->protocol
== IPPROTO_TCP
)
337 ret_val
= TRANSPORT_INFO_IPV4_TCP
;
338 else if (iphdr
->protocol
== IPPROTO_UDP
)
339 ret_val
= TRANSPORT_INFO_IPV4_UDP
;
341 if (ipv6_hdr(skb
)->nexthdr
== IPPROTO_TCP
)
342 ret_val
= TRANSPORT_INFO_IPV6_TCP
;
343 else if (ipv6_hdr(skb
)->nexthdr
== IPPROTO_UDP
)
344 ret_val
= TRANSPORT_INFO_IPV6_UDP
;
351 static int netvsc_start_xmit(struct sk_buff
*skb
, struct net_device
*net
)
353 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
354 struct hv_netvsc_packet
*packet
= NULL
;
356 unsigned int num_data_pgs
;
357 struct rndis_message
*rndis_msg
;
358 struct rndis_packet
*rndis_pkt
;
362 struct rndis_per_packet_info
*ppi
;
363 struct ndis_tcp_ip_checksum_info
*csum_info
;
364 struct ndis_tcp_lso_info
*lso_info
;
369 struct hv_page_buffer page_buf
[MAX_PAGE_BUFFER_COUNT
];
370 struct hv_page_buffer
*pb
= page_buf
;
371 struct netvsc_stats
*tx_stats
= this_cpu_ptr(net_device_ctx
->tx_stats
);
373 /* We will atmost need two pages to describe the rndis
374 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
375 * of pages in a single packet. If skb is scattered around
376 * more pages we try linearizing it.
380 skb_length
= skb
->len
;
381 num_data_pgs
= netvsc_get_slots(skb
) + 2;
382 if (num_data_pgs
> MAX_PAGE_BUFFER_COUNT
&& linear
) {
383 net_alert_ratelimited("packet too big: %u pages (%u bytes)\n",
384 num_data_pgs
, skb
->len
);
387 } else if (num_data_pgs
> MAX_PAGE_BUFFER_COUNT
) {
388 if (skb_linearize(skb
)) {
389 net_alert_ratelimited("failed to linearize skb\n");
398 * Place the rndis header in the skb head room and
399 * the skb->cb will be used for hv_netvsc_packet
402 ret
= skb_cow_head(skb
, RNDIS_AND_PPI_SIZE
);
404 netdev_err(net
, "unable to alloc hv_netvsc_packet\n");
408 /* Use the skb control buffer for building up the packet */
409 BUILD_BUG_ON(sizeof(struct hv_netvsc_packet
) >
410 FIELD_SIZEOF(struct sk_buff
, cb
));
411 packet
= (struct hv_netvsc_packet
*)skb
->cb
;
414 packet
->q_idx
= skb_get_queue_mapping(skb
);
416 packet
->total_data_buflen
= skb
->len
;
418 rndis_msg
= (struct rndis_message
*)skb
->head
;
420 memset(rndis_msg
, 0, RNDIS_AND_PPI_SIZE
);
422 isvlan
= skb
->vlan_tci
& VLAN_TAG_PRESENT
;
424 /* Add the rndis header */
425 rndis_msg
->ndis_msg_type
= RNDIS_MSG_PACKET
;
426 rndis_msg
->msg_len
= packet
->total_data_buflen
;
427 rndis_pkt
= &rndis_msg
->msg
.pkt
;
428 rndis_pkt
->data_offset
= sizeof(struct rndis_packet
);
429 rndis_pkt
->data_len
= packet
->total_data_buflen
;
430 rndis_pkt
->per_pkt_info_offset
= sizeof(struct rndis_packet
);
432 rndis_msg_size
= RNDIS_MESSAGE_SIZE(struct rndis_packet
);
434 hash
= skb_get_hash_raw(skb
);
435 if (hash
!= 0 && net
->real_num_tx_queues
> 1) {
436 rndis_msg_size
+= NDIS_HASH_PPI_SIZE
;
437 ppi
= init_ppi_data(rndis_msg
, NDIS_HASH_PPI_SIZE
,
439 *(u32
*)((void *)ppi
+ ppi
->ppi_offset
) = hash
;
443 struct ndis_pkt_8021q_info
*vlan
;
445 rndis_msg_size
+= NDIS_VLAN_PPI_SIZE
;
446 ppi
= init_ppi_data(rndis_msg
, NDIS_VLAN_PPI_SIZE
,
448 vlan
= (struct ndis_pkt_8021q_info
*)((void *)ppi
+
450 vlan
->vlanid
= skb
->vlan_tci
& VLAN_VID_MASK
;
451 vlan
->pri
= (skb
->vlan_tci
& VLAN_PRIO_MASK
) >>
455 net_trans_info
= get_net_transport_info(skb
, &hdr_offset
);
456 if (net_trans_info
== TRANSPORT_INFO_NOT_IP
)
460 * Setup the sendside checksum offload only if this is not a
466 if ((skb
->ip_summed
== CHECKSUM_NONE
) ||
467 (skb
->ip_summed
== CHECKSUM_UNNECESSARY
))
470 rndis_msg_size
+= NDIS_CSUM_PPI_SIZE
;
471 ppi
= init_ppi_data(rndis_msg
, NDIS_CSUM_PPI_SIZE
,
472 TCPIP_CHKSUM_PKTINFO
);
474 csum_info
= (struct ndis_tcp_ip_checksum_info
*)((void *)ppi
+
477 if (net_trans_info
& (INFO_IPV4
<< 16))
478 csum_info
->transmit
.is_ipv4
= 1;
480 csum_info
->transmit
.is_ipv6
= 1;
482 if (net_trans_info
& INFO_TCP
) {
483 csum_info
->transmit
.tcp_checksum
= 1;
484 csum_info
->transmit
.tcp_header_offset
= hdr_offset
;
485 } else if (net_trans_info
& INFO_UDP
) {
486 /* UDP checksum offload is not supported on ws2008r2.
487 * Furthermore, on ws2012 and ws2012r2, there are some
488 * issues with udp checksum offload from Linux guests.
489 * (these are host issues).
490 * For now compute the checksum here.
495 ret
= skb_cow_head(skb
, 0);
500 udp_len
= ntohs(uh
->len
);
502 uh
->check
= csum_tcpudp_magic(ip_hdr(skb
)->saddr
,
504 udp_len
, IPPROTO_UDP
,
505 csum_partial(uh
, udp_len
, 0));
507 uh
->check
= CSUM_MANGLED_0
;
509 csum_info
->transmit
.udp_checksum
= 0;
514 rndis_msg_size
+= NDIS_LSO_PPI_SIZE
;
515 ppi
= init_ppi_data(rndis_msg
, NDIS_LSO_PPI_SIZE
,
516 TCP_LARGESEND_PKTINFO
);
518 lso_info
= (struct ndis_tcp_lso_info
*)((void *)ppi
+
521 lso_info
->lso_v2_transmit
.type
= NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE
;
522 if (net_trans_info
& (INFO_IPV4
<< 16)) {
523 lso_info
->lso_v2_transmit
.ip_version
=
524 NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4
;
525 ip_hdr(skb
)->tot_len
= 0;
526 ip_hdr(skb
)->check
= 0;
527 tcp_hdr(skb
)->check
=
528 ~csum_tcpudp_magic(ip_hdr(skb
)->saddr
,
529 ip_hdr(skb
)->daddr
, 0, IPPROTO_TCP
, 0);
531 lso_info
->lso_v2_transmit
.ip_version
=
532 NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6
;
533 ipv6_hdr(skb
)->payload_len
= 0;
534 tcp_hdr(skb
)->check
=
535 ~csum_ipv6_magic(&ipv6_hdr(skb
)->saddr
,
536 &ipv6_hdr(skb
)->daddr
, 0, IPPROTO_TCP
, 0);
538 lso_info
->lso_v2_transmit
.tcp_header_offset
= hdr_offset
;
539 lso_info
->lso_v2_transmit
.mss
= skb_shinfo(skb
)->gso_size
;
542 /* Start filling in the page buffers with the rndis hdr */
543 rndis_msg
->msg_len
+= rndis_msg_size
;
544 packet
->total_data_buflen
= rndis_msg
->msg_len
;
545 packet
->page_buf_cnt
= init_page_array(rndis_msg
, rndis_msg_size
,
548 ret
= netvsc_send(net_device_ctx
->device_ctx
, packet
,
549 rndis_msg
, &pb
, skb
);
553 u64_stats_update_begin(&tx_stats
->syncp
);
555 tx_stats
->bytes
+= skb_length
;
556 u64_stats_update_end(&tx_stats
->syncp
);
558 if (ret
!= -EAGAIN
) {
559 dev_kfree_skb_any(skb
);
560 net
->stats
.tx_dropped
++;
564 return (ret
== -EAGAIN
) ? NETDEV_TX_BUSY
: NETDEV_TX_OK
;
568 * netvsc_linkstatus_callback - Link up/down notification
570 void netvsc_linkstatus_callback(struct hv_device
*device_obj
,
571 struct rndis_message
*resp
)
573 struct rndis_indicate_status
*indicate
= &resp
->msg
.indicate_status
;
574 struct net_device
*net
;
575 struct net_device_context
*ndev_ctx
;
576 struct netvsc_device
*net_device
;
577 struct netvsc_reconfig
*event
;
580 /* Handle link change statuses only */
581 if (indicate
->status
!= RNDIS_STATUS_NETWORK_CHANGE
&&
582 indicate
->status
!= RNDIS_STATUS_MEDIA_CONNECT
&&
583 indicate
->status
!= RNDIS_STATUS_MEDIA_DISCONNECT
)
586 net_device
= hv_get_drvdata(device_obj
);
587 net
= net_device
->ndev
;
589 if (!net
|| net
->reg_state
!= NETREG_REGISTERED
)
592 ndev_ctx
= netdev_priv(net
);
594 event
= kzalloc(sizeof(*event
), GFP_ATOMIC
);
597 event
->event
= indicate
->status
;
599 spin_lock_irqsave(&ndev_ctx
->lock
, flags
);
600 list_add_tail(&event
->list
, &ndev_ctx
->reconfig_events
);
601 spin_unlock_irqrestore(&ndev_ctx
->lock
, flags
);
603 schedule_delayed_work(&ndev_ctx
->dwork
, 0);
607 * netvsc_recv_callback - Callback when we receive a packet from the
608 * "wire" on the specified device.
610 int netvsc_recv_callback(struct hv_device
*device_obj
,
611 struct hv_netvsc_packet
*packet
,
613 struct ndis_tcp_ip_checksum_info
*csum_info
,
614 struct vmbus_channel
*channel
,
617 struct net_device
*net
;
618 struct net_device_context
*net_device_ctx
;
620 struct netvsc_stats
*rx_stats
;
622 net
= ((struct netvsc_device
*)hv_get_drvdata(device_obj
))->ndev
;
623 if (!net
|| net
->reg_state
!= NETREG_REGISTERED
) {
624 return NVSP_STAT_FAIL
;
626 net_device_ctx
= netdev_priv(net
);
627 rx_stats
= this_cpu_ptr(net_device_ctx
->rx_stats
);
629 /* Allocate a skb - TODO direct I/O to pages? */
630 skb
= netdev_alloc_skb_ip_align(net
, packet
->total_data_buflen
);
631 if (unlikely(!skb
)) {
632 ++net
->stats
.rx_dropped
;
633 return NVSP_STAT_FAIL
;
637 * Copy to skb. This copy is needed here since the memory pointed by
638 * hv_netvsc_packet cannot be deallocated
640 memcpy(skb_put(skb
, packet
->total_data_buflen
), *data
,
641 packet
->total_data_buflen
);
643 skb
->protocol
= eth_type_trans(skb
, net
);
645 /* We only look at the IP checksum here.
646 * Should we be dropping the packet if checksum
647 * failed? How do we deal with other checksums - TCP/UDP?
649 if (csum_info
->receive
.ip_checksum_succeeded
)
650 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
652 skb
->ip_summed
= CHECKSUM_NONE
;
655 if (vlan_tci
& VLAN_TAG_PRESENT
)
656 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
),
659 skb_record_rx_queue(skb
, channel
->
660 offermsg
.offer
.sub_channel_index
);
662 u64_stats_update_begin(&rx_stats
->syncp
);
664 rx_stats
->bytes
+= packet
->total_data_buflen
;
665 u64_stats_update_end(&rx_stats
->syncp
);
668 * Pass the skb back up. Network stack will deallocate the skb when it
677 static void netvsc_get_drvinfo(struct net_device
*net
,
678 struct ethtool_drvinfo
*info
)
680 strlcpy(info
->driver
, KBUILD_MODNAME
, sizeof(info
->driver
));
681 strlcpy(info
->fw_version
, "N/A", sizeof(info
->fw_version
));
684 static void netvsc_get_channels(struct net_device
*net
,
685 struct ethtool_channels
*channel
)
687 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
688 struct hv_device
*dev
= net_device_ctx
->device_ctx
;
689 struct netvsc_device
*nvdev
= hv_get_drvdata(dev
);
692 channel
->max_combined
= nvdev
->max_chn
;
693 channel
->combined_count
= nvdev
->num_chn
;
697 static int netvsc_set_channels(struct net_device
*net
,
698 struct ethtool_channels
*channels
)
700 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
701 struct hv_device
*dev
= net_device_ctx
->device_ctx
;
702 struct netvsc_device
*nvdev
= hv_get_drvdata(dev
);
703 struct netvsc_device_info device_info
;
707 bool recovering
= false;
709 if (!nvdev
|| nvdev
->destroy
)
712 num_chn
= nvdev
->num_chn
;
713 max_chn
= min_t(u32
, nvdev
->max_chn
, num_online_cpus());
715 if (nvdev
->nvsp_version
< NVSP_PROTOCOL_VERSION_5
) {
716 pr_info("vRSS unsupported before NVSP Version 5\n");
720 /* We do not support rx, tx, or other */
722 channels
->rx_count
||
723 channels
->tx_count
||
724 channels
->other_count
||
725 (channels
->combined_count
< 1))
728 if (channels
->combined_count
> max_chn
) {
729 pr_info("combined channels too high, using %d\n", max_chn
);
730 channels
->combined_count
= max_chn
;
733 ret
= netvsc_close(net
);
738 nvdev
->start_remove
= true;
739 rndis_filter_device_remove(dev
);
741 nvdev
->num_chn
= channels
->combined_count
;
743 net_device_ctx
->device_ctx
= dev
;
744 hv_set_drvdata(dev
, net
);
746 memset(&device_info
, 0, sizeof(device_info
));
747 device_info
.num_chn
= nvdev
->num_chn
; /* passed to RNDIS */
748 device_info
.ring_size
= ring_size
;
749 device_info
.max_num_vrss_chns
= max_num_vrss_chns
;
751 ret
= rndis_filter_device_add(dev
, &device_info
);
754 netdev_err(net
, "unable to add netvsc device (ret %d)\n", ret
);
760 nvdev
= hv_get_drvdata(dev
);
762 ret
= netif_set_real_num_tx_queues(net
, nvdev
->num_chn
);
765 netdev_err(net
, "could not set tx queue count (ret %d)\n", ret
);
771 ret
= netif_set_real_num_rx_queues(net
, nvdev
->num_chn
);
774 netdev_err(net
, "could not set rx queue count (ret %d)\n", ret
);
786 /* If the above failed, we attempt to recover through the same
787 * process but with the original number of channels.
789 netdev_err(net
, "could not set channels, recovering\n");
791 channels
->combined_count
= num_chn
;
795 static int netvsc_change_mtu(struct net_device
*ndev
, int mtu
)
797 struct net_device_context
*ndevctx
= netdev_priv(ndev
);
798 struct hv_device
*hdev
= ndevctx
->device_ctx
;
799 struct netvsc_device
*nvdev
= hv_get_drvdata(hdev
);
800 struct netvsc_device_info device_info
;
801 int limit
= ETH_DATA_LEN
;
804 if (nvdev
== NULL
|| nvdev
->destroy
)
807 if (nvdev
->nvsp_version
>= NVSP_PROTOCOL_VERSION_2
)
808 limit
= NETVSC_MTU
- ETH_HLEN
;
810 if (mtu
< NETVSC_MTU_MIN
|| mtu
> limit
)
813 ret
= netvsc_close(ndev
);
817 nvdev
->start_remove
= true;
818 rndis_filter_device_remove(hdev
);
822 ndevctx
->device_ctx
= hdev
;
823 hv_set_drvdata(hdev
, ndev
);
825 memset(&device_info
, 0, sizeof(device_info
));
826 device_info
.ring_size
= ring_size
;
827 device_info
.num_chn
= nvdev
->num_chn
;
828 device_info
.max_num_vrss_chns
= max_num_vrss_chns
;
829 rndis_filter_device_add(hdev
, &device_info
);
837 static struct rtnl_link_stats64
*netvsc_get_stats64(struct net_device
*net
,
838 struct rtnl_link_stats64
*t
)
840 struct net_device_context
*ndev_ctx
= netdev_priv(net
);
843 for_each_possible_cpu(cpu
) {
844 struct netvsc_stats
*tx_stats
= per_cpu_ptr(ndev_ctx
->tx_stats
,
846 struct netvsc_stats
*rx_stats
= per_cpu_ptr(ndev_ctx
->rx_stats
,
848 u64 tx_packets
, tx_bytes
, rx_packets
, rx_bytes
;
852 start
= u64_stats_fetch_begin_irq(&tx_stats
->syncp
);
853 tx_packets
= tx_stats
->packets
;
854 tx_bytes
= tx_stats
->bytes
;
855 } while (u64_stats_fetch_retry_irq(&tx_stats
->syncp
, start
));
858 start
= u64_stats_fetch_begin_irq(&rx_stats
->syncp
);
859 rx_packets
= rx_stats
->packets
;
860 rx_bytes
= rx_stats
->bytes
;
861 } while (u64_stats_fetch_retry_irq(&rx_stats
->syncp
, start
));
863 t
->tx_bytes
+= tx_bytes
;
864 t
->tx_packets
+= tx_packets
;
865 t
->rx_bytes
+= rx_bytes
;
866 t
->rx_packets
+= rx_packets
;
869 t
->tx_dropped
= net
->stats
.tx_dropped
;
870 t
->tx_errors
= net
->stats
.tx_dropped
;
872 t
->rx_dropped
= net
->stats
.rx_dropped
;
873 t
->rx_errors
= net
->stats
.rx_errors
;
878 static int netvsc_set_mac_addr(struct net_device
*ndev
, void *p
)
880 struct net_device_context
*ndevctx
= netdev_priv(ndev
);
881 struct hv_device
*hdev
= ndevctx
->device_ctx
;
882 struct sockaddr
*addr
= p
;
883 char save_adr
[ETH_ALEN
];
884 unsigned char save_aatype
;
887 memcpy(save_adr
, ndev
->dev_addr
, ETH_ALEN
);
888 save_aatype
= ndev
->addr_assign_type
;
890 err
= eth_mac_addr(ndev
, p
);
894 err
= rndis_filter_set_device_mac(hdev
, addr
->sa_data
);
896 /* roll back to saved MAC */
897 memcpy(ndev
->dev_addr
, save_adr
, ETH_ALEN
);
898 ndev
->addr_assign_type
= save_aatype
;
904 #ifdef CONFIG_NET_POLL_CONTROLLER
905 static void netvsc_poll_controller(struct net_device
*net
)
907 /* As netvsc_start_xmit() works synchronous we don't have to
908 * trigger anything here.
913 static const struct ethtool_ops ethtool_ops
= {
914 .get_drvinfo
= netvsc_get_drvinfo
,
915 .get_link
= ethtool_op_get_link
,
916 .get_channels
= netvsc_get_channels
,
917 .set_channels
= netvsc_set_channels
,
920 static const struct net_device_ops device_ops
= {
921 .ndo_open
= netvsc_open
,
922 .ndo_stop
= netvsc_close
,
923 .ndo_start_xmit
= netvsc_start_xmit
,
924 .ndo_set_rx_mode
= netvsc_set_multicast_list
,
925 .ndo_change_mtu
= netvsc_change_mtu
,
926 .ndo_validate_addr
= eth_validate_addr
,
927 .ndo_set_mac_address
= netvsc_set_mac_addr
,
928 .ndo_select_queue
= netvsc_select_queue
,
929 .ndo_get_stats64
= netvsc_get_stats64
,
930 #ifdef CONFIG_NET_POLL_CONTROLLER
931 .ndo_poll_controller
= netvsc_poll_controller
,
936 * Handle link status changes. For RNDIS_STATUS_NETWORK_CHANGE emulate link
937 * down/up sequence. In case of RNDIS_STATUS_MEDIA_CONNECT when carrier is
938 * present send GARP packet to network peers with netif_notify_peers().
940 static void netvsc_link_change(struct work_struct
*w
)
942 struct net_device_context
*ndev_ctx
;
943 struct net_device
*net
;
944 struct netvsc_device
*net_device
;
945 struct rndis_device
*rdev
;
946 struct netvsc_reconfig
*event
= NULL
;
947 bool notify
= false, reschedule
= false;
948 unsigned long flags
, next_reconfig
, delay
;
950 ndev_ctx
= container_of(w
, struct net_device_context
, dwork
.work
);
951 net_device
= hv_get_drvdata(ndev_ctx
->device_ctx
);
952 rdev
= net_device
->extension
;
953 net
= net_device
->ndev
;
955 next_reconfig
= ndev_ctx
->last_reconfig
+ LINKCHANGE_INT
;
956 if (time_is_after_jiffies(next_reconfig
)) {
957 /* link_watch only sends one notification with current state
958 * per second, avoid doing reconfig more frequently. Handle
961 delay
= next_reconfig
- jiffies
;
962 delay
= delay
< LINKCHANGE_INT
? delay
: LINKCHANGE_INT
;
963 schedule_delayed_work(&ndev_ctx
->dwork
, delay
);
966 ndev_ctx
->last_reconfig
= jiffies
;
968 spin_lock_irqsave(&ndev_ctx
->lock
, flags
);
969 if (!list_empty(&ndev_ctx
->reconfig_events
)) {
970 event
= list_first_entry(&ndev_ctx
->reconfig_events
,
971 struct netvsc_reconfig
, list
);
972 list_del(&event
->list
);
973 reschedule
= !list_empty(&ndev_ctx
->reconfig_events
);
975 spin_unlock_irqrestore(&ndev_ctx
->lock
, flags
);
982 switch (event
->event
) {
983 /* Only the following events are possible due to the check in
984 * netvsc_linkstatus_callback()
986 case RNDIS_STATUS_MEDIA_CONNECT
:
987 if (rdev
->link_state
) {
988 rdev
->link_state
= false;
989 netif_carrier_on(net
);
990 netif_tx_wake_all_queues(net
);
996 case RNDIS_STATUS_MEDIA_DISCONNECT
:
997 if (!rdev
->link_state
) {
998 rdev
->link_state
= true;
999 netif_carrier_off(net
);
1000 netif_tx_stop_all_queues(net
);
1004 case RNDIS_STATUS_NETWORK_CHANGE
:
1005 /* Only makes sense if carrier is present */
1006 if (!rdev
->link_state
) {
1007 rdev
->link_state
= true;
1008 netif_carrier_off(net
);
1009 netif_tx_stop_all_queues(net
);
1010 event
->event
= RNDIS_STATUS_MEDIA_CONNECT
;
1011 spin_lock_irqsave(&ndev_ctx
->lock
, flags
);
1012 list_add_tail(&event
->list
, &ndev_ctx
->reconfig_events
);
1013 spin_unlock_irqrestore(&ndev_ctx
->lock
, flags
);
1022 netdev_notify_peers(net
);
1024 /* link_watch only sends one notification with current state per
1025 * second, handle next reconfig event in 2 seconds.
1028 schedule_delayed_work(&ndev_ctx
->dwork
, LINKCHANGE_INT
);
1031 static void netvsc_free_netdev(struct net_device
*netdev
)
1033 struct net_device_context
*net_device_ctx
= netdev_priv(netdev
);
1035 free_percpu(net_device_ctx
->tx_stats
);
1036 free_percpu(net_device_ctx
->rx_stats
);
1037 free_netdev(netdev
);
1040 static int netvsc_probe(struct hv_device
*dev
,
1041 const struct hv_vmbus_device_id
*dev_id
)
1043 struct net_device
*net
= NULL
;
1044 struct net_device_context
*net_device_ctx
;
1045 struct netvsc_device_info device_info
;
1046 struct netvsc_device
*nvdev
;
1049 net
= alloc_etherdev_mq(sizeof(struct net_device_context
),
1054 netif_carrier_off(net
);
1056 net_device_ctx
= netdev_priv(net
);
1057 net_device_ctx
->device_ctx
= dev
;
1058 net_device_ctx
->msg_enable
= netif_msg_init(debug
, default_msg
);
1059 if (netif_msg_probe(net_device_ctx
))
1060 netdev_dbg(net
, "netvsc msg_enable: %d\n",
1061 net_device_ctx
->msg_enable
);
1063 net_device_ctx
->tx_stats
= netdev_alloc_pcpu_stats(struct netvsc_stats
);
1064 if (!net_device_ctx
->tx_stats
) {
1068 net_device_ctx
->rx_stats
= netdev_alloc_pcpu_stats(struct netvsc_stats
);
1069 if (!net_device_ctx
->rx_stats
) {
1070 free_percpu(net_device_ctx
->tx_stats
);
1075 hv_set_drvdata(dev
, net
);
1076 INIT_DELAYED_WORK(&net_device_ctx
->dwork
, netvsc_link_change
);
1077 INIT_WORK(&net_device_ctx
->work
, do_set_multicast
);
1079 spin_lock_init(&net_device_ctx
->lock
);
1080 INIT_LIST_HEAD(&net_device_ctx
->reconfig_events
);
1082 net
->netdev_ops
= &device_ops
;
1084 net
->hw_features
= NETIF_F_RXCSUM
| NETIF_F_SG
| NETIF_F_IP_CSUM
|
1086 net
->features
= NETIF_F_HW_VLAN_CTAG_TX
| NETIF_F_SG
| NETIF_F_RXCSUM
|
1087 NETIF_F_IP_CSUM
| NETIF_F_TSO
;
1089 net
->ethtool_ops
= ðtool_ops
;
1090 SET_NETDEV_DEV(net
, &dev
->device
);
1092 /* We always need headroom for rndis header */
1093 net
->needed_headroom
= RNDIS_AND_PPI_SIZE
;
1095 /* Notify the netvsc driver of the new device */
1096 memset(&device_info
, 0, sizeof(device_info
));
1097 device_info
.ring_size
= ring_size
;
1098 device_info
.max_num_vrss_chns
= max_num_vrss_chns
;
1099 ret
= rndis_filter_device_add(dev
, &device_info
);
1101 netdev_err(net
, "unable to add netvsc device (ret %d)\n", ret
);
1102 netvsc_free_netdev(net
);
1103 hv_set_drvdata(dev
, NULL
);
1106 memcpy(net
->dev_addr
, device_info
.mac_adr
, ETH_ALEN
);
1108 nvdev
= hv_get_drvdata(dev
);
1109 netif_set_real_num_tx_queues(net
, nvdev
->num_chn
);
1110 netif_set_real_num_rx_queues(net
, nvdev
->num_chn
);
1112 ret
= register_netdev(net
);
1114 pr_err("Unable to register netdev.\n");
1115 rndis_filter_device_remove(dev
);
1116 netvsc_free_netdev(net
);
1122 static int netvsc_remove(struct hv_device
*dev
)
1124 struct net_device
*net
;
1125 struct net_device_context
*ndev_ctx
;
1126 struct netvsc_device
*net_device
;
1128 net_device
= hv_get_drvdata(dev
);
1129 net
= net_device
->ndev
;
1132 dev_err(&dev
->device
, "No net device to remove\n");
1136 net_device
->start_remove
= true;
1138 ndev_ctx
= netdev_priv(net
);
1139 cancel_delayed_work_sync(&ndev_ctx
->dwork
);
1140 cancel_work_sync(&ndev_ctx
->work
);
1142 /* Stop outbound asap */
1143 netif_tx_disable(net
);
1145 unregister_netdev(net
);
1148 * Call to the vsc driver to let it know that the device is being
1151 rndis_filter_device_remove(dev
);
1153 netvsc_free_netdev(net
);
1157 static const struct hv_vmbus_device_id id_table
[] = {
1163 MODULE_DEVICE_TABLE(vmbus
, id_table
);
1165 /* The one and only one */
1166 static struct hv_driver netvsc_drv
= {
1167 .name
= KBUILD_MODNAME
,
1168 .id_table
= id_table
,
1169 .probe
= netvsc_probe
,
1170 .remove
= netvsc_remove
,
1173 static void __exit
netvsc_drv_exit(void)
1175 vmbus_driver_unregister(&netvsc_drv
);
1178 static int __init
netvsc_drv_init(void)
1180 if (ring_size
< RING_SIZE_MIN
) {
1181 ring_size
= RING_SIZE_MIN
;
1182 pr_info("Increased ring_size to %d (min allowed)\n",
1185 return vmbus_driver_register(&netvsc_drv
);
1188 MODULE_LICENSE("GPL");
1189 MODULE_DESCRIPTION("Microsoft Hyper-V network driver");
1191 module_init(netvsc_drv_init
);
1192 module_exit(netvsc_drv_exit
);