1 /* sunvnet.c: Sun LDOM Virtual Network Driver.
3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/types.h>
11 #include <linux/slab.h>
12 #include <linux/delay.h>
13 #include <linux/init.h>
14 #include <linux/netdevice.h>
15 #include <linux/ethtool.h>
16 #include <linux/etherdevice.h>
17 #include <linux/mutex.h>
18 #include <linux/highmem.h>
19 #include <linux/if_vlan.h>
21 #if IS_ENABLED(CONFIG_IPV6)
22 #include <linux/icmpv6.h>
27 #include <net/route.h>
34 #define DRV_MODULE_NAME "sunvnet"
35 #define DRV_MODULE_VERSION "1.0"
36 #define DRV_MODULE_RELDATE "June 25, 2007"
38 static char version
[] =
39 DRV_MODULE_NAME
".c:v" DRV_MODULE_VERSION
" (" DRV_MODULE_RELDATE
")\n";
40 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
41 MODULE_DESCRIPTION("Sun LDOM virtual network driver");
42 MODULE_LICENSE("GPL");
43 MODULE_VERSION(DRV_MODULE_VERSION
);
45 #define VNET_MAX_TXQS 16
47 /* Heuristic for the number of times to exponentially backoff and
48 * retry sending an LDC trigger when EAGAIN is encountered
50 #define VNET_MAX_RETRIES 10
52 static int __vnet_tx_trigger(struct vnet_port
*port
, u32 start
);
54 /* Ordered from largest major to lowest */
55 static struct vio_version vnet_versions
[] = {
56 { .major
= 1, .minor
= 8 },
57 { .major
= 1, .minor
= 7 },
58 { .major
= 1, .minor
= 6 },
59 { .major
= 1, .minor
= 0 },
62 static inline u32
vnet_tx_dring_avail(struct vio_dring_state
*dr
)
64 return vio_dring_avail(dr
, VNET_TX_RING_SIZE
);
67 static int vnet_handle_unknown(struct vnet_port
*port
, void *arg
)
69 struct vio_msg_tag
*pkt
= arg
;
71 pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n",
72 pkt
->type
, pkt
->stype
, pkt
->stype_env
, pkt
->sid
);
73 pr_err("Resetting connection\n");
75 ldc_disconnect(port
->vio
.lp
);
80 static int vnet_port_alloc_tx_ring(struct vnet_port
*port
);
82 static int vnet_send_attr(struct vio_driver_state
*vio
)
84 struct vnet_port
*port
= to_vnet_port(vio
);
85 struct net_device
*dev
= port
->vp
->dev
;
86 struct vio_net_attr_info pkt
;
87 int framelen
= ETH_FRAME_LEN
;
90 err
= vnet_port_alloc_tx_ring(to_vnet_port(vio
));
94 memset(&pkt
, 0, sizeof(pkt
));
95 pkt
.tag
.type
= VIO_TYPE_CTRL
;
96 pkt
.tag
.stype
= VIO_SUBTYPE_INFO
;
97 pkt
.tag
.stype_env
= VIO_ATTR_INFO
;
98 pkt
.tag
.sid
= vio_send_sid(vio
);
99 if (vio_version_before(vio
, 1, 2))
100 pkt
.xfer_mode
= VIO_DRING_MODE
;
102 pkt
.xfer_mode
= VIO_NEW_DRING_MODE
;
103 pkt
.addr_type
= VNET_ADDR_ETHERMAC
;
105 for (i
= 0; i
< 6; i
++)
106 pkt
.addr
|= (u64
)dev
->dev_addr
[i
] << ((5 - i
) * 8);
107 if (vio_version_after(vio
, 1, 3)) {
109 port
->rmtu
= min(VNET_MAXPACKET
, port
->rmtu
);
110 pkt
.mtu
= port
->rmtu
;
112 port
->rmtu
= VNET_MAXPACKET
;
113 pkt
.mtu
= port
->rmtu
;
115 if (vio_version_after_eq(vio
, 1, 6))
116 pkt
.options
= VIO_TX_DRING
;
117 } else if (vio_version_before(vio
, 1, 3)) {
120 pkt
.mtu
= framelen
+ VLAN_HLEN
;
124 if (vio_version_after_eq(vio
, 1, 7) && port
->tso
) {
125 pkt
.cflags
|= VNET_LSO_IPV4_CAPAB
;
127 port
->tsolen
= VNET_MAXTSO
;
128 pkt
.ipv4_lso_maxlen
= port
->tsolen
;
131 pkt
.plnk_updt
= PHYSLINK_UPDATE_NONE
;
133 viodbg(HS
, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
134 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
135 "cflags[0x%04x] lso_max[%u]\n",
136 pkt
.xfer_mode
, pkt
.addr_type
,
137 (unsigned long long)pkt
.addr
,
138 pkt
.ack_freq
, pkt
.plnk_updt
, pkt
.options
,
139 (unsigned long long)pkt
.mtu
, pkt
.cflags
, pkt
.ipv4_lso_maxlen
);
142 return vio_ldc_send(vio
, &pkt
, sizeof(pkt
));
145 static int handle_attr_info(struct vio_driver_state
*vio
,
146 struct vio_net_attr_info
*pkt
)
148 struct vnet_port
*port
= to_vnet_port(vio
);
152 viodbg(HS
, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
153 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
154 " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
155 pkt
->xfer_mode
, pkt
->addr_type
,
156 (unsigned long long)pkt
->addr
,
157 pkt
->ack_freq
, pkt
->plnk_updt
, pkt
->options
,
158 (unsigned long long)pkt
->mtu
, port
->rmtu
, pkt
->cflags
,
159 pkt
->ipv4_lso_maxlen
);
161 pkt
->tag
.sid
= vio_send_sid(vio
);
163 xfer_mode
= pkt
->xfer_mode
;
164 /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */
165 if (vio_version_before(vio
, 1, 2) && xfer_mode
== VIO_DRING_MODE
)
166 xfer_mode
= VIO_NEW_DRING_MODE
;
169 * < v1.3 - ETH_FRAME_LEN exactly
170 * > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change
172 * = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly
174 if (vio_version_before(vio
, 1, 3)) {
175 localmtu
= ETH_FRAME_LEN
;
176 } else if (vio_version_after(vio
, 1, 3)) {
177 localmtu
= port
->rmtu
? port
->rmtu
: VNET_MAXPACKET
;
178 localmtu
= min(pkt
->mtu
, localmtu
);
181 localmtu
= ETH_FRAME_LEN
+ VLAN_HLEN
;
183 port
->rmtu
= localmtu
;
185 /* LSO negotiation */
186 if (vio_version_after_eq(vio
, 1, 7))
187 port
->tso
&= !!(pkt
->cflags
& VNET_LSO_IPV4_CAPAB
);
192 port
->tsolen
= VNET_MAXTSO
;
193 port
->tsolen
= min(port
->tsolen
, pkt
->ipv4_lso_maxlen
);
194 if (port
->tsolen
< VNET_MINTSO
) {
197 pkt
->cflags
&= ~VNET_LSO_IPV4_CAPAB
;
199 pkt
->ipv4_lso_maxlen
= port
->tsolen
;
201 pkt
->cflags
&= ~VNET_LSO_IPV4_CAPAB
;
202 pkt
->ipv4_lso_maxlen
= 0;
205 /* for version >= 1.6, ACK packet mode we support */
206 if (vio_version_after_eq(vio
, 1, 6)) {
207 pkt
->xfer_mode
= VIO_NEW_DRING_MODE
;
208 pkt
->options
= VIO_TX_DRING
;
211 if (!(xfer_mode
| VIO_NEW_DRING_MODE
) ||
212 pkt
->addr_type
!= VNET_ADDR_ETHERMAC
||
213 pkt
->mtu
!= localmtu
) {
214 viodbg(HS
, "SEND NET ATTR NACK\n");
216 pkt
->tag
.stype
= VIO_SUBTYPE_NACK
;
218 (void) vio_ldc_send(vio
, pkt
, sizeof(*pkt
));
222 viodbg(HS
, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] "
223 "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] "
224 "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
225 pkt
->xfer_mode
, pkt
->addr_type
,
226 (unsigned long long)pkt
->addr
,
227 pkt
->ack_freq
, pkt
->plnk_updt
, pkt
->options
,
228 (unsigned long long)pkt
->mtu
, port
->rmtu
, pkt
->cflags
,
229 pkt
->ipv4_lso_maxlen
);
231 pkt
->tag
.stype
= VIO_SUBTYPE_ACK
;
233 return vio_ldc_send(vio
, pkt
, sizeof(*pkt
));
238 static int handle_attr_ack(struct vio_driver_state
*vio
,
239 struct vio_net_attr_info
*pkt
)
241 viodbg(HS
, "GOT NET ATTR ACK\n");
246 static int handle_attr_nack(struct vio_driver_state
*vio
,
247 struct vio_net_attr_info
*pkt
)
249 viodbg(HS
, "GOT NET ATTR NACK\n");
254 static int vnet_handle_attr(struct vio_driver_state
*vio
, void *arg
)
256 struct vio_net_attr_info
*pkt
= arg
;
258 switch (pkt
->tag
.stype
) {
259 case VIO_SUBTYPE_INFO
:
260 return handle_attr_info(vio
, pkt
);
262 case VIO_SUBTYPE_ACK
:
263 return handle_attr_ack(vio
, pkt
);
265 case VIO_SUBTYPE_NACK
:
266 return handle_attr_nack(vio
, pkt
);
273 static void vnet_handshake_complete(struct vio_driver_state
*vio
)
275 struct vio_dring_state
*dr
;
277 dr
= &vio
->drings
[VIO_DRIVER_RX_RING
];
278 dr
->snd_nxt
= dr
->rcv_nxt
= 1;
280 dr
= &vio
->drings
[VIO_DRIVER_TX_RING
];
281 dr
->snd_nxt
= dr
->rcv_nxt
= 1;
284 /* The hypervisor interface that implements copying to/from imported
285 * memory from another domain requires that copies are done to 8-byte
286 * aligned buffers, and that the lengths of such copies are also 8-byte
289 * So we align skb->data to an 8-byte multiple and pad-out the data
290 * area so we can round the copy length up to the next multiple of
293 * The transmitter puts the actual start of the packet 6 bytes into
294 * the buffer it sends over, so that the IP headers after the ethernet
295 * header are aligned properly. These 6 bytes are not in the descriptor
296 * length, they are simply implied. This offset is represented using
297 * the VNET_PACKET_SKIP macro.
299 static struct sk_buff
*alloc_and_align_skb(struct net_device
*dev
,
302 struct sk_buff
*skb
= netdev_alloc_skb(dev
, len
+VNET_PACKET_SKIP
+8+8);
303 unsigned long addr
, off
;
308 addr
= (unsigned long) skb
->data
;
309 off
= ((addr
+ 7UL) & ~7UL) - addr
;
311 skb_reserve(skb
, off
);
316 static inline void vnet_fullcsum(struct sk_buff
*skb
)
318 struct iphdr
*iph
= ip_hdr(skb
);
319 int offset
= skb_transport_offset(skb
);
321 if (skb
->protocol
!= htons(ETH_P_IP
))
323 if (iph
->protocol
!= IPPROTO_TCP
&&
324 iph
->protocol
!= IPPROTO_UDP
)
326 skb
->ip_summed
= CHECKSUM_NONE
;
329 if (iph
->protocol
== IPPROTO_TCP
) {
330 struct tcphdr
*ptcp
= tcp_hdr(skb
);
333 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
334 ptcp
->check
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
335 skb
->len
- offset
, IPPROTO_TCP
,
337 } else if (iph
->protocol
== IPPROTO_UDP
) {
338 struct udphdr
*pudp
= udp_hdr(skb
);
341 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
342 pudp
->check
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
343 skb
->len
- offset
, IPPROTO_UDP
,
348 static int vnet_rx_one(struct vnet_port
*port
, struct vio_net_desc
*desc
)
350 struct net_device
*dev
= port
->vp
->dev
;
351 unsigned int len
= desc
->size
;
352 unsigned int copy_len
;
357 if (unlikely(len
< ETH_ZLEN
|| len
> port
->rmtu
)) {
358 dev
->stats
.rx_length_errors
++;
362 skb
= alloc_and_align_skb(dev
, len
);
364 if (unlikely(!skb
)) {
365 dev
->stats
.rx_missed_errors
++;
369 copy_len
= (len
+ VNET_PACKET_SKIP
+ 7U) & ~7U;
370 skb_put(skb
, copy_len
);
371 err
= ldc_copy(port
->vio
.lp
, LDC_COPY_IN
,
372 skb
->data
, copy_len
, 0,
373 desc
->cookies
, desc
->ncookies
);
374 if (unlikely(err
< 0)) {
375 dev
->stats
.rx_frame_errors
++;
379 skb_pull(skb
, VNET_PACKET_SKIP
);
381 skb
->protocol
= eth_type_trans(skb
, dev
);
383 if (vio_version_after_eq(&port
->vio
, 1, 8)) {
384 struct vio_net_dext
*dext
= vio_net_ext(desc
);
386 if (dext
->flags
& VNET_PKT_HCK_IPV4_HDRCKSUM
) {
387 if (skb
->protocol
== ETH_P_IP
) {
388 struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
394 if ((dext
->flags
& VNET_PKT_HCK_FULLCKSUM
) &&
395 skb
->ip_summed
== CHECKSUM_NONE
)
397 if (dext
->flags
& VNET_PKT_HCK_IPV4_HDRCKSUM_OK
) {
398 skb
->ip_summed
= CHECKSUM_PARTIAL
;
400 if (dext
->flags
& VNET_PKT_HCK_FULLCKSUM_OK
)
405 skb
->ip_summed
= port
->switch_port
? CHECKSUM_NONE
: CHECKSUM_PARTIAL
;
407 dev
->stats
.rx_packets
++;
408 dev
->stats
.rx_bytes
+= len
;
409 napi_gro_receive(&port
->napi
, skb
);
416 dev
->stats
.rx_dropped
++;
420 static int vnet_send_ack(struct vnet_port
*port
, struct vio_dring_state
*dr
,
421 u32 start
, u32 end
, u8 vio_dring_state
)
423 struct vio_dring_data hdr
= {
425 .type
= VIO_TYPE_DATA
,
426 .stype
= VIO_SUBTYPE_ACK
,
427 .stype_env
= VIO_DRING_DATA
,
428 .sid
= vio_send_sid(&port
->vio
),
430 .dring_ident
= dr
->ident
,
433 .state
= vio_dring_state
,
438 hdr
.seq
= dr
->snd_nxt
;
441 err
= vio_ldc_send(&port
->vio
, &hdr
, sizeof(hdr
));
447 if ((delay
<<= 1) > 128)
449 if (retries
++ > VNET_MAX_RETRIES
) {
450 pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
451 port
->raddr
[0], port
->raddr
[1],
452 port
->raddr
[2], port
->raddr
[3],
453 port
->raddr
[4], port
->raddr
[5]);
456 } while (err
== -EAGAIN
);
458 if (err
<= 0 && vio_dring_state
== VIO_DRING_STOPPED
) {
459 port
->stop_rx_idx
= end
;
460 port
->stop_rx
= true;
462 port
->stop_rx_idx
= 0;
463 port
->stop_rx
= false;
469 static u32
next_idx(u32 idx
, struct vio_dring_state
*dr
)
471 if (++idx
== dr
->num_entries
)
476 static u32
prev_idx(u32 idx
, struct vio_dring_state
*dr
)
479 idx
= dr
->num_entries
- 1;
486 static struct vio_net_desc
*get_rx_desc(struct vnet_port
*port
,
487 struct vio_dring_state
*dr
,
490 struct vio_net_desc
*desc
= port
->vio
.desc_buf
;
493 err
= ldc_get_dring_entry(port
->vio
.lp
, desc
, dr
->entry_size
,
494 (index
* dr
->entry_size
),
495 dr
->cookies
, dr
->ncookies
);
502 static int put_rx_desc(struct vnet_port
*port
,
503 struct vio_dring_state
*dr
,
504 struct vio_net_desc
*desc
,
509 err
= ldc_put_dring_entry(port
->vio
.lp
, desc
, dr
->entry_size
,
510 (index
* dr
->entry_size
),
511 dr
->cookies
, dr
->ncookies
);
518 static int vnet_walk_rx_one(struct vnet_port
*port
,
519 struct vio_dring_state
*dr
,
520 u32 index
, int *needs_ack
)
522 struct vio_net_desc
*desc
= get_rx_desc(port
, dr
, index
);
523 struct vio_driver_state
*vio
= &port
->vio
;
526 BUG_ON(desc
== NULL
);
528 return PTR_ERR(desc
);
530 if (desc
->hdr
.state
!= VIO_DESC_READY
)
535 viodbg(DATA
, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n",
536 desc
->hdr
.state
, desc
->hdr
.ack
,
537 desc
->size
, desc
->ncookies
,
538 desc
->cookies
[0].cookie_addr
,
539 desc
->cookies
[0].cookie_size
);
541 err
= vnet_rx_one(port
, desc
);
542 if (err
== -ECONNRESET
)
544 desc
->hdr
.state
= VIO_DESC_DONE
;
545 err
= put_rx_desc(port
, dr
, desc
, index
);
548 *needs_ack
= desc
->hdr
.ack
;
552 static int vnet_walk_rx(struct vnet_port
*port
, struct vio_dring_state
*dr
,
553 u32 start
, u32 end
, int *npkts
, int budget
)
555 struct vio_driver_state
*vio
= &port
->vio
;
556 int ack_start
= -1, ack_end
= -1;
557 bool send_ack
= true;
559 end
= (end
== (u32
) -1) ? prev_idx(start
, dr
) : next_idx(end
, dr
);
561 viodbg(DATA
, "vnet_walk_rx start[%08x] end[%08x]\n", start
, end
);
563 while (start
!= end
) {
564 int ack
= 0, err
= vnet_walk_rx_one(port
, dr
, start
, &ack
);
565 if (err
== -ECONNRESET
)
573 start
= next_idx(start
, dr
);
574 if (ack
&& start
!= end
) {
575 err
= vnet_send_ack(port
, dr
, ack_start
, ack_end
,
577 if (err
== -ECONNRESET
)
581 if ((*npkts
) >= budget
) {
586 if (unlikely(ack_start
== -1))
587 ack_start
= ack_end
= prev_idx(start
, dr
);
589 port
->napi_resume
= false;
590 return vnet_send_ack(port
, dr
, ack_start
, ack_end
,
593 port
->napi_resume
= true;
594 port
->napi_stop_idx
= ack_end
;
599 static int vnet_rx(struct vnet_port
*port
, void *msgbuf
, int *npkts
,
602 struct vio_dring_data
*pkt
= msgbuf
;
603 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_RX_RING
];
604 struct vio_driver_state
*vio
= &port
->vio
;
606 viodbg(DATA
, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n",
607 pkt
->tag
.stype_env
, pkt
->seq
, dr
->rcv_nxt
);
609 if (unlikely(pkt
->tag
.stype_env
!= VIO_DRING_DATA
))
611 if (unlikely(pkt
->seq
!= dr
->rcv_nxt
)) {
612 pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n",
613 pkt
->seq
, dr
->rcv_nxt
);
617 if (!port
->napi_resume
)
620 /* XXX Validate pkt->start_idx and pkt->end_idx XXX */
622 return vnet_walk_rx(port
, dr
, pkt
->start_idx
, pkt
->end_idx
,
626 static int idx_is_pending(struct vio_dring_state
*dr
, u32 end
)
631 while (idx
!= dr
->prod
) {
636 idx
= next_idx(idx
, dr
);
641 static int vnet_ack(struct vnet_port
*port
, void *msgbuf
)
643 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
644 struct vio_dring_data
*pkt
= msgbuf
;
645 struct net_device
*dev
;
648 struct vio_net_desc
*desc
;
649 struct netdev_queue
*txq
;
651 if (unlikely(pkt
->tag
.stype_env
!= VIO_DRING_DATA
))
658 if (unlikely(!idx_is_pending(dr
, end
))) {
659 netif_tx_unlock(dev
);
663 /* sync for race conditions with vnet_start_xmit() and tell xmit it
664 * is time to send a trigger.
666 dr
->cons
= next_idx(end
, dr
);
667 desc
= vio_dring_entry(dr
, dr
->cons
);
668 if (desc
->hdr
.state
== VIO_DESC_READY
&& !port
->start_cons
) {
669 /* vnet_start_xmit() just populated this dring but missed
670 * sending the "start" LDC message to the consumer.
671 * Send a "start" trigger on its behalf.
673 if (__vnet_tx_trigger(port
, dr
->cons
) > 0)
674 port
->start_cons
= false;
676 port
->start_cons
= true;
678 port
->start_cons
= true;
680 netif_tx_unlock(dev
);
682 txq
= netdev_get_tx_queue(dev
, port
->q_index
);
683 if (unlikely(netif_tx_queue_stopped(txq
) &&
684 vnet_tx_dring_avail(dr
) >= VNET_TX_WAKEUP_THRESH(dr
)))
690 static int vnet_nack(struct vnet_port
*port
, void *msgbuf
)
692 /* XXX just reset or similar XXX */
696 static int handle_mcast(struct vnet_port
*port
, void *msgbuf
)
698 struct vio_net_mcast_info
*pkt
= msgbuf
;
700 if (pkt
->tag
.stype
!= VIO_SUBTYPE_ACK
)
701 pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n",
711 /* Got back a STOPPED LDC message on port. If the queue is stopped,
712 * wake it up so that we'll send out another START message at the
715 static void maybe_tx_wakeup(struct vnet_port
*port
)
717 struct netdev_queue
*txq
;
719 txq
= netdev_get_tx_queue(port
->vp
->dev
, port
->q_index
);
720 __netif_tx_lock(txq
, smp_processor_id());
721 if (likely(netif_tx_queue_stopped(txq
))) {
722 struct vio_dring_state
*dr
;
724 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
725 netif_tx_wake_queue(txq
);
727 __netif_tx_unlock(txq
);
730 static inline bool port_is_up(struct vnet_port
*vnet
)
732 struct vio_driver_state
*vio
= &vnet
->vio
;
734 return !!(vio
->hs_state
& VIO_HS_COMPLETE
);
737 static int vnet_event_napi(struct vnet_port
*port
, int budget
)
739 struct vio_driver_state
*vio
= &port
->vio
;
742 int event
= (port
->rx_event
& LDC_EVENT_RESET
);
745 if (unlikely(event
== LDC_EVENT_RESET
||
746 event
== LDC_EVENT_UP
)) {
747 vio_link_state_change(vio
, event
);
749 if (event
== LDC_EVENT_RESET
) {
758 /* We may have multiple LDC events in rx_event. Unroll send_events() */
759 event
= (port
->rx_event
& LDC_EVENT_UP
);
760 port
->rx_event
&= ~(LDC_EVENT_RESET
|LDC_EVENT_UP
);
761 if (event
== LDC_EVENT_UP
)
763 event
= port
->rx_event
;
764 if (!(event
& LDC_EVENT_DATA_READY
))
767 /* we dont expect any other bits than RESET, UP, DATA_READY */
768 BUG_ON(event
!= LDC_EVENT_DATA_READY
);
773 struct vio_msg_tag tag
;
777 if (port
->napi_resume
) {
778 struct vio_dring_data
*pkt
=
779 (struct vio_dring_data
*)&msgbuf
;
780 struct vio_dring_state
*dr
=
781 &port
->vio
.drings
[VIO_DRIVER_RX_RING
];
783 pkt
->tag
.type
= VIO_TYPE_DATA
;
784 pkt
->tag
.stype
= VIO_SUBTYPE_INFO
;
785 pkt
->tag
.stype_env
= VIO_DRING_DATA
;
786 pkt
->seq
= dr
->rcv_nxt
;
787 pkt
->start_idx
= next_idx(port
->napi_stop_idx
, dr
);
791 err
= ldc_read(vio
->lp
, &msgbuf
, sizeof(msgbuf
));
792 if (unlikely(err
< 0)) {
793 if (err
== -ECONNRESET
)
799 viodbg(DATA
, "TAG [%02x:%02x:%04x:%08x]\n",
802 msgbuf
.tag
.stype_env
,
804 err
= vio_validate_sid(vio
, &msgbuf
.tag
);
808 if (likely(msgbuf
.tag
.type
== VIO_TYPE_DATA
)) {
809 if (msgbuf
.tag
.stype
== VIO_SUBTYPE_INFO
) {
810 if (!port_is_up(port
)) {
811 /* failures like handshake_failure()
812 * may have cleaned up dring, but
813 * NAPI polling may bring us here.
818 err
= vnet_rx(port
, &msgbuf
, &npkts
, budget
);
823 } else if (msgbuf
.tag
.stype
== VIO_SUBTYPE_ACK
) {
824 err
= vnet_ack(port
, &msgbuf
);
827 } else if (msgbuf
.tag
.stype
== VIO_SUBTYPE_NACK
) {
828 err
= vnet_nack(port
, &msgbuf
);
830 } else if (msgbuf
.tag
.type
== VIO_TYPE_CTRL
) {
831 if (msgbuf
.tag
.stype_env
== VNET_MCAST_INFO
)
832 err
= handle_mcast(port
, &msgbuf
);
834 err
= vio_control_pkt_engine(vio
, &msgbuf
);
838 err
= vnet_handle_unknown(port
, &msgbuf
);
840 if (err
== -ECONNRESET
)
843 if (unlikely(tx_wakeup
&& err
!= -ECONNRESET
))
844 maybe_tx_wakeup(port
);
848 static int vnet_poll(struct napi_struct
*napi
, int budget
)
850 struct vnet_port
*port
= container_of(napi
, struct vnet_port
, napi
);
851 struct vio_driver_state
*vio
= &port
->vio
;
852 int processed
= vnet_event_napi(port
, budget
);
854 if (processed
< budget
) {
856 port
->rx_event
&= ~LDC_EVENT_DATA_READY
;
857 vio_set_intr(vio
->vdev
->rx_ino
, HV_INTR_ENABLED
);
862 static void vnet_event(void *arg
, int event
)
864 struct vnet_port
*port
= arg
;
865 struct vio_driver_state
*vio
= &port
->vio
;
867 port
->rx_event
|= event
;
868 vio_set_intr(vio
->vdev
->rx_ino
, HV_INTR_DISABLED
);
869 napi_schedule(&port
->napi
);
873 static int __vnet_tx_trigger(struct vnet_port
*port
, u32 start
)
875 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
876 struct vio_dring_data hdr
= {
878 .type
= VIO_TYPE_DATA
,
879 .stype
= VIO_SUBTYPE_INFO
,
880 .stype_env
= VIO_DRING_DATA
,
881 .sid
= vio_send_sid(&port
->vio
),
883 .dring_ident
= dr
->ident
,
891 err
= vnet_send_ack(port
,
892 &port
->vio
.drings
[VIO_DRIVER_RX_RING
],
893 port
->stop_rx_idx
, -1,
899 hdr
.seq
= dr
->snd_nxt
;
902 err
= vio_ldc_send(&port
->vio
, &hdr
, sizeof(hdr
));
908 if ((delay
<<= 1) > 128)
910 if (retries
++ > VNET_MAX_RETRIES
)
912 } while (err
== -EAGAIN
);
917 struct vnet_port
*__tx_port_find(struct vnet
*vp
, struct sk_buff
*skb
)
919 unsigned int hash
= vnet_hashfn(skb
->data
);
920 struct hlist_head
*hp
= &vp
->port_hash
[hash
];
921 struct vnet_port
*port
;
923 hlist_for_each_entry_rcu(port
, hp
, hash
) {
924 if (!port_is_up(port
))
926 if (ether_addr_equal(port
->raddr
, skb
->data
))
929 list_for_each_entry_rcu(port
, &vp
->port_list
, list
) {
930 if (!port
->switch_port
)
932 if (!port_is_up(port
))
939 static struct sk_buff
*vnet_clean_tx_ring(struct vnet_port
*port
,
942 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
943 struct sk_buff
*skb
= NULL
;
950 txi
= VNET_TX_RING_SIZE
-1;
952 for (i
= 0; i
< VNET_TX_RING_SIZE
; ++i
) {
953 struct vio_net_desc
*d
;
955 d
= vio_dring_entry(dr
, txi
);
957 if (d
->hdr
.state
== VIO_DESC_DONE
) {
958 if (port
->tx_bufs
[txi
].skb
) {
959 BUG_ON(port
->tx_bufs
[txi
].skb
->next
);
961 port
->tx_bufs
[txi
].skb
->next
= skb
;
962 skb
= port
->tx_bufs
[txi
].skb
;
963 port
->tx_bufs
[txi
].skb
= NULL
;
965 ldc_unmap(port
->vio
.lp
,
966 port
->tx_bufs
[txi
].cookies
,
967 port
->tx_bufs
[txi
].ncookies
);
969 d
->hdr
.state
= VIO_DESC_FREE
;
970 } else if (d
->hdr
.state
== VIO_DESC_READY
) {
972 } else if (d
->hdr
.state
== VIO_DESC_FREE
) {
977 txi
= VNET_TX_RING_SIZE
-1;
982 static inline void vnet_free_skbs(struct sk_buff
*skb
)
984 struct sk_buff
*next
;
994 static void vnet_clean_timer_expire(unsigned long port0
)
996 struct vnet_port
*port
= (struct vnet_port
*)port0
;
997 struct sk_buff
*freeskbs
;
1000 netif_tx_lock(port
->vp
->dev
);
1001 freeskbs
= vnet_clean_tx_ring(port
, &pending
);
1002 netif_tx_unlock(port
->vp
->dev
);
1004 vnet_free_skbs(freeskbs
);
1007 (void)mod_timer(&port
->clean_timer
,
1008 jiffies
+ VNET_CLEAN_TIMEOUT
);
1010 del_timer(&port
->clean_timer
);
1013 static inline int vnet_skb_map(struct ldc_channel
*lp
, struct sk_buff
*skb
,
1014 struct ldc_trans_cookie
*cookies
, int ncookies
,
1015 unsigned int map_perm
)
1017 int i
, nc
, err
, blen
;
1020 blen
= skb_headlen(skb
);
1021 if (blen
< ETH_ZLEN
)
1023 blen
+= VNET_PACKET_SKIP
;
1024 blen
+= 8 - (blen
& 7);
1026 err
= ldc_map_single(lp
, skb
->data
-VNET_PACKET_SKIP
, blen
, cookies
,
1027 ncookies
, map_perm
);
1032 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1033 skb_frag_t
*f
= &skb_shinfo(skb
)->frags
[i
];
1036 if (nc
< ncookies
) {
1037 vaddr
= kmap_atomic(skb_frag_page(f
));
1038 blen
= skb_frag_size(f
);
1039 blen
+= 8 - (blen
& 7);
1040 err
= ldc_map_single(lp
, vaddr
+ f
->page_offset
,
1041 blen
, cookies
+ nc
, ncookies
- nc
,
1043 kunmap_atomic(vaddr
);
1049 ldc_unmap(lp
, cookies
, nc
);
1057 static inline struct sk_buff
*vnet_skb_shape(struct sk_buff
*skb
, int ncookies
)
1059 struct sk_buff
*nskb
;
1060 int i
, len
, pad
, docopy
;
1064 if (len
< ETH_ZLEN
) {
1065 pad
+= ETH_ZLEN
- skb
->len
;
1068 len
+= VNET_PACKET_SKIP
;
1069 pad
+= 8 - (len
& 7);
1071 /* make sure we have enough cookies and alignment in every frag */
1072 docopy
= skb_shinfo(skb
)->nr_frags
>= ncookies
;
1073 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1074 skb_frag_t
*f
= &skb_shinfo(skb
)->frags
[i
];
1076 docopy
|= f
->page_offset
& 7;
1078 if (((unsigned long)skb
->data
& 7) != VNET_PACKET_SKIP
||
1079 skb_tailroom(skb
) < pad
||
1080 skb_headroom(skb
) < VNET_PACKET_SKIP
|| docopy
) {
1081 int start
= 0, offset
;
1084 len
= skb
->len
> ETH_ZLEN
? skb
->len
: ETH_ZLEN
;
1085 nskb
= alloc_and_align_skb(skb
->dev
, len
);
1090 skb_reserve(nskb
, VNET_PACKET_SKIP
);
1092 nskb
->protocol
= skb
->protocol
;
1093 offset
= skb_mac_header(skb
) - skb
->data
;
1094 skb_set_mac_header(nskb
, offset
);
1095 offset
= skb_network_header(skb
) - skb
->data
;
1096 skb_set_network_header(nskb
, offset
);
1097 offset
= skb_transport_header(skb
) - skb
->data
;
1098 skb_set_transport_header(nskb
, offset
);
1101 nskb
->csum_offset
= skb
->csum_offset
;
1102 nskb
->ip_summed
= skb
->ip_summed
;
1104 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
1105 start
= skb_checksum_start_offset(skb
);
1107 struct iphdr
*iph
= ip_hdr(nskb
);
1108 int offset
= start
+ nskb
->csum_offset
;
1110 if (skb_copy_bits(skb
, 0, nskb
->data
, start
)) {
1111 dev_kfree_skb(nskb
);
1115 *(__sum16
*)(skb
->data
+ offset
) = 0;
1116 csum
= skb_copy_and_csum_bits(skb
, start
,
1118 skb
->len
- start
, 0);
1119 if (iph
->protocol
== IPPROTO_TCP
||
1120 iph
->protocol
== IPPROTO_UDP
) {
1121 csum
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
1123 iph
->protocol
, csum
);
1125 *(__sum16
*)(nskb
->data
+ offset
) = csum
;
1127 nskb
->ip_summed
= CHECKSUM_NONE
;
1128 } else if (skb_copy_bits(skb
, 0, nskb
->data
, skb
->len
)) {
1129 dev_kfree_skb(nskb
);
1133 (void)skb_put(nskb
, skb
->len
);
1134 if (skb_is_gso(skb
)) {
1135 skb_shinfo(nskb
)->gso_size
= skb_shinfo(skb
)->gso_size
;
1136 skb_shinfo(nskb
)->gso_type
= skb_shinfo(skb
)->gso_type
;
1145 vnet_select_queue(struct net_device
*dev
, struct sk_buff
*skb
,
1146 void *accel_priv
, select_queue_fallback_t fallback
)
1148 struct vnet
*vp
= netdev_priv(dev
);
1149 struct vnet_port
*port
= __tx_port_find(vp
, skb
);
1153 return port
->q_index
;
1156 static int vnet_start_xmit(struct sk_buff
*skb
, struct net_device
*dev
);
1158 static int vnet_handle_offloads(struct vnet_port
*port
, struct sk_buff
*skb
)
1160 struct net_device
*dev
= port
->vp
->dev
;
1161 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1162 struct sk_buff
*segs
;
1163 int maclen
, datalen
;
1165 int gso_size
, gso_type
, gso_segs
;
1166 int hlen
= skb_transport_header(skb
) - skb_mac_header(skb
);
1167 int proto
= IPPROTO_IP
;
1169 if (skb
->protocol
== htons(ETH_P_IP
))
1170 proto
= ip_hdr(skb
)->protocol
;
1171 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1172 proto
= ipv6_hdr(skb
)->nexthdr
;
1174 if (proto
== IPPROTO_TCP
)
1175 hlen
+= tcp_hdr(skb
)->doff
* 4;
1176 else if (proto
== IPPROTO_UDP
)
1177 hlen
+= sizeof(struct udphdr
);
1179 pr_err("vnet_handle_offloads GSO with unknown transport "
1180 "protocol %d tproto %d\n", skb
->protocol
, proto
);
1181 hlen
= 128; /* XXX */
1183 datalen
= port
->tsolen
- hlen
;
1185 gso_size
= skb_shinfo(skb
)->gso_size
;
1186 gso_type
= skb_shinfo(skb
)->gso_type
;
1187 gso_segs
= skb_shinfo(skb
)->gso_segs
;
1189 if (port
->tso
&& gso_size
< datalen
)
1190 gso_segs
= DIV_ROUND_UP(skb
->len
- hlen
, datalen
);
1192 if (unlikely(vnet_tx_dring_avail(dr
) < gso_segs
)) {
1193 struct netdev_queue
*txq
;
1195 txq
= netdev_get_tx_queue(dev
, port
->q_index
);
1196 netif_tx_stop_queue(txq
);
1197 if (vnet_tx_dring_avail(dr
) < skb_shinfo(skb
)->gso_segs
)
1198 return NETDEV_TX_BUSY
;
1199 netif_tx_wake_queue(txq
);
1202 maclen
= skb_network_header(skb
) - skb_mac_header(skb
);
1203 skb_pull(skb
, maclen
);
1205 if (port
->tso
&& gso_size
< datalen
) {
1206 /* segment to TSO size */
1207 skb_shinfo(skb
)->gso_size
= datalen
;
1208 skb_shinfo(skb
)->gso_segs
= gso_segs
;
1210 segs
= skb_gso_segment(skb
, dev
->features
& ~NETIF_F_TSO
);
1212 /* restore gso_size & gso_segs */
1213 skb_shinfo(skb
)->gso_size
= gso_size
;
1214 skb_shinfo(skb
)->gso_segs
= DIV_ROUND_UP(skb
->len
- hlen
,
1217 segs
= skb_gso_segment(skb
, dev
->features
& ~NETIF_F_TSO
);
1219 dev
->stats
.tx_dropped
++;
1220 return NETDEV_TX_OK
;
1223 skb_push(skb
, maclen
);
1224 skb_reset_mac_header(skb
);
1228 struct sk_buff
*curr
= segs
;
1232 if (port
->tso
&& curr
->len
> dev
->mtu
) {
1233 skb_shinfo(curr
)->gso_size
= gso_size
;
1234 skb_shinfo(curr
)->gso_type
= gso_type
;
1235 skb_shinfo(curr
)->gso_segs
=
1236 DIV_ROUND_UP(curr
->len
- hlen
, gso_size
);
1238 skb_shinfo(curr
)->gso_size
= 0;
1240 skb_push(curr
, maclen
);
1241 skb_reset_mac_header(curr
);
1242 memcpy(skb_mac_header(curr
), skb_mac_header(skb
),
1244 curr
->csum_start
= skb_transport_header(curr
) - curr
->head
;
1245 if (ip_hdr(curr
)->protocol
== IPPROTO_TCP
)
1246 curr
->csum_offset
= offsetof(struct tcphdr
, check
);
1247 else if (ip_hdr(curr
)->protocol
== IPPROTO_UDP
)
1248 curr
->csum_offset
= offsetof(struct udphdr
, check
);
1250 if (!(status
& NETDEV_TX_MASK
))
1251 status
= vnet_start_xmit(curr
, dev
);
1252 if (status
& NETDEV_TX_MASK
)
1253 dev_kfree_skb_any(curr
);
1256 if (!(status
& NETDEV_TX_MASK
))
1257 dev_kfree_skb_any(skb
);
1261 static int vnet_start_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
1263 struct vnet
*vp
= netdev_priv(dev
);
1264 struct vnet_port
*port
= NULL
;
1265 struct vio_dring_state
*dr
;
1266 struct vio_net_desc
*d
;
1268 struct sk_buff
*freeskbs
= NULL
;
1270 unsigned pending
= 0;
1271 struct netdev_queue
*txq
;
1274 port
= __tx_port_find(vp
, skb
);
1275 if (unlikely(!port
)) {
1280 if (skb_is_gso(skb
) && skb
->len
> port
->tsolen
) {
1281 err
= vnet_handle_offloads(port
, skb
);
1286 if (!skb_is_gso(skb
) && skb
->len
> port
->rmtu
) {
1287 unsigned long localmtu
= port
->rmtu
- ETH_HLEN
;
1289 if (vio_version_after_eq(&port
->vio
, 1, 3))
1290 localmtu
-= VLAN_HLEN
;
1292 if (skb
->protocol
== htons(ETH_P_IP
)) {
1294 struct rtable
*rt
= NULL
;
1296 memset(&fl4
, 0, sizeof(fl4
));
1297 fl4
.flowi4_oif
= dev
->ifindex
;
1298 fl4
.flowi4_tos
= RT_TOS(ip_hdr(skb
)->tos
);
1299 fl4
.daddr
= ip_hdr(skb
)->daddr
;
1300 fl4
.saddr
= ip_hdr(skb
)->saddr
;
1302 rt
= ip_route_output_key(dev_net(dev
), &fl4
);
1305 skb_dst_set(skb
, &rt
->dst
);
1306 icmp_send(skb
, ICMP_DEST_UNREACH
,
1311 #if IS_ENABLED(CONFIG_IPV6)
1312 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1313 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, localmtu
);
1318 skb
= vnet_skb_shape(skb
, 2);
1323 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
1326 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1327 i
= skb_get_queue_mapping(skb
);
1328 txq
= netdev_get_tx_queue(dev
, i
);
1329 if (unlikely(vnet_tx_dring_avail(dr
) < 1)) {
1330 if (!netif_tx_queue_stopped(txq
)) {
1331 netif_tx_stop_queue(txq
);
1333 /* This is a hard error, log it. */
1334 netdev_err(dev
, "BUG! Tx Ring full when queue awake!\n");
1335 dev
->stats
.tx_errors
++;
1338 return NETDEV_TX_BUSY
;
1341 d
= vio_dring_cur(dr
);
1345 freeskbs
= vnet_clean_tx_ring(port
, &pending
);
1347 BUG_ON(port
->tx_bufs
[txi
].skb
);
1353 err
= vnet_skb_map(port
->vio
.lp
, skb
, port
->tx_bufs
[txi
].cookies
, 2,
1354 (LDC_MAP_SHADOW
| LDC_MAP_DIRECT
| LDC_MAP_RW
));
1356 netdev_info(dev
, "tx buffer map error %d\n", err
);
1360 port
->tx_bufs
[txi
].skb
= skb
;
1362 port
->tx_bufs
[txi
].ncookies
= err
;
1364 /* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
1365 * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
1366 * the protocol itself does not require it as long as the peer
1367 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
1369 * An ACK for every packet in the ring is expensive as the
1370 * sending of LDC messages is slow and affects performance.
1372 d
->hdr
.ack
= VIO_ACK_DISABLE
;
1374 d
->ncookies
= port
->tx_bufs
[txi
].ncookies
;
1375 for (i
= 0; i
< d
->ncookies
; i
++)
1376 d
->cookies
[i
] = port
->tx_bufs
[txi
].cookies
[i
];
1377 if (vio_version_after_eq(&port
->vio
, 1, 7)) {
1378 struct vio_net_dext
*dext
= vio_net_ext(d
);
1380 memset(dext
, 0, sizeof(*dext
));
1381 if (skb_is_gso(port
->tx_bufs
[txi
].skb
)) {
1382 dext
->ipv4_lso_mss
= skb_shinfo(port
->tx_bufs
[txi
].skb
)
1384 dext
->flags
|= VNET_PKT_IPV4_LSO
;
1386 if (vio_version_after_eq(&port
->vio
, 1, 8) &&
1387 !port
->switch_port
) {
1388 dext
->flags
|= VNET_PKT_HCK_IPV4_HDRCKSUM_OK
;
1389 dext
->flags
|= VNET_PKT_HCK_FULLCKSUM_OK
;
1393 /* This has to be a non-SMP write barrier because we are writing
1394 * to memory which is shared with the peer LDOM.
1398 d
->hdr
.state
= VIO_DESC_READY
;
1400 /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent
1401 * to notify the consumer that some descriptors are READY.
1402 * After that "start" trigger, no additional triggers are needed until
1403 * a DRING_STOPPED is received from the consumer. The dr->cons field
1404 * (set up by vnet_ack()) has the value of the next dring index
1405 * that has not yet been ack-ed. We send a "start" trigger here
1406 * if, and only if, start_cons is true (reset it afterward). Conversely,
1407 * vnet_ack() should check if the dring corresponding to cons
1408 * is marked READY, but start_cons was false.
1409 * If so, vnet_ack() should send out the missed "start" trigger.
1411 * Note that the wmb() above makes sure the cookies et al. are
1412 * not globally visible before the VIO_DESC_READY, and that the
1413 * stores are ordered correctly by the compiler. The consumer will
1414 * not proceed until the VIO_DESC_READY is visible assuring that
1415 * the consumer does not observe anything related to descriptors
1416 * out of order. The HV trap from the LDC start trigger is the
1417 * producer to consumer announcement that work is available to the
1420 if (!port
->start_cons
)
1421 goto ldc_start_done
; /* previous trigger suffices */
1423 err
= __vnet_tx_trigger(port
, dr
->cons
);
1424 if (unlikely(err
< 0)) {
1425 netdev_info(dev
, "TX trigger error %d\n", err
);
1426 d
->hdr
.state
= VIO_DESC_FREE
;
1427 dev
->stats
.tx_carrier_errors
++;
1432 port
->start_cons
= false;
1434 dev
->stats
.tx_packets
++;
1435 dev
->stats
.tx_bytes
+= port
->tx_bufs
[txi
].skb
->len
;
1437 dr
->prod
= (dr
->prod
+ 1) & (VNET_TX_RING_SIZE
- 1);
1438 if (unlikely(vnet_tx_dring_avail(dr
) < 1)) {
1439 netif_tx_stop_queue(txq
);
1440 if (vnet_tx_dring_avail(dr
) > VNET_TX_WAKEUP_THRESH(dr
))
1441 netif_tx_wake_queue(txq
);
1444 (void)mod_timer(&port
->clean_timer
, jiffies
+ VNET_CLEAN_TIMEOUT
);
1447 vnet_free_skbs(freeskbs
);
1449 return NETDEV_TX_OK
;
1453 (void)mod_timer(&port
->clean_timer
,
1454 jiffies
+ VNET_CLEAN_TIMEOUT
);
1456 del_timer(&port
->clean_timer
);
1461 vnet_free_skbs(freeskbs
);
1462 dev
->stats
.tx_dropped
++;
1463 return NETDEV_TX_OK
;
1466 static void vnet_tx_timeout(struct net_device
*dev
)
1468 /* XXX Implement me XXX */
1471 static int vnet_open(struct net_device
*dev
)
1473 netif_carrier_on(dev
);
1474 netif_tx_start_all_queues(dev
);
1479 static int vnet_close(struct net_device
*dev
)
1481 netif_tx_stop_all_queues(dev
);
1482 netif_carrier_off(dev
);
1487 static struct vnet_mcast_entry
*__vnet_mc_find(struct vnet
*vp
, u8
*addr
)
1489 struct vnet_mcast_entry
*m
;
1491 for (m
= vp
->mcast_list
; m
; m
= m
->next
) {
1492 if (ether_addr_equal(m
->addr
, addr
))
1498 static void __update_mc_list(struct vnet
*vp
, struct net_device
*dev
)
1500 struct netdev_hw_addr
*ha
;
1502 netdev_for_each_mc_addr(ha
, dev
) {
1503 struct vnet_mcast_entry
*m
;
1505 m
= __vnet_mc_find(vp
, ha
->addr
);
1512 m
= kzalloc(sizeof(*m
), GFP_ATOMIC
);
1515 memcpy(m
->addr
, ha
->addr
, ETH_ALEN
);
1518 m
->next
= vp
->mcast_list
;
1524 static void __send_mc_list(struct vnet
*vp
, struct vnet_port
*port
)
1526 struct vio_net_mcast_info info
;
1527 struct vnet_mcast_entry
*m
, **pp
;
1530 memset(&info
, 0, sizeof(info
));
1532 info
.tag
.type
= VIO_TYPE_CTRL
;
1533 info
.tag
.stype
= VIO_SUBTYPE_INFO
;
1534 info
.tag
.stype_env
= VNET_MCAST_INFO
;
1535 info
.tag
.sid
= vio_send_sid(&port
->vio
);
1539 for (m
= vp
->mcast_list
; m
; m
= m
->next
) {
1543 memcpy(&info
.mcast_addr
[n_addrs
* ETH_ALEN
],
1545 if (++n_addrs
== VNET_NUM_MCAST
) {
1546 info
.count
= n_addrs
;
1548 (void) vio_ldc_send(&port
->vio
, &info
,
1554 info
.count
= n_addrs
;
1555 (void) vio_ldc_send(&port
->vio
, &info
, sizeof(info
));
1561 pp
= &vp
->mcast_list
;
1562 while ((m
= *pp
) != NULL
) {
1569 memcpy(&info
.mcast_addr
[n_addrs
* ETH_ALEN
],
1571 if (++n_addrs
== VNET_NUM_MCAST
) {
1572 info
.count
= n_addrs
;
1573 (void) vio_ldc_send(&port
->vio
, &info
,
1582 info
.count
= n_addrs
;
1583 (void) vio_ldc_send(&port
->vio
, &info
, sizeof(info
));
1587 static void vnet_set_rx_mode(struct net_device
*dev
)
1589 struct vnet
*vp
= netdev_priv(dev
);
1590 struct vnet_port
*port
;
1593 list_for_each_entry_rcu(port
, &vp
->port_list
, list
) {
1595 if (port
->switch_port
) {
1596 __update_mc_list(vp
, dev
);
1597 __send_mc_list(vp
, port
);
1604 static int vnet_change_mtu(struct net_device
*dev
, int new_mtu
)
1606 if (new_mtu
< 68 || new_mtu
> 65535)
1613 static int vnet_set_mac_addr(struct net_device
*dev
, void *p
)
1618 static void vnet_get_drvinfo(struct net_device
*dev
,
1619 struct ethtool_drvinfo
*info
)
1621 strlcpy(info
->driver
, DRV_MODULE_NAME
, sizeof(info
->driver
));
1622 strlcpy(info
->version
, DRV_MODULE_VERSION
, sizeof(info
->version
));
1625 static u32
vnet_get_msglevel(struct net_device
*dev
)
1627 struct vnet
*vp
= netdev_priv(dev
);
1628 return vp
->msg_enable
;
1631 static void vnet_set_msglevel(struct net_device
*dev
, u32 value
)
1633 struct vnet
*vp
= netdev_priv(dev
);
1634 vp
->msg_enable
= value
;
1637 static const struct ethtool_ops vnet_ethtool_ops
= {
1638 .get_drvinfo
= vnet_get_drvinfo
,
1639 .get_msglevel
= vnet_get_msglevel
,
1640 .set_msglevel
= vnet_set_msglevel
,
1641 .get_link
= ethtool_op_get_link
,
1644 static void vnet_port_free_tx_bufs(struct vnet_port
*port
)
1646 struct vio_dring_state
*dr
;
1649 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1651 ldc_free_exp_dring(port
->vio
.lp
, dr
->base
,
1652 (dr
->entry_size
* dr
->num_entries
),
1653 dr
->cookies
, dr
->ncookies
);
1656 dr
->num_entries
= 0;
1661 for (i
= 0; i
< VNET_TX_RING_SIZE
; i
++) {
1662 struct vio_net_desc
*d
;
1663 void *skb
= port
->tx_bufs
[i
].skb
;
1668 d
= vio_dring_entry(dr
, i
);
1669 if (d
->hdr
.state
== VIO_DESC_READY
)
1670 pr_warn("active transmit buffers freed\n");
1672 ldc_unmap(port
->vio
.lp
,
1673 port
->tx_bufs
[i
].cookies
,
1674 port
->tx_bufs
[i
].ncookies
);
1676 port
->tx_bufs
[i
].skb
= NULL
;
1677 d
->hdr
.state
= VIO_DESC_FREE
;
1681 static int vnet_port_alloc_tx_ring(struct vnet_port
*port
)
1683 struct vio_dring_state
*dr
;
1684 unsigned long len
, elen
;
1685 int i
, err
, ncookies
;
1688 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1690 elen
= sizeof(struct vio_net_desc
) +
1691 sizeof(struct ldc_trans_cookie
) * 2;
1692 if (vio_version_after_eq(&port
->vio
, 1, 7))
1693 elen
+= sizeof(struct vio_net_dext
);
1694 len
= VNET_TX_RING_SIZE
* elen
;
1696 ncookies
= VIO_MAX_RING_COOKIES
;
1697 dring
= ldc_alloc_exp_dring(port
->vio
.lp
, len
,
1698 dr
->cookies
, &ncookies
,
1702 if (IS_ERR(dring
)) {
1703 err
= PTR_ERR(dring
);
1708 dr
->entry_size
= elen
;
1709 dr
->num_entries
= VNET_TX_RING_SIZE
;
1710 dr
->prod
= dr
->cons
= 0;
1711 port
->start_cons
= true; /* need an initial trigger */
1712 dr
->pending
= VNET_TX_RING_SIZE
;
1713 dr
->ncookies
= ncookies
;
1715 for (i
= 0; i
< VNET_TX_RING_SIZE
; ++i
) {
1716 struct vio_net_desc
*d
;
1718 d
= vio_dring_entry(dr
, i
);
1719 d
->hdr
.state
= VIO_DESC_FREE
;
1724 vnet_port_free_tx_bufs(port
);
1729 #ifdef CONFIG_NET_POLL_CONTROLLER
1730 static void vnet_poll_controller(struct net_device
*dev
)
1732 struct vnet
*vp
= netdev_priv(dev
);
1733 struct vnet_port
*port
;
1734 unsigned long flags
;
1736 spin_lock_irqsave(&vp
->lock
, flags
);
1737 if (!list_empty(&vp
->port_list
)) {
1738 port
= list_entry(vp
->port_list
.next
, struct vnet_port
, list
);
1739 napi_schedule(&port
->napi
);
1741 spin_unlock_irqrestore(&vp
->lock
, flags
);
1744 static LIST_HEAD(vnet_list
);
1745 static DEFINE_MUTEX(vnet_list_mutex
);
1747 static const struct net_device_ops vnet_ops
= {
1748 .ndo_open
= vnet_open
,
1749 .ndo_stop
= vnet_close
,
1750 .ndo_set_rx_mode
= vnet_set_rx_mode
,
1751 .ndo_set_mac_address
= vnet_set_mac_addr
,
1752 .ndo_validate_addr
= eth_validate_addr
,
1753 .ndo_tx_timeout
= vnet_tx_timeout
,
1754 .ndo_change_mtu
= vnet_change_mtu
,
1755 .ndo_start_xmit
= vnet_start_xmit
,
1756 .ndo_select_queue
= vnet_select_queue
,
1757 #ifdef CONFIG_NET_POLL_CONTROLLER
1758 .ndo_poll_controller
= vnet_poll_controller
,
1762 static struct vnet
*vnet_new(const u64
*local_mac
)
1764 struct net_device
*dev
;
1768 dev
= alloc_etherdev_mqs(sizeof(*vp
), VNET_MAX_TXQS
, 1);
1770 return ERR_PTR(-ENOMEM
);
1771 dev
->needed_headroom
= VNET_PACKET_SKIP
+ 8;
1772 dev
->needed_tailroom
= 8;
1774 for (i
= 0; i
< ETH_ALEN
; i
++)
1775 dev
->dev_addr
[i
] = (*local_mac
>> (5 - i
) * 8) & 0xff;
1777 vp
= netdev_priv(dev
);
1779 spin_lock_init(&vp
->lock
);
1782 INIT_LIST_HEAD(&vp
->port_list
);
1783 for (i
= 0; i
< VNET_PORT_HASH_SIZE
; i
++)
1784 INIT_HLIST_HEAD(&vp
->port_hash
[i
]);
1785 INIT_LIST_HEAD(&vp
->list
);
1786 vp
->local_mac
= *local_mac
;
1788 dev
->netdev_ops
= &vnet_ops
;
1789 dev
->ethtool_ops
= &vnet_ethtool_ops
;
1790 dev
->watchdog_timeo
= VNET_TX_TIMEOUT
;
1792 dev
->hw_features
= NETIF_F_TSO
| NETIF_F_GSO
| NETIF_F_GSO_SOFTWARE
|
1793 NETIF_F_HW_CSUM
| NETIF_F_SG
;
1794 dev
->features
= dev
->hw_features
;
1796 err
= register_netdev(dev
);
1798 pr_err("Cannot register net device, aborting\n");
1799 goto err_out_free_dev
;
1802 netdev_info(dev
, "Sun LDOM vnet %pM\n", dev
->dev_addr
);
1804 list_add(&vp
->list
, &vnet_list
);
1811 return ERR_PTR(err
);
1814 static struct vnet
*vnet_find_or_create(const u64
*local_mac
)
1816 struct vnet
*iter
, *vp
;
1818 mutex_lock(&vnet_list_mutex
);
1820 list_for_each_entry(iter
, &vnet_list
, list
) {
1821 if (iter
->local_mac
== *local_mac
) {
1827 vp
= vnet_new(local_mac
);
1828 mutex_unlock(&vnet_list_mutex
);
1833 static void vnet_cleanup(void)
1836 struct net_device
*dev
;
1838 mutex_lock(&vnet_list_mutex
);
1839 while (!list_empty(&vnet_list
)) {
1840 vp
= list_first_entry(&vnet_list
, struct vnet
, list
);
1841 list_del(&vp
->list
);
1843 /* vio_unregister_driver() should have cleaned up port_list */
1844 BUG_ON(!list_empty(&vp
->port_list
));
1845 unregister_netdev(dev
);
1848 mutex_unlock(&vnet_list_mutex
);
1851 static const char *local_mac_prop
= "local-mac-address";
1853 static struct vnet
*vnet_find_parent(struct mdesc_handle
*hp
,
1856 const u64
*local_mac
= NULL
;
1859 mdesc_for_each_arc(a
, hp
, port_node
, MDESC_ARC_TYPE_BACK
) {
1860 u64 target
= mdesc_arc_target(hp
, a
);
1863 name
= mdesc_get_property(hp
, target
, "name", NULL
);
1864 if (!name
|| strcmp(name
, "network"))
1867 local_mac
= mdesc_get_property(hp
, target
,
1868 local_mac_prop
, NULL
);
1873 return ERR_PTR(-ENODEV
);
1875 return vnet_find_or_create(local_mac
);
1878 static struct ldc_channel_config vnet_ldc_cfg
= {
1879 .event
= vnet_event
,
1881 .mode
= LDC_MODE_UNRELIABLE
,
1884 static struct vio_driver_ops vnet_vio_ops
= {
1885 .send_attr
= vnet_send_attr
,
1886 .handle_attr
= vnet_handle_attr
,
1887 .handshake_complete
= vnet_handshake_complete
,
1890 static void print_version(void)
1892 printk_once(KERN_INFO
"%s", version
);
1895 const char *remote_macaddr_prop
= "remote-mac-address";
1898 vnet_port_add_txq(struct vnet_port
*port
)
1900 struct vnet
*vp
= port
->vp
;
1904 n
= n
& (VNET_MAX_TXQS
- 1);
1906 netif_tx_wake_queue(netdev_get_tx_queue(vp
->dev
, port
->q_index
));
1910 vnet_port_rm_txq(struct vnet_port
*port
)
1913 netif_tx_stop_queue(netdev_get_tx_queue(port
->vp
->dev
, port
->q_index
));
1916 static int vnet_port_probe(struct vio_dev
*vdev
, const struct vio_device_id
*id
)
1918 struct mdesc_handle
*hp
;
1919 struct vnet_port
*port
;
1920 unsigned long flags
;
1923 int len
, i
, err
, switch_port
;
1929 vp
= vnet_find_parent(hp
, vdev
->mp
);
1931 pr_err("Cannot find port parent vnet\n");
1933 goto err_out_put_mdesc
;
1936 rmac
= mdesc_get_property(hp
, vdev
->mp
, remote_macaddr_prop
, &len
);
1939 pr_err("Port lacks %s property\n", remote_macaddr_prop
);
1940 goto err_out_put_mdesc
;
1943 port
= kzalloc(sizeof(*port
), GFP_KERNEL
);
1946 goto err_out_put_mdesc
;
1948 for (i
= 0; i
< ETH_ALEN
; i
++)
1949 port
->raddr
[i
] = (*rmac
>> (5 - i
) * 8) & 0xff;
1953 err
= vio_driver_init(&port
->vio
, vdev
, VDEV_NETWORK
,
1954 vnet_versions
, ARRAY_SIZE(vnet_versions
),
1955 &vnet_vio_ops
, vp
->dev
->name
);
1957 goto err_out_free_port
;
1959 err
= vio_ldc_alloc(&port
->vio
, &vnet_ldc_cfg
, port
);
1961 goto err_out_free_port
;
1963 netif_napi_add(port
->vp
->dev
, &port
->napi
, vnet_poll
, NAPI_POLL_WEIGHT
);
1965 INIT_HLIST_NODE(&port
->hash
);
1966 INIT_LIST_HEAD(&port
->list
);
1969 if (mdesc_get_property(hp
, vdev
->mp
, "switch-port", NULL
) != NULL
)
1971 port
->switch_port
= switch_port
;
1975 spin_lock_irqsave(&vp
->lock
, flags
);
1977 list_add_rcu(&port
->list
, &vp
->port_list
);
1979 list_add_tail_rcu(&port
->list
, &vp
->port_list
);
1980 hlist_add_head_rcu(&port
->hash
,
1981 &vp
->port_hash
[vnet_hashfn(port
->raddr
)]);
1982 vnet_port_add_txq(port
);
1983 spin_unlock_irqrestore(&vp
->lock
, flags
);
1985 dev_set_drvdata(&vdev
->dev
, port
);
1987 pr_info("%s: PORT ( remote-mac %pM%s )\n",
1988 vp
->dev
->name
, port
->raddr
, switch_port
? " switch-port" : "");
1990 setup_timer(&port
->clean_timer
, vnet_clean_timer_expire
,
1991 (unsigned long)port
);
1993 napi_enable(&port
->napi
);
1994 vio_port_up(&port
->vio
);
2008 static int vnet_port_remove(struct vio_dev
*vdev
)
2010 struct vnet_port
*port
= dev_get_drvdata(&vdev
->dev
);
2014 del_timer_sync(&port
->vio
.timer
);
2016 napi_disable(&port
->napi
);
2018 list_del_rcu(&port
->list
);
2019 hlist_del_rcu(&port
->hash
);
2022 del_timer_sync(&port
->clean_timer
);
2023 vnet_port_rm_txq(port
);
2024 netif_napi_del(&port
->napi
);
2025 vnet_port_free_tx_bufs(port
);
2026 vio_ldc_free(&port
->vio
);
2028 dev_set_drvdata(&vdev
->dev
, NULL
);
2036 static const struct vio_device_id vnet_port_match
[] = {
2038 .type
= "vnet-port",
2042 MODULE_DEVICE_TABLE(vio
, vnet_port_match
);
2044 static struct vio_driver vnet_port_driver
= {
2045 .id_table
= vnet_port_match
,
2046 .probe
= vnet_port_probe
,
2047 .remove
= vnet_port_remove
,
2048 .name
= "vnet_port",
2051 static int __init
vnet_init(void)
2053 return vio_register_driver(&vnet_port_driver
);
2056 static void __exit
vnet_exit(void)
2058 vio_unregister_driver(&vnet_port_driver
);
2062 module_init(vnet_init
);
2063 module_exit(vnet_exit
);