2 * Back-end of the driver for virtual network devices. This portion of the
3 * driver exports a 'unified' network-device interface that can be accessed
4 * by any operating system that implements a compatible front end. A
5 * reference front-end implementation can be found in:
6 * drivers/net/xen-netfront.c
8 * Copyright (c) 2002-2005, K A Fraser
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
37 #include <linux/kthread.h>
38 #include <linux/if_vlan.h>
39 #include <linux/udp.h>
42 #include <net/ip6_checksum.h>
45 #include <xen/events.h>
46 #include <xen/interface/memory.h>
48 #include <asm/xen/hypercall.h>
49 #include <asm/xen/page.h>
51 /* Provide an option to disable split event channels at load time as
52 * event channels are limited resource. Split event channels are
55 bool separate_tx_rx_irq
= 1;
56 module_param(separate_tx_rx_irq
, bool, 0644);
59 * This is the maximum slots a skb can have. If a guest sends a skb
60 * which exceeds this limit it is considered malicious.
62 #define FATAL_SKB_SLOTS_DEFAULT 20
63 static unsigned int fatal_skb_slots
= FATAL_SKB_SLOTS_DEFAULT
;
64 module_param(fatal_skb_slots
, uint
, 0444);
67 * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating
68 * the maximum slots a valid packet can use. Now this value is defined
69 * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by
72 #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN
75 * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of
76 * one or more merged tx requests, otherwise it is the continuation of
77 * previous tx request.
79 static inline int pending_tx_is_head(struct xenvif
*vif
, RING_IDX idx
)
81 return vif
->pending_tx_info
[idx
].head
!= INVALID_PENDING_RING_IDX
;
84 static void xenvif_idx_release(struct xenvif
*vif
, u16 pending_idx
,
87 static void make_tx_response(struct xenvif
*vif
,
88 struct xen_netif_tx_request
*txp
,
91 static inline int tx_work_todo(struct xenvif
*vif
);
92 static inline int rx_work_todo(struct xenvif
*vif
);
94 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
101 static inline unsigned long idx_to_pfn(struct xenvif
*vif
,
104 return page_to_pfn(vif
->mmap_pages
[idx
]);
107 static inline unsigned long idx_to_kaddr(struct xenvif
*vif
,
110 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif
, idx
));
113 /* This is a miniumum size for the linear area to avoid lots of
114 * calls to __pskb_pull_tail() as we set up checksum offsets. The
115 * value 128 was chosen as it covers all IPv4 and most likely
118 #define PKT_PROT_LEN 128
120 static u16
frag_get_pending_idx(skb_frag_t
*frag
)
122 return (u16
)frag
->page_offset
;
125 static void frag_set_pending_idx(skb_frag_t
*frag
, u16 pending_idx
)
127 frag
->page_offset
= pending_idx
;
130 static inline pending_ring_idx_t
pending_index(unsigned i
)
132 return i
& (MAX_PENDING_REQS
-1);
135 static inline pending_ring_idx_t
nr_pending_reqs(struct xenvif
*vif
)
137 return MAX_PENDING_REQS
-
138 vif
->pending_prod
+ vif
->pending_cons
;
141 static int max_required_rx_slots(struct xenvif
*vif
)
143 int max
= DIV_ROUND_UP(vif
->dev
->mtu
, PAGE_SIZE
);
145 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
146 if (vif
->can_sg
|| vif
->gso_mask
|| vif
->gso_prefix_mask
)
147 max
+= MAX_SKB_FRAGS
+ 1; /* extra_info + frags */
152 int xenvif_rx_ring_full(struct xenvif
*vif
)
154 RING_IDX peek
= vif
->rx_req_cons_peek
;
155 RING_IDX needed
= max_required_rx_slots(vif
);
157 return ((vif
->rx
.sring
->req_prod
- peek
) < needed
) ||
158 ((vif
->rx
.rsp_prod_pvt
+ XEN_NETIF_RX_RING_SIZE
- peek
) < needed
);
161 int xenvif_must_stop_queue(struct xenvif
*vif
)
163 if (!xenvif_rx_ring_full(vif
))
166 vif
->rx
.sring
->req_event
= vif
->rx_req_cons_peek
+
167 max_required_rx_slots(vif
);
168 mb(); /* request notification /then/ check the queue */
170 return xenvif_rx_ring_full(vif
);
174 * Returns true if we should start a new receive buffer instead of
175 * adding 'size' bytes to a buffer which currently contains 'offset'
178 static bool start_new_rx_buffer(int offset
, unsigned long size
, int head
)
180 /* simple case: we have completely filled the current buffer. */
181 if (offset
== MAX_BUFFER_OFFSET
)
185 * complex case: start a fresh buffer if the current frag
186 * would overflow the current buffer but only if:
187 * (i) this frag would fit completely in the next buffer
188 * and (ii) there is already some data in the current buffer
189 * and (iii) this is not the head buffer.
192 * - (i) stops us splitting a frag into two copies
193 * unless the frag is too large for a single buffer.
194 * - (ii) stops us from leaving a buffer pointlessly empty.
195 * - (iii) stops us leaving the first buffer
196 * empty. Strictly speaking this is already covered
197 * by (ii) but is explicitly checked because
198 * netfront relies on the first buffer being
199 * non-empty and can crash otherwise.
201 * This means we will effectively linearise small
202 * frags but do not needlessly split large buffers
203 * into multiple copies tend to give large frags their
204 * own buffers as before.
206 if ((offset
+ size
> MAX_BUFFER_OFFSET
) &&
207 (size
<= MAX_BUFFER_OFFSET
) && offset
&& !head
)
213 struct xenvif_count_slot_state
{
214 unsigned long copy_off
;
218 unsigned int xenvif_count_frag_slots(struct xenvif
*vif
,
219 unsigned long offset
, unsigned long size
,
220 struct xenvif_count_slot_state
*state
)
224 offset
&= ~PAGE_MASK
;
229 bytes
= PAGE_SIZE
- offset
;
234 if (start_new_rx_buffer(state
->copy_off
, bytes
, state
->head
)) {
239 if (state
->copy_off
+ bytes
> MAX_BUFFER_OFFSET
)
240 bytes
= MAX_BUFFER_OFFSET
- state
->copy_off
;
242 state
->copy_off
+= bytes
;
247 if (offset
== PAGE_SIZE
)
257 * Figure out how many ring slots we're going to need to send @skb to
258 * the guest. This function is essentially a dry run of
259 * xenvif_gop_frag_copy.
261 unsigned int xenvif_count_skb_slots(struct xenvif
*vif
, struct sk_buff
*skb
)
263 struct xenvif_count_slot_state state
;
271 /* Slot for the first (partial) page of data. */
274 /* Need a slot for the GSO prefix for GSO extra data? */
275 if (skb_shinfo(skb
)->gso_size
)
279 while (data
< skb_tail_pointer(skb
)) {
280 unsigned long offset
= offset_in_page(data
);
281 unsigned long size
= PAGE_SIZE
- offset
;
283 if (data
+ size
> skb_tail_pointer(skb
))
284 size
= skb_tail_pointer(skb
) - data
;
286 count
+= xenvif_count_frag_slots(vif
, offset
, size
, &state
);
291 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
292 unsigned long size
= skb_frag_size(&skb_shinfo(skb
)->frags
[i
]);
293 unsigned long offset
= skb_shinfo(skb
)->frags
[i
].page_offset
;
295 count
+= xenvif_count_frag_slots(vif
, offset
, size
, &state
);
300 struct netrx_pending_operations
{
301 unsigned copy_prod
, copy_cons
;
302 unsigned meta_prod
, meta_cons
;
303 struct gnttab_copy
*copy
;
304 struct xenvif_rx_meta
*meta
;
306 grant_ref_t copy_gref
;
309 static struct xenvif_rx_meta
*get_next_rx_buffer(struct xenvif
*vif
,
310 struct netrx_pending_operations
*npo
)
312 struct xenvif_rx_meta
*meta
;
313 struct xen_netif_rx_request
*req
;
315 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
317 meta
= npo
->meta
+ npo
->meta_prod
++;
318 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
324 npo
->copy_gref
= req
->gref
;
330 * Set up the grant operations for this fragment. If it's a flipping
331 * interface, we also set up the unmap request from here.
333 static void xenvif_gop_frag_copy(struct xenvif
*vif
, struct sk_buff
*skb
,
334 struct netrx_pending_operations
*npo
,
335 struct page
*page
, unsigned long size
,
336 unsigned long offset
, int *head
)
338 struct gnttab_copy
*copy_gop
;
339 struct xenvif_rx_meta
*meta
;
343 /* Data must not cross a page boundary. */
344 BUG_ON(size
+ offset
> PAGE_SIZE
<<compound_order(page
));
346 meta
= npo
->meta
+ npo
->meta_prod
- 1;
348 /* Skip unused frames from start of page */
349 page
+= offset
>> PAGE_SHIFT
;
350 offset
&= ~PAGE_MASK
;
353 BUG_ON(offset
>= PAGE_SIZE
);
354 BUG_ON(npo
->copy_off
> MAX_BUFFER_OFFSET
);
356 bytes
= PAGE_SIZE
- offset
;
361 if (start_new_rx_buffer(npo
->copy_off
, bytes
, *head
)) {
363 * Netfront requires there to be some data in the head
368 meta
= get_next_rx_buffer(vif
, npo
);
371 if (npo
->copy_off
+ bytes
> MAX_BUFFER_OFFSET
)
372 bytes
= MAX_BUFFER_OFFSET
- npo
->copy_off
;
374 copy_gop
= npo
->copy
+ npo
->copy_prod
++;
375 copy_gop
->flags
= GNTCOPY_dest_gref
;
376 copy_gop
->len
= bytes
;
378 copy_gop
->source
.domid
= DOMID_SELF
;
379 copy_gop
->source
.u
.gmfn
= virt_to_mfn(page_address(page
));
380 copy_gop
->source
.offset
= offset
;
382 copy_gop
->dest
.domid
= vif
->domid
;
383 copy_gop
->dest
.offset
= npo
->copy_off
;
384 copy_gop
->dest
.u
.ref
= npo
->copy_gref
;
386 npo
->copy_off
+= bytes
;
393 if (offset
== PAGE_SIZE
&& size
) {
394 BUG_ON(!PageCompound(page
));
399 /* Leave a gap for the GSO descriptor. */
400 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
401 gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
402 else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
)
403 gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
405 gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
407 if (*head
&& ((1 << gso_type
) & vif
->gso_mask
))
410 *head
= 0; /* There must be something in this buffer now. */
416 * Prepare an SKB to be transmitted to the frontend.
418 * This function is responsible for allocating grant operations, meta
421 * It returns the number of meta structures consumed. The number of
422 * ring slots used is always equal to the number of meta slots used
423 * plus the number of GSO descriptors used. Currently, we use either
424 * zero GSO descriptors (for non-GSO packets) or one descriptor (for
425 * frontend-side LRO).
427 static int xenvif_gop_skb(struct sk_buff
*skb
,
428 struct netrx_pending_operations
*npo
)
430 struct xenvif
*vif
= netdev_priv(skb
->dev
);
431 int nr_frags
= skb_shinfo(skb
)->nr_frags
;
433 struct xen_netif_rx_request
*req
;
434 struct xenvif_rx_meta
*meta
;
441 old_meta_prod
= npo
->meta_prod
;
443 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
) {
444 gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
445 gso_size
= skb_shinfo(skb
)->gso_size
;
446 } else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
) {
447 gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
448 gso_size
= skb_shinfo(skb
)->gso_size
;
450 gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
454 /* Set up a GSO prefix descriptor, if necessary */
455 if ((1 << skb_shinfo(skb
)->gso_type
) & vif
->gso_prefix_mask
) {
456 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
457 meta
= npo
->meta
+ npo
->meta_prod
++;
458 meta
->gso_type
= gso_type
;
459 meta
->gso_size
= gso_size
;
464 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
465 meta
= npo
->meta
+ npo
->meta_prod
++;
467 if ((1 << gso_type
) & vif
->gso_mask
) {
468 meta
->gso_type
= gso_type
;
469 meta
->gso_size
= gso_size
;
471 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
478 npo
->copy_gref
= req
->gref
;
481 while (data
< skb_tail_pointer(skb
)) {
482 unsigned int offset
= offset_in_page(data
);
483 unsigned int len
= PAGE_SIZE
- offset
;
485 if (data
+ len
> skb_tail_pointer(skb
))
486 len
= skb_tail_pointer(skb
) - data
;
488 xenvif_gop_frag_copy(vif
, skb
, npo
,
489 virt_to_page(data
), len
, offset
, &head
);
493 for (i
= 0; i
< nr_frags
; i
++) {
494 xenvif_gop_frag_copy(vif
, skb
, npo
,
495 skb_frag_page(&skb_shinfo(skb
)->frags
[i
]),
496 skb_frag_size(&skb_shinfo(skb
)->frags
[i
]),
497 skb_shinfo(skb
)->frags
[i
].page_offset
,
501 return npo
->meta_prod
- old_meta_prod
;
505 * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was
506 * used to set up the operations on the top of
507 * netrx_pending_operations, which have since been done. Check that
508 * they didn't give any errors and advance over them.
510 static int xenvif_check_gop(struct xenvif
*vif
, int nr_meta_slots
,
511 struct netrx_pending_operations
*npo
)
513 struct gnttab_copy
*copy_op
;
514 int status
= XEN_NETIF_RSP_OKAY
;
517 for (i
= 0; i
< nr_meta_slots
; i
++) {
518 copy_op
= npo
->copy
+ npo
->copy_cons
++;
519 if (copy_op
->status
!= GNTST_okay
) {
521 "Bad status %d from copy to DOM%d.\n",
522 copy_op
->status
, vif
->domid
);
523 status
= XEN_NETIF_RSP_ERROR
;
530 static void xenvif_add_frag_responses(struct xenvif
*vif
, int status
,
531 struct xenvif_rx_meta
*meta
,
535 unsigned long offset
;
537 /* No fragments used */
538 if (nr_meta_slots
<= 1)
543 for (i
= 0; i
< nr_meta_slots
; i
++) {
545 if (i
== nr_meta_slots
- 1)
548 flags
= XEN_NETRXF_more_data
;
551 make_rx_response(vif
, meta
[i
].id
, status
, offset
,
552 meta
[i
].size
, flags
);
556 struct skb_cb_overlay
{
560 static void xenvif_kick_thread(struct xenvif
*vif
)
565 void xenvif_rx_action(struct xenvif
*vif
)
569 struct xen_netif_rx_response
*resp
;
570 struct sk_buff_head rxq
;
576 unsigned long offset
;
577 struct skb_cb_overlay
*sco
;
578 int need_to_notify
= 0;
580 struct netrx_pending_operations npo
= {
581 .copy
= vif
->grant_copy_op
,
585 skb_queue_head_init(&rxq
);
589 while ((skb
= skb_dequeue(&vif
->rx_queue
)) != NULL
) {
590 vif
= netdev_priv(skb
->dev
);
591 nr_frags
= skb_shinfo(skb
)->nr_frags
;
593 sco
= (struct skb_cb_overlay
*)skb
->cb
;
594 sco
->meta_slots_used
= xenvif_gop_skb(skb
, &npo
);
596 count
+= nr_frags
+ 1;
598 __skb_queue_tail(&rxq
, skb
);
600 /* Filled the batch queue? */
601 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
602 if (count
+ MAX_SKB_FRAGS
>= XEN_NETIF_RX_RING_SIZE
)
606 BUG_ON(npo
.meta_prod
> ARRAY_SIZE(vif
->meta
));
611 BUG_ON(npo
.copy_prod
> ARRAY_SIZE(vif
->grant_copy_op
));
612 gnttab_batch_copy(vif
->grant_copy_op
, npo
.copy_prod
);
614 while ((skb
= __skb_dequeue(&rxq
)) != NULL
) {
615 sco
= (struct skb_cb_overlay
*)skb
->cb
;
617 vif
= netdev_priv(skb
->dev
);
619 if ((1 << vif
->meta
[npo
.meta_cons
].gso_type
) &
620 vif
->gso_prefix_mask
) {
621 resp
= RING_GET_RESPONSE(&vif
->rx
,
622 vif
->rx
.rsp_prod_pvt
++);
624 resp
->flags
= XEN_NETRXF_gso_prefix
| XEN_NETRXF_more_data
;
626 resp
->offset
= vif
->meta
[npo
.meta_cons
].gso_size
;
627 resp
->id
= vif
->meta
[npo
.meta_cons
].id
;
628 resp
->status
= sco
->meta_slots_used
;
631 sco
->meta_slots_used
--;
635 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
636 vif
->dev
->stats
.tx_packets
++;
638 status
= xenvif_check_gop(vif
, sco
->meta_slots_used
, &npo
);
640 if (sco
->meta_slots_used
== 1)
643 flags
= XEN_NETRXF_more_data
;
645 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) /* local packet? */
646 flags
|= XEN_NETRXF_csum_blank
| XEN_NETRXF_data_validated
;
647 else if (skb
->ip_summed
== CHECKSUM_UNNECESSARY
)
648 /* remote but checksummed. */
649 flags
|= XEN_NETRXF_data_validated
;
652 resp
= make_rx_response(vif
, vif
->meta
[npo
.meta_cons
].id
,
654 vif
->meta
[npo
.meta_cons
].size
,
657 if ((1 << vif
->meta
[npo
.meta_cons
].gso_type
) &
659 struct xen_netif_extra_info
*gso
=
660 (struct xen_netif_extra_info
*)
661 RING_GET_RESPONSE(&vif
->rx
,
662 vif
->rx
.rsp_prod_pvt
++);
664 resp
->flags
|= XEN_NETRXF_extra_info
;
666 gso
->u
.gso
.type
= vif
->meta
[npo
.meta_cons
].gso_type
;
667 gso
->u
.gso
.size
= vif
->meta
[npo
.meta_cons
].gso_size
;
669 gso
->u
.gso
.features
= 0;
671 gso
->type
= XEN_NETIF_EXTRA_TYPE_GSO
;
675 xenvif_add_frag_responses(vif
, status
,
676 vif
->meta
+ npo
.meta_cons
+ 1,
677 sco
->meta_slots_used
);
679 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->rx
, ret
);
684 xenvif_notify_tx_completion(vif
);
686 npo
.meta_cons
+= sco
->meta_slots_used
;
691 notify_remote_via_irq(vif
->rx_irq
);
693 /* More work to do? */
694 if (!skb_queue_empty(&vif
->rx_queue
))
695 xenvif_kick_thread(vif
);
698 void xenvif_queue_tx_skb(struct xenvif
*vif
, struct sk_buff
*skb
)
700 skb_queue_tail(&vif
->rx_queue
, skb
);
702 xenvif_kick_thread(vif
);
705 void xenvif_check_rx_xenvif(struct xenvif
*vif
)
709 RING_FINAL_CHECK_FOR_REQUESTS(&vif
->tx
, more_to_do
);
712 napi_schedule(&vif
->napi
);
715 static void tx_add_credit(struct xenvif
*vif
)
717 unsigned long max_burst
, max_credit
;
720 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
721 * Otherwise the interface can seize up due to insufficient credit.
723 max_burst
= RING_GET_REQUEST(&vif
->tx
, vif
->tx
.req_cons
)->size
;
724 max_burst
= min(max_burst
, 131072UL);
725 max_burst
= max(max_burst
, vif
->credit_bytes
);
727 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
728 max_credit
= vif
->remaining_credit
+ vif
->credit_bytes
;
729 if (max_credit
< vif
->remaining_credit
)
730 max_credit
= ULONG_MAX
; /* wrapped: clamp to ULONG_MAX */
732 vif
->remaining_credit
= min(max_credit
, max_burst
);
735 static void tx_credit_callback(unsigned long data
)
737 struct xenvif
*vif
= (struct xenvif
*)data
;
739 xenvif_check_rx_xenvif(vif
);
742 static void xenvif_tx_err(struct xenvif
*vif
,
743 struct xen_netif_tx_request
*txp
, RING_IDX end
)
745 RING_IDX cons
= vif
->tx
.req_cons
;
748 make_tx_response(vif
, txp
, XEN_NETIF_RSP_ERROR
);
751 txp
= RING_GET_REQUEST(&vif
->tx
, cons
++);
753 vif
->tx
.req_cons
= cons
;
756 static void xenvif_fatal_tx_err(struct xenvif
*vif
)
758 netdev_err(vif
->dev
, "fatal error; disabling device\n");
759 xenvif_carrier_off(vif
);
762 static int xenvif_count_requests(struct xenvif
*vif
,
763 struct xen_netif_tx_request
*first
,
764 struct xen_netif_tx_request
*txp
,
767 RING_IDX cons
= vif
->tx
.req_cons
;
772 if (!(first
->flags
& XEN_NETTXF_more_data
))
776 struct xen_netif_tx_request dropped_tx
= { 0 };
778 if (slots
>= work_to_do
) {
780 "Asked for %d slots but exceeds this limit\n",
782 xenvif_fatal_tx_err(vif
);
786 /* This guest is really using too many slots and
787 * considered malicious.
789 if (unlikely(slots
>= fatal_skb_slots
)) {
791 "Malicious frontend using %d slots, threshold %u\n",
792 slots
, fatal_skb_slots
);
793 xenvif_fatal_tx_err(vif
);
797 /* Xen network protocol had implicit dependency on
798 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
799 * the historical MAX_SKB_FRAGS value 18 to honor the
800 * same behavior as before. Any packet using more than
801 * 18 slots but less than fatal_skb_slots slots is
804 if (!drop_err
&& slots
>= XEN_NETBK_LEGACY_SLOTS_MAX
) {
807 "Too many slots (%d) exceeding limit (%d), dropping packet\n",
808 slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
815 memcpy(txp
, RING_GET_REQUEST(&vif
->tx
, cons
+ slots
),
818 /* If the guest submitted a frame >= 64 KiB then
819 * first->size overflowed and following slots will
820 * appear to be larger than the frame.
822 * This cannot be fatal error as there are buggy
823 * frontends that do this.
825 * Consume all slots and drop the packet.
827 if (!drop_err
&& txp
->size
> first
->size
) {
830 "Invalid tx request, slot size %u > remaining size %u\n",
831 txp
->size
, first
->size
);
835 first
->size
-= txp
->size
;
838 if (unlikely((txp
->offset
+ txp
->size
) > PAGE_SIZE
)) {
839 netdev_err(vif
->dev
, "Cross page boundary, txp->offset: %x, size: %u\n",
840 txp
->offset
, txp
->size
);
841 xenvif_fatal_tx_err(vif
);
845 more_data
= txp
->flags
& XEN_NETTXF_more_data
;
853 xenvif_tx_err(vif
, first
, cons
+ slots
);
860 static struct page
*xenvif_alloc_page(struct xenvif
*vif
,
865 page
= alloc_page(GFP_ATOMIC
|__GFP_COLD
);
868 vif
->mmap_pages
[pending_idx
] = page
;
873 static struct gnttab_copy
*xenvif_get_requests(struct xenvif
*vif
,
875 struct xen_netif_tx_request
*txp
,
876 struct gnttab_copy
*gop
)
878 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
879 skb_frag_t
*frags
= shinfo
->frags
;
880 u16 pending_idx
= *((u16
*)skb
->data
);
884 pending_ring_idx_t index
, start_idx
= 0;
886 unsigned int nr_slots
;
887 struct pending_tx_info
*first
= NULL
;
889 /* At this point shinfo->nr_frags is in fact the number of
890 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
892 nr_slots
= shinfo
->nr_frags
;
894 /* Skip first skb fragment if it is on same page as header fragment. */
895 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
897 /* Coalesce tx requests, at this point the packet passed in
898 * should be <= 64K. Any packets larger than 64K have been
899 * handled in xenvif_count_requests().
901 for (shinfo
->nr_frags
= slot
= start
; slot
< nr_slots
;
902 shinfo
->nr_frags
++) {
903 struct pending_tx_info
*pending_tx_info
=
904 vif
->pending_tx_info
;
906 page
= alloc_page(GFP_ATOMIC
|__GFP_COLD
);
912 while (dst_offset
< PAGE_SIZE
&& slot
< nr_slots
) {
913 gop
->flags
= GNTCOPY_source_gref
;
915 gop
->source
.u
.ref
= txp
->gref
;
916 gop
->source
.domid
= vif
->domid
;
917 gop
->source
.offset
= txp
->offset
;
919 gop
->dest
.domid
= DOMID_SELF
;
921 gop
->dest
.offset
= dst_offset
;
922 gop
->dest
.u
.gmfn
= virt_to_mfn(page_address(page
));
924 if (dst_offset
+ txp
->size
> PAGE_SIZE
) {
925 /* This page can only merge a portion
926 * of tx request. Do not increment any
927 * pointer / counter here. The txp
928 * will be dealt with in future
929 * rounds, eventually hitting the
932 gop
->len
= PAGE_SIZE
- dst_offset
;
933 txp
->offset
+= gop
->len
;
934 txp
->size
-= gop
->len
;
935 dst_offset
+= gop
->len
; /* quit loop */
937 /* This tx request can be merged in the page */
938 gop
->len
= txp
->size
;
939 dst_offset
+= gop
->len
;
941 index
= pending_index(vif
->pending_cons
++);
943 pending_idx
= vif
->pending_ring
[index
];
945 memcpy(&pending_tx_info
[pending_idx
].req
, txp
,
948 /* Poison these fields, corresponding
949 * fields for head tx req will be set
950 * to correct values after the loop.
952 vif
->mmap_pages
[pending_idx
] = (void *)(~0UL);
953 pending_tx_info
[pending_idx
].head
=
954 INVALID_PENDING_RING_IDX
;
957 first
= &pending_tx_info
[pending_idx
];
959 head_idx
= pending_idx
;
969 first
->req
.offset
= 0;
970 first
->req
.size
= dst_offset
;
971 first
->head
= start_idx
;
972 vif
->mmap_pages
[head_idx
] = page
;
973 frag_set_pending_idx(&frags
[shinfo
->nr_frags
], head_idx
);
976 BUG_ON(shinfo
->nr_frags
> MAX_SKB_FRAGS
);
980 /* Unwind, freeing all pages and sending error responses. */
981 while (shinfo
->nr_frags
-- > start
) {
982 xenvif_idx_release(vif
,
983 frag_get_pending_idx(&frags
[shinfo
->nr_frags
]),
984 XEN_NETIF_RSP_ERROR
);
986 /* The head too, if necessary. */
988 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_ERROR
);
993 static int xenvif_tx_check_gop(struct xenvif
*vif
,
995 struct gnttab_copy
**gopp
)
997 struct gnttab_copy
*gop
= *gopp
;
998 u16 pending_idx
= *((u16
*)skb
->data
);
999 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1000 struct pending_tx_info
*tx_info
;
1001 int nr_frags
= shinfo
->nr_frags
;
1003 u16 peek
; /* peek into next tx request */
1005 /* Check status of header. */
1008 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_ERROR
);
1010 /* Skip first skb fragment if it is on same page as header fragment. */
1011 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
1013 for (i
= start
; i
< nr_frags
; i
++) {
1015 pending_ring_idx_t head
;
1017 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[i
]);
1018 tx_info
= &vif
->pending_tx_info
[pending_idx
];
1019 head
= tx_info
->head
;
1021 /* Check error status: if okay then remember grant handle. */
1023 newerr
= (++gop
)->status
;
1026 peek
= vif
->pending_ring
[pending_index(++head
)];
1027 } while (!pending_tx_is_head(vif
, peek
));
1029 if (likely(!newerr
)) {
1030 /* Had a previous error? Invalidate this fragment. */
1032 xenvif_idx_release(vif
, pending_idx
,
1033 XEN_NETIF_RSP_OKAY
);
1037 /* Error on this fragment: respond to client with an error. */
1038 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_ERROR
);
1040 /* Not the first error? Preceding frags already invalidated. */
1044 /* First error: invalidate header and preceding fragments. */
1045 pending_idx
= *((u16
*)skb
->data
);
1046 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_OKAY
);
1047 for (j
= start
; j
< i
; j
++) {
1048 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[j
]);
1049 xenvif_idx_release(vif
, pending_idx
,
1050 XEN_NETIF_RSP_OKAY
);
1053 /* Remember the error: invalidate all subsequent fragments. */
1061 static void xenvif_fill_frags(struct xenvif
*vif
, struct sk_buff
*skb
)
1063 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1064 int nr_frags
= shinfo
->nr_frags
;
1067 for (i
= 0; i
< nr_frags
; i
++) {
1068 skb_frag_t
*frag
= shinfo
->frags
+ i
;
1069 struct xen_netif_tx_request
*txp
;
1073 pending_idx
= frag_get_pending_idx(frag
);
1075 txp
= &vif
->pending_tx_info
[pending_idx
].req
;
1076 page
= virt_to_page(idx_to_kaddr(vif
, pending_idx
));
1077 __skb_fill_page_desc(skb
, i
, page
, txp
->offset
, txp
->size
);
1078 skb
->len
+= txp
->size
;
1079 skb
->data_len
+= txp
->size
;
1080 skb
->truesize
+= txp
->size
;
1082 /* Take an extra reference to offset xenvif_idx_release */
1083 get_page(vif
->mmap_pages
[pending_idx
]);
1084 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_OKAY
);
1088 static int xenvif_get_extras(struct xenvif
*vif
,
1089 struct xen_netif_extra_info
*extras
,
1092 struct xen_netif_extra_info extra
;
1093 RING_IDX cons
= vif
->tx
.req_cons
;
1096 if (unlikely(work_to_do
-- <= 0)) {
1097 netdev_err(vif
->dev
, "Missing extra info\n");
1098 xenvif_fatal_tx_err(vif
);
1102 memcpy(&extra
, RING_GET_REQUEST(&vif
->tx
, cons
),
1104 if (unlikely(!extra
.type
||
1105 extra
.type
>= XEN_NETIF_EXTRA_TYPE_MAX
)) {
1106 vif
->tx
.req_cons
= ++cons
;
1107 netdev_err(vif
->dev
,
1108 "Invalid extra type: %d\n", extra
.type
);
1109 xenvif_fatal_tx_err(vif
);
1113 memcpy(&extras
[extra
.type
- 1], &extra
, sizeof(extra
));
1114 vif
->tx
.req_cons
= ++cons
;
1115 } while (extra
.flags
& XEN_NETIF_EXTRA_FLAG_MORE
);
1120 static int xenvif_set_skb_gso(struct xenvif
*vif
,
1121 struct sk_buff
*skb
,
1122 struct xen_netif_extra_info
*gso
)
1124 if (!gso
->u
.gso
.size
) {
1125 netdev_err(vif
->dev
, "GSO size must not be zero.\n");
1126 xenvif_fatal_tx_err(vif
);
1130 switch (gso
->u
.gso
.type
) {
1131 case XEN_NETIF_GSO_TYPE_TCPV4
:
1132 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
1134 case XEN_NETIF_GSO_TYPE_TCPV6
:
1135 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
1138 netdev_err(vif
->dev
, "Bad GSO type %d.\n", gso
->u
.gso
.type
);
1139 xenvif_fatal_tx_err(vif
);
1143 skb_shinfo(skb
)->gso_size
= gso
->u
.gso
.size
;
1145 /* Header must be checked, and gso_segs computed. */
1146 skb_shinfo(skb
)->gso_type
|= SKB_GSO_DODGY
;
1147 skb_shinfo(skb
)->gso_segs
= 0;
1152 static inline void maybe_pull_tail(struct sk_buff
*skb
, unsigned int len
)
1154 if (skb_is_nonlinear(skb
) && skb_headlen(skb
) < len
) {
1155 /* If we need to pullup then pullup to the max, so we
1156 * won't need to do it again.
1158 int target
= min_t(int, skb
->len
, MAX_TCP_HEADER
);
1159 __pskb_pull_tail(skb
, target
- skb_headlen(skb
));
1163 static int checksum_setup_ip(struct xenvif
*vif
, struct sk_buff
*skb
,
1164 int recalculate_partial_csum
)
1166 struct iphdr
*iph
= (void *)skb
->data
;
1167 unsigned int header_size
;
1171 off
= sizeof(struct iphdr
);
1173 header_size
= skb
->network_header
+ off
+ MAX_IPOPTLEN
;
1174 maybe_pull_tail(skb
, header_size
);
1178 switch (iph
->protocol
) {
1180 if (!skb_partial_csum_set(skb
, off
,
1181 offsetof(struct tcphdr
, check
)))
1184 if (recalculate_partial_csum
) {
1185 struct tcphdr
*tcph
= tcp_hdr(skb
);
1187 header_size
= skb
->network_header
+
1189 sizeof(struct tcphdr
);
1190 maybe_pull_tail(skb
, header_size
);
1192 tcph
->check
= ~csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
1198 if (!skb_partial_csum_set(skb
, off
,
1199 offsetof(struct udphdr
, check
)))
1202 if (recalculate_partial_csum
) {
1203 struct udphdr
*udph
= udp_hdr(skb
);
1205 header_size
= skb
->network_header
+
1207 sizeof(struct udphdr
);
1208 maybe_pull_tail(skb
, header_size
);
1210 udph
->check
= ~csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
1216 if (net_ratelimit())
1217 netdev_err(vif
->dev
,
1218 "Attempting to checksum a non-TCP/UDP packet, "
1219 "dropping a protocol %d packet\n",
1230 static int checksum_setup_ipv6(struct xenvif
*vif
, struct sk_buff
*skb
,
1231 int recalculate_partial_csum
)
1234 struct ipv6hdr
*ipv6h
= (void *)skb
->data
;
1236 unsigned int header_size
;
1243 off
= sizeof(struct ipv6hdr
);
1245 header_size
= skb
->network_header
+ off
;
1246 maybe_pull_tail(skb
, header_size
);
1248 nexthdr
= ipv6h
->nexthdr
;
1250 while ((off
<= sizeof(struct ipv6hdr
) + ntohs(ipv6h
->payload_len
)) &&
1253 case IPPROTO_DSTOPTS
:
1254 case IPPROTO_HOPOPTS
:
1255 case IPPROTO_ROUTING
: {
1256 struct ipv6_opt_hdr
*hp
= (void *)(skb
->data
+ off
);
1258 header_size
= skb
->network_header
+
1260 sizeof(struct ipv6_opt_hdr
);
1261 maybe_pull_tail(skb
, header_size
);
1263 nexthdr
= hp
->nexthdr
;
1264 off
+= ipv6_optlen(hp
);
1268 struct ip_auth_hdr
*hp
= (void *)(skb
->data
+ off
);
1270 header_size
= skb
->network_header
+
1272 sizeof(struct ip_auth_hdr
);
1273 maybe_pull_tail(skb
, header_size
);
1275 nexthdr
= hp
->nexthdr
;
1276 off
+= (hp
->hdrlen
+2)<<2;
1279 case IPPROTO_FRAGMENT
:
1289 if (net_ratelimit())
1290 netdev_err(vif
->dev
, "Failed to parse packet header\n");
1295 if (net_ratelimit())
1296 netdev_err(vif
->dev
, "Packet is a fragment!\n");
1302 if (!skb_partial_csum_set(skb
, off
,
1303 offsetof(struct tcphdr
, check
)))
1306 if (recalculate_partial_csum
) {
1307 struct tcphdr
*tcph
= tcp_hdr(skb
);
1309 header_size
= skb
->network_header
+
1311 sizeof(struct tcphdr
);
1312 maybe_pull_tail(skb
, header_size
);
1314 tcph
->check
= ~csum_ipv6_magic(&ipv6h
->saddr
,
1321 if (!skb_partial_csum_set(skb
, off
,
1322 offsetof(struct udphdr
, check
)))
1325 if (recalculate_partial_csum
) {
1326 struct udphdr
*udph
= udp_hdr(skb
);
1328 header_size
= skb
->network_header
+
1330 sizeof(struct udphdr
);
1331 maybe_pull_tail(skb
, header_size
);
1333 udph
->check
= ~csum_ipv6_magic(&ipv6h
->saddr
,
1340 if (net_ratelimit())
1341 netdev_err(vif
->dev
,
1342 "Attempting to checksum a non-TCP/UDP packet, "
1343 "dropping a protocol %d packet\n",
1354 static int checksum_setup(struct xenvif
*vif
, struct sk_buff
*skb
)
1357 int recalculate_partial_csum
= 0;
1359 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1360 * peers can fail to set NETRXF_csum_blank when sending a GSO
1361 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1362 * recalculate the partial checksum.
1364 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
&& skb_is_gso(skb
)) {
1365 vif
->rx_gso_checksum_fixup
++;
1366 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1367 recalculate_partial_csum
= 1;
1370 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1371 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
1374 if (skb
->protocol
== htons(ETH_P_IP
))
1375 err
= checksum_setup_ip(vif
, skb
, recalculate_partial_csum
);
1376 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1377 err
= checksum_setup_ipv6(vif
, skb
, recalculate_partial_csum
);
1382 static bool tx_credit_exceeded(struct xenvif
*vif
, unsigned size
)
1384 u64 now
= get_jiffies_64();
1385 u64 next_credit
= vif
->credit_window_start
+
1386 msecs_to_jiffies(vif
->credit_usec
/ 1000);
1388 /* Timer could already be pending in rare cases. */
1389 if (timer_pending(&vif
->credit_timeout
))
1392 /* Passed the point where we can replenish credit? */
1393 if (time_after_eq64(now
, next_credit
)) {
1394 vif
->credit_window_start
= now
;
1398 /* Still too big to send right now? Set a callback. */
1399 if (size
> vif
->remaining_credit
) {
1400 vif
->credit_timeout
.data
=
1402 vif
->credit_timeout
.function
=
1404 mod_timer(&vif
->credit_timeout
,
1406 vif
->credit_window_start
= next_credit
;
1414 static unsigned xenvif_tx_build_gops(struct xenvif
*vif
)
1416 struct gnttab_copy
*gop
= vif
->tx_copy_ops
, *request_gop
;
1417 struct sk_buff
*skb
;
1420 while ((nr_pending_reqs(vif
) + XEN_NETBK_LEGACY_SLOTS_MAX
1421 < MAX_PENDING_REQS
)) {
1422 struct xen_netif_tx_request txreq
;
1423 struct xen_netif_tx_request txfrags
[XEN_NETBK_LEGACY_SLOTS_MAX
];
1425 struct xen_netif_extra_info extras
[XEN_NETIF_EXTRA_TYPE_MAX
-1];
1429 unsigned int data_len
;
1430 pending_ring_idx_t index
;
1432 if (vif
->tx
.sring
->req_prod
- vif
->tx
.req_cons
>
1433 XEN_NETIF_TX_RING_SIZE
) {
1434 netdev_err(vif
->dev
,
1435 "Impossible number of requests. "
1436 "req_prod %d, req_cons %d, size %ld\n",
1437 vif
->tx
.sring
->req_prod
, vif
->tx
.req_cons
,
1438 XEN_NETIF_TX_RING_SIZE
);
1439 xenvif_fatal_tx_err(vif
);
1443 RING_FINAL_CHECK_FOR_REQUESTS(&vif
->tx
, work_to_do
);
1447 idx
= vif
->tx
.req_cons
;
1448 rmb(); /* Ensure that we see the request before we copy it. */
1449 memcpy(&txreq
, RING_GET_REQUEST(&vif
->tx
, idx
), sizeof(txreq
));
1451 /* Credit-based scheduling. */
1452 if (txreq
.size
> vif
->remaining_credit
&&
1453 tx_credit_exceeded(vif
, txreq
.size
))
1456 vif
->remaining_credit
-= txreq
.size
;
1459 vif
->tx
.req_cons
= ++idx
;
1461 memset(extras
, 0, sizeof(extras
));
1462 if (txreq
.flags
& XEN_NETTXF_extra_info
) {
1463 work_to_do
= xenvif_get_extras(vif
, extras
,
1465 idx
= vif
->tx
.req_cons
;
1466 if (unlikely(work_to_do
< 0))
1470 ret
= xenvif_count_requests(vif
, &txreq
, txfrags
, work_to_do
);
1471 if (unlikely(ret
< 0))
1476 if (unlikely(txreq
.size
< ETH_HLEN
)) {
1477 netdev_dbg(vif
->dev
,
1478 "Bad packet size: %d\n", txreq
.size
);
1479 xenvif_tx_err(vif
, &txreq
, idx
);
1483 /* No crossing a page as the payload mustn't fragment. */
1484 if (unlikely((txreq
.offset
+ txreq
.size
) > PAGE_SIZE
)) {
1485 netdev_err(vif
->dev
,
1486 "txreq.offset: %x, size: %u, end: %lu\n",
1487 txreq
.offset
, txreq
.size
,
1488 (txreq
.offset
&~PAGE_MASK
) + txreq
.size
);
1489 xenvif_fatal_tx_err(vif
);
1493 index
= pending_index(vif
->pending_cons
);
1494 pending_idx
= vif
->pending_ring
[index
];
1496 data_len
= (txreq
.size
> PKT_PROT_LEN
&&
1497 ret
< XEN_NETBK_LEGACY_SLOTS_MAX
) ?
1498 PKT_PROT_LEN
: txreq
.size
;
1500 skb
= alloc_skb(data_len
+ NET_SKB_PAD
+ NET_IP_ALIGN
,
1501 GFP_ATOMIC
| __GFP_NOWARN
);
1502 if (unlikely(skb
== NULL
)) {
1503 netdev_dbg(vif
->dev
,
1504 "Can't allocate a skb in start_xmit.\n");
1505 xenvif_tx_err(vif
, &txreq
, idx
);
1509 /* Packets passed to netif_rx() must have some headroom. */
1510 skb_reserve(skb
, NET_SKB_PAD
+ NET_IP_ALIGN
);
1512 if (extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1].type
) {
1513 struct xen_netif_extra_info
*gso
;
1514 gso
= &extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1];
1516 if (xenvif_set_skb_gso(vif
, skb
, gso
)) {
1517 /* Failure in xenvif_set_skb_gso is fatal. */
1523 /* XXX could copy straight to head */
1524 page
= xenvif_alloc_page(vif
, pending_idx
);
1527 xenvif_tx_err(vif
, &txreq
, idx
);
1531 gop
->source
.u
.ref
= txreq
.gref
;
1532 gop
->source
.domid
= vif
->domid
;
1533 gop
->source
.offset
= txreq
.offset
;
1535 gop
->dest
.u
.gmfn
= virt_to_mfn(page_address(page
));
1536 gop
->dest
.domid
= DOMID_SELF
;
1537 gop
->dest
.offset
= txreq
.offset
;
1539 gop
->len
= txreq
.size
;
1540 gop
->flags
= GNTCOPY_source_gref
;
1544 memcpy(&vif
->pending_tx_info
[pending_idx
].req
,
1545 &txreq
, sizeof(txreq
));
1546 vif
->pending_tx_info
[pending_idx
].head
= index
;
1547 *((u16
*)skb
->data
) = pending_idx
;
1549 __skb_put(skb
, data_len
);
1551 skb_shinfo(skb
)->nr_frags
= ret
;
1552 if (data_len
< txreq
.size
) {
1553 skb_shinfo(skb
)->nr_frags
++;
1554 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1557 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1558 INVALID_PENDING_IDX
);
1561 vif
->pending_cons
++;
1563 request_gop
= xenvif_get_requests(vif
, skb
, txfrags
, gop
);
1564 if (request_gop
== NULL
) {
1566 xenvif_tx_err(vif
, &txreq
, idx
);
1571 __skb_queue_tail(&vif
->tx_queue
, skb
);
1573 vif
->tx
.req_cons
= idx
;
1575 if ((gop
-vif
->tx_copy_ops
) >= ARRAY_SIZE(vif
->tx_copy_ops
))
1579 return gop
- vif
->tx_copy_ops
;
1583 static int xenvif_tx_submit(struct xenvif
*vif
, int budget
)
1585 struct gnttab_copy
*gop
= vif
->tx_copy_ops
;
1586 struct sk_buff
*skb
;
1589 while (work_done
< budget
&&
1590 (skb
= __skb_dequeue(&vif
->tx_queue
)) != NULL
) {
1591 struct xen_netif_tx_request
*txp
;
1595 pending_idx
= *((u16
*)skb
->data
);
1596 txp
= &vif
->pending_tx_info
[pending_idx
].req
;
1598 /* Check the remap error code. */
1599 if (unlikely(xenvif_tx_check_gop(vif
, skb
, &gop
))) {
1600 netdev_dbg(vif
->dev
, "netback grant failed.\n");
1601 skb_shinfo(skb
)->nr_frags
= 0;
1606 data_len
= skb
->len
;
1608 (void *)(idx_to_kaddr(vif
, pending_idx
)|txp
->offset
),
1610 if (data_len
< txp
->size
) {
1611 /* Append the packet payload as a fragment. */
1612 txp
->offset
+= data_len
;
1613 txp
->size
-= data_len
;
1615 /* Schedule a response immediately. */
1616 xenvif_idx_release(vif
, pending_idx
,
1617 XEN_NETIF_RSP_OKAY
);
1620 if (txp
->flags
& XEN_NETTXF_csum_blank
)
1621 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1622 else if (txp
->flags
& XEN_NETTXF_data_validated
)
1623 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1625 xenvif_fill_frags(vif
, skb
);
1627 if (skb_is_nonlinear(skb
) && skb_headlen(skb
) < PKT_PROT_LEN
) {
1628 int target
= min_t(int, skb
->len
, PKT_PROT_LEN
);
1629 __pskb_pull_tail(skb
, target
- skb_headlen(skb
));
1632 skb
->dev
= vif
->dev
;
1633 skb
->protocol
= eth_type_trans(skb
, skb
->dev
);
1634 skb_reset_network_header(skb
);
1636 if (checksum_setup(vif
, skb
)) {
1637 netdev_dbg(vif
->dev
,
1638 "Can't setup checksum in net_tx_action\n");
1643 skb_probe_transport_header(skb
, 0);
1645 vif
->dev
->stats
.rx_bytes
+= skb
->len
;
1646 vif
->dev
->stats
.rx_packets
++;
1650 netif_receive_skb(skb
);
1656 /* Called after netfront has transmitted */
1657 int xenvif_tx_action(struct xenvif
*vif
, int budget
)
1662 if (unlikely(!tx_work_todo(vif
)))
1665 nr_gops
= xenvif_tx_build_gops(vif
);
1670 gnttab_batch_copy(vif
->tx_copy_ops
, nr_gops
);
1672 work_done
= xenvif_tx_submit(vif
, nr_gops
);
1677 static void xenvif_idx_release(struct xenvif
*vif
, u16 pending_idx
,
1680 struct pending_tx_info
*pending_tx_info
;
1681 pending_ring_idx_t head
;
1682 u16 peek
; /* peek into next tx request */
1684 BUG_ON(vif
->mmap_pages
[pending_idx
] == (void *)(~0UL));
1686 /* Already complete? */
1687 if (vif
->mmap_pages
[pending_idx
] == NULL
)
1690 pending_tx_info
= &vif
->pending_tx_info
[pending_idx
];
1692 head
= pending_tx_info
->head
;
1694 BUG_ON(!pending_tx_is_head(vif
, head
));
1695 BUG_ON(vif
->pending_ring
[pending_index(head
)] != pending_idx
);
1698 pending_ring_idx_t index
;
1699 pending_ring_idx_t idx
= pending_index(head
);
1700 u16 info_idx
= vif
->pending_ring
[idx
];
1702 pending_tx_info
= &vif
->pending_tx_info
[info_idx
];
1703 make_tx_response(vif
, &pending_tx_info
->req
, status
);
1705 /* Setting any number other than
1706 * INVALID_PENDING_RING_IDX indicates this slot is
1707 * starting a new packet / ending a previous packet.
1709 pending_tx_info
->head
= 0;
1711 index
= pending_index(vif
->pending_prod
++);
1712 vif
->pending_ring
[index
] = vif
->pending_ring
[info_idx
];
1714 peek
= vif
->pending_ring
[pending_index(++head
)];
1716 } while (!pending_tx_is_head(vif
, peek
));
1718 put_page(vif
->mmap_pages
[pending_idx
]);
1719 vif
->mmap_pages
[pending_idx
] = NULL
;
1723 static void make_tx_response(struct xenvif
*vif
,
1724 struct xen_netif_tx_request
*txp
,
1727 RING_IDX i
= vif
->tx
.rsp_prod_pvt
;
1728 struct xen_netif_tx_response
*resp
;
1731 resp
= RING_GET_RESPONSE(&vif
->tx
, i
);
1735 if (txp
->flags
& XEN_NETTXF_extra_info
)
1736 RING_GET_RESPONSE(&vif
->tx
, ++i
)->status
= XEN_NETIF_RSP_NULL
;
1738 vif
->tx
.rsp_prod_pvt
= ++i
;
1739 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->tx
, notify
);
1741 notify_remote_via_irq(vif
->tx_irq
);
1744 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
1751 RING_IDX i
= vif
->rx
.rsp_prod_pvt
;
1752 struct xen_netif_rx_response
*resp
;
1754 resp
= RING_GET_RESPONSE(&vif
->rx
, i
);
1755 resp
->offset
= offset
;
1756 resp
->flags
= flags
;
1758 resp
->status
= (s16
)size
;
1760 resp
->status
= (s16
)st
;
1762 vif
->rx
.rsp_prod_pvt
= ++i
;
1767 static inline int rx_work_todo(struct xenvif
*vif
)
1769 return !skb_queue_empty(&vif
->rx_queue
);
1772 static inline int tx_work_todo(struct xenvif
*vif
)
1775 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif
->tx
)) &&
1776 (nr_pending_reqs(vif
) + XEN_NETBK_LEGACY_SLOTS_MAX
1777 < MAX_PENDING_REQS
))
1783 void xenvif_unmap_frontend_rings(struct xenvif
*vif
)
1786 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif
),
1789 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif
),
1793 int xenvif_map_frontend_rings(struct xenvif
*vif
,
1794 grant_ref_t tx_ring_ref
,
1795 grant_ref_t rx_ring_ref
)
1798 struct xen_netif_tx_sring
*txs
;
1799 struct xen_netif_rx_sring
*rxs
;
1803 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif
),
1804 tx_ring_ref
, &addr
);
1808 txs
= (struct xen_netif_tx_sring
*)addr
;
1809 BACK_RING_INIT(&vif
->tx
, txs
, PAGE_SIZE
);
1811 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif
),
1812 rx_ring_ref
, &addr
);
1816 rxs
= (struct xen_netif_rx_sring
*)addr
;
1817 BACK_RING_INIT(&vif
->rx
, rxs
, PAGE_SIZE
);
1819 vif
->rx_req_cons_peek
= 0;
1824 xenvif_unmap_frontend_rings(vif
);
1828 int xenvif_kthread(void *data
)
1830 struct xenvif
*vif
= data
;
1832 while (!kthread_should_stop()) {
1833 wait_event_interruptible(vif
->wq
,
1834 rx_work_todo(vif
) ||
1835 kthread_should_stop());
1836 if (kthread_should_stop())
1839 if (rx_work_todo(vif
))
1840 xenvif_rx_action(vif
);
1848 static int __init
netback_init(void)
1855 if (fatal_skb_slots
< XEN_NETBK_LEGACY_SLOTS_MAX
) {
1856 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
1857 fatal_skb_slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
1858 fatal_skb_slots
= XEN_NETBK_LEGACY_SLOTS_MAX
;
1861 rc
= xenvif_xenbus_init();
1871 module_init(netback_init
);
1873 static void __exit
netback_fini(void)
1875 xenvif_xenbus_fini();
1877 module_exit(netback_fini
);
1879 MODULE_LICENSE("Dual BSD/GPL");
1880 MODULE_ALIAS("xen-backend:vif");