2 * Back-end of the driver for virtual network devices. This portion of the
3 * driver exports a 'unified' network-device interface that can be accessed
4 * by any operating system that implements a compatible front end. A
5 * reference front-end implementation can be found in:
6 * drivers/net/xen-netfront.c
8 * Copyright (c) 2002-2005, K A Fraser
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
37 #include <linux/kthread.h>
38 #include <linux/if_vlan.h>
39 #include <linux/udp.h>
44 #include <xen/events.h>
45 #include <xen/interface/memory.h>
47 #include <asm/xen/hypercall.h>
48 #include <asm/xen/page.h>
50 /* Provide an option to disable split event channels at load time as
51 * event channels are limited resource. Split event channels are
54 bool separate_tx_rx_irq
= 1;
55 module_param(separate_tx_rx_irq
, bool, 0644);
58 * This is the maximum slots a skb can have. If a guest sends a skb
59 * which exceeds this limit it is considered malicious.
61 #define FATAL_SKB_SLOTS_DEFAULT 20
62 static unsigned int fatal_skb_slots
= FATAL_SKB_SLOTS_DEFAULT
;
63 module_param(fatal_skb_slots
, uint
, 0444);
66 * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating
67 * the maximum slots a valid packet can use. Now this value is defined
68 * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by
71 #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN
74 * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of
75 * one or more merged tx requests, otherwise it is the continuation of
76 * previous tx request.
78 static inline int pending_tx_is_head(struct xenvif
*vif
, RING_IDX idx
)
80 return vif
->pending_tx_info
[idx
].head
!= INVALID_PENDING_RING_IDX
;
83 static void xenvif_idx_release(struct xenvif
*vif
, u16 pending_idx
,
86 static void make_tx_response(struct xenvif
*vif
,
87 struct xen_netif_tx_request
*txp
,
90 static inline int tx_work_todo(struct xenvif
*vif
);
91 static inline int rx_work_todo(struct xenvif
*vif
);
93 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
100 static inline unsigned long idx_to_pfn(struct xenvif
*vif
,
103 return page_to_pfn(vif
->mmap_pages
[idx
]);
106 static inline unsigned long idx_to_kaddr(struct xenvif
*vif
,
109 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif
, idx
));
112 /* This is a miniumum size for the linear area to avoid lots of
113 * calls to __pskb_pull_tail() as we set up checksum offsets. The
114 * value 128 was chosen as it covers all IPv4 and most likely
117 #define PKT_PROT_LEN 128
119 static u16
frag_get_pending_idx(skb_frag_t
*frag
)
121 return (u16
)frag
->page_offset
;
124 static void frag_set_pending_idx(skb_frag_t
*frag
, u16 pending_idx
)
126 frag
->page_offset
= pending_idx
;
129 static inline pending_ring_idx_t
pending_index(unsigned i
)
131 return i
& (MAX_PENDING_REQS
-1);
134 static inline pending_ring_idx_t
nr_pending_reqs(struct xenvif
*vif
)
136 return MAX_PENDING_REQS
-
137 vif
->pending_prod
+ vif
->pending_cons
;
140 static int max_required_rx_slots(struct xenvif
*vif
)
142 int max
= DIV_ROUND_UP(vif
->dev
->mtu
, PAGE_SIZE
);
144 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
145 if (vif
->can_sg
|| vif
->gso_mask
|| vif
->gso_prefix_mask
)
146 max
+= MAX_SKB_FRAGS
+ 1; /* extra_info + frags */
151 int xenvif_rx_ring_full(struct xenvif
*vif
)
153 RING_IDX peek
= vif
->rx_req_cons_peek
;
154 RING_IDX needed
= max_required_rx_slots(vif
);
156 return ((vif
->rx
.sring
->req_prod
- peek
) < needed
) ||
157 ((vif
->rx
.rsp_prod_pvt
+ XEN_NETIF_RX_RING_SIZE
- peek
) < needed
);
160 int xenvif_must_stop_queue(struct xenvif
*vif
)
162 if (!xenvif_rx_ring_full(vif
))
165 vif
->rx
.sring
->req_event
= vif
->rx_req_cons_peek
+
166 max_required_rx_slots(vif
);
167 mb(); /* request notification /then/ check the queue */
169 return xenvif_rx_ring_full(vif
);
173 * Returns true if we should start a new receive buffer instead of
174 * adding 'size' bytes to a buffer which currently contains 'offset'
177 static bool start_new_rx_buffer(int offset
, unsigned long size
, int head
)
179 /* simple case: we have completely filled the current buffer. */
180 if (offset
== MAX_BUFFER_OFFSET
)
184 * complex case: start a fresh buffer if the current frag
185 * would overflow the current buffer but only if:
186 * (i) this frag would fit completely in the next buffer
187 * and (ii) there is already some data in the current buffer
188 * and (iii) this is not the head buffer.
191 * - (i) stops us splitting a frag into two copies
192 * unless the frag is too large for a single buffer.
193 * - (ii) stops us from leaving a buffer pointlessly empty.
194 * - (iii) stops us leaving the first buffer
195 * empty. Strictly speaking this is already covered
196 * by (ii) but is explicitly checked because
197 * netfront relies on the first buffer being
198 * non-empty and can crash otherwise.
200 * This means we will effectively linearise small
201 * frags but do not needlessly split large buffers
202 * into multiple copies tend to give large frags their
203 * own buffers as before.
205 if ((offset
+ size
> MAX_BUFFER_OFFSET
) &&
206 (size
<= MAX_BUFFER_OFFSET
) && offset
&& !head
)
212 struct xenvif_count_slot_state
{
213 unsigned long copy_off
;
217 unsigned int xenvif_count_frag_slots(struct xenvif
*vif
,
218 unsigned long offset
, unsigned long size
,
219 struct xenvif_count_slot_state
*state
)
223 offset
&= ~PAGE_MASK
;
228 bytes
= PAGE_SIZE
- offset
;
233 if (start_new_rx_buffer(state
->copy_off
, bytes
, state
->head
)) {
238 if (state
->copy_off
+ bytes
> MAX_BUFFER_OFFSET
)
239 bytes
= MAX_BUFFER_OFFSET
- state
->copy_off
;
241 state
->copy_off
+= bytes
;
246 if (offset
== PAGE_SIZE
)
256 * Figure out how many ring slots we're going to need to send @skb to
257 * the guest. This function is essentially a dry run of
258 * xenvif_gop_frag_copy.
260 unsigned int xenvif_count_skb_slots(struct xenvif
*vif
, struct sk_buff
*skb
)
262 struct xenvif_count_slot_state state
;
270 /* Slot for the first (partial) page of data. */
273 /* Need a slot for the GSO prefix for GSO extra data? */
274 if (skb_shinfo(skb
)->gso_size
)
278 while (data
< skb_tail_pointer(skb
)) {
279 unsigned long offset
= offset_in_page(data
);
280 unsigned long size
= PAGE_SIZE
- offset
;
282 if (data
+ size
> skb_tail_pointer(skb
))
283 size
= skb_tail_pointer(skb
) - data
;
285 count
+= xenvif_count_frag_slots(vif
, offset
, size
, &state
);
290 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
291 unsigned long size
= skb_frag_size(&skb_shinfo(skb
)->frags
[i
]);
292 unsigned long offset
= skb_shinfo(skb
)->frags
[i
].page_offset
;
294 count
+= xenvif_count_frag_slots(vif
, offset
, size
, &state
);
299 struct netrx_pending_operations
{
300 unsigned copy_prod
, copy_cons
;
301 unsigned meta_prod
, meta_cons
;
302 struct gnttab_copy
*copy
;
303 struct xenvif_rx_meta
*meta
;
305 grant_ref_t copy_gref
;
308 static struct xenvif_rx_meta
*get_next_rx_buffer(struct xenvif
*vif
,
309 struct netrx_pending_operations
*npo
)
311 struct xenvif_rx_meta
*meta
;
312 struct xen_netif_rx_request
*req
;
314 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
316 meta
= npo
->meta
+ npo
->meta_prod
++;
317 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
323 npo
->copy_gref
= req
->gref
;
329 * Set up the grant operations for this fragment. If it's a flipping
330 * interface, we also set up the unmap request from here.
332 static void xenvif_gop_frag_copy(struct xenvif
*vif
, struct sk_buff
*skb
,
333 struct netrx_pending_operations
*npo
,
334 struct page
*page
, unsigned long size
,
335 unsigned long offset
, int *head
)
337 struct gnttab_copy
*copy_gop
;
338 struct xenvif_rx_meta
*meta
;
342 /* Data must not cross a page boundary. */
343 BUG_ON(size
+ offset
> PAGE_SIZE
<<compound_order(page
));
345 meta
= npo
->meta
+ npo
->meta_prod
- 1;
347 /* Skip unused frames from start of page */
348 page
+= offset
>> PAGE_SHIFT
;
349 offset
&= ~PAGE_MASK
;
352 BUG_ON(offset
>= PAGE_SIZE
);
353 BUG_ON(npo
->copy_off
> MAX_BUFFER_OFFSET
);
355 bytes
= PAGE_SIZE
- offset
;
360 if (start_new_rx_buffer(npo
->copy_off
, bytes
, *head
)) {
362 * Netfront requires there to be some data in the head
367 meta
= get_next_rx_buffer(vif
, npo
);
370 if (npo
->copy_off
+ bytes
> MAX_BUFFER_OFFSET
)
371 bytes
= MAX_BUFFER_OFFSET
- npo
->copy_off
;
373 copy_gop
= npo
->copy
+ npo
->copy_prod
++;
374 copy_gop
->flags
= GNTCOPY_dest_gref
;
375 copy_gop
->len
= bytes
;
377 copy_gop
->source
.domid
= DOMID_SELF
;
378 copy_gop
->source
.u
.gmfn
= virt_to_mfn(page_address(page
));
379 copy_gop
->source
.offset
= offset
;
381 copy_gop
->dest
.domid
= vif
->domid
;
382 copy_gop
->dest
.offset
= npo
->copy_off
;
383 copy_gop
->dest
.u
.ref
= npo
->copy_gref
;
385 npo
->copy_off
+= bytes
;
392 if (offset
== PAGE_SIZE
&& size
) {
393 BUG_ON(!PageCompound(page
));
398 /* Leave a gap for the GSO descriptor. */
399 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
400 gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
401 else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
)
402 gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
404 gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
406 if (*head
&& ((1 << gso_type
) & vif
->gso_mask
))
409 *head
= 0; /* There must be something in this buffer now. */
415 * Prepare an SKB to be transmitted to the frontend.
417 * This function is responsible for allocating grant operations, meta
420 * It returns the number of meta structures consumed. The number of
421 * ring slots used is always equal to the number of meta slots used
422 * plus the number of GSO descriptors used. Currently, we use either
423 * zero GSO descriptors (for non-GSO packets) or one descriptor (for
424 * frontend-side LRO).
426 static int xenvif_gop_skb(struct sk_buff
*skb
,
427 struct netrx_pending_operations
*npo
)
429 struct xenvif
*vif
= netdev_priv(skb
->dev
);
430 int nr_frags
= skb_shinfo(skb
)->nr_frags
;
432 struct xen_netif_rx_request
*req
;
433 struct xenvif_rx_meta
*meta
;
440 old_meta_prod
= npo
->meta_prod
;
442 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
) {
443 gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
444 gso_size
= skb_shinfo(skb
)->gso_size
;
445 } else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
) {
446 gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
447 gso_size
= skb_shinfo(skb
)->gso_size
;
449 gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
453 /* Set up a GSO prefix descriptor, if necessary */
454 if ((1 << skb_shinfo(skb
)->gso_type
) & vif
->gso_prefix_mask
) {
455 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
456 meta
= npo
->meta
+ npo
->meta_prod
++;
457 meta
->gso_type
= gso_type
;
458 meta
->gso_size
= gso_size
;
463 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
464 meta
= npo
->meta
+ npo
->meta_prod
++;
466 if ((1 << gso_type
) & vif
->gso_mask
) {
467 meta
->gso_type
= gso_type
;
468 meta
->gso_size
= gso_size
;
470 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
477 npo
->copy_gref
= req
->gref
;
480 while (data
< skb_tail_pointer(skb
)) {
481 unsigned int offset
= offset_in_page(data
);
482 unsigned int len
= PAGE_SIZE
- offset
;
484 if (data
+ len
> skb_tail_pointer(skb
))
485 len
= skb_tail_pointer(skb
) - data
;
487 xenvif_gop_frag_copy(vif
, skb
, npo
,
488 virt_to_page(data
), len
, offset
, &head
);
492 for (i
= 0; i
< nr_frags
; i
++) {
493 xenvif_gop_frag_copy(vif
, skb
, npo
,
494 skb_frag_page(&skb_shinfo(skb
)->frags
[i
]),
495 skb_frag_size(&skb_shinfo(skb
)->frags
[i
]),
496 skb_shinfo(skb
)->frags
[i
].page_offset
,
500 return npo
->meta_prod
- old_meta_prod
;
504 * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was
505 * used to set up the operations on the top of
506 * netrx_pending_operations, which have since been done. Check that
507 * they didn't give any errors and advance over them.
509 static int xenvif_check_gop(struct xenvif
*vif
, int nr_meta_slots
,
510 struct netrx_pending_operations
*npo
)
512 struct gnttab_copy
*copy_op
;
513 int status
= XEN_NETIF_RSP_OKAY
;
516 for (i
= 0; i
< nr_meta_slots
; i
++) {
517 copy_op
= npo
->copy
+ npo
->copy_cons
++;
518 if (copy_op
->status
!= GNTST_okay
) {
520 "Bad status %d from copy to DOM%d.\n",
521 copy_op
->status
, vif
->domid
);
522 status
= XEN_NETIF_RSP_ERROR
;
529 static void xenvif_add_frag_responses(struct xenvif
*vif
, int status
,
530 struct xenvif_rx_meta
*meta
,
534 unsigned long offset
;
536 /* No fragments used */
537 if (nr_meta_slots
<= 1)
542 for (i
= 0; i
< nr_meta_slots
; i
++) {
544 if (i
== nr_meta_slots
- 1)
547 flags
= XEN_NETRXF_more_data
;
550 make_rx_response(vif
, meta
[i
].id
, status
, offset
,
551 meta
[i
].size
, flags
);
555 struct skb_cb_overlay
{
559 static void xenvif_kick_thread(struct xenvif
*vif
)
564 void xenvif_rx_action(struct xenvif
*vif
)
568 struct xen_netif_rx_response
*resp
;
569 struct sk_buff_head rxq
;
575 unsigned long offset
;
576 struct skb_cb_overlay
*sco
;
577 int need_to_notify
= 0;
579 struct netrx_pending_operations npo
= {
580 .copy
= vif
->grant_copy_op
,
584 skb_queue_head_init(&rxq
);
588 while ((skb
= skb_dequeue(&vif
->rx_queue
)) != NULL
) {
589 vif
= netdev_priv(skb
->dev
);
590 nr_frags
= skb_shinfo(skb
)->nr_frags
;
592 sco
= (struct skb_cb_overlay
*)skb
->cb
;
593 sco
->meta_slots_used
= xenvif_gop_skb(skb
, &npo
);
595 count
+= nr_frags
+ 1;
597 __skb_queue_tail(&rxq
, skb
);
599 /* Filled the batch queue? */
600 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
601 if (count
+ MAX_SKB_FRAGS
>= XEN_NETIF_RX_RING_SIZE
)
605 BUG_ON(npo
.meta_prod
> ARRAY_SIZE(vif
->meta
));
610 BUG_ON(npo
.copy_prod
> ARRAY_SIZE(vif
->grant_copy_op
));
611 gnttab_batch_copy(vif
->grant_copy_op
, npo
.copy_prod
);
613 while ((skb
= __skb_dequeue(&rxq
)) != NULL
) {
614 sco
= (struct skb_cb_overlay
*)skb
->cb
;
616 vif
= netdev_priv(skb
->dev
);
618 if ((1 << vif
->meta
[npo
.meta_cons
].gso_type
) &
619 vif
->gso_prefix_mask
) {
620 resp
= RING_GET_RESPONSE(&vif
->rx
,
621 vif
->rx
.rsp_prod_pvt
++);
623 resp
->flags
= XEN_NETRXF_gso_prefix
| XEN_NETRXF_more_data
;
625 resp
->offset
= vif
->meta
[npo
.meta_cons
].gso_size
;
626 resp
->id
= vif
->meta
[npo
.meta_cons
].id
;
627 resp
->status
= sco
->meta_slots_used
;
630 sco
->meta_slots_used
--;
634 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
635 vif
->dev
->stats
.tx_packets
++;
637 status
= xenvif_check_gop(vif
, sco
->meta_slots_used
, &npo
);
639 if (sco
->meta_slots_used
== 1)
642 flags
= XEN_NETRXF_more_data
;
644 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) /* local packet? */
645 flags
|= XEN_NETRXF_csum_blank
| XEN_NETRXF_data_validated
;
646 else if (skb
->ip_summed
== CHECKSUM_UNNECESSARY
)
647 /* remote but checksummed. */
648 flags
|= XEN_NETRXF_data_validated
;
651 resp
= make_rx_response(vif
, vif
->meta
[npo
.meta_cons
].id
,
653 vif
->meta
[npo
.meta_cons
].size
,
656 if ((1 << vif
->meta
[npo
.meta_cons
].gso_type
) &
658 struct xen_netif_extra_info
*gso
=
659 (struct xen_netif_extra_info
*)
660 RING_GET_RESPONSE(&vif
->rx
,
661 vif
->rx
.rsp_prod_pvt
++);
663 resp
->flags
|= XEN_NETRXF_extra_info
;
665 gso
->u
.gso
.type
= vif
->meta
[npo
.meta_cons
].gso_type
;
666 gso
->u
.gso
.size
= vif
->meta
[npo
.meta_cons
].gso_size
;
668 gso
->u
.gso
.features
= 0;
670 gso
->type
= XEN_NETIF_EXTRA_TYPE_GSO
;
674 xenvif_add_frag_responses(vif
, status
,
675 vif
->meta
+ npo
.meta_cons
+ 1,
676 sco
->meta_slots_used
);
678 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->rx
, ret
);
683 xenvif_notify_tx_completion(vif
);
685 npo
.meta_cons
+= sco
->meta_slots_used
;
690 notify_remote_via_irq(vif
->rx_irq
);
692 /* More work to do? */
693 if (!skb_queue_empty(&vif
->rx_queue
))
694 xenvif_kick_thread(vif
);
697 void xenvif_queue_tx_skb(struct xenvif
*vif
, struct sk_buff
*skb
)
699 skb_queue_tail(&vif
->rx_queue
, skb
);
701 xenvif_kick_thread(vif
);
704 void xenvif_check_rx_xenvif(struct xenvif
*vif
)
708 RING_FINAL_CHECK_FOR_REQUESTS(&vif
->tx
, more_to_do
);
711 napi_schedule(&vif
->napi
);
714 static void tx_add_credit(struct xenvif
*vif
)
716 unsigned long max_burst
, max_credit
;
719 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
720 * Otherwise the interface can seize up due to insufficient credit.
722 max_burst
= RING_GET_REQUEST(&vif
->tx
, vif
->tx
.req_cons
)->size
;
723 max_burst
= min(max_burst
, 131072UL);
724 max_burst
= max(max_burst
, vif
->credit_bytes
);
726 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
727 max_credit
= vif
->remaining_credit
+ vif
->credit_bytes
;
728 if (max_credit
< vif
->remaining_credit
)
729 max_credit
= ULONG_MAX
; /* wrapped: clamp to ULONG_MAX */
731 vif
->remaining_credit
= min(max_credit
, max_burst
);
734 static void tx_credit_callback(unsigned long data
)
736 struct xenvif
*vif
= (struct xenvif
*)data
;
738 xenvif_check_rx_xenvif(vif
);
741 static void xenvif_tx_err(struct xenvif
*vif
,
742 struct xen_netif_tx_request
*txp
, RING_IDX end
)
744 RING_IDX cons
= vif
->tx
.req_cons
;
747 make_tx_response(vif
, txp
, XEN_NETIF_RSP_ERROR
);
750 txp
= RING_GET_REQUEST(&vif
->tx
, cons
++);
752 vif
->tx
.req_cons
= cons
;
755 static void xenvif_fatal_tx_err(struct xenvif
*vif
)
757 netdev_err(vif
->dev
, "fatal error; disabling device\n");
758 xenvif_carrier_off(vif
);
761 static int xenvif_count_requests(struct xenvif
*vif
,
762 struct xen_netif_tx_request
*first
,
763 struct xen_netif_tx_request
*txp
,
766 RING_IDX cons
= vif
->tx
.req_cons
;
771 if (!(first
->flags
& XEN_NETTXF_more_data
))
775 struct xen_netif_tx_request dropped_tx
= { 0 };
777 if (slots
>= work_to_do
) {
779 "Asked for %d slots but exceeds this limit\n",
781 xenvif_fatal_tx_err(vif
);
785 /* This guest is really using too many slots and
786 * considered malicious.
788 if (unlikely(slots
>= fatal_skb_slots
)) {
790 "Malicious frontend using %d slots, threshold %u\n",
791 slots
, fatal_skb_slots
);
792 xenvif_fatal_tx_err(vif
);
796 /* Xen network protocol had implicit dependency on
797 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
798 * the historical MAX_SKB_FRAGS value 18 to honor the
799 * same behavior as before. Any packet using more than
800 * 18 slots but less than fatal_skb_slots slots is
803 if (!drop_err
&& slots
>= XEN_NETBK_LEGACY_SLOTS_MAX
) {
806 "Too many slots (%d) exceeding limit (%d), dropping packet\n",
807 slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
814 memcpy(txp
, RING_GET_REQUEST(&vif
->tx
, cons
+ slots
),
817 /* If the guest submitted a frame >= 64 KiB then
818 * first->size overflowed and following slots will
819 * appear to be larger than the frame.
821 * This cannot be fatal error as there are buggy
822 * frontends that do this.
824 * Consume all slots and drop the packet.
826 if (!drop_err
&& txp
->size
> first
->size
) {
829 "Invalid tx request, slot size %u > remaining size %u\n",
830 txp
->size
, first
->size
);
834 first
->size
-= txp
->size
;
837 if (unlikely((txp
->offset
+ txp
->size
) > PAGE_SIZE
)) {
838 netdev_err(vif
->dev
, "Cross page boundary, txp->offset: %x, size: %u\n",
839 txp
->offset
, txp
->size
);
840 xenvif_fatal_tx_err(vif
);
844 more_data
= txp
->flags
& XEN_NETTXF_more_data
;
852 xenvif_tx_err(vif
, first
, cons
+ slots
);
859 static struct page
*xenvif_alloc_page(struct xenvif
*vif
,
864 page
= alloc_page(GFP_ATOMIC
|__GFP_COLD
);
867 vif
->mmap_pages
[pending_idx
] = page
;
872 static struct gnttab_copy
*xenvif_get_requests(struct xenvif
*vif
,
874 struct xen_netif_tx_request
*txp
,
875 struct gnttab_copy
*gop
)
877 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
878 skb_frag_t
*frags
= shinfo
->frags
;
879 u16 pending_idx
= *((u16
*)skb
->data
);
883 pending_ring_idx_t index
, start_idx
= 0;
885 unsigned int nr_slots
;
886 struct pending_tx_info
*first
= NULL
;
888 /* At this point shinfo->nr_frags is in fact the number of
889 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
891 nr_slots
= shinfo
->nr_frags
;
893 /* Skip first skb fragment if it is on same page as header fragment. */
894 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
896 /* Coalesce tx requests, at this point the packet passed in
897 * should be <= 64K. Any packets larger than 64K have been
898 * handled in xenvif_count_requests().
900 for (shinfo
->nr_frags
= slot
= start
; slot
< nr_slots
;
901 shinfo
->nr_frags
++) {
902 struct pending_tx_info
*pending_tx_info
=
903 vif
->pending_tx_info
;
905 page
= alloc_page(GFP_ATOMIC
|__GFP_COLD
);
911 while (dst_offset
< PAGE_SIZE
&& slot
< nr_slots
) {
912 gop
->flags
= GNTCOPY_source_gref
;
914 gop
->source
.u
.ref
= txp
->gref
;
915 gop
->source
.domid
= vif
->domid
;
916 gop
->source
.offset
= txp
->offset
;
918 gop
->dest
.domid
= DOMID_SELF
;
920 gop
->dest
.offset
= dst_offset
;
921 gop
->dest
.u
.gmfn
= virt_to_mfn(page_address(page
));
923 if (dst_offset
+ txp
->size
> PAGE_SIZE
) {
924 /* This page can only merge a portion
925 * of tx request. Do not increment any
926 * pointer / counter here. The txp
927 * will be dealt with in future
928 * rounds, eventually hitting the
931 gop
->len
= PAGE_SIZE
- dst_offset
;
932 txp
->offset
+= gop
->len
;
933 txp
->size
-= gop
->len
;
934 dst_offset
+= gop
->len
; /* quit loop */
936 /* This tx request can be merged in the page */
937 gop
->len
= txp
->size
;
938 dst_offset
+= gop
->len
;
940 index
= pending_index(vif
->pending_cons
++);
942 pending_idx
= vif
->pending_ring
[index
];
944 memcpy(&pending_tx_info
[pending_idx
].req
, txp
,
947 /* Poison these fields, corresponding
948 * fields for head tx req will be set
949 * to correct values after the loop.
951 vif
->mmap_pages
[pending_idx
] = (void *)(~0UL);
952 pending_tx_info
[pending_idx
].head
=
953 INVALID_PENDING_RING_IDX
;
956 first
= &pending_tx_info
[pending_idx
];
958 head_idx
= pending_idx
;
968 first
->req
.offset
= 0;
969 first
->req
.size
= dst_offset
;
970 first
->head
= start_idx
;
971 vif
->mmap_pages
[head_idx
] = page
;
972 frag_set_pending_idx(&frags
[shinfo
->nr_frags
], head_idx
);
975 BUG_ON(shinfo
->nr_frags
> MAX_SKB_FRAGS
);
979 /* Unwind, freeing all pages and sending error responses. */
980 while (shinfo
->nr_frags
-- > start
) {
981 xenvif_idx_release(vif
,
982 frag_get_pending_idx(&frags
[shinfo
->nr_frags
]),
983 XEN_NETIF_RSP_ERROR
);
985 /* The head too, if necessary. */
987 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_ERROR
);
992 static int xenvif_tx_check_gop(struct xenvif
*vif
,
994 struct gnttab_copy
**gopp
)
996 struct gnttab_copy
*gop
= *gopp
;
997 u16 pending_idx
= *((u16
*)skb
->data
);
998 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
999 struct pending_tx_info
*tx_info
;
1000 int nr_frags
= shinfo
->nr_frags
;
1002 u16 peek
; /* peek into next tx request */
1004 /* Check status of header. */
1007 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_ERROR
);
1009 /* Skip first skb fragment if it is on same page as header fragment. */
1010 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
1012 for (i
= start
; i
< nr_frags
; i
++) {
1014 pending_ring_idx_t head
;
1016 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[i
]);
1017 tx_info
= &vif
->pending_tx_info
[pending_idx
];
1018 head
= tx_info
->head
;
1020 /* Check error status: if okay then remember grant handle. */
1022 newerr
= (++gop
)->status
;
1025 peek
= vif
->pending_ring
[pending_index(++head
)];
1026 } while (!pending_tx_is_head(vif
, peek
));
1028 if (likely(!newerr
)) {
1029 /* Had a previous error? Invalidate this fragment. */
1031 xenvif_idx_release(vif
, pending_idx
,
1032 XEN_NETIF_RSP_OKAY
);
1036 /* Error on this fragment: respond to client with an error. */
1037 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_ERROR
);
1039 /* Not the first error? Preceding frags already invalidated. */
1043 /* First error: invalidate header and preceding fragments. */
1044 pending_idx
= *((u16
*)skb
->data
);
1045 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_OKAY
);
1046 for (j
= start
; j
< i
; j
++) {
1047 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[j
]);
1048 xenvif_idx_release(vif
, pending_idx
,
1049 XEN_NETIF_RSP_OKAY
);
1052 /* Remember the error: invalidate all subsequent fragments. */
1060 static void xenvif_fill_frags(struct xenvif
*vif
, struct sk_buff
*skb
)
1062 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1063 int nr_frags
= shinfo
->nr_frags
;
1066 for (i
= 0; i
< nr_frags
; i
++) {
1067 skb_frag_t
*frag
= shinfo
->frags
+ i
;
1068 struct xen_netif_tx_request
*txp
;
1072 pending_idx
= frag_get_pending_idx(frag
);
1074 txp
= &vif
->pending_tx_info
[pending_idx
].req
;
1075 page
= virt_to_page(idx_to_kaddr(vif
, pending_idx
));
1076 __skb_fill_page_desc(skb
, i
, page
, txp
->offset
, txp
->size
);
1077 skb
->len
+= txp
->size
;
1078 skb
->data_len
+= txp
->size
;
1079 skb
->truesize
+= txp
->size
;
1081 /* Take an extra reference to offset xenvif_idx_release */
1082 get_page(vif
->mmap_pages
[pending_idx
]);
1083 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_OKAY
);
1087 static int xenvif_get_extras(struct xenvif
*vif
,
1088 struct xen_netif_extra_info
*extras
,
1091 struct xen_netif_extra_info extra
;
1092 RING_IDX cons
= vif
->tx
.req_cons
;
1095 if (unlikely(work_to_do
-- <= 0)) {
1096 netdev_err(vif
->dev
, "Missing extra info\n");
1097 xenvif_fatal_tx_err(vif
);
1101 memcpy(&extra
, RING_GET_REQUEST(&vif
->tx
, cons
),
1103 if (unlikely(!extra
.type
||
1104 extra
.type
>= XEN_NETIF_EXTRA_TYPE_MAX
)) {
1105 vif
->tx
.req_cons
= ++cons
;
1106 netdev_err(vif
->dev
,
1107 "Invalid extra type: %d\n", extra
.type
);
1108 xenvif_fatal_tx_err(vif
);
1112 memcpy(&extras
[extra
.type
- 1], &extra
, sizeof(extra
));
1113 vif
->tx
.req_cons
= ++cons
;
1114 } while (extra
.flags
& XEN_NETIF_EXTRA_FLAG_MORE
);
1119 static int xenvif_set_skb_gso(struct xenvif
*vif
,
1120 struct sk_buff
*skb
,
1121 struct xen_netif_extra_info
*gso
)
1123 if (!gso
->u
.gso
.size
) {
1124 netdev_err(vif
->dev
, "GSO size must not be zero.\n");
1125 xenvif_fatal_tx_err(vif
);
1129 switch (gso
->u
.gso
.type
) {
1130 case XEN_NETIF_GSO_TYPE_TCPV4
:
1131 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
1133 case XEN_NETIF_GSO_TYPE_TCPV6
:
1134 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
1137 netdev_err(vif
->dev
, "Bad GSO type %d.\n", gso
->u
.gso
.type
);
1138 xenvif_fatal_tx_err(vif
);
1142 skb_shinfo(skb
)->gso_size
= gso
->u
.gso
.size
;
1144 /* Header must be checked, and gso_segs computed. */
1145 skb_shinfo(skb
)->gso_type
|= SKB_GSO_DODGY
;
1146 skb_shinfo(skb
)->gso_segs
= 0;
1151 static inline void maybe_pull_tail(struct sk_buff
*skb
, unsigned int len
)
1153 if (skb_is_nonlinear(skb
) && skb_headlen(skb
) < len
) {
1154 /* If we need to pullup then pullup to the max, so we
1155 * won't need to do it again.
1157 int target
= min_t(int, skb
->len
, MAX_TCP_HEADER
);
1158 __pskb_pull_tail(skb
, target
- skb_headlen(skb
));
1162 static int checksum_setup_ip(struct xenvif
*vif
, struct sk_buff
*skb
,
1163 int recalculate_partial_csum
)
1165 struct iphdr
*iph
= (void *)skb
->data
;
1166 unsigned int header_size
;
1170 off
= sizeof(struct iphdr
);
1172 header_size
= skb
->network_header
+ off
+ MAX_IPOPTLEN
;
1173 maybe_pull_tail(skb
, header_size
);
1177 switch (iph
->protocol
) {
1179 if (!skb_partial_csum_set(skb
, off
,
1180 offsetof(struct tcphdr
, check
)))
1183 if (recalculate_partial_csum
) {
1184 struct tcphdr
*tcph
= tcp_hdr(skb
);
1186 header_size
= skb
->network_header
+
1188 sizeof(struct tcphdr
);
1189 maybe_pull_tail(skb
, header_size
);
1191 tcph
->check
= ~csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
1197 if (!skb_partial_csum_set(skb
, off
,
1198 offsetof(struct udphdr
, check
)))
1201 if (recalculate_partial_csum
) {
1202 struct udphdr
*udph
= udp_hdr(skb
);
1204 header_size
= skb
->network_header
+
1206 sizeof(struct udphdr
);
1207 maybe_pull_tail(skb
, header_size
);
1209 udph
->check
= ~csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
1215 if (net_ratelimit())
1216 netdev_err(vif
->dev
,
1217 "Attempting to checksum a non-TCP/UDP packet, "
1218 "dropping a protocol %d packet\n",
1229 static int checksum_setup_ipv6(struct xenvif
*vif
, struct sk_buff
*skb
,
1230 int recalculate_partial_csum
)
1233 struct ipv6hdr
*ipv6h
= (void *)skb
->data
;
1235 unsigned int header_size
;
1242 off
= sizeof(struct ipv6hdr
);
1244 header_size
= skb
->network_header
+ off
;
1245 maybe_pull_tail(skb
, header_size
);
1247 nexthdr
= ipv6h
->nexthdr
;
1249 while ((off
<= sizeof(struct ipv6hdr
) + ntohs(ipv6h
->payload_len
)) &&
1252 case IPPROTO_DSTOPTS
:
1253 case IPPROTO_HOPOPTS
:
1254 case IPPROTO_ROUTING
: {
1255 struct ipv6_opt_hdr
*hp
= (void *)(skb
->data
+ off
);
1257 header_size
= skb
->network_header
+
1259 sizeof(struct ipv6_opt_hdr
);
1260 maybe_pull_tail(skb
, header_size
);
1262 nexthdr
= hp
->nexthdr
;
1263 off
+= ipv6_optlen(hp
);
1267 struct ip_auth_hdr
*hp
= (void *)(skb
->data
+ off
);
1269 header_size
= skb
->network_header
+
1271 sizeof(struct ip_auth_hdr
);
1272 maybe_pull_tail(skb
, header_size
);
1274 nexthdr
= hp
->nexthdr
;
1275 off
+= (hp
->hdrlen
+2)<<2;
1278 case IPPROTO_FRAGMENT
:
1288 if (net_ratelimit())
1289 netdev_err(vif
->dev
, "Failed to parse packet header\n");
1294 if (net_ratelimit())
1295 netdev_err(vif
->dev
, "Packet is a fragment!\n");
1301 if (!skb_partial_csum_set(skb
, off
,
1302 offsetof(struct tcphdr
, check
)))
1305 if (recalculate_partial_csum
) {
1306 struct tcphdr
*tcph
= tcp_hdr(skb
);
1308 header_size
= skb
->network_header
+
1310 sizeof(struct tcphdr
);
1311 maybe_pull_tail(skb
, header_size
);
1313 tcph
->check
= ~csum_ipv6_magic(&ipv6h
->saddr
,
1320 if (!skb_partial_csum_set(skb
, off
,
1321 offsetof(struct udphdr
, check
)))
1324 if (recalculate_partial_csum
) {
1325 struct udphdr
*udph
= udp_hdr(skb
);
1327 header_size
= skb
->network_header
+
1329 sizeof(struct udphdr
);
1330 maybe_pull_tail(skb
, header_size
);
1332 udph
->check
= ~csum_ipv6_magic(&ipv6h
->saddr
,
1339 if (net_ratelimit())
1340 netdev_err(vif
->dev
,
1341 "Attempting to checksum a non-TCP/UDP packet, "
1342 "dropping a protocol %d packet\n",
1353 static int checksum_setup(struct xenvif
*vif
, struct sk_buff
*skb
)
1356 int recalculate_partial_csum
= 0;
1358 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1359 * peers can fail to set NETRXF_csum_blank when sending a GSO
1360 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1361 * recalculate the partial checksum.
1363 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
&& skb_is_gso(skb
)) {
1364 vif
->rx_gso_checksum_fixup
++;
1365 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1366 recalculate_partial_csum
= 1;
1369 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1370 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
1373 if (skb
->protocol
== htons(ETH_P_IP
))
1374 err
= checksum_setup_ip(vif
, skb
, recalculate_partial_csum
);
1375 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1376 err
= checksum_setup_ipv6(vif
, skb
, recalculate_partial_csum
);
1381 static bool tx_credit_exceeded(struct xenvif
*vif
, unsigned size
)
1383 u64 now
= get_jiffies_64();
1384 u64 next_credit
= vif
->credit_window_start
+
1385 msecs_to_jiffies(vif
->credit_usec
/ 1000);
1387 /* Timer could already be pending in rare cases. */
1388 if (timer_pending(&vif
->credit_timeout
))
1391 /* Passed the point where we can replenish credit? */
1392 if (time_after_eq64(now
, next_credit
)) {
1393 vif
->credit_window_start
= now
;
1397 /* Still too big to send right now? Set a callback. */
1398 if (size
> vif
->remaining_credit
) {
1399 vif
->credit_timeout
.data
=
1401 vif
->credit_timeout
.function
=
1403 mod_timer(&vif
->credit_timeout
,
1405 vif
->credit_window_start
= next_credit
;
1413 static unsigned xenvif_tx_build_gops(struct xenvif
*vif
)
1415 struct gnttab_copy
*gop
= vif
->tx_copy_ops
, *request_gop
;
1416 struct sk_buff
*skb
;
1419 while ((nr_pending_reqs(vif
) + XEN_NETBK_LEGACY_SLOTS_MAX
1420 < MAX_PENDING_REQS
)) {
1421 struct xen_netif_tx_request txreq
;
1422 struct xen_netif_tx_request txfrags
[XEN_NETBK_LEGACY_SLOTS_MAX
];
1424 struct xen_netif_extra_info extras
[XEN_NETIF_EXTRA_TYPE_MAX
-1];
1428 unsigned int data_len
;
1429 pending_ring_idx_t index
;
1431 if (vif
->tx
.sring
->req_prod
- vif
->tx
.req_cons
>
1432 XEN_NETIF_TX_RING_SIZE
) {
1433 netdev_err(vif
->dev
,
1434 "Impossible number of requests. "
1435 "req_prod %d, req_cons %d, size %ld\n",
1436 vif
->tx
.sring
->req_prod
, vif
->tx
.req_cons
,
1437 XEN_NETIF_TX_RING_SIZE
);
1438 xenvif_fatal_tx_err(vif
);
1442 RING_FINAL_CHECK_FOR_REQUESTS(&vif
->tx
, work_to_do
);
1446 idx
= vif
->tx
.req_cons
;
1447 rmb(); /* Ensure that we see the request before we copy it. */
1448 memcpy(&txreq
, RING_GET_REQUEST(&vif
->tx
, idx
), sizeof(txreq
));
1450 /* Credit-based scheduling. */
1451 if (txreq
.size
> vif
->remaining_credit
&&
1452 tx_credit_exceeded(vif
, txreq
.size
))
1455 vif
->remaining_credit
-= txreq
.size
;
1458 vif
->tx
.req_cons
= ++idx
;
1460 memset(extras
, 0, sizeof(extras
));
1461 if (txreq
.flags
& XEN_NETTXF_extra_info
) {
1462 work_to_do
= xenvif_get_extras(vif
, extras
,
1464 idx
= vif
->tx
.req_cons
;
1465 if (unlikely(work_to_do
< 0))
1469 ret
= xenvif_count_requests(vif
, &txreq
, txfrags
, work_to_do
);
1470 if (unlikely(ret
< 0))
1475 if (unlikely(txreq
.size
< ETH_HLEN
)) {
1476 netdev_dbg(vif
->dev
,
1477 "Bad packet size: %d\n", txreq
.size
);
1478 xenvif_tx_err(vif
, &txreq
, idx
);
1482 /* No crossing a page as the payload mustn't fragment. */
1483 if (unlikely((txreq
.offset
+ txreq
.size
) > PAGE_SIZE
)) {
1484 netdev_err(vif
->dev
,
1485 "txreq.offset: %x, size: %u, end: %lu\n",
1486 txreq
.offset
, txreq
.size
,
1487 (txreq
.offset
&~PAGE_MASK
) + txreq
.size
);
1488 xenvif_fatal_tx_err(vif
);
1492 index
= pending_index(vif
->pending_cons
);
1493 pending_idx
= vif
->pending_ring
[index
];
1495 data_len
= (txreq
.size
> PKT_PROT_LEN
&&
1496 ret
< XEN_NETBK_LEGACY_SLOTS_MAX
) ?
1497 PKT_PROT_LEN
: txreq
.size
;
1499 skb
= alloc_skb(data_len
+ NET_SKB_PAD
+ NET_IP_ALIGN
,
1500 GFP_ATOMIC
| __GFP_NOWARN
);
1501 if (unlikely(skb
== NULL
)) {
1502 netdev_dbg(vif
->dev
,
1503 "Can't allocate a skb in start_xmit.\n");
1504 xenvif_tx_err(vif
, &txreq
, idx
);
1508 /* Packets passed to netif_rx() must have some headroom. */
1509 skb_reserve(skb
, NET_SKB_PAD
+ NET_IP_ALIGN
);
1511 if (extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1].type
) {
1512 struct xen_netif_extra_info
*gso
;
1513 gso
= &extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1];
1515 if (xenvif_set_skb_gso(vif
, skb
, gso
)) {
1516 /* Failure in xenvif_set_skb_gso is fatal. */
1522 /* XXX could copy straight to head */
1523 page
= xenvif_alloc_page(vif
, pending_idx
);
1526 xenvif_tx_err(vif
, &txreq
, idx
);
1530 gop
->source
.u
.ref
= txreq
.gref
;
1531 gop
->source
.domid
= vif
->domid
;
1532 gop
->source
.offset
= txreq
.offset
;
1534 gop
->dest
.u
.gmfn
= virt_to_mfn(page_address(page
));
1535 gop
->dest
.domid
= DOMID_SELF
;
1536 gop
->dest
.offset
= txreq
.offset
;
1538 gop
->len
= txreq
.size
;
1539 gop
->flags
= GNTCOPY_source_gref
;
1543 memcpy(&vif
->pending_tx_info
[pending_idx
].req
,
1544 &txreq
, sizeof(txreq
));
1545 vif
->pending_tx_info
[pending_idx
].head
= index
;
1546 *((u16
*)skb
->data
) = pending_idx
;
1548 __skb_put(skb
, data_len
);
1550 skb_shinfo(skb
)->nr_frags
= ret
;
1551 if (data_len
< txreq
.size
) {
1552 skb_shinfo(skb
)->nr_frags
++;
1553 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1556 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1557 INVALID_PENDING_IDX
);
1560 vif
->pending_cons
++;
1562 request_gop
= xenvif_get_requests(vif
, skb
, txfrags
, gop
);
1563 if (request_gop
== NULL
) {
1565 xenvif_tx_err(vif
, &txreq
, idx
);
1570 __skb_queue_tail(&vif
->tx_queue
, skb
);
1572 vif
->tx
.req_cons
= idx
;
1574 if ((gop
-vif
->tx_copy_ops
) >= ARRAY_SIZE(vif
->tx_copy_ops
))
1578 return gop
- vif
->tx_copy_ops
;
1582 static int xenvif_tx_submit(struct xenvif
*vif
, int budget
)
1584 struct gnttab_copy
*gop
= vif
->tx_copy_ops
;
1585 struct sk_buff
*skb
;
1588 while (work_done
< budget
&&
1589 (skb
= __skb_dequeue(&vif
->tx_queue
)) != NULL
) {
1590 struct xen_netif_tx_request
*txp
;
1594 pending_idx
= *((u16
*)skb
->data
);
1595 txp
= &vif
->pending_tx_info
[pending_idx
].req
;
1597 /* Check the remap error code. */
1598 if (unlikely(xenvif_tx_check_gop(vif
, skb
, &gop
))) {
1599 netdev_dbg(vif
->dev
, "netback grant failed.\n");
1600 skb_shinfo(skb
)->nr_frags
= 0;
1605 data_len
= skb
->len
;
1607 (void *)(idx_to_kaddr(vif
, pending_idx
)|txp
->offset
),
1609 if (data_len
< txp
->size
) {
1610 /* Append the packet payload as a fragment. */
1611 txp
->offset
+= data_len
;
1612 txp
->size
-= data_len
;
1614 /* Schedule a response immediately. */
1615 xenvif_idx_release(vif
, pending_idx
,
1616 XEN_NETIF_RSP_OKAY
);
1619 if (txp
->flags
& XEN_NETTXF_csum_blank
)
1620 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1621 else if (txp
->flags
& XEN_NETTXF_data_validated
)
1622 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1624 xenvif_fill_frags(vif
, skb
);
1626 if (skb_is_nonlinear(skb
) && skb_headlen(skb
) < PKT_PROT_LEN
) {
1627 int target
= min_t(int, skb
->len
, PKT_PROT_LEN
);
1628 __pskb_pull_tail(skb
, target
- skb_headlen(skb
));
1631 skb
->dev
= vif
->dev
;
1632 skb
->protocol
= eth_type_trans(skb
, skb
->dev
);
1633 skb_reset_network_header(skb
);
1635 if (checksum_setup(vif
, skb
)) {
1636 netdev_dbg(vif
->dev
,
1637 "Can't setup checksum in net_tx_action\n");
1642 skb_probe_transport_header(skb
, 0);
1644 vif
->dev
->stats
.rx_bytes
+= skb
->len
;
1645 vif
->dev
->stats
.rx_packets
++;
1649 netif_receive_skb(skb
);
1655 /* Called after netfront has transmitted */
1656 int xenvif_tx_action(struct xenvif
*vif
, int budget
)
1661 if (unlikely(!tx_work_todo(vif
)))
1664 nr_gops
= xenvif_tx_build_gops(vif
);
1669 gnttab_batch_copy(vif
->tx_copy_ops
, nr_gops
);
1671 work_done
= xenvif_tx_submit(vif
, nr_gops
);
1676 static void xenvif_idx_release(struct xenvif
*vif
, u16 pending_idx
,
1679 struct pending_tx_info
*pending_tx_info
;
1680 pending_ring_idx_t head
;
1681 u16 peek
; /* peek into next tx request */
1683 BUG_ON(vif
->mmap_pages
[pending_idx
] == (void *)(~0UL));
1685 /* Already complete? */
1686 if (vif
->mmap_pages
[pending_idx
] == NULL
)
1689 pending_tx_info
= &vif
->pending_tx_info
[pending_idx
];
1691 head
= pending_tx_info
->head
;
1693 BUG_ON(!pending_tx_is_head(vif
, head
));
1694 BUG_ON(vif
->pending_ring
[pending_index(head
)] != pending_idx
);
1697 pending_ring_idx_t index
;
1698 pending_ring_idx_t idx
= pending_index(head
);
1699 u16 info_idx
= vif
->pending_ring
[idx
];
1701 pending_tx_info
= &vif
->pending_tx_info
[info_idx
];
1702 make_tx_response(vif
, &pending_tx_info
->req
, status
);
1704 /* Setting any number other than
1705 * INVALID_PENDING_RING_IDX indicates this slot is
1706 * starting a new packet / ending a previous packet.
1708 pending_tx_info
->head
= 0;
1710 index
= pending_index(vif
->pending_prod
++);
1711 vif
->pending_ring
[index
] = vif
->pending_ring
[info_idx
];
1713 peek
= vif
->pending_ring
[pending_index(++head
)];
1715 } while (!pending_tx_is_head(vif
, peek
));
1717 put_page(vif
->mmap_pages
[pending_idx
]);
1718 vif
->mmap_pages
[pending_idx
] = NULL
;
1722 static void make_tx_response(struct xenvif
*vif
,
1723 struct xen_netif_tx_request
*txp
,
1726 RING_IDX i
= vif
->tx
.rsp_prod_pvt
;
1727 struct xen_netif_tx_response
*resp
;
1730 resp
= RING_GET_RESPONSE(&vif
->tx
, i
);
1734 if (txp
->flags
& XEN_NETTXF_extra_info
)
1735 RING_GET_RESPONSE(&vif
->tx
, ++i
)->status
= XEN_NETIF_RSP_NULL
;
1737 vif
->tx
.rsp_prod_pvt
= ++i
;
1738 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->tx
, notify
);
1740 notify_remote_via_irq(vif
->tx_irq
);
1743 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
1750 RING_IDX i
= vif
->rx
.rsp_prod_pvt
;
1751 struct xen_netif_rx_response
*resp
;
1753 resp
= RING_GET_RESPONSE(&vif
->rx
, i
);
1754 resp
->offset
= offset
;
1755 resp
->flags
= flags
;
1757 resp
->status
= (s16
)size
;
1759 resp
->status
= (s16
)st
;
1761 vif
->rx
.rsp_prod_pvt
= ++i
;
1766 static inline int rx_work_todo(struct xenvif
*vif
)
1768 return !skb_queue_empty(&vif
->rx_queue
);
1771 static inline int tx_work_todo(struct xenvif
*vif
)
1774 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif
->tx
)) &&
1775 (nr_pending_reqs(vif
) + XEN_NETBK_LEGACY_SLOTS_MAX
1776 < MAX_PENDING_REQS
))
1782 void xenvif_unmap_frontend_rings(struct xenvif
*vif
)
1785 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif
),
1788 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif
),
1792 int xenvif_map_frontend_rings(struct xenvif
*vif
,
1793 grant_ref_t tx_ring_ref
,
1794 grant_ref_t rx_ring_ref
)
1797 struct xen_netif_tx_sring
*txs
;
1798 struct xen_netif_rx_sring
*rxs
;
1802 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif
),
1803 tx_ring_ref
, &addr
);
1807 txs
= (struct xen_netif_tx_sring
*)addr
;
1808 BACK_RING_INIT(&vif
->tx
, txs
, PAGE_SIZE
);
1810 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif
),
1811 rx_ring_ref
, &addr
);
1815 rxs
= (struct xen_netif_rx_sring
*)addr
;
1816 BACK_RING_INIT(&vif
->rx
, rxs
, PAGE_SIZE
);
1818 vif
->rx_req_cons_peek
= 0;
1823 xenvif_unmap_frontend_rings(vif
);
1827 int xenvif_kthread(void *data
)
1829 struct xenvif
*vif
= data
;
1831 while (!kthread_should_stop()) {
1832 wait_event_interruptible(vif
->wq
,
1833 rx_work_todo(vif
) ||
1834 kthread_should_stop());
1835 if (kthread_should_stop())
1838 if (rx_work_todo(vif
))
1839 xenvif_rx_action(vif
);
1847 static int __init
netback_init(void)
1854 if (fatal_skb_slots
< XEN_NETBK_LEGACY_SLOTS_MAX
) {
1855 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
1856 fatal_skb_slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
1857 fatal_skb_slots
= XEN_NETBK_LEGACY_SLOTS_MAX
;
1860 rc
= xenvif_xenbus_init();
1870 module_init(netback_init
);
1872 static void __exit
netback_fini(void)
1874 xenvif_xenbus_fini();
1876 module_exit(netback_fini
);
1878 MODULE_LICENSE("Dual BSD/GPL");
1879 MODULE_ALIAS("xen-backend:vif");