2 * Back-end of the driver for virtual network devices. This portion of the
3 * driver exports a 'unified' network-device interface that can be accessed
4 * by any operating system that implements a compatible front end. A
5 * reference front-end implementation can be found in:
6 * drivers/net/xen-netfront.c
8 * Copyright (c) 2002-2005, K A Fraser
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
37 #include <linux/kthread.h>
38 #include <linux/if_vlan.h>
39 #include <linux/udp.h>
40 #include <linux/highmem.h>
45 #include <xen/events.h>
46 #include <xen/interface/memory.h>
48 #include <asm/xen/hypercall.h>
49 #include <asm/xen/page.h>
51 /* Provide an option to disable split event channels at load time as
52 * event channels are limited resource. Split event channels are
55 bool separate_tx_rx_irq
= 1;
56 module_param(separate_tx_rx_irq
, bool, 0644);
58 /* When guest ring is filled up, qdisc queues the packets for us, but we have
59 * to timeout them, otherwise other guests' packets can get stuck there
61 unsigned int rx_drain_timeout_msecs
= 10000;
62 module_param(rx_drain_timeout_msecs
, uint
, 0444);
63 unsigned int rx_drain_timeout_jiffies
;
66 * This is the maximum slots a skb can have. If a guest sends a skb
67 * which exceeds this limit it is considered malicious.
69 #define FATAL_SKB_SLOTS_DEFAULT 20
70 static unsigned int fatal_skb_slots
= FATAL_SKB_SLOTS_DEFAULT
;
71 module_param(fatal_skb_slots
, uint
, 0444);
73 static void xenvif_idx_release(struct xenvif
*vif
, u16 pending_idx
,
76 static void make_tx_response(struct xenvif
*vif
,
77 struct xen_netif_tx_request
*txp
,
80 static inline int tx_work_todo(struct xenvif
*vif
);
81 static inline int rx_work_todo(struct xenvif
*vif
);
83 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
90 static inline unsigned long idx_to_pfn(struct xenvif
*vif
,
93 return page_to_pfn(vif
->mmap_pages
[idx
]);
96 static inline unsigned long idx_to_kaddr(struct xenvif
*vif
,
99 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif
, idx
));
102 #define callback_param(vif, pending_idx) \
103 (vif->pending_tx_info[pending_idx].callback_struct)
105 /* Find the containing VIF's structure from a pointer in pending_tx_info array
107 static inline struct xenvif
* ubuf_to_vif(struct ubuf_info
*ubuf
)
109 u16 pending_idx
= ubuf
->desc
;
110 struct pending_tx_info
*temp
=
111 container_of(ubuf
, struct pending_tx_info
, callback_struct
);
112 return container_of(temp
- pending_idx
,
117 /* This is a miniumum size for the linear area to avoid lots of
118 * calls to __pskb_pull_tail() as we set up checksum offsets. The
119 * value 128 was chosen as it covers all IPv4 and most likely
122 #define PKT_PROT_LEN 128
124 static u16
frag_get_pending_idx(skb_frag_t
*frag
)
126 return (u16
)frag
->page_offset
;
129 static void frag_set_pending_idx(skb_frag_t
*frag
, u16 pending_idx
)
131 frag
->page_offset
= pending_idx
;
134 static inline pending_ring_idx_t
pending_index(unsigned i
)
136 return i
& (MAX_PENDING_REQS
-1);
139 bool xenvif_rx_ring_slots_available(struct xenvif
*vif
, int needed
)
144 prod
= vif
->rx
.sring
->req_prod
;
145 cons
= vif
->rx
.req_cons
;
147 if (prod
- cons
>= needed
)
150 vif
->rx
.sring
->req_event
= prod
+ 1;
152 /* Make sure event is visible before we check prod
156 } while (vif
->rx
.sring
->req_prod
!= prod
);
162 * Returns true if we should start a new receive buffer instead of
163 * adding 'size' bytes to a buffer which currently contains 'offset'
166 static bool start_new_rx_buffer(int offset
, unsigned long size
, int head
)
168 /* simple case: we have completely filled the current buffer. */
169 if (offset
== MAX_BUFFER_OFFSET
)
173 * complex case: start a fresh buffer if the current frag
174 * would overflow the current buffer but only if:
175 * (i) this frag would fit completely in the next buffer
176 * and (ii) there is already some data in the current buffer
177 * and (iii) this is not the head buffer.
180 * - (i) stops us splitting a frag into two copies
181 * unless the frag is too large for a single buffer.
182 * - (ii) stops us from leaving a buffer pointlessly empty.
183 * - (iii) stops us leaving the first buffer
184 * empty. Strictly speaking this is already covered
185 * by (ii) but is explicitly checked because
186 * netfront relies on the first buffer being
187 * non-empty and can crash otherwise.
189 * This means we will effectively linearise small
190 * frags but do not needlessly split large buffers
191 * into multiple copies tend to give large frags their
192 * own buffers as before.
194 BUG_ON(size
> MAX_BUFFER_OFFSET
);
195 if ((offset
+ size
> MAX_BUFFER_OFFSET
) && offset
&& !head
)
201 struct netrx_pending_operations
{
202 unsigned copy_prod
, copy_cons
;
203 unsigned meta_prod
, meta_cons
;
204 struct gnttab_copy
*copy
;
205 struct xenvif_rx_meta
*meta
;
207 grant_ref_t copy_gref
;
210 static struct xenvif_rx_meta
*get_next_rx_buffer(struct xenvif
*vif
,
211 struct netrx_pending_operations
*npo
)
213 struct xenvif_rx_meta
*meta
;
214 struct xen_netif_rx_request
*req
;
216 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
218 meta
= npo
->meta
+ npo
->meta_prod
++;
219 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
225 npo
->copy_gref
= req
->gref
;
231 * Set up the grant operations for this fragment. If it's a flipping
232 * interface, we also set up the unmap request from here.
234 static void xenvif_gop_frag_copy(struct xenvif
*vif
, struct sk_buff
*skb
,
235 struct netrx_pending_operations
*npo
,
236 struct page
*page
, unsigned long size
,
237 unsigned long offset
, int *head
,
238 struct xenvif
*foreign_vif
,
239 grant_ref_t foreign_gref
)
241 struct gnttab_copy
*copy_gop
;
242 struct xenvif_rx_meta
*meta
;
244 int gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
246 /* Data must not cross a page boundary. */
247 BUG_ON(size
+ offset
> PAGE_SIZE
<<compound_order(page
));
249 meta
= npo
->meta
+ npo
->meta_prod
- 1;
251 /* Skip unused frames from start of page */
252 page
+= offset
>> PAGE_SHIFT
;
253 offset
&= ~PAGE_MASK
;
256 BUG_ON(offset
>= PAGE_SIZE
);
257 BUG_ON(npo
->copy_off
> MAX_BUFFER_OFFSET
);
259 bytes
= PAGE_SIZE
- offset
;
264 if (start_new_rx_buffer(npo
->copy_off
, bytes
, *head
)) {
266 * Netfront requires there to be some data in the head
271 meta
= get_next_rx_buffer(vif
, npo
);
274 if (npo
->copy_off
+ bytes
> MAX_BUFFER_OFFSET
)
275 bytes
= MAX_BUFFER_OFFSET
- npo
->copy_off
;
277 copy_gop
= npo
->copy
+ npo
->copy_prod
++;
278 copy_gop
->flags
= GNTCOPY_dest_gref
;
279 copy_gop
->len
= bytes
;
282 copy_gop
->source
.domid
= foreign_vif
->domid
;
283 copy_gop
->source
.u
.ref
= foreign_gref
;
284 copy_gop
->flags
|= GNTCOPY_source_gref
;
286 copy_gop
->source
.domid
= DOMID_SELF
;
287 copy_gop
->source
.u
.gmfn
=
288 virt_to_mfn(page_address(page
));
290 copy_gop
->source
.offset
= offset
;
292 copy_gop
->dest
.domid
= vif
->domid
;
293 copy_gop
->dest
.offset
= npo
->copy_off
;
294 copy_gop
->dest
.u
.ref
= npo
->copy_gref
;
296 npo
->copy_off
+= bytes
;
303 if (offset
== PAGE_SIZE
&& size
) {
304 BUG_ON(!PageCompound(page
));
309 /* Leave a gap for the GSO descriptor. */
310 if (skb_is_gso(skb
)) {
311 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
312 gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
313 else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
)
314 gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
317 if (*head
&& ((1 << gso_type
) & vif
->gso_mask
))
320 *head
= 0; /* There must be something in this buffer now. */
326 * Prepare an SKB to be transmitted to the frontend.
328 * This function is responsible for allocating grant operations, meta
331 * It returns the number of meta structures consumed. The number of
332 * ring slots used is always equal to the number of meta slots used
333 * plus the number of GSO descriptors used. Currently, we use either
334 * zero GSO descriptors (for non-GSO packets) or one descriptor (for
335 * frontend-side LRO).
337 static int xenvif_gop_skb(struct sk_buff
*skb
,
338 struct netrx_pending_operations
*npo
)
340 struct xenvif
*vif
= netdev_priv(skb
->dev
);
341 int nr_frags
= skb_shinfo(skb
)->nr_frags
;
343 struct xen_netif_rx_request
*req
;
344 struct xenvif_rx_meta
*meta
;
349 struct ubuf_info
*ubuf
= skb_shinfo(skb
)->destructor_arg
;
350 grant_ref_t foreign_grefs
[MAX_SKB_FRAGS
];
351 struct xenvif
*foreign_vif
= NULL
;
353 old_meta_prod
= npo
->meta_prod
;
355 gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
356 if (skb_is_gso(skb
)) {
357 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
358 gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
359 else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
)
360 gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
363 /* Set up a GSO prefix descriptor, if necessary */
364 if ((1 << gso_type
) & vif
->gso_prefix_mask
) {
365 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
366 meta
= npo
->meta
+ npo
->meta_prod
++;
367 meta
->gso_type
= gso_type
;
368 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
373 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
374 meta
= npo
->meta
+ npo
->meta_prod
++;
376 if ((1 << gso_type
) & vif
->gso_mask
) {
377 meta
->gso_type
= gso_type
;
378 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
380 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
387 npo
->copy_gref
= req
->gref
;
389 if ((skb_shinfo(skb
)->tx_flags
& SKBTX_DEV_ZEROCOPY
) &&
390 (ubuf
->callback
== &xenvif_zerocopy_callback
)) {
392 foreign_vif
= ubuf_to_vif(ubuf
);
395 u16 pending_idx
= ubuf
->desc
;
397 foreign_vif
->pending_tx_info
[pending_idx
].req
.gref
;
398 ubuf
= (struct ubuf_info
*) ubuf
->ctx
;
403 while (data
< skb_tail_pointer(skb
)) {
404 unsigned int offset
= offset_in_page(data
);
405 unsigned int len
= PAGE_SIZE
- offset
;
407 if (data
+ len
> skb_tail_pointer(skb
))
408 len
= skb_tail_pointer(skb
) - data
;
410 xenvif_gop_frag_copy(vif
, skb
, npo
,
411 virt_to_page(data
), len
, offset
, &head
,
417 for (i
= 0; i
< nr_frags
; i
++) {
418 xenvif_gop_frag_copy(vif
, skb
, npo
,
419 skb_frag_page(&skb_shinfo(skb
)->frags
[i
]),
420 skb_frag_size(&skb_shinfo(skb
)->frags
[i
]),
421 skb_shinfo(skb
)->frags
[i
].page_offset
,
427 return npo
->meta_prod
- old_meta_prod
;
431 * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was
432 * used to set up the operations on the top of
433 * netrx_pending_operations, which have since been done. Check that
434 * they didn't give any errors and advance over them.
436 static int xenvif_check_gop(struct xenvif
*vif
, int nr_meta_slots
,
437 struct netrx_pending_operations
*npo
)
439 struct gnttab_copy
*copy_op
;
440 int status
= XEN_NETIF_RSP_OKAY
;
443 for (i
= 0; i
< nr_meta_slots
; i
++) {
444 copy_op
= npo
->copy
+ npo
->copy_cons
++;
445 if (copy_op
->status
!= GNTST_okay
) {
447 "Bad status %d from copy to DOM%d.\n",
448 copy_op
->status
, vif
->domid
);
449 status
= XEN_NETIF_RSP_ERROR
;
456 static void xenvif_add_frag_responses(struct xenvif
*vif
, int status
,
457 struct xenvif_rx_meta
*meta
,
461 unsigned long offset
;
463 /* No fragments used */
464 if (nr_meta_slots
<= 1)
469 for (i
= 0; i
< nr_meta_slots
; i
++) {
471 if (i
== nr_meta_slots
- 1)
474 flags
= XEN_NETRXF_more_data
;
477 make_rx_response(vif
, meta
[i
].id
, status
, offset
,
478 meta
[i
].size
, flags
);
482 struct xenvif_rx_cb
{
486 #define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
488 void xenvif_kick_thread(struct xenvif
*vif
)
493 static void xenvif_rx_action(struct xenvif
*vif
)
497 struct xen_netif_rx_response
*resp
;
498 struct sk_buff_head rxq
;
502 unsigned long offset
;
503 bool need_to_notify
= false;
505 struct netrx_pending_operations npo
= {
506 .copy
= vif
->grant_copy_op
,
510 skb_queue_head_init(&rxq
);
512 while ((skb
= skb_dequeue(&vif
->rx_queue
)) != NULL
) {
513 RING_IDX max_slots_needed
;
514 RING_IDX old_req_cons
;
515 RING_IDX ring_slots_used
;
518 /* We need a cheap worse case estimate for the number of
522 max_slots_needed
= DIV_ROUND_UP(offset_in_page(skb
->data
) +
525 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
529 size
= skb_frag_size(&skb_shinfo(skb
)->frags
[i
]);
530 offset
= skb_shinfo(skb
)->frags
[i
].page_offset
;
532 /* For a worse-case estimate we need to factor in
533 * the fragment page offset as this will affect the
534 * number of times xenvif_gop_frag_copy() will
535 * call start_new_rx_buffer().
537 max_slots_needed
+= DIV_ROUND_UP(offset
+ size
,
541 /* To avoid the estimate becoming too pessimal for some
542 * frontends that limit posted rx requests, cap the estimate
545 if (max_slots_needed
> MAX_SKB_FRAGS
)
546 max_slots_needed
= MAX_SKB_FRAGS
;
548 /* We may need one more slot for GSO metadata */
549 if (skb_is_gso(skb
) &&
550 (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
||
551 skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
))
554 /* If the skb may not fit then bail out now */
555 if (!xenvif_rx_ring_slots_available(vif
, max_slots_needed
)) {
556 skb_queue_head(&vif
->rx_queue
, skb
);
557 need_to_notify
= true;
558 vif
->rx_last_skb_slots
= max_slots_needed
;
561 vif
->rx_last_skb_slots
= 0;
563 old_req_cons
= vif
->rx
.req_cons
;
564 XENVIF_RX_CB(skb
)->meta_slots_used
= xenvif_gop_skb(skb
, &npo
);
565 ring_slots_used
= vif
->rx
.req_cons
- old_req_cons
;
567 BUG_ON(ring_slots_used
> max_slots_needed
);
569 __skb_queue_tail(&rxq
, skb
);
572 BUG_ON(npo
.meta_prod
> ARRAY_SIZE(vif
->meta
));
577 BUG_ON(npo
.copy_prod
> MAX_GRANT_COPY_OPS
);
578 gnttab_batch_copy(vif
->grant_copy_op
, npo
.copy_prod
);
580 while ((skb
= __skb_dequeue(&rxq
)) != NULL
) {
582 if ((1 << vif
->meta
[npo
.meta_cons
].gso_type
) &
583 vif
->gso_prefix_mask
) {
584 resp
= RING_GET_RESPONSE(&vif
->rx
,
585 vif
->rx
.rsp_prod_pvt
++);
587 resp
->flags
= XEN_NETRXF_gso_prefix
| XEN_NETRXF_more_data
;
589 resp
->offset
= vif
->meta
[npo
.meta_cons
].gso_size
;
590 resp
->id
= vif
->meta
[npo
.meta_cons
].id
;
591 resp
->status
= XENVIF_RX_CB(skb
)->meta_slots_used
;
594 XENVIF_RX_CB(skb
)->meta_slots_used
--;
598 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
599 vif
->dev
->stats
.tx_packets
++;
601 status
= xenvif_check_gop(vif
,
602 XENVIF_RX_CB(skb
)->meta_slots_used
,
605 if (XENVIF_RX_CB(skb
)->meta_slots_used
== 1)
608 flags
= XEN_NETRXF_more_data
;
610 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) /* local packet? */
611 flags
|= XEN_NETRXF_csum_blank
| XEN_NETRXF_data_validated
;
612 else if (skb
->ip_summed
== CHECKSUM_UNNECESSARY
)
613 /* remote but checksummed. */
614 flags
|= XEN_NETRXF_data_validated
;
617 resp
= make_rx_response(vif
, vif
->meta
[npo
.meta_cons
].id
,
619 vif
->meta
[npo
.meta_cons
].size
,
622 if ((1 << vif
->meta
[npo
.meta_cons
].gso_type
) &
624 struct xen_netif_extra_info
*gso
=
625 (struct xen_netif_extra_info
*)
626 RING_GET_RESPONSE(&vif
->rx
,
627 vif
->rx
.rsp_prod_pvt
++);
629 resp
->flags
|= XEN_NETRXF_extra_info
;
631 gso
->u
.gso
.type
= vif
->meta
[npo
.meta_cons
].gso_type
;
632 gso
->u
.gso
.size
= vif
->meta
[npo
.meta_cons
].gso_size
;
634 gso
->u
.gso
.features
= 0;
636 gso
->type
= XEN_NETIF_EXTRA_TYPE_GSO
;
640 xenvif_add_frag_responses(vif
, status
,
641 vif
->meta
+ npo
.meta_cons
+ 1,
642 XENVIF_RX_CB(skb
)->meta_slots_used
);
644 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->rx
, ret
);
646 need_to_notify
|= !!ret
;
648 npo
.meta_cons
+= XENVIF_RX_CB(skb
)->meta_slots_used
;
654 notify_remote_via_irq(vif
->rx_irq
);
657 void xenvif_check_rx_xenvif(struct xenvif
*vif
)
661 RING_FINAL_CHECK_FOR_REQUESTS(&vif
->tx
, more_to_do
);
664 napi_schedule(&vif
->napi
);
667 static void tx_add_credit(struct xenvif
*vif
)
669 unsigned long max_burst
, max_credit
;
672 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
673 * Otherwise the interface can seize up due to insufficient credit.
675 max_burst
= RING_GET_REQUEST(&vif
->tx
, vif
->tx
.req_cons
)->size
;
676 max_burst
= min(max_burst
, 131072UL);
677 max_burst
= max(max_burst
, vif
->credit_bytes
);
679 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
680 max_credit
= vif
->remaining_credit
+ vif
->credit_bytes
;
681 if (max_credit
< vif
->remaining_credit
)
682 max_credit
= ULONG_MAX
; /* wrapped: clamp to ULONG_MAX */
684 vif
->remaining_credit
= min(max_credit
, max_burst
);
687 static void tx_credit_callback(unsigned long data
)
689 struct xenvif
*vif
= (struct xenvif
*)data
;
691 xenvif_check_rx_xenvif(vif
);
694 static void xenvif_tx_err(struct xenvif
*vif
,
695 struct xen_netif_tx_request
*txp
, RING_IDX end
)
697 RING_IDX cons
= vif
->tx
.req_cons
;
701 spin_lock_irqsave(&vif
->response_lock
, flags
);
702 make_tx_response(vif
, txp
, XEN_NETIF_RSP_ERROR
);
703 spin_unlock_irqrestore(&vif
->response_lock
, flags
);
706 txp
= RING_GET_REQUEST(&vif
->tx
, cons
++);
708 vif
->tx
.req_cons
= cons
;
711 static void xenvif_fatal_tx_err(struct xenvif
*vif
)
713 netdev_err(vif
->dev
, "fatal error; disabling device\n");
714 xenvif_carrier_off(vif
);
717 static int xenvif_count_requests(struct xenvif
*vif
,
718 struct xen_netif_tx_request
*first
,
719 struct xen_netif_tx_request
*txp
,
722 RING_IDX cons
= vif
->tx
.req_cons
;
727 if (!(first
->flags
& XEN_NETTXF_more_data
))
731 struct xen_netif_tx_request dropped_tx
= { 0 };
733 if (slots
>= work_to_do
) {
735 "Asked for %d slots but exceeds this limit\n",
737 xenvif_fatal_tx_err(vif
);
741 /* This guest is really using too many slots and
742 * considered malicious.
744 if (unlikely(slots
>= fatal_skb_slots
)) {
746 "Malicious frontend using %d slots, threshold %u\n",
747 slots
, fatal_skb_slots
);
748 xenvif_fatal_tx_err(vif
);
752 /* Xen network protocol had implicit dependency on
753 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
754 * the historical MAX_SKB_FRAGS value 18 to honor the
755 * same behavior as before. Any packet using more than
756 * 18 slots but less than fatal_skb_slots slots is
759 if (!drop_err
&& slots
>= XEN_NETBK_LEGACY_SLOTS_MAX
) {
762 "Too many slots (%d) exceeding limit (%d), dropping packet\n",
763 slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
770 memcpy(txp
, RING_GET_REQUEST(&vif
->tx
, cons
+ slots
),
773 /* If the guest submitted a frame >= 64 KiB then
774 * first->size overflowed and following slots will
775 * appear to be larger than the frame.
777 * This cannot be fatal error as there are buggy
778 * frontends that do this.
780 * Consume all slots and drop the packet.
782 if (!drop_err
&& txp
->size
> first
->size
) {
785 "Invalid tx request, slot size %u > remaining size %u\n",
786 txp
->size
, first
->size
);
790 first
->size
-= txp
->size
;
793 if (unlikely((txp
->offset
+ txp
->size
) > PAGE_SIZE
)) {
794 netdev_err(vif
->dev
, "Cross page boundary, txp->offset: %x, size: %u\n",
795 txp
->offset
, txp
->size
);
796 xenvif_fatal_tx_err(vif
);
800 more_data
= txp
->flags
& XEN_NETTXF_more_data
;
808 xenvif_tx_err(vif
, first
, cons
+ slots
);
816 struct xenvif_tx_cb
{
820 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
822 static inline void xenvif_tx_create_gop(struct xenvif
*vif
,
824 struct xen_netif_tx_request
*txp
,
825 struct gnttab_map_grant_ref
*gop
)
827 vif
->pages_to_map
[gop
-vif
->tx_map_ops
] = vif
->mmap_pages
[pending_idx
];
828 gnttab_set_map_op(gop
, idx_to_kaddr(vif
, pending_idx
),
829 GNTMAP_host_map
| GNTMAP_readonly
,
830 txp
->gref
, vif
->domid
);
832 memcpy(&vif
->pending_tx_info
[pending_idx
].req
, txp
,
836 static inline struct sk_buff
*xenvif_alloc_skb(unsigned int size
)
838 struct sk_buff
*skb
=
839 alloc_skb(size
+ NET_SKB_PAD
+ NET_IP_ALIGN
,
840 GFP_ATOMIC
| __GFP_NOWARN
);
841 if (unlikely(skb
== NULL
))
844 /* Packets passed to netif_rx() must have some headroom. */
845 skb_reserve(skb
, NET_SKB_PAD
+ NET_IP_ALIGN
);
847 /* Initialize it here to avoid later surprises */
848 skb_shinfo(skb
)->destructor_arg
= NULL
;
853 static struct gnttab_map_grant_ref
*xenvif_get_requests(struct xenvif
*vif
,
855 struct xen_netif_tx_request
*txp
,
856 struct gnttab_map_grant_ref
*gop
)
858 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
859 skb_frag_t
*frags
= shinfo
->frags
;
860 u16 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
862 pending_ring_idx_t index
;
863 unsigned int nr_slots
, frag_overflow
= 0;
865 /* At this point shinfo->nr_frags is in fact the number of
866 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
868 if (shinfo
->nr_frags
> MAX_SKB_FRAGS
) {
869 frag_overflow
= shinfo
->nr_frags
- MAX_SKB_FRAGS
;
870 BUG_ON(frag_overflow
> MAX_SKB_FRAGS
);
871 shinfo
->nr_frags
= MAX_SKB_FRAGS
;
873 nr_slots
= shinfo
->nr_frags
;
875 /* Skip first skb fragment if it is on same page as header fragment. */
876 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
878 for (shinfo
->nr_frags
= start
; shinfo
->nr_frags
< nr_slots
;
879 shinfo
->nr_frags
++, txp
++, gop
++) {
880 index
= pending_index(vif
->pending_cons
++);
881 pending_idx
= vif
->pending_ring
[index
];
882 xenvif_tx_create_gop(vif
, pending_idx
, txp
, gop
);
883 frag_set_pending_idx(&frags
[shinfo
->nr_frags
], pending_idx
);
887 struct sk_buff
*nskb
= xenvif_alloc_skb(0);
888 if (unlikely(nskb
== NULL
)) {
891 "Can't allocate the frag_list skb.\n");
895 shinfo
= skb_shinfo(nskb
);
896 frags
= shinfo
->frags
;
898 for (shinfo
->nr_frags
= 0; shinfo
->nr_frags
< frag_overflow
;
899 shinfo
->nr_frags
++, txp
++, gop
++) {
900 index
= pending_index(vif
->pending_cons
++);
901 pending_idx
= vif
->pending_ring
[index
];
902 xenvif_tx_create_gop(vif
, pending_idx
, txp
, gop
);
903 frag_set_pending_idx(&frags
[shinfo
->nr_frags
],
907 skb_shinfo(skb
)->frag_list
= nskb
;
913 static inline void xenvif_grant_handle_set(struct xenvif
*vif
,
915 grant_handle_t handle
)
917 if (unlikely(vif
->grant_tx_handle
[pending_idx
] !=
918 NETBACK_INVALID_HANDLE
)) {
920 "Trying to overwrite active handle! pending_idx: %x\n",
924 vif
->grant_tx_handle
[pending_idx
] = handle
;
927 static inline void xenvif_grant_handle_reset(struct xenvif
*vif
,
930 if (unlikely(vif
->grant_tx_handle
[pending_idx
] ==
931 NETBACK_INVALID_HANDLE
)) {
933 "Trying to unmap invalid handle! pending_idx: %x\n",
937 vif
->grant_tx_handle
[pending_idx
] = NETBACK_INVALID_HANDLE
;
940 static int xenvif_tx_check_gop(struct xenvif
*vif
,
942 struct gnttab_map_grant_ref
**gopp
)
944 struct gnttab_map_grant_ref
*gop
= *gopp
;
945 u16 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
946 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
947 struct pending_tx_info
*tx_info
;
948 int nr_frags
= shinfo
->nr_frags
;
950 struct sk_buff
*first_skb
= NULL
;
952 /* Check status of header. */
955 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_ERROR
);
957 xenvif_grant_handle_set(vif
, pending_idx
, gop
->handle
);
959 /* Skip first skb fragment if it is on same page as header fragment. */
960 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
963 for (i
= start
; i
< nr_frags
; i
++) {
966 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[i
]);
967 tx_info
= &vif
->pending_tx_info
[pending_idx
];
969 /* Check error status: if okay then remember grant handle. */
970 newerr
= (++gop
)->status
;
972 if (likely(!newerr
)) {
973 xenvif_grant_handle_set(vif
, pending_idx
, gop
->handle
);
974 /* Had a previous error? Invalidate this fragment. */
976 xenvif_idx_unmap(vif
, pending_idx
);
980 /* Error on this fragment: respond to client with an error. */
981 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_ERROR
);
983 /* Not the first error? Preceding frags already invalidated. */
986 /* First error: invalidate header and preceding fragments. */
988 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
990 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
991 xenvif_idx_unmap(vif
, pending_idx
);
992 for (j
= start
; j
< i
; j
++) {
993 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[j
]);
994 xenvif_idx_unmap(vif
, pending_idx
);
997 /* Remember the error: invalidate all subsequent fragments. */
1001 if (skb_has_frag_list(skb
)) {
1003 skb
= shinfo
->frag_list
;
1004 shinfo
= skb_shinfo(skb
);
1005 nr_frags
= shinfo
->nr_frags
;
1011 /* There was a mapping error in the frag_list skb. We have to unmap
1012 * the first skb's frags
1014 if (first_skb
&& err
) {
1016 shinfo
= skb_shinfo(first_skb
);
1017 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
1018 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
1019 for (j
= start
; j
< shinfo
->nr_frags
; j
++) {
1020 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[j
]);
1021 xenvif_idx_unmap(vif
, pending_idx
);
1029 static void xenvif_fill_frags(struct xenvif
*vif
, struct sk_buff
*skb
)
1031 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1032 int nr_frags
= shinfo
->nr_frags
;
1034 u16 prev_pending_idx
= INVALID_PENDING_IDX
;
1036 if (skb_shinfo(skb
)->destructor_arg
)
1037 prev_pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
1039 for (i
= 0; i
< nr_frags
; i
++) {
1040 skb_frag_t
*frag
= shinfo
->frags
+ i
;
1041 struct xen_netif_tx_request
*txp
;
1045 pending_idx
= frag_get_pending_idx(frag
);
1047 /* If this is not the first frag, chain it to the previous*/
1048 if (unlikely(prev_pending_idx
== INVALID_PENDING_IDX
))
1049 skb_shinfo(skb
)->destructor_arg
=
1050 &callback_param(vif
, pending_idx
);
1051 else if (likely(pending_idx
!= prev_pending_idx
))
1052 callback_param(vif
, prev_pending_idx
).ctx
=
1053 &callback_param(vif
, pending_idx
);
1055 callback_param(vif
, pending_idx
).ctx
= NULL
;
1056 prev_pending_idx
= pending_idx
;
1058 txp
= &vif
->pending_tx_info
[pending_idx
].req
;
1059 page
= virt_to_page(idx_to_kaddr(vif
, pending_idx
));
1060 __skb_fill_page_desc(skb
, i
, page
, txp
->offset
, txp
->size
);
1061 skb
->len
+= txp
->size
;
1062 skb
->data_len
+= txp
->size
;
1063 skb
->truesize
+= txp
->size
;
1065 /* Take an extra reference to offset network stack's put_page */
1066 get_page(vif
->mmap_pages
[pending_idx
]);
1068 /* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
1069 * overlaps with "index", and "mapping" is not set. I think mapping
1070 * should be set. If delivered to local stack, it would drop this
1071 * skb in sk_filter unless the socket has the right to use it.
1073 skb
->pfmemalloc
= false;
1076 static int xenvif_get_extras(struct xenvif
*vif
,
1077 struct xen_netif_extra_info
*extras
,
1080 struct xen_netif_extra_info extra
;
1081 RING_IDX cons
= vif
->tx
.req_cons
;
1084 if (unlikely(work_to_do
-- <= 0)) {
1085 netdev_err(vif
->dev
, "Missing extra info\n");
1086 xenvif_fatal_tx_err(vif
);
1090 memcpy(&extra
, RING_GET_REQUEST(&vif
->tx
, cons
),
1092 if (unlikely(!extra
.type
||
1093 extra
.type
>= XEN_NETIF_EXTRA_TYPE_MAX
)) {
1094 vif
->tx
.req_cons
= ++cons
;
1095 netdev_err(vif
->dev
,
1096 "Invalid extra type: %d\n", extra
.type
);
1097 xenvif_fatal_tx_err(vif
);
1101 memcpy(&extras
[extra
.type
- 1], &extra
, sizeof(extra
));
1102 vif
->tx
.req_cons
= ++cons
;
1103 } while (extra
.flags
& XEN_NETIF_EXTRA_FLAG_MORE
);
1108 static int xenvif_set_skb_gso(struct xenvif
*vif
,
1109 struct sk_buff
*skb
,
1110 struct xen_netif_extra_info
*gso
)
1112 if (!gso
->u
.gso
.size
) {
1113 netdev_err(vif
->dev
, "GSO size must not be zero.\n");
1114 xenvif_fatal_tx_err(vif
);
1118 switch (gso
->u
.gso
.type
) {
1119 case XEN_NETIF_GSO_TYPE_TCPV4
:
1120 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
1122 case XEN_NETIF_GSO_TYPE_TCPV6
:
1123 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
1126 netdev_err(vif
->dev
, "Bad GSO type %d.\n", gso
->u
.gso
.type
);
1127 xenvif_fatal_tx_err(vif
);
1131 skb_shinfo(skb
)->gso_size
= gso
->u
.gso
.size
;
1132 /* gso_segs will be calculated later */
1137 static int checksum_setup(struct xenvif
*vif
, struct sk_buff
*skb
)
1139 bool recalculate_partial_csum
= false;
1141 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1142 * peers can fail to set NETRXF_csum_blank when sending a GSO
1143 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1144 * recalculate the partial checksum.
1146 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
&& skb_is_gso(skb
)) {
1147 vif
->rx_gso_checksum_fixup
++;
1148 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1149 recalculate_partial_csum
= true;
1152 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1153 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
1156 return skb_checksum_setup(skb
, recalculate_partial_csum
);
1159 static bool tx_credit_exceeded(struct xenvif
*vif
, unsigned size
)
1161 u64 now
= get_jiffies_64();
1162 u64 next_credit
= vif
->credit_window_start
+
1163 msecs_to_jiffies(vif
->credit_usec
/ 1000);
1165 /* Timer could already be pending in rare cases. */
1166 if (timer_pending(&vif
->credit_timeout
))
1169 /* Passed the point where we can replenish credit? */
1170 if (time_after_eq64(now
, next_credit
)) {
1171 vif
->credit_window_start
= now
;
1175 /* Still too big to send right now? Set a callback. */
1176 if (size
> vif
->remaining_credit
) {
1177 vif
->credit_timeout
.data
=
1179 vif
->credit_timeout
.function
=
1181 mod_timer(&vif
->credit_timeout
,
1183 vif
->credit_window_start
= next_credit
;
1191 static unsigned xenvif_tx_build_gops(struct xenvif
*vif
, int budget
)
1193 struct gnttab_map_grant_ref
*gop
= vif
->tx_map_ops
, *request_gop
;
1194 struct sk_buff
*skb
;
1197 while (skb_queue_len(&vif
->tx_queue
) < budget
) {
1198 struct xen_netif_tx_request txreq
;
1199 struct xen_netif_tx_request txfrags
[XEN_NETBK_LEGACY_SLOTS_MAX
];
1200 struct xen_netif_extra_info extras
[XEN_NETIF_EXTRA_TYPE_MAX
-1];
1204 unsigned int data_len
;
1205 pending_ring_idx_t index
;
1207 if (vif
->tx
.sring
->req_prod
- vif
->tx
.req_cons
>
1208 XEN_NETIF_TX_RING_SIZE
) {
1209 netdev_err(vif
->dev
,
1210 "Impossible number of requests. "
1211 "req_prod %d, req_cons %d, size %ld\n",
1212 vif
->tx
.sring
->req_prod
, vif
->tx
.req_cons
,
1213 XEN_NETIF_TX_RING_SIZE
);
1214 xenvif_fatal_tx_err(vif
);
1218 work_to_do
= RING_HAS_UNCONSUMED_REQUESTS(&vif
->tx
);
1222 idx
= vif
->tx
.req_cons
;
1223 rmb(); /* Ensure that we see the request before we copy it. */
1224 memcpy(&txreq
, RING_GET_REQUEST(&vif
->tx
, idx
), sizeof(txreq
));
1226 /* Credit-based scheduling. */
1227 if (txreq
.size
> vif
->remaining_credit
&&
1228 tx_credit_exceeded(vif
, txreq
.size
))
1231 vif
->remaining_credit
-= txreq
.size
;
1234 vif
->tx
.req_cons
= ++idx
;
1236 memset(extras
, 0, sizeof(extras
));
1237 if (txreq
.flags
& XEN_NETTXF_extra_info
) {
1238 work_to_do
= xenvif_get_extras(vif
, extras
,
1240 idx
= vif
->tx
.req_cons
;
1241 if (unlikely(work_to_do
< 0))
1245 ret
= xenvif_count_requests(vif
, &txreq
, txfrags
, work_to_do
);
1246 if (unlikely(ret
< 0))
1251 if (unlikely(txreq
.size
< ETH_HLEN
)) {
1252 netdev_dbg(vif
->dev
,
1253 "Bad packet size: %d\n", txreq
.size
);
1254 xenvif_tx_err(vif
, &txreq
, idx
);
1258 /* No crossing a page as the payload mustn't fragment. */
1259 if (unlikely((txreq
.offset
+ txreq
.size
) > PAGE_SIZE
)) {
1260 netdev_err(vif
->dev
,
1261 "txreq.offset: %x, size: %u, end: %lu\n",
1262 txreq
.offset
, txreq
.size
,
1263 (txreq
.offset
&~PAGE_MASK
) + txreq
.size
);
1264 xenvif_fatal_tx_err(vif
);
1268 index
= pending_index(vif
->pending_cons
);
1269 pending_idx
= vif
->pending_ring
[index
];
1271 data_len
= (txreq
.size
> PKT_PROT_LEN
&&
1272 ret
< XEN_NETBK_LEGACY_SLOTS_MAX
) ?
1273 PKT_PROT_LEN
: txreq
.size
;
1275 skb
= xenvif_alloc_skb(data_len
);
1276 if (unlikely(skb
== NULL
)) {
1277 netdev_dbg(vif
->dev
,
1278 "Can't allocate a skb in start_xmit.\n");
1279 xenvif_tx_err(vif
, &txreq
, idx
);
1283 if (extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1].type
) {
1284 struct xen_netif_extra_info
*gso
;
1285 gso
= &extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1];
1287 if (xenvif_set_skb_gso(vif
, skb
, gso
)) {
1288 /* Failure in xenvif_set_skb_gso is fatal. */
1294 xenvif_tx_create_gop(vif
, pending_idx
, &txreq
, gop
);
1298 XENVIF_TX_CB(skb
)->pending_idx
= pending_idx
;
1300 __skb_put(skb
, data_len
);
1302 skb_shinfo(skb
)->nr_frags
= ret
;
1303 if (data_len
< txreq
.size
) {
1304 skb_shinfo(skb
)->nr_frags
++;
1305 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1308 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1309 INVALID_PENDING_IDX
);
1312 vif
->pending_cons
++;
1314 request_gop
= xenvif_get_requests(vif
, skb
, txfrags
, gop
);
1315 if (request_gop
== NULL
) {
1317 xenvif_tx_err(vif
, &txreq
, idx
);
1322 __skb_queue_tail(&vif
->tx_queue
, skb
);
1324 vif
->tx
.req_cons
= idx
;
1326 if ((gop
-vif
->tx_map_ops
) >= ARRAY_SIZE(vif
->tx_map_ops
))
1330 return gop
- vif
->tx_map_ops
;
1333 /* Consolidate skb with a frag_list into a brand new one with local pages on
1334 * frags. Returns 0 or -ENOMEM if can't allocate new pages.
1336 static int xenvif_handle_frag_list(struct xenvif
*vif
, struct sk_buff
*skb
)
1338 unsigned int offset
= skb_headlen(skb
);
1339 skb_frag_t frags
[MAX_SKB_FRAGS
];
1341 struct ubuf_info
*uarg
;
1342 struct sk_buff
*nskb
= skb_shinfo(skb
)->frag_list
;
1344 vif
->tx_zerocopy_sent
+= 2;
1345 vif
->tx_frag_overflow
++;
1347 xenvif_fill_frags(vif
, nskb
);
1348 /* Subtract frags size, we will correct it later */
1349 skb
->truesize
-= skb
->data_len
;
1350 skb
->len
+= nskb
->len
;
1351 skb
->data_len
+= nskb
->len
;
1353 /* create a brand new frags array and coalesce there */
1354 for (i
= 0; offset
< skb
->len
; i
++) {
1358 BUG_ON(i
>= MAX_SKB_FRAGS
);
1359 page
= alloc_page(GFP_ATOMIC
|__GFP_COLD
);
1362 skb
->truesize
+= skb
->data_len
;
1363 for (j
= 0; j
< i
; j
++)
1364 put_page(frags
[j
].page
.p
);
1368 if (offset
+ PAGE_SIZE
< skb
->len
)
1371 len
= skb
->len
- offset
;
1372 if (skb_copy_bits(skb
, offset
, page_address(page
), len
))
1376 frags
[i
].page
.p
= page
;
1377 frags
[i
].page_offset
= 0;
1378 skb_frag_size_set(&frags
[i
], len
);
1380 /* swap out with old one */
1381 memcpy(skb_shinfo(skb
)->frags
,
1383 i
* sizeof(skb_frag_t
));
1384 skb_shinfo(skb
)->nr_frags
= i
;
1385 skb
->truesize
+= i
* PAGE_SIZE
;
1387 /* remove traces of mapped pages and frag_list */
1388 skb_frag_list_init(skb
);
1389 uarg
= skb_shinfo(skb
)->destructor_arg
;
1390 uarg
->callback(uarg
, true);
1391 skb_shinfo(skb
)->destructor_arg
= NULL
;
1393 skb_shinfo(nskb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1399 static int xenvif_tx_submit(struct xenvif
*vif
)
1401 struct gnttab_map_grant_ref
*gop
= vif
->tx_map_ops
;
1402 struct sk_buff
*skb
;
1405 while ((skb
= __skb_dequeue(&vif
->tx_queue
)) != NULL
) {
1406 struct xen_netif_tx_request
*txp
;
1410 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
1411 txp
= &vif
->pending_tx_info
[pending_idx
].req
;
1413 /* Check the remap error code. */
1414 if (unlikely(xenvif_tx_check_gop(vif
, skb
, &gop
))) {
1415 netdev_dbg(vif
->dev
, "netback grant failed.\n");
1416 skb_shinfo(skb
)->nr_frags
= 0;
1421 data_len
= skb
->len
;
1423 (void *)(idx_to_kaddr(vif
, pending_idx
)|txp
->offset
),
1425 callback_param(vif
, pending_idx
).ctx
= NULL
;
1426 if (data_len
< txp
->size
) {
1427 /* Append the packet payload as a fragment. */
1428 txp
->offset
+= data_len
;
1429 txp
->size
-= data_len
;
1430 skb_shinfo(skb
)->destructor_arg
=
1431 &callback_param(vif
, pending_idx
);
1433 /* Schedule a response immediately. */
1434 xenvif_idx_unmap(vif
, pending_idx
);
1437 if (txp
->flags
& XEN_NETTXF_csum_blank
)
1438 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1439 else if (txp
->flags
& XEN_NETTXF_data_validated
)
1440 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1442 xenvif_fill_frags(vif
, skb
);
1444 if (unlikely(skb_has_frag_list(skb
))) {
1445 if (xenvif_handle_frag_list(vif
, skb
)) {
1446 if (net_ratelimit())
1447 netdev_err(vif
->dev
,
1448 "Not enough memory to consolidate frag_list!\n");
1449 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1455 if (skb_is_nonlinear(skb
) && skb_headlen(skb
) < PKT_PROT_LEN
) {
1456 int target
= min_t(int, skb
->len
, PKT_PROT_LEN
);
1457 __pskb_pull_tail(skb
, target
- skb_headlen(skb
));
1460 skb
->dev
= vif
->dev
;
1461 skb
->protocol
= eth_type_trans(skb
, skb
->dev
);
1462 skb_reset_network_header(skb
);
1464 if (checksum_setup(vif
, skb
)) {
1465 netdev_dbg(vif
->dev
,
1466 "Can't setup checksum in net_tx_action\n");
1467 /* We have to set this flag to trigger the callback */
1468 if (skb_shinfo(skb
)->destructor_arg
)
1469 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1474 skb_probe_transport_header(skb
, 0);
1476 /* If the packet is GSO then we will have just set up the
1477 * transport header offset in checksum_setup so it's now
1478 * straightforward to calculate gso_segs.
1480 if (skb_is_gso(skb
)) {
1481 int mss
= skb_shinfo(skb
)->gso_size
;
1482 int hdrlen
= skb_transport_header(skb
) -
1483 skb_mac_header(skb
) +
1486 skb_shinfo(skb
)->gso_segs
=
1487 DIV_ROUND_UP(skb
->len
- hdrlen
, mss
);
1490 vif
->dev
->stats
.rx_bytes
+= skb
->len
;
1491 vif
->dev
->stats
.rx_packets
++;
1495 /* Set this flag right before netif_receive_skb, otherwise
1496 * someone might think this packet already left netback, and
1497 * do a skb_copy_ubufs while we are still in control of the
1498 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
1500 if (skb_shinfo(skb
)->destructor_arg
) {
1501 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1502 vif
->tx_zerocopy_sent
++;
1505 netif_receive_skb(skb
);
1511 void xenvif_zerocopy_callback(struct ubuf_info
*ubuf
, bool zerocopy_success
)
1513 unsigned long flags
;
1514 pending_ring_idx_t index
;
1515 struct xenvif
*vif
= ubuf_to_vif(ubuf
);
1517 /* This is the only place where we grab this lock, to protect callbacks
1520 spin_lock_irqsave(&vif
->callback_lock
, flags
);
1522 u16 pending_idx
= ubuf
->desc
;
1523 ubuf
= (struct ubuf_info
*) ubuf
->ctx
;
1524 BUG_ON(vif
->dealloc_prod
- vif
->dealloc_cons
>=
1526 index
= pending_index(vif
->dealloc_prod
);
1527 vif
->dealloc_ring
[index
] = pending_idx
;
1528 /* Sync with xenvif_tx_dealloc_action:
1529 * insert idx then incr producer.
1532 vif
->dealloc_prod
++;
1534 wake_up(&vif
->dealloc_wq
);
1535 spin_unlock_irqrestore(&vif
->callback_lock
, flags
);
1537 if (likely(zerocopy_success
))
1538 vif
->tx_zerocopy_success
++;
1540 vif
->tx_zerocopy_fail
++;
1543 static inline void xenvif_tx_dealloc_action(struct xenvif
*vif
)
1545 struct gnttab_unmap_grant_ref
*gop
;
1546 pending_ring_idx_t dc
, dp
;
1547 u16 pending_idx
, pending_idx_release
[MAX_PENDING_REQS
];
1550 dc
= vif
->dealloc_cons
;
1551 gop
= vif
->tx_unmap_ops
;
1553 /* Free up any grants we have finished using */
1555 dp
= vif
->dealloc_prod
;
1557 /* Ensure we see all indices enqueued by all
1558 * xenvif_zerocopy_callback().
1563 BUG_ON(gop
- vif
->tx_unmap_ops
> MAX_PENDING_REQS
);
1565 vif
->dealloc_ring
[pending_index(dc
++)];
1567 pending_idx_release
[gop
-vif
->tx_unmap_ops
] =
1569 vif
->pages_to_unmap
[gop
-vif
->tx_unmap_ops
] =
1570 vif
->mmap_pages
[pending_idx
];
1571 gnttab_set_unmap_op(gop
,
1572 idx_to_kaddr(vif
, pending_idx
),
1574 vif
->grant_tx_handle
[pending_idx
]);
1575 xenvif_grant_handle_reset(vif
, pending_idx
);
1579 } while (dp
!= vif
->dealloc_prod
);
1581 vif
->dealloc_cons
= dc
;
1583 if (gop
- vif
->tx_unmap_ops
> 0) {
1585 ret
= gnttab_unmap_refs(vif
->tx_unmap_ops
,
1587 vif
->pages_to_unmap
,
1588 gop
- vif
->tx_unmap_ops
);
1590 netdev_err(vif
->dev
, "Unmap fail: nr_ops %tx ret %d\n",
1591 gop
- vif
->tx_unmap_ops
, ret
);
1592 for (i
= 0; i
< gop
- vif
->tx_unmap_ops
; ++i
) {
1593 if (gop
[i
].status
!= GNTST_okay
)
1594 netdev_err(vif
->dev
,
1595 " host_addr: %llx handle: %x status: %d\n",
1604 for (i
= 0; i
< gop
- vif
->tx_unmap_ops
; ++i
)
1605 xenvif_idx_release(vif
, pending_idx_release
[i
],
1606 XEN_NETIF_RSP_OKAY
);
1610 /* Called after netfront has transmitted */
1611 int xenvif_tx_action(struct xenvif
*vif
, int budget
)
1616 if (unlikely(!tx_work_todo(vif
)))
1619 nr_gops
= xenvif_tx_build_gops(vif
, budget
);
1624 ret
= gnttab_map_refs(vif
->tx_map_ops
,
1630 work_done
= xenvif_tx_submit(vif
);
1635 static void xenvif_idx_release(struct xenvif
*vif
, u16 pending_idx
,
1638 struct pending_tx_info
*pending_tx_info
;
1639 pending_ring_idx_t index
;
1640 unsigned long flags
;
1642 pending_tx_info
= &vif
->pending_tx_info
[pending_idx
];
1643 spin_lock_irqsave(&vif
->response_lock
, flags
);
1644 make_tx_response(vif
, &pending_tx_info
->req
, status
);
1645 index
= pending_index(vif
->pending_prod
);
1646 vif
->pending_ring
[index
] = pending_idx
;
1647 /* TX shouldn't use the index before we give it back here */
1649 vif
->pending_prod
++;
1650 spin_unlock_irqrestore(&vif
->response_lock
, flags
);
1654 static void make_tx_response(struct xenvif
*vif
,
1655 struct xen_netif_tx_request
*txp
,
1658 RING_IDX i
= vif
->tx
.rsp_prod_pvt
;
1659 struct xen_netif_tx_response
*resp
;
1662 resp
= RING_GET_RESPONSE(&vif
->tx
, i
);
1666 if (txp
->flags
& XEN_NETTXF_extra_info
)
1667 RING_GET_RESPONSE(&vif
->tx
, ++i
)->status
= XEN_NETIF_RSP_NULL
;
1669 vif
->tx
.rsp_prod_pvt
= ++i
;
1670 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->tx
, notify
);
1672 notify_remote_via_irq(vif
->tx_irq
);
1675 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
1682 RING_IDX i
= vif
->rx
.rsp_prod_pvt
;
1683 struct xen_netif_rx_response
*resp
;
1685 resp
= RING_GET_RESPONSE(&vif
->rx
, i
);
1686 resp
->offset
= offset
;
1687 resp
->flags
= flags
;
1689 resp
->status
= (s16
)size
;
1691 resp
->status
= (s16
)st
;
1693 vif
->rx
.rsp_prod_pvt
= ++i
;
1698 void xenvif_idx_unmap(struct xenvif
*vif
, u16 pending_idx
)
1701 struct gnttab_unmap_grant_ref tx_unmap_op
;
1703 gnttab_set_unmap_op(&tx_unmap_op
,
1704 idx_to_kaddr(vif
, pending_idx
),
1706 vif
->grant_tx_handle
[pending_idx
]);
1707 xenvif_grant_handle_reset(vif
, pending_idx
);
1709 ret
= gnttab_unmap_refs(&tx_unmap_op
, NULL
,
1710 &vif
->mmap_pages
[pending_idx
], 1);
1712 netdev_err(vif
->dev
,
1713 "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: %x status: %d\n",
1716 tx_unmap_op
.host_addr
,
1718 tx_unmap_op
.status
);
1722 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_OKAY
);
1725 static inline int rx_work_todo(struct xenvif
*vif
)
1727 return (!skb_queue_empty(&vif
->rx_queue
) &&
1728 xenvif_rx_ring_slots_available(vif
, vif
->rx_last_skb_slots
)) ||
1729 vif
->rx_queue_purge
;
1732 static inline int tx_work_todo(struct xenvif
*vif
)
1735 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif
->tx
)))
1741 static inline bool tx_dealloc_work_todo(struct xenvif
*vif
)
1743 return vif
->dealloc_cons
!= vif
->dealloc_prod
;
1746 void xenvif_unmap_frontend_rings(struct xenvif
*vif
)
1749 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif
),
1752 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif
),
1756 int xenvif_map_frontend_rings(struct xenvif
*vif
,
1757 grant_ref_t tx_ring_ref
,
1758 grant_ref_t rx_ring_ref
)
1761 struct xen_netif_tx_sring
*txs
;
1762 struct xen_netif_rx_sring
*rxs
;
1766 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif
),
1767 tx_ring_ref
, &addr
);
1771 txs
= (struct xen_netif_tx_sring
*)addr
;
1772 BACK_RING_INIT(&vif
->tx
, txs
, PAGE_SIZE
);
1774 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif
),
1775 rx_ring_ref
, &addr
);
1779 rxs
= (struct xen_netif_rx_sring
*)addr
;
1780 BACK_RING_INIT(&vif
->rx
, rxs
, PAGE_SIZE
);
1785 xenvif_unmap_frontend_rings(vif
);
1789 void xenvif_stop_queue(struct xenvif
*vif
)
1791 if (!vif
->can_queue
)
1794 netif_stop_queue(vif
->dev
);
1797 static void xenvif_start_queue(struct xenvif
*vif
)
1799 if (xenvif_schedulable(vif
))
1800 netif_wake_queue(vif
->dev
);
1803 int xenvif_kthread_guest_rx(void *data
)
1805 struct xenvif
*vif
= data
;
1806 struct sk_buff
*skb
;
1808 while (!kthread_should_stop()) {
1809 wait_event_interruptible(vif
->wq
,
1810 rx_work_todo(vif
) ||
1811 kthread_should_stop());
1812 if (kthread_should_stop())
1815 if (vif
->rx_queue_purge
) {
1816 skb_queue_purge(&vif
->rx_queue
);
1817 vif
->rx_queue_purge
= false;
1820 if (!skb_queue_empty(&vif
->rx_queue
))
1821 xenvif_rx_action(vif
);
1823 if (skb_queue_empty(&vif
->rx_queue
) &&
1824 netif_queue_stopped(vif
->dev
)) {
1825 del_timer_sync(&vif
->wake_queue
);
1826 xenvif_start_queue(vif
);
1832 /* Bin any remaining skbs */
1833 while ((skb
= skb_dequeue(&vif
->rx_queue
)) != NULL
)
1839 int xenvif_dealloc_kthread(void *data
)
1841 struct xenvif
*vif
= data
;
1843 while (!kthread_should_stop()) {
1844 wait_event_interruptible(vif
->dealloc_wq
,
1845 tx_dealloc_work_todo(vif
) ||
1846 kthread_should_stop());
1847 if (kthread_should_stop())
1850 xenvif_tx_dealloc_action(vif
);
1854 /* Unmap anything remaining*/
1855 if (tx_dealloc_work_todo(vif
))
1856 xenvif_tx_dealloc_action(vif
);
1861 static int __init
netback_init(void)
1868 if (fatal_skb_slots
< XEN_NETBK_LEGACY_SLOTS_MAX
) {
1869 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
1870 fatal_skb_slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
1871 fatal_skb_slots
= XEN_NETBK_LEGACY_SLOTS_MAX
;
1874 rc
= xenvif_xenbus_init();
1878 rx_drain_timeout_jiffies
= msecs_to_jiffies(rx_drain_timeout_msecs
);
1886 module_init(netback_init
);
1888 static void __exit
netback_fini(void)
1890 xenvif_xenbus_fini();
1892 module_exit(netback_fini
);
1894 MODULE_LICENSE("Dual BSD/GPL");
1895 MODULE_ALIAS("xen-backend:vif");