2 * Back-end of the driver for virtual network devices. This portion of the
3 * driver exports a 'unified' network-device interface that can be accessed
4 * by any operating system that implements a compatible front end. A
5 * reference front-end implementation can be found in:
6 * drivers/net/xen-netfront.c
8 * Copyright (c) 2002-2005, K A Fraser
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
37 #include <linux/kthread.h>
38 #include <linux/if_vlan.h>
39 #include <linux/udp.h>
40 #include <linux/highmem.h>
45 #include <xen/events.h>
46 #include <xen/interface/memory.h>
48 #include <asm/xen/hypercall.h>
49 #include <asm/xen/page.h>
51 /* Provide an option to disable split event channels at load time as
52 * event channels are limited resource. Split event channels are
55 bool separate_tx_rx_irq
= 1;
56 module_param(separate_tx_rx_irq
, bool, 0644);
58 /* When guest ring is filled up, qdisc queues the packets for us, but we have
59 * to timeout them, otherwise other guests' packets can get stuck there
61 unsigned int rx_drain_timeout_msecs
= 10000;
62 module_param(rx_drain_timeout_msecs
, uint
, 0444);
63 unsigned int rx_drain_timeout_jiffies
;
65 unsigned int xenvif_max_queues
;
66 module_param_named(max_queues
, xenvif_max_queues
, uint
, 0644);
67 MODULE_PARM_DESC(max_queues
,
68 "Maximum number of queues per virtual interface");
71 * This is the maximum slots a skb can have. If a guest sends a skb
72 * which exceeds this limit it is considered malicious.
74 #define FATAL_SKB_SLOTS_DEFAULT 20
75 static unsigned int fatal_skb_slots
= FATAL_SKB_SLOTS_DEFAULT
;
76 module_param(fatal_skb_slots
, uint
, 0444);
78 static void xenvif_idx_release(struct xenvif_queue
*queue
, u16 pending_idx
,
81 static void make_tx_response(struct xenvif_queue
*queue
,
82 struct xen_netif_tx_request
*txp
,
85 static inline int tx_work_todo(struct xenvif_queue
*queue
);
86 static inline int rx_work_todo(struct xenvif_queue
*queue
);
88 static struct xen_netif_rx_response
*make_rx_response(struct xenvif_queue
*queue
,
95 static inline unsigned long idx_to_pfn(struct xenvif_queue
*queue
,
98 return page_to_pfn(queue
->mmap_pages
[idx
]);
101 static inline unsigned long idx_to_kaddr(struct xenvif_queue
*queue
,
104 return (unsigned long)pfn_to_kaddr(idx_to_pfn(queue
, idx
));
107 #define callback_param(vif, pending_idx) \
108 (vif->pending_tx_info[pending_idx].callback_struct)
110 /* Find the containing VIF's structure from a pointer in pending_tx_info array
112 static inline struct xenvif_queue
*ubuf_to_queue(const struct ubuf_info
*ubuf
)
114 u16 pending_idx
= ubuf
->desc
;
115 struct pending_tx_info
*temp
=
116 container_of(ubuf
, struct pending_tx_info
, callback_struct
);
117 return container_of(temp
- pending_idx
,
122 /* This is a miniumum size for the linear area to avoid lots of
123 * calls to __pskb_pull_tail() as we set up checksum offsets. The
124 * value 128 was chosen as it covers all IPv4 and most likely
127 #define PKT_PROT_LEN 128
129 static u16
frag_get_pending_idx(skb_frag_t
*frag
)
131 return (u16
)frag
->page_offset
;
134 static void frag_set_pending_idx(skb_frag_t
*frag
, u16 pending_idx
)
136 frag
->page_offset
= pending_idx
;
139 static inline pending_ring_idx_t
pending_index(unsigned i
)
141 return i
& (MAX_PENDING_REQS
-1);
144 bool xenvif_rx_ring_slots_available(struct xenvif_queue
*queue
, int needed
)
149 prod
= queue
->rx
.sring
->req_prod
;
150 cons
= queue
->rx
.req_cons
;
152 if (prod
- cons
>= needed
)
155 queue
->rx
.sring
->req_event
= prod
+ 1;
157 /* Make sure event is visible before we check prod
161 } while (queue
->rx
.sring
->req_prod
!= prod
);
167 * Returns true if we should start a new receive buffer instead of
168 * adding 'size' bytes to a buffer which currently contains 'offset'
171 static bool start_new_rx_buffer(int offset
, unsigned long size
, int head
,
174 /* simple case: we have completely filled the current buffer. */
175 if (offset
== MAX_BUFFER_OFFSET
)
179 * complex case: start a fresh buffer if the current frag
180 * would overflow the current buffer but only if:
181 * (i) this frag would fit completely in the next buffer
182 * and (ii) there is already some data in the current buffer
183 * and (iii) this is not the head buffer.
184 * and (iv) there is no need to fully utilize the buffers
187 * - (i) stops us splitting a frag into two copies
188 * unless the frag is too large for a single buffer.
189 * - (ii) stops us from leaving a buffer pointlessly empty.
190 * - (iii) stops us leaving the first buffer
191 * empty. Strictly speaking this is already covered
192 * by (ii) but is explicitly checked because
193 * netfront relies on the first buffer being
194 * non-empty and can crash otherwise.
195 * - (iv) is needed for skbs which can use up more than MAX_SKB_FRAGS
198 * This means we will effectively linearise small
199 * frags but do not needlessly split large buffers
200 * into multiple copies tend to give large frags their
201 * own buffers as before.
203 BUG_ON(size
> MAX_BUFFER_OFFSET
);
204 if ((offset
+ size
> MAX_BUFFER_OFFSET
) && offset
&& !head
&&
211 struct netrx_pending_operations
{
212 unsigned copy_prod
, copy_cons
;
213 unsigned meta_prod
, meta_cons
;
214 struct gnttab_copy
*copy
;
215 struct xenvif_rx_meta
*meta
;
217 grant_ref_t copy_gref
;
220 static struct xenvif_rx_meta
*get_next_rx_buffer(struct xenvif_queue
*queue
,
221 struct netrx_pending_operations
*npo
)
223 struct xenvif_rx_meta
*meta
;
224 struct xen_netif_rx_request
*req
;
226 req
= RING_GET_REQUEST(&queue
->rx
, queue
->rx
.req_cons
++);
228 meta
= npo
->meta
+ npo
->meta_prod
++;
229 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
235 npo
->copy_gref
= req
->gref
;
240 struct xenvif_rx_cb
{
245 #define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
248 * Set up the grant operations for this fragment. If it's a flipping
249 * interface, we also set up the unmap request from here.
251 static void xenvif_gop_frag_copy(struct xenvif_queue
*queue
, struct sk_buff
*skb
,
252 struct netrx_pending_operations
*npo
,
253 struct page
*page
, unsigned long size
,
254 unsigned long offset
, int *head
,
255 struct xenvif_queue
*foreign_queue
,
256 grant_ref_t foreign_gref
)
258 struct gnttab_copy
*copy_gop
;
259 struct xenvif_rx_meta
*meta
;
261 int gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
263 /* Data must not cross a page boundary. */
264 BUG_ON(size
+ offset
> PAGE_SIZE
<<compound_order(page
));
266 meta
= npo
->meta
+ npo
->meta_prod
- 1;
268 /* Skip unused frames from start of page */
269 page
+= offset
>> PAGE_SHIFT
;
270 offset
&= ~PAGE_MASK
;
273 BUG_ON(offset
>= PAGE_SIZE
);
274 BUG_ON(npo
->copy_off
> MAX_BUFFER_OFFSET
);
276 bytes
= PAGE_SIZE
- offset
;
281 if (start_new_rx_buffer(npo
->copy_off
,
284 XENVIF_RX_CB(skb
)->full_coalesce
)) {
286 * Netfront requires there to be some data in the head
291 meta
= get_next_rx_buffer(queue
, npo
);
294 if (npo
->copy_off
+ bytes
> MAX_BUFFER_OFFSET
)
295 bytes
= MAX_BUFFER_OFFSET
- npo
->copy_off
;
297 copy_gop
= npo
->copy
+ npo
->copy_prod
++;
298 copy_gop
->flags
= GNTCOPY_dest_gref
;
299 copy_gop
->len
= bytes
;
302 copy_gop
->source
.domid
= foreign_queue
->vif
->domid
;
303 copy_gop
->source
.u
.ref
= foreign_gref
;
304 copy_gop
->flags
|= GNTCOPY_source_gref
;
306 copy_gop
->source
.domid
= DOMID_SELF
;
307 copy_gop
->source
.u
.gmfn
=
308 virt_to_mfn(page_address(page
));
310 copy_gop
->source
.offset
= offset
;
312 copy_gop
->dest
.domid
= queue
->vif
->domid
;
313 copy_gop
->dest
.offset
= npo
->copy_off
;
314 copy_gop
->dest
.u
.ref
= npo
->copy_gref
;
316 npo
->copy_off
+= bytes
;
323 if (offset
== PAGE_SIZE
&& size
) {
324 BUG_ON(!PageCompound(page
));
329 /* Leave a gap for the GSO descriptor. */
330 if (skb_is_gso(skb
)) {
331 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
332 gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
333 else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
)
334 gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
337 if (*head
&& ((1 << gso_type
) & queue
->vif
->gso_mask
))
338 queue
->rx
.req_cons
++;
340 *head
= 0; /* There must be something in this buffer now. */
346 * Find the grant ref for a given frag in a chain of struct ubuf_info's
347 * skb: the skb itself
348 * i: the frag's number
349 * ubuf: a pointer to an element in the chain. It should not be NULL
351 * Returns a pointer to the element in the chain where the page were found. If
352 * not found, returns NULL.
353 * See the definition of callback_struct in common.h for more details about
356 static const struct ubuf_info
*xenvif_find_gref(const struct sk_buff
*const skb
,
358 const struct ubuf_info
*ubuf
)
360 struct xenvif_queue
*foreign_queue
= ubuf_to_queue(ubuf
);
363 u16 pending_idx
= ubuf
->desc
;
365 if (skb_shinfo(skb
)->frags
[i
].page
.p
==
366 foreign_queue
->mmap_pages
[pending_idx
])
368 ubuf
= (struct ubuf_info
*) ubuf
->ctx
;
375 * Prepare an SKB to be transmitted to the frontend.
377 * This function is responsible for allocating grant operations, meta
380 * It returns the number of meta structures consumed. The number of
381 * ring slots used is always equal to the number of meta slots used
382 * plus the number of GSO descriptors used. Currently, we use either
383 * zero GSO descriptors (for non-GSO packets) or one descriptor (for
384 * frontend-side LRO).
386 static int xenvif_gop_skb(struct sk_buff
*skb
,
387 struct netrx_pending_operations
*npo
,
388 struct xenvif_queue
*queue
)
390 struct xenvif
*vif
= netdev_priv(skb
->dev
);
391 int nr_frags
= skb_shinfo(skb
)->nr_frags
;
393 struct xen_netif_rx_request
*req
;
394 struct xenvif_rx_meta
*meta
;
399 const struct ubuf_info
*ubuf
= skb_shinfo(skb
)->destructor_arg
;
400 const struct ubuf_info
*const head_ubuf
= ubuf
;
402 old_meta_prod
= npo
->meta_prod
;
404 gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
405 if (skb_is_gso(skb
)) {
406 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
407 gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
408 else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
)
409 gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
412 /* Set up a GSO prefix descriptor, if necessary */
413 if ((1 << gso_type
) & vif
->gso_prefix_mask
) {
414 req
= RING_GET_REQUEST(&queue
->rx
, queue
->rx
.req_cons
++);
415 meta
= npo
->meta
+ npo
->meta_prod
++;
416 meta
->gso_type
= gso_type
;
417 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
422 req
= RING_GET_REQUEST(&queue
->rx
, queue
->rx
.req_cons
++);
423 meta
= npo
->meta
+ npo
->meta_prod
++;
425 if ((1 << gso_type
) & vif
->gso_mask
) {
426 meta
->gso_type
= gso_type
;
427 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
429 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
436 npo
->copy_gref
= req
->gref
;
439 while (data
< skb_tail_pointer(skb
)) {
440 unsigned int offset
= offset_in_page(data
);
441 unsigned int len
= PAGE_SIZE
- offset
;
443 if (data
+ len
> skb_tail_pointer(skb
))
444 len
= skb_tail_pointer(skb
) - data
;
446 xenvif_gop_frag_copy(queue
, skb
, npo
,
447 virt_to_page(data
), len
, offset
, &head
,
453 for (i
= 0; i
< nr_frags
; i
++) {
454 /* This variable also signals whether foreign_gref has a real
457 struct xenvif_queue
*foreign_queue
= NULL
;
458 grant_ref_t foreign_gref
;
460 if ((skb_shinfo(skb
)->tx_flags
& SKBTX_DEV_ZEROCOPY
) &&
461 (ubuf
->callback
== &xenvif_zerocopy_callback
)) {
462 const struct ubuf_info
*const startpoint
= ubuf
;
464 /* Ideally ubuf points to the chain element which
465 * belongs to this frag. Or if frags were removed from
466 * the beginning, then shortly before it.
468 ubuf
= xenvif_find_gref(skb
, i
, ubuf
);
470 /* Try again from the beginning of the list, if we
471 * haven't tried from there. This only makes sense in
472 * the unlikely event of reordering the original frags.
473 * For injected local pages it's an unnecessary second
476 if (unlikely(!ubuf
) && startpoint
!= head_ubuf
)
477 ubuf
= xenvif_find_gref(skb
, i
, head_ubuf
);
480 u16 pending_idx
= ubuf
->desc
;
482 foreign_queue
= ubuf_to_queue(ubuf
);
484 foreign_queue
->pending_tx_info
[pending_idx
].req
.gref
;
485 /* Just a safety measure. If this was the last
486 * element on the list, the for loop will
487 * iterate again if a local page were added to
488 * the end. Using head_ubuf here prevents the
489 * second search on the chain. Or the original
490 * frags changed order, but that's less likely.
491 * In any way, ubuf shouldn't be NULL.
494 (struct ubuf_info
*) ubuf
->ctx
:
497 /* This frag was a local page, added to the
498 * array after the skb left netback.
502 xenvif_gop_frag_copy(queue
, skb
, npo
,
503 skb_frag_page(&skb_shinfo(skb
)->frags
[i
]),
504 skb_frag_size(&skb_shinfo(skb
)->frags
[i
]),
505 skb_shinfo(skb
)->frags
[i
].page_offset
,
508 foreign_queue
? foreign_gref
: UINT_MAX
);
511 return npo
->meta_prod
- old_meta_prod
;
515 * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was
516 * used to set up the operations on the top of
517 * netrx_pending_operations, which have since been done. Check that
518 * they didn't give any errors and advance over them.
520 static int xenvif_check_gop(struct xenvif
*vif
, int nr_meta_slots
,
521 struct netrx_pending_operations
*npo
)
523 struct gnttab_copy
*copy_op
;
524 int status
= XEN_NETIF_RSP_OKAY
;
527 for (i
= 0; i
< nr_meta_slots
; i
++) {
528 copy_op
= npo
->copy
+ npo
->copy_cons
++;
529 if (copy_op
->status
!= GNTST_okay
) {
531 "Bad status %d from copy to DOM%d.\n",
532 copy_op
->status
, vif
->domid
);
533 status
= XEN_NETIF_RSP_ERROR
;
540 static void xenvif_add_frag_responses(struct xenvif_queue
*queue
, int status
,
541 struct xenvif_rx_meta
*meta
,
545 unsigned long offset
;
547 /* No fragments used */
548 if (nr_meta_slots
<= 1)
553 for (i
= 0; i
< nr_meta_slots
; i
++) {
555 if (i
== nr_meta_slots
- 1)
558 flags
= XEN_NETRXF_more_data
;
561 make_rx_response(queue
, meta
[i
].id
, status
, offset
,
562 meta
[i
].size
, flags
);
566 void xenvif_kick_thread(struct xenvif_queue
*queue
)
571 static void xenvif_rx_action(struct xenvif_queue
*queue
)
575 struct xen_netif_rx_response
*resp
;
576 struct sk_buff_head rxq
;
580 unsigned long offset
;
581 bool need_to_notify
= false;
583 struct netrx_pending_operations npo
= {
584 .copy
= queue
->grant_copy_op
,
588 skb_queue_head_init(&rxq
);
590 while ((skb
= skb_dequeue(&queue
->rx_queue
)) != NULL
) {
591 RING_IDX max_slots_needed
;
592 RING_IDX old_req_cons
;
593 RING_IDX ring_slots_used
;
596 /* We need a cheap worse case estimate for the number of
600 max_slots_needed
= DIV_ROUND_UP(offset_in_page(skb
->data
) +
603 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
607 size
= skb_frag_size(&skb_shinfo(skb
)->frags
[i
]);
608 offset
= skb_shinfo(skb
)->frags
[i
].page_offset
;
610 /* For a worse-case estimate we need to factor in
611 * the fragment page offset as this will affect the
612 * number of times xenvif_gop_frag_copy() will
613 * call start_new_rx_buffer().
615 max_slots_needed
+= DIV_ROUND_UP(offset
+ size
,
619 /* To avoid the estimate becoming too pessimal for some
620 * frontends that limit posted rx requests, cap the estimate
621 * at MAX_SKB_FRAGS. In this case netback will fully coalesce
622 * the skb into the provided slots.
624 if (max_slots_needed
> MAX_SKB_FRAGS
) {
625 max_slots_needed
= MAX_SKB_FRAGS
;
626 XENVIF_RX_CB(skb
)->full_coalesce
= true;
628 XENVIF_RX_CB(skb
)->full_coalesce
= false;
631 /* We may need one more slot for GSO metadata */
632 if (skb_is_gso(skb
) &&
633 (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
||
634 skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
))
637 /* If the skb may not fit then bail out now */
638 if (!xenvif_rx_ring_slots_available(queue
, max_slots_needed
)) {
639 skb_queue_head(&queue
->rx_queue
, skb
);
640 need_to_notify
= true;
641 queue
->rx_last_skb_slots
= max_slots_needed
;
644 queue
->rx_last_skb_slots
= 0;
646 old_req_cons
= queue
->rx
.req_cons
;
647 XENVIF_RX_CB(skb
)->meta_slots_used
= xenvif_gop_skb(skb
, &npo
, queue
);
648 ring_slots_used
= queue
->rx
.req_cons
- old_req_cons
;
650 BUG_ON(ring_slots_used
> max_slots_needed
);
652 __skb_queue_tail(&rxq
, skb
);
655 BUG_ON(npo
.meta_prod
> ARRAY_SIZE(queue
->meta
));
660 BUG_ON(npo
.copy_prod
> MAX_GRANT_COPY_OPS
);
661 gnttab_batch_copy(queue
->grant_copy_op
, npo
.copy_prod
);
663 while ((skb
= __skb_dequeue(&rxq
)) != NULL
) {
665 if ((1 << queue
->meta
[npo
.meta_cons
].gso_type
) &
666 queue
->vif
->gso_prefix_mask
) {
667 resp
= RING_GET_RESPONSE(&queue
->rx
,
668 queue
->rx
.rsp_prod_pvt
++);
670 resp
->flags
= XEN_NETRXF_gso_prefix
| XEN_NETRXF_more_data
;
672 resp
->offset
= queue
->meta
[npo
.meta_cons
].gso_size
;
673 resp
->id
= queue
->meta
[npo
.meta_cons
].id
;
674 resp
->status
= XENVIF_RX_CB(skb
)->meta_slots_used
;
677 XENVIF_RX_CB(skb
)->meta_slots_used
--;
681 queue
->stats
.tx_bytes
+= skb
->len
;
682 queue
->stats
.tx_packets
++;
684 status
= xenvif_check_gop(queue
->vif
,
685 XENVIF_RX_CB(skb
)->meta_slots_used
,
688 if (XENVIF_RX_CB(skb
)->meta_slots_used
== 1)
691 flags
= XEN_NETRXF_more_data
;
693 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) /* local packet? */
694 flags
|= XEN_NETRXF_csum_blank
| XEN_NETRXF_data_validated
;
695 else if (skb
->ip_summed
== CHECKSUM_UNNECESSARY
)
696 /* remote but checksummed. */
697 flags
|= XEN_NETRXF_data_validated
;
700 resp
= make_rx_response(queue
, queue
->meta
[npo
.meta_cons
].id
,
702 queue
->meta
[npo
.meta_cons
].size
,
705 if ((1 << queue
->meta
[npo
.meta_cons
].gso_type
) &
706 queue
->vif
->gso_mask
) {
707 struct xen_netif_extra_info
*gso
=
708 (struct xen_netif_extra_info
*)
709 RING_GET_RESPONSE(&queue
->rx
,
710 queue
->rx
.rsp_prod_pvt
++);
712 resp
->flags
|= XEN_NETRXF_extra_info
;
714 gso
->u
.gso
.type
= queue
->meta
[npo
.meta_cons
].gso_type
;
715 gso
->u
.gso
.size
= queue
->meta
[npo
.meta_cons
].gso_size
;
717 gso
->u
.gso
.features
= 0;
719 gso
->type
= XEN_NETIF_EXTRA_TYPE_GSO
;
723 xenvif_add_frag_responses(queue
, status
,
724 queue
->meta
+ npo
.meta_cons
+ 1,
725 XENVIF_RX_CB(skb
)->meta_slots_used
);
727 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue
->rx
, ret
);
729 need_to_notify
|= !!ret
;
731 npo
.meta_cons
+= XENVIF_RX_CB(skb
)->meta_slots_used
;
737 notify_remote_via_irq(queue
->rx_irq
);
740 void xenvif_napi_schedule_or_enable_events(struct xenvif_queue
*queue
)
744 RING_FINAL_CHECK_FOR_REQUESTS(&queue
->tx
, more_to_do
);
747 napi_schedule(&queue
->napi
);
750 static void tx_add_credit(struct xenvif_queue
*queue
)
752 unsigned long max_burst
, max_credit
;
755 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
756 * Otherwise the interface can seize up due to insufficient credit.
758 max_burst
= RING_GET_REQUEST(&queue
->tx
, queue
->tx
.req_cons
)->size
;
759 max_burst
= min(max_burst
, 131072UL);
760 max_burst
= max(max_burst
, queue
->credit_bytes
);
762 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
763 max_credit
= queue
->remaining_credit
+ queue
->credit_bytes
;
764 if (max_credit
< queue
->remaining_credit
)
765 max_credit
= ULONG_MAX
; /* wrapped: clamp to ULONG_MAX */
767 queue
->remaining_credit
= min(max_credit
, max_burst
);
770 static void tx_credit_callback(unsigned long data
)
772 struct xenvif_queue
*queue
= (struct xenvif_queue
*)data
;
773 tx_add_credit(queue
);
774 xenvif_napi_schedule_or_enable_events(queue
);
777 static void xenvif_tx_err(struct xenvif_queue
*queue
,
778 struct xen_netif_tx_request
*txp
, RING_IDX end
)
780 RING_IDX cons
= queue
->tx
.req_cons
;
784 spin_lock_irqsave(&queue
->response_lock
, flags
);
785 make_tx_response(queue
, txp
, XEN_NETIF_RSP_ERROR
);
786 spin_unlock_irqrestore(&queue
->response_lock
, flags
);
789 txp
= RING_GET_REQUEST(&queue
->tx
, cons
++);
791 queue
->tx
.req_cons
= cons
;
794 static void xenvif_fatal_tx_err(struct xenvif
*vif
)
796 netdev_err(vif
->dev
, "fatal error; disabling device\n");
797 vif
->disabled
= true;
798 /* Disable the vif from queue 0's kthread */
800 xenvif_kick_thread(&vif
->queues
[0]);
803 static int xenvif_count_requests(struct xenvif_queue
*queue
,
804 struct xen_netif_tx_request
*first
,
805 struct xen_netif_tx_request
*txp
,
808 RING_IDX cons
= queue
->tx
.req_cons
;
813 if (!(first
->flags
& XEN_NETTXF_more_data
))
817 struct xen_netif_tx_request dropped_tx
= { 0 };
819 if (slots
>= work_to_do
) {
820 netdev_err(queue
->vif
->dev
,
821 "Asked for %d slots but exceeds this limit\n",
823 xenvif_fatal_tx_err(queue
->vif
);
827 /* This guest is really using too many slots and
828 * considered malicious.
830 if (unlikely(slots
>= fatal_skb_slots
)) {
831 netdev_err(queue
->vif
->dev
,
832 "Malicious frontend using %d slots, threshold %u\n",
833 slots
, fatal_skb_slots
);
834 xenvif_fatal_tx_err(queue
->vif
);
838 /* Xen network protocol had implicit dependency on
839 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
840 * the historical MAX_SKB_FRAGS value 18 to honor the
841 * same behavior as before. Any packet using more than
842 * 18 slots but less than fatal_skb_slots slots is
845 if (!drop_err
&& slots
>= XEN_NETBK_LEGACY_SLOTS_MAX
) {
847 netdev_dbg(queue
->vif
->dev
,
848 "Too many slots (%d) exceeding limit (%d), dropping packet\n",
849 slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
856 memcpy(txp
, RING_GET_REQUEST(&queue
->tx
, cons
+ slots
),
859 /* If the guest submitted a frame >= 64 KiB then
860 * first->size overflowed and following slots will
861 * appear to be larger than the frame.
863 * This cannot be fatal error as there are buggy
864 * frontends that do this.
866 * Consume all slots and drop the packet.
868 if (!drop_err
&& txp
->size
> first
->size
) {
870 netdev_dbg(queue
->vif
->dev
,
871 "Invalid tx request, slot size %u > remaining size %u\n",
872 txp
->size
, first
->size
);
876 first
->size
-= txp
->size
;
879 if (unlikely((txp
->offset
+ txp
->size
) > PAGE_SIZE
)) {
880 netdev_err(queue
->vif
->dev
, "Cross page boundary, txp->offset: %x, size: %u\n",
881 txp
->offset
, txp
->size
);
882 xenvif_fatal_tx_err(queue
->vif
);
886 more_data
= txp
->flags
& XEN_NETTXF_more_data
;
894 xenvif_tx_err(queue
, first
, cons
+ slots
);
902 struct xenvif_tx_cb
{
906 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
908 static inline void xenvif_tx_create_map_op(struct xenvif_queue
*queue
,
910 struct xen_netif_tx_request
*txp
,
911 struct gnttab_map_grant_ref
*mop
)
913 queue
->pages_to_map
[mop
-queue
->tx_map_ops
] = queue
->mmap_pages
[pending_idx
];
914 gnttab_set_map_op(mop
, idx_to_kaddr(queue
, pending_idx
),
915 GNTMAP_host_map
| GNTMAP_readonly
,
916 txp
->gref
, queue
->vif
->domid
);
918 memcpy(&queue
->pending_tx_info
[pending_idx
].req
, txp
,
922 static inline struct sk_buff
*xenvif_alloc_skb(unsigned int size
)
924 struct sk_buff
*skb
=
925 alloc_skb(size
+ NET_SKB_PAD
+ NET_IP_ALIGN
,
926 GFP_ATOMIC
| __GFP_NOWARN
);
927 if (unlikely(skb
== NULL
))
930 /* Packets passed to netif_rx() must have some headroom. */
931 skb_reserve(skb
, NET_SKB_PAD
+ NET_IP_ALIGN
);
933 /* Initialize it here to avoid later surprises */
934 skb_shinfo(skb
)->destructor_arg
= NULL
;
939 static struct gnttab_map_grant_ref
*xenvif_get_requests(struct xenvif_queue
*queue
,
941 struct xen_netif_tx_request
*txp
,
942 struct gnttab_map_grant_ref
*gop
)
944 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
945 skb_frag_t
*frags
= shinfo
->frags
;
946 u16 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
948 pending_ring_idx_t index
;
949 unsigned int nr_slots
, frag_overflow
= 0;
951 /* At this point shinfo->nr_frags is in fact the number of
952 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
954 if (shinfo
->nr_frags
> MAX_SKB_FRAGS
) {
955 frag_overflow
= shinfo
->nr_frags
- MAX_SKB_FRAGS
;
956 BUG_ON(frag_overflow
> MAX_SKB_FRAGS
);
957 shinfo
->nr_frags
= MAX_SKB_FRAGS
;
959 nr_slots
= shinfo
->nr_frags
;
961 /* Skip first skb fragment if it is on same page as header fragment. */
962 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
964 for (shinfo
->nr_frags
= start
; shinfo
->nr_frags
< nr_slots
;
965 shinfo
->nr_frags
++, txp
++, gop
++) {
966 index
= pending_index(queue
->pending_cons
++);
967 pending_idx
= queue
->pending_ring
[index
];
968 xenvif_tx_create_map_op(queue
, pending_idx
, txp
, gop
);
969 frag_set_pending_idx(&frags
[shinfo
->nr_frags
], pending_idx
);
973 struct sk_buff
*nskb
= xenvif_alloc_skb(0);
974 if (unlikely(nskb
== NULL
)) {
976 netdev_err(queue
->vif
->dev
,
977 "Can't allocate the frag_list skb.\n");
981 shinfo
= skb_shinfo(nskb
);
982 frags
= shinfo
->frags
;
984 for (shinfo
->nr_frags
= 0; shinfo
->nr_frags
< frag_overflow
;
985 shinfo
->nr_frags
++, txp
++, gop
++) {
986 index
= pending_index(queue
->pending_cons
++);
987 pending_idx
= queue
->pending_ring
[index
];
988 xenvif_tx_create_map_op(queue
, pending_idx
, txp
, gop
);
989 frag_set_pending_idx(&frags
[shinfo
->nr_frags
],
993 skb_shinfo(skb
)->frag_list
= nskb
;
999 static inline void xenvif_grant_handle_set(struct xenvif_queue
*queue
,
1001 grant_handle_t handle
)
1003 if (unlikely(queue
->grant_tx_handle
[pending_idx
] !=
1004 NETBACK_INVALID_HANDLE
)) {
1005 netdev_err(queue
->vif
->dev
,
1006 "Trying to overwrite active handle! pending_idx: %x\n",
1010 queue
->grant_tx_handle
[pending_idx
] = handle
;
1013 static inline void xenvif_grant_handle_reset(struct xenvif_queue
*queue
,
1016 if (unlikely(queue
->grant_tx_handle
[pending_idx
] ==
1017 NETBACK_INVALID_HANDLE
)) {
1018 netdev_err(queue
->vif
->dev
,
1019 "Trying to unmap invalid handle! pending_idx: %x\n",
1023 queue
->grant_tx_handle
[pending_idx
] = NETBACK_INVALID_HANDLE
;
1026 static int xenvif_tx_check_gop(struct xenvif_queue
*queue
,
1027 struct sk_buff
*skb
,
1028 struct gnttab_map_grant_ref
**gopp_map
,
1029 struct gnttab_copy
**gopp_copy
)
1031 struct gnttab_map_grant_ref
*gop_map
= *gopp_map
;
1032 u16 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
1033 /* This always points to the shinfo of the skb being checked, which
1034 * could be either the first or the one on the frag_list
1036 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1037 /* If this is non-NULL, we are currently checking the frag_list skb, and
1038 * this points to the shinfo of the first one
1040 struct skb_shared_info
*first_shinfo
= NULL
;
1041 int nr_frags
= shinfo
->nr_frags
;
1042 const bool sharedslot
= nr_frags
&&
1043 frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
;
1046 /* Check status of header. */
1047 err
= (*gopp_copy
)->status
;
1048 if (unlikely(err
)) {
1049 if (net_ratelimit())
1050 netdev_dbg(queue
->vif
->dev
,
1051 "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
1052 (*gopp_copy
)->status
,
1054 (*gopp_copy
)->source
.u
.ref
);
1055 /* The first frag might still have this slot mapped */
1057 xenvif_idx_release(queue
, pending_idx
,
1058 XEN_NETIF_RSP_ERROR
);
1063 for (i
= 0; i
< nr_frags
; i
++, gop_map
++) {
1066 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[i
]);
1068 /* Check error status: if okay then remember grant handle. */
1069 newerr
= gop_map
->status
;
1071 if (likely(!newerr
)) {
1072 xenvif_grant_handle_set(queue
,
1075 /* Had a previous error? Invalidate this fragment. */
1076 if (unlikely(err
)) {
1077 xenvif_idx_unmap(queue
, pending_idx
);
1078 /* If the mapping of the first frag was OK, but
1079 * the header's copy failed, and they are
1080 * sharing a slot, send an error
1082 if (i
== 0 && sharedslot
)
1083 xenvif_idx_release(queue
, pending_idx
,
1084 XEN_NETIF_RSP_ERROR
);
1086 xenvif_idx_release(queue
, pending_idx
,
1087 XEN_NETIF_RSP_OKAY
);
1092 /* Error on this fragment: respond to client with an error. */
1093 if (net_ratelimit())
1094 netdev_dbg(queue
->vif
->dev
,
1095 "Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
1101 xenvif_idx_release(queue
, pending_idx
, XEN_NETIF_RSP_ERROR
);
1103 /* Not the first error? Preceding frags already invalidated. */
1107 /* First error: if the header haven't shared a slot with the
1108 * first frag, release it as well.
1111 xenvif_idx_release(queue
,
1112 XENVIF_TX_CB(skb
)->pending_idx
,
1113 XEN_NETIF_RSP_OKAY
);
1115 /* Invalidate preceding fragments of this skb. */
1116 for (j
= 0; j
< i
; j
++) {
1117 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[j
]);
1118 xenvif_idx_unmap(queue
, pending_idx
);
1119 xenvif_idx_release(queue
, pending_idx
,
1120 XEN_NETIF_RSP_OKAY
);
1123 /* And if we found the error while checking the frag_list, unmap
1124 * the first skb's frags
1127 for (j
= 0; j
< first_shinfo
->nr_frags
; j
++) {
1128 pending_idx
= frag_get_pending_idx(&first_shinfo
->frags
[j
]);
1129 xenvif_idx_unmap(queue
, pending_idx
);
1130 xenvif_idx_release(queue
, pending_idx
,
1131 XEN_NETIF_RSP_OKAY
);
1135 /* Remember the error: invalidate all subsequent fragments. */
1139 if (skb_has_frag_list(skb
) && !first_shinfo
) {
1140 first_shinfo
= skb_shinfo(skb
);
1141 shinfo
= skb_shinfo(skb_shinfo(skb
)->frag_list
);
1142 nr_frags
= shinfo
->nr_frags
;
1147 *gopp_map
= gop_map
;
1151 static void xenvif_fill_frags(struct xenvif_queue
*queue
, struct sk_buff
*skb
)
1153 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1154 int nr_frags
= shinfo
->nr_frags
;
1156 u16 prev_pending_idx
= INVALID_PENDING_IDX
;
1158 for (i
= 0; i
< nr_frags
; i
++) {
1159 skb_frag_t
*frag
= shinfo
->frags
+ i
;
1160 struct xen_netif_tx_request
*txp
;
1164 pending_idx
= frag_get_pending_idx(frag
);
1166 /* If this is not the first frag, chain it to the previous*/
1167 if (prev_pending_idx
== INVALID_PENDING_IDX
)
1168 skb_shinfo(skb
)->destructor_arg
=
1169 &callback_param(queue
, pending_idx
);
1171 callback_param(queue
, prev_pending_idx
).ctx
=
1172 &callback_param(queue
, pending_idx
);
1174 callback_param(queue
, pending_idx
).ctx
= NULL
;
1175 prev_pending_idx
= pending_idx
;
1177 txp
= &queue
->pending_tx_info
[pending_idx
].req
;
1178 page
= virt_to_page(idx_to_kaddr(queue
, pending_idx
));
1179 __skb_fill_page_desc(skb
, i
, page
, txp
->offset
, txp
->size
);
1180 skb
->len
+= txp
->size
;
1181 skb
->data_len
+= txp
->size
;
1182 skb
->truesize
+= txp
->size
;
1184 /* Take an extra reference to offset network stack's put_page */
1185 get_page(queue
->mmap_pages
[pending_idx
]);
1187 /* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
1188 * overlaps with "index", and "mapping" is not set. I think mapping
1189 * should be set. If delivered to local stack, it would drop this
1190 * skb in sk_filter unless the socket has the right to use it.
1192 skb
->pfmemalloc
= false;
1195 static int xenvif_get_extras(struct xenvif_queue
*queue
,
1196 struct xen_netif_extra_info
*extras
,
1199 struct xen_netif_extra_info extra
;
1200 RING_IDX cons
= queue
->tx
.req_cons
;
1203 if (unlikely(work_to_do
-- <= 0)) {
1204 netdev_err(queue
->vif
->dev
, "Missing extra info\n");
1205 xenvif_fatal_tx_err(queue
->vif
);
1209 memcpy(&extra
, RING_GET_REQUEST(&queue
->tx
, cons
),
1211 if (unlikely(!extra
.type
||
1212 extra
.type
>= XEN_NETIF_EXTRA_TYPE_MAX
)) {
1213 queue
->tx
.req_cons
= ++cons
;
1214 netdev_err(queue
->vif
->dev
,
1215 "Invalid extra type: %d\n", extra
.type
);
1216 xenvif_fatal_tx_err(queue
->vif
);
1220 memcpy(&extras
[extra
.type
- 1], &extra
, sizeof(extra
));
1221 queue
->tx
.req_cons
= ++cons
;
1222 } while (extra
.flags
& XEN_NETIF_EXTRA_FLAG_MORE
);
1227 static int xenvif_set_skb_gso(struct xenvif
*vif
,
1228 struct sk_buff
*skb
,
1229 struct xen_netif_extra_info
*gso
)
1231 if (!gso
->u
.gso
.size
) {
1232 netdev_err(vif
->dev
, "GSO size must not be zero.\n");
1233 xenvif_fatal_tx_err(vif
);
1237 switch (gso
->u
.gso
.type
) {
1238 case XEN_NETIF_GSO_TYPE_TCPV4
:
1239 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
1241 case XEN_NETIF_GSO_TYPE_TCPV6
:
1242 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
1245 netdev_err(vif
->dev
, "Bad GSO type %d.\n", gso
->u
.gso
.type
);
1246 xenvif_fatal_tx_err(vif
);
1250 skb_shinfo(skb
)->gso_size
= gso
->u
.gso
.size
;
1251 /* gso_segs will be calculated later */
1256 static int checksum_setup(struct xenvif_queue
*queue
, struct sk_buff
*skb
)
1258 bool recalculate_partial_csum
= false;
1260 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1261 * peers can fail to set NETRXF_csum_blank when sending a GSO
1262 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1263 * recalculate the partial checksum.
1265 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
&& skb_is_gso(skb
)) {
1266 queue
->stats
.rx_gso_checksum_fixup
++;
1267 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1268 recalculate_partial_csum
= true;
1271 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1272 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
1275 return skb_checksum_setup(skb
, recalculate_partial_csum
);
1278 static bool tx_credit_exceeded(struct xenvif_queue
*queue
, unsigned size
)
1280 u64 now
= get_jiffies_64();
1281 u64 next_credit
= queue
->credit_window_start
+
1282 msecs_to_jiffies(queue
->credit_usec
/ 1000);
1284 /* Timer could already be pending in rare cases. */
1285 if (timer_pending(&queue
->credit_timeout
))
1288 /* Passed the point where we can replenish credit? */
1289 if (time_after_eq64(now
, next_credit
)) {
1290 queue
->credit_window_start
= now
;
1291 tx_add_credit(queue
);
1294 /* Still too big to send right now? Set a callback. */
1295 if (size
> queue
->remaining_credit
) {
1296 queue
->credit_timeout
.data
=
1297 (unsigned long)queue
;
1298 queue
->credit_timeout
.function
=
1300 mod_timer(&queue
->credit_timeout
,
1302 queue
->credit_window_start
= next_credit
;
1310 static void xenvif_tx_build_gops(struct xenvif_queue
*queue
,
1315 struct gnttab_map_grant_ref
*gop
= queue
->tx_map_ops
, *request_gop
;
1316 struct sk_buff
*skb
;
1319 while (skb_queue_len(&queue
->tx_queue
) < budget
) {
1320 struct xen_netif_tx_request txreq
;
1321 struct xen_netif_tx_request txfrags
[XEN_NETBK_LEGACY_SLOTS_MAX
];
1322 struct xen_netif_extra_info extras
[XEN_NETIF_EXTRA_TYPE_MAX
-1];
1326 unsigned int data_len
;
1327 pending_ring_idx_t index
;
1329 if (queue
->tx
.sring
->req_prod
- queue
->tx
.req_cons
>
1330 XEN_NETIF_TX_RING_SIZE
) {
1331 netdev_err(queue
->vif
->dev
,
1332 "Impossible number of requests. "
1333 "req_prod %d, req_cons %d, size %ld\n",
1334 queue
->tx
.sring
->req_prod
, queue
->tx
.req_cons
,
1335 XEN_NETIF_TX_RING_SIZE
);
1336 xenvif_fatal_tx_err(queue
->vif
);
1340 work_to_do
= RING_HAS_UNCONSUMED_REQUESTS(&queue
->tx
);
1344 idx
= queue
->tx
.req_cons
;
1345 rmb(); /* Ensure that we see the request before we copy it. */
1346 memcpy(&txreq
, RING_GET_REQUEST(&queue
->tx
, idx
), sizeof(txreq
));
1348 /* Credit-based scheduling. */
1349 if (txreq
.size
> queue
->remaining_credit
&&
1350 tx_credit_exceeded(queue
, txreq
.size
))
1353 queue
->remaining_credit
-= txreq
.size
;
1356 queue
->tx
.req_cons
= ++idx
;
1358 memset(extras
, 0, sizeof(extras
));
1359 if (txreq
.flags
& XEN_NETTXF_extra_info
) {
1360 work_to_do
= xenvif_get_extras(queue
, extras
,
1362 idx
= queue
->tx
.req_cons
;
1363 if (unlikely(work_to_do
< 0))
1367 ret
= xenvif_count_requests(queue
, &txreq
, txfrags
, work_to_do
);
1368 if (unlikely(ret
< 0))
1373 if (unlikely(txreq
.size
< ETH_HLEN
)) {
1374 netdev_dbg(queue
->vif
->dev
,
1375 "Bad packet size: %d\n", txreq
.size
);
1376 xenvif_tx_err(queue
, &txreq
, idx
);
1380 /* No crossing a page as the payload mustn't fragment. */
1381 if (unlikely((txreq
.offset
+ txreq
.size
) > PAGE_SIZE
)) {
1382 netdev_err(queue
->vif
->dev
,
1383 "txreq.offset: %x, size: %u, end: %lu\n",
1384 txreq
.offset
, txreq
.size
,
1385 (txreq
.offset
&~PAGE_MASK
) + txreq
.size
);
1386 xenvif_fatal_tx_err(queue
->vif
);
1390 index
= pending_index(queue
->pending_cons
);
1391 pending_idx
= queue
->pending_ring
[index
];
1393 data_len
= (txreq
.size
> PKT_PROT_LEN
&&
1394 ret
< XEN_NETBK_LEGACY_SLOTS_MAX
) ?
1395 PKT_PROT_LEN
: txreq
.size
;
1397 skb
= xenvif_alloc_skb(data_len
);
1398 if (unlikely(skb
== NULL
)) {
1399 netdev_dbg(queue
->vif
->dev
,
1400 "Can't allocate a skb in start_xmit.\n");
1401 xenvif_tx_err(queue
, &txreq
, idx
);
1405 if (extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1].type
) {
1406 struct xen_netif_extra_info
*gso
;
1407 gso
= &extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1];
1409 if (xenvif_set_skb_gso(queue
->vif
, skb
, gso
)) {
1410 /* Failure in xenvif_set_skb_gso is fatal. */
1416 XENVIF_TX_CB(skb
)->pending_idx
= pending_idx
;
1418 __skb_put(skb
, data_len
);
1419 queue
->tx_copy_ops
[*copy_ops
].source
.u
.ref
= txreq
.gref
;
1420 queue
->tx_copy_ops
[*copy_ops
].source
.domid
= queue
->vif
->domid
;
1421 queue
->tx_copy_ops
[*copy_ops
].source
.offset
= txreq
.offset
;
1423 queue
->tx_copy_ops
[*copy_ops
].dest
.u
.gmfn
=
1424 virt_to_mfn(skb
->data
);
1425 queue
->tx_copy_ops
[*copy_ops
].dest
.domid
= DOMID_SELF
;
1426 queue
->tx_copy_ops
[*copy_ops
].dest
.offset
=
1427 offset_in_page(skb
->data
);
1429 queue
->tx_copy_ops
[*copy_ops
].len
= data_len
;
1430 queue
->tx_copy_ops
[*copy_ops
].flags
= GNTCOPY_source_gref
;
1434 skb_shinfo(skb
)->nr_frags
= ret
;
1435 if (data_len
< txreq
.size
) {
1436 skb_shinfo(skb
)->nr_frags
++;
1437 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1439 xenvif_tx_create_map_op(queue
, pending_idx
, &txreq
, gop
);
1442 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1443 INVALID_PENDING_IDX
);
1444 memcpy(&queue
->pending_tx_info
[pending_idx
].req
, &txreq
,
1448 queue
->pending_cons
++;
1450 request_gop
= xenvif_get_requests(queue
, skb
, txfrags
, gop
);
1451 if (request_gop
== NULL
) {
1453 xenvif_tx_err(queue
, &txreq
, idx
);
1458 __skb_queue_tail(&queue
->tx_queue
, skb
);
1460 queue
->tx
.req_cons
= idx
;
1462 if (((gop
-queue
->tx_map_ops
) >= ARRAY_SIZE(queue
->tx_map_ops
)) ||
1463 (*copy_ops
>= ARRAY_SIZE(queue
->tx_copy_ops
)))
1467 (*map_ops
) = gop
- queue
->tx_map_ops
;
1471 /* Consolidate skb with a frag_list into a brand new one with local pages on
1472 * frags. Returns 0 or -ENOMEM if can't allocate new pages.
1474 static int xenvif_handle_frag_list(struct xenvif_queue
*queue
, struct sk_buff
*skb
)
1476 unsigned int offset
= skb_headlen(skb
);
1477 skb_frag_t frags
[MAX_SKB_FRAGS
];
1479 struct ubuf_info
*uarg
;
1480 struct sk_buff
*nskb
= skb_shinfo(skb
)->frag_list
;
1482 queue
->stats
.tx_zerocopy_sent
+= 2;
1483 queue
->stats
.tx_frag_overflow
++;
1485 xenvif_fill_frags(queue
, nskb
);
1486 /* Subtract frags size, we will correct it later */
1487 skb
->truesize
-= skb
->data_len
;
1488 skb
->len
+= nskb
->len
;
1489 skb
->data_len
+= nskb
->len
;
1491 /* create a brand new frags array and coalesce there */
1492 for (i
= 0; offset
< skb
->len
; i
++) {
1496 BUG_ON(i
>= MAX_SKB_FRAGS
);
1497 page
= alloc_page(GFP_ATOMIC
|__GFP_COLD
);
1500 skb
->truesize
+= skb
->data_len
;
1501 for (j
= 0; j
< i
; j
++)
1502 put_page(frags
[j
].page
.p
);
1506 if (offset
+ PAGE_SIZE
< skb
->len
)
1509 len
= skb
->len
- offset
;
1510 if (skb_copy_bits(skb
, offset
, page_address(page
), len
))
1514 frags
[i
].page
.p
= page
;
1515 frags
[i
].page_offset
= 0;
1516 skb_frag_size_set(&frags
[i
], len
);
1518 /* swap out with old one */
1519 memcpy(skb_shinfo(skb
)->frags
,
1521 i
* sizeof(skb_frag_t
));
1522 skb_shinfo(skb
)->nr_frags
= i
;
1523 skb
->truesize
+= i
* PAGE_SIZE
;
1525 /* remove traces of mapped pages and frag_list */
1526 skb_frag_list_init(skb
);
1527 uarg
= skb_shinfo(skb
)->destructor_arg
;
1528 uarg
->callback(uarg
, true);
1529 skb_shinfo(skb
)->destructor_arg
= NULL
;
1531 skb_shinfo(nskb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1537 static int xenvif_tx_submit(struct xenvif_queue
*queue
)
1539 struct gnttab_map_grant_ref
*gop_map
= queue
->tx_map_ops
;
1540 struct gnttab_copy
*gop_copy
= queue
->tx_copy_ops
;
1541 struct sk_buff
*skb
;
1544 while ((skb
= __skb_dequeue(&queue
->tx_queue
)) != NULL
) {
1545 struct xen_netif_tx_request
*txp
;
1549 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
1550 txp
= &queue
->pending_tx_info
[pending_idx
].req
;
1552 /* Check the remap error code. */
1553 if (unlikely(xenvif_tx_check_gop(queue
, skb
, &gop_map
, &gop_copy
))) {
1554 /* If there was an error, xenvif_tx_check_gop is
1555 * expected to release all the frags which were mapped,
1556 * so kfree_skb shouldn't do it again
1558 skb_shinfo(skb
)->nr_frags
= 0;
1559 if (skb_has_frag_list(skb
)) {
1560 struct sk_buff
*nskb
=
1561 skb_shinfo(skb
)->frag_list
;
1562 skb_shinfo(nskb
)->nr_frags
= 0;
1568 data_len
= skb
->len
;
1569 callback_param(queue
, pending_idx
).ctx
= NULL
;
1570 if (data_len
< txp
->size
) {
1571 /* Append the packet payload as a fragment. */
1572 txp
->offset
+= data_len
;
1573 txp
->size
-= data_len
;
1575 /* Schedule a response immediately. */
1576 xenvif_idx_release(queue
, pending_idx
,
1577 XEN_NETIF_RSP_OKAY
);
1580 if (txp
->flags
& XEN_NETTXF_csum_blank
)
1581 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1582 else if (txp
->flags
& XEN_NETTXF_data_validated
)
1583 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1585 xenvif_fill_frags(queue
, skb
);
1587 if (unlikely(skb_has_frag_list(skb
))) {
1588 if (xenvif_handle_frag_list(queue
, skb
)) {
1589 if (net_ratelimit())
1590 netdev_err(queue
->vif
->dev
,
1591 "Not enough memory to consolidate frag_list!\n");
1592 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1598 if (skb_is_nonlinear(skb
) && skb_headlen(skb
) < PKT_PROT_LEN
) {
1599 int target
= min_t(int, skb
->len
, PKT_PROT_LEN
);
1600 __pskb_pull_tail(skb
, target
- skb_headlen(skb
));
1603 skb
->dev
= queue
->vif
->dev
;
1604 skb
->protocol
= eth_type_trans(skb
, skb
->dev
);
1605 skb_reset_network_header(skb
);
1607 if (checksum_setup(queue
, skb
)) {
1608 netdev_dbg(queue
->vif
->dev
,
1609 "Can't setup checksum in net_tx_action\n");
1610 /* We have to set this flag to trigger the callback */
1611 if (skb_shinfo(skb
)->destructor_arg
)
1612 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1617 skb_probe_transport_header(skb
, 0);
1619 /* If the packet is GSO then we will have just set up the
1620 * transport header offset in checksum_setup so it's now
1621 * straightforward to calculate gso_segs.
1623 if (skb_is_gso(skb
)) {
1624 int mss
= skb_shinfo(skb
)->gso_size
;
1625 int hdrlen
= skb_transport_header(skb
) -
1626 skb_mac_header(skb
) +
1629 skb_shinfo(skb
)->gso_segs
=
1630 DIV_ROUND_UP(skb
->len
- hdrlen
, mss
);
1633 queue
->stats
.rx_bytes
+= skb
->len
;
1634 queue
->stats
.rx_packets
++;
1638 /* Set this flag right before netif_receive_skb, otherwise
1639 * someone might think this packet already left netback, and
1640 * do a skb_copy_ubufs while we are still in control of the
1641 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
1643 if (skb_shinfo(skb
)->destructor_arg
) {
1644 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1645 queue
->stats
.tx_zerocopy_sent
++;
1648 netif_receive_skb(skb
);
1654 void xenvif_zerocopy_callback(struct ubuf_info
*ubuf
, bool zerocopy_success
)
1656 unsigned long flags
;
1657 pending_ring_idx_t index
;
1658 struct xenvif_queue
*queue
= ubuf_to_queue(ubuf
);
1660 /* This is the only place where we grab this lock, to protect callbacks
1663 spin_lock_irqsave(&queue
->callback_lock
, flags
);
1665 u16 pending_idx
= ubuf
->desc
;
1666 ubuf
= (struct ubuf_info
*) ubuf
->ctx
;
1667 BUG_ON(queue
->dealloc_prod
- queue
->dealloc_cons
>=
1669 index
= pending_index(queue
->dealloc_prod
);
1670 queue
->dealloc_ring
[index
] = pending_idx
;
1671 /* Sync with xenvif_tx_dealloc_action:
1672 * insert idx then incr producer.
1675 queue
->dealloc_prod
++;
1677 wake_up(&queue
->dealloc_wq
);
1678 spin_unlock_irqrestore(&queue
->callback_lock
, flags
);
1680 if (likely(zerocopy_success
))
1681 queue
->stats
.tx_zerocopy_success
++;
1683 queue
->stats
.tx_zerocopy_fail
++;
1686 static inline void xenvif_tx_dealloc_action(struct xenvif_queue
*queue
)
1688 struct gnttab_unmap_grant_ref
*gop
;
1689 pending_ring_idx_t dc
, dp
;
1690 u16 pending_idx
, pending_idx_release
[MAX_PENDING_REQS
];
1693 dc
= queue
->dealloc_cons
;
1694 gop
= queue
->tx_unmap_ops
;
1696 /* Free up any grants we have finished using */
1698 dp
= queue
->dealloc_prod
;
1700 /* Ensure we see all indices enqueued by all
1701 * xenvif_zerocopy_callback().
1706 BUG_ON(gop
- queue
->tx_unmap_ops
> MAX_PENDING_REQS
);
1708 queue
->dealloc_ring
[pending_index(dc
++)];
1710 pending_idx_release
[gop
-queue
->tx_unmap_ops
] =
1712 queue
->pages_to_unmap
[gop
-queue
->tx_unmap_ops
] =
1713 queue
->mmap_pages
[pending_idx
];
1714 gnttab_set_unmap_op(gop
,
1715 idx_to_kaddr(queue
, pending_idx
),
1717 queue
->grant_tx_handle
[pending_idx
]);
1718 xenvif_grant_handle_reset(queue
, pending_idx
);
1722 } while (dp
!= queue
->dealloc_prod
);
1724 queue
->dealloc_cons
= dc
;
1726 if (gop
- queue
->tx_unmap_ops
> 0) {
1728 ret
= gnttab_unmap_refs(queue
->tx_unmap_ops
,
1730 queue
->pages_to_unmap
,
1731 gop
- queue
->tx_unmap_ops
);
1733 netdev_err(queue
->vif
->dev
, "Unmap fail: nr_ops %tx ret %d\n",
1734 gop
- queue
->tx_unmap_ops
, ret
);
1735 for (i
= 0; i
< gop
- queue
->tx_unmap_ops
; ++i
) {
1736 if (gop
[i
].status
!= GNTST_okay
)
1737 netdev_err(queue
->vif
->dev
,
1738 " host_addr: %llx handle: %x status: %d\n",
1747 for (i
= 0; i
< gop
- queue
->tx_unmap_ops
; ++i
)
1748 xenvif_idx_release(queue
, pending_idx_release
[i
],
1749 XEN_NETIF_RSP_OKAY
);
1753 /* Called after netfront has transmitted */
1754 int xenvif_tx_action(struct xenvif_queue
*queue
, int budget
)
1756 unsigned nr_mops
, nr_cops
= 0;
1759 if (unlikely(!tx_work_todo(queue
)))
1762 xenvif_tx_build_gops(queue
, budget
, &nr_cops
, &nr_mops
);
1767 gnttab_batch_copy(queue
->tx_copy_ops
, nr_cops
);
1769 ret
= gnttab_map_refs(queue
->tx_map_ops
,
1771 queue
->pages_to_map
,
1776 work_done
= xenvif_tx_submit(queue
);
1781 static void xenvif_idx_release(struct xenvif_queue
*queue
, u16 pending_idx
,
1784 struct pending_tx_info
*pending_tx_info
;
1785 pending_ring_idx_t index
;
1786 unsigned long flags
;
1788 pending_tx_info
= &queue
->pending_tx_info
[pending_idx
];
1789 spin_lock_irqsave(&queue
->response_lock
, flags
);
1790 make_tx_response(queue
, &pending_tx_info
->req
, status
);
1791 index
= pending_index(queue
->pending_prod
);
1792 queue
->pending_ring
[index
] = pending_idx
;
1793 /* TX shouldn't use the index before we give it back here */
1795 queue
->pending_prod
++;
1796 spin_unlock_irqrestore(&queue
->response_lock
, flags
);
1800 static void make_tx_response(struct xenvif_queue
*queue
,
1801 struct xen_netif_tx_request
*txp
,
1804 RING_IDX i
= queue
->tx
.rsp_prod_pvt
;
1805 struct xen_netif_tx_response
*resp
;
1808 resp
= RING_GET_RESPONSE(&queue
->tx
, i
);
1812 if (txp
->flags
& XEN_NETTXF_extra_info
)
1813 RING_GET_RESPONSE(&queue
->tx
, ++i
)->status
= XEN_NETIF_RSP_NULL
;
1815 queue
->tx
.rsp_prod_pvt
= ++i
;
1816 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue
->tx
, notify
);
1818 notify_remote_via_irq(queue
->tx_irq
);
1821 static struct xen_netif_rx_response
*make_rx_response(struct xenvif_queue
*queue
,
1828 RING_IDX i
= queue
->rx
.rsp_prod_pvt
;
1829 struct xen_netif_rx_response
*resp
;
1831 resp
= RING_GET_RESPONSE(&queue
->rx
, i
);
1832 resp
->offset
= offset
;
1833 resp
->flags
= flags
;
1835 resp
->status
= (s16
)size
;
1837 resp
->status
= (s16
)st
;
1839 queue
->rx
.rsp_prod_pvt
= ++i
;
1844 void xenvif_idx_unmap(struct xenvif_queue
*queue
, u16 pending_idx
)
1847 struct gnttab_unmap_grant_ref tx_unmap_op
;
1849 gnttab_set_unmap_op(&tx_unmap_op
,
1850 idx_to_kaddr(queue
, pending_idx
),
1852 queue
->grant_tx_handle
[pending_idx
]);
1853 xenvif_grant_handle_reset(queue
, pending_idx
);
1855 ret
= gnttab_unmap_refs(&tx_unmap_op
, NULL
,
1856 &queue
->mmap_pages
[pending_idx
], 1);
1858 netdev_err(queue
->vif
->dev
,
1859 "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: %x status: %d\n",
1862 tx_unmap_op
.host_addr
,
1864 tx_unmap_op
.status
);
1869 static inline int rx_work_todo(struct xenvif_queue
*queue
)
1871 return (!skb_queue_empty(&queue
->rx_queue
) &&
1872 xenvif_rx_ring_slots_available(queue
, queue
->rx_last_skb_slots
)) ||
1873 queue
->rx_queue_purge
;
1876 static inline int tx_work_todo(struct xenvif_queue
*queue
)
1878 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue
->tx
)))
1884 static inline bool tx_dealloc_work_todo(struct xenvif_queue
*queue
)
1886 return queue
->dealloc_cons
!= queue
->dealloc_prod
;
1889 void xenvif_unmap_frontend_rings(struct xenvif_queue
*queue
)
1891 if (queue
->tx
.sring
)
1892 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue
->vif
),
1894 if (queue
->rx
.sring
)
1895 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue
->vif
),
1899 int xenvif_map_frontend_rings(struct xenvif_queue
*queue
,
1900 grant_ref_t tx_ring_ref
,
1901 grant_ref_t rx_ring_ref
)
1904 struct xen_netif_tx_sring
*txs
;
1905 struct xen_netif_rx_sring
*rxs
;
1909 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue
->vif
),
1910 tx_ring_ref
, &addr
);
1914 txs
= (struct xen_netif_tx_sring
*)addr
;
1915 BACK_RING_INIT(&queue
->tx
, txs
, PAGE_SIZE
);
1917 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue
->vif
),
1918 rx_ring_ref
, &addr
);
1922 rxs
= (struct xen_netif_rx_sring
*)addr
;
1923 BACK_RING_INIT(&queue
->rx
, rxs
, PAGE_SIZE
);
1928 xenvif_unmap_frontend_rings(queue
);
1932 static void xenvif_start_queue(struct xenvif_queue
*queue
)
1934 if (xenvif_schedulable(queue
->vif
))
1935 xenvif_wake_queue(queue
);
1938 int xenvif_kthread_guest_rx(void *data
)
1940 struct xenvif_queue
*queue
= data
;
1941 struct sk_buff
*skb
;
1943 while (!kthread_should_stop()) {
1944 wait_event_interruptible(queue
->wq
,
1945 rx_work_todo(queue
) ||
1946 queue
->vif
->disabled
||
1947 kthread_should_stop());
1949 /* This frontend is found to be rogue, disable it in
1950 * kthread context. Currently this is only set when
1951 * netback finds out frontend sends malformed packet,
1952 * but we cannot disable the interface in softirq
1953 * context so we defer it here, if this thread is
1954 * associated with queue 0.
1956 if (unlikely(queue
->vif
->disabled
&& netif_carrier_ok(queue
->vif
->dev
) && queue
->id
== 0))
1957 xenvif_carrier_off(queue
->vif
);
1959 if (kthread_should_stop())
1962 if (queue
->rx_queue_purge
) {
1963 skb_queue_purge(&queue
->rx_queue
);
1964 queue
->rx_queue_purge
= false;
1967 if (!skb_queue_empty(&queue
->rx_queue
))
1968 xenvif_rx_action(queue
);
1970 if (skb_queue_empty(&queue
->rx_queue
) &&
1971 xenvif_queue_stopped(queue
)) {
1972 del_timer_sync(&queue
->wake_queue
);
1973 xenvif_start_queue(queue
);
1979 /* Bin any remaining skbs */
1980 while ((skb
= skb_dequeue(&queue
->rx_queue
)) != NULL
)
1986 int xenvif_dealloc_kthread(void *data
)
1988 struct xenvif_queue
*queue
= data
;
1990 while (!kthread_should_stop()) {
1991 wait_event_interruptible(queue
->dealloc_wq
,
1992 tx_dealloc_work_todo(queue
) ||
1993 kthread_should_stop());
1994 if (kthread_should_stop())
1997 xenvif_tx_dealloc_action(queue
);
2001 /* Unmap anything remaining*/
2002 if (tx_dealloc_work_todo(queue
))
2003 xenvif_tx_dealloc_action(queue
);
2008 static int __init
netback_init(void)
2015 /* Allow as many queues as there are CPUs, by default */
2016 xenvif_max_queues
= num_online_cpus();
2018 if (fatal_skb_slots
< XEN_NETBK_LEGACY_SLOTS_MAX
) {
2019 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
2020 fatal_skb_slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
2021 fatal_skb_slots
= XEN_NETBK_LEGACY_SLOTS_MAX
;
2024 rc
= xenvif_xenbus_init();
2028 rx_drain_timeout_jiffies
= msecs_to_jiffies(rx_drain_timeout_msecs
);
2036 module_init(netback_init
);
2038 static void __exit
netback_fini(void)
2040 xenvif_xenbus_fini();
2042 module_exit(netback_fini
);
2044 MODULE_LICENSE("Dual BSD/GPL");
2045 MODULE_ALIAS("xen-backend:vif");