snmp: add missing counters for RFC 4293
[deliverable/linux.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
43
44 #include <net/sock.h>
45 #include <net/snmp.h>
46
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
57
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
60 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
61 {
62 static u32 ipv6_fragmentation_id = 1;
63 static DEFINE_SPINLOCK(ip6_id_lock);
64
65 spin_lock_bh(&ip6_id_lock);
66 fhdr->identification = htonl(ipv6_fragmentation_id);
67 if (++ipv6_fragmentation_id == 0)
68 ipv6_fragmentation_id = 1;
69 spin_unlock_bh(&ip6_id_lock);
70 }
71
72 int __ip6_local_out(struct sk_buff *skb)
73 {
74 int len;
75
76 len = skb->len - sizeof(struct ipv6hdr);
77 if (len > IPV6_MAXPLEN)
78 len = 0;
79 ipv6_hdr(skb)->payload_len = htons(len);
80
81 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
82 dst_output);
83 }
84
85 int ip6_local_out(struct sk_buff *skb)
86 {
87 int err;
88
89 err = __ip6_local_out(skb);
90 if (likely(err == 1))
91 err = dst_output(skb);
92
93 return err;
94 }
95 EXPORT_SYMBOL_GPL(ip6_local_out);
96
97 static int ip6_output_finish(struct sk_buff *skb)
98 {
99 struct dst_entry *dst = skb->dst;
100
101 if (dst->hh)
102 return neigh_hh_output(dst->hh, skb);
103 else if (dst->neighbour)
104 return dst->neighbour->output(skb);
105
106 IP6_INC_STATS_BH(dev_net(dst->dev),
107 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
108 kfree_skb(skb);
109 return -EINVAL;
110
111 }
112
113 /* dev_loopback_xmit for use with netfilter. */
114 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
115 {
116 skb_reset_mac_header(newskb);
117 __skb_pull(newskb, skb_network_offset(newskb));
118 newskb->pkt_type = PACKET_LOOPBACK;
119 newskb->ip_summed = CHECKSUM_UNNECESSARY;
120 WARN_ON(!newskb->dst);
121
122 netif_rx(newskb);
123 return 0;
124 }
125
126
127 static int ip6_output2(struct sk_buff *skb)
128 {
129 struct dst_entry *dst = skb->dst;
130 struct net_device *dev = dst->dev;
131
132 skb->protocol = htons(ETH_P_IPV6);
133 skb->dev = dev;
134
135 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
136 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
137 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
138
139 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
140 ((mroute6_socket(dev_net(dev)) &&
141 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
142 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
143 &ipv6_hdr(skb)->saddr))) {
144 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
145
146 /* Do not check for IFF_ALLMULTI; multicast routing
147 is not supported in any case.
148 */
149 if (newskb)
150 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
151 NULL, newskb->dev,
152 ip6_dev_loopback_xmit);
153
154 if (ipv6_hdr(skb)->hop_limit == 0) {
155 IP6_INC_STATS(dev_net(dev), idev,
156 IPSTATS_MIB_OUTDISCARDS);
157 kfree_skb(skb);
158 return 0;
159 }
160 }
161
162 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
163 skb->len);
164 }
165
166 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
167 ip6_output_finish);
168 }
169
170 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
171 {
172 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
173
174 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
175 skb->dst->dev->mtu : dst_mtu(skb->dst);
176 }
177
178 int ip6_output(struct sk_buff *skb)
179 {
180 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
181 if (unlikely(idev->cnf.disable_ipv6)) {
182 IP6_INC_STATS(dev_net(skb->dst->dev), idev,
183 IPSTATS_MIB_OUTDISCARDS);
184 kfree_skb(skb);
185 return 0;
186 }
187
188 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
189 dst_allfrag(skb->dst))
190 return ip6_fragment(skb, ip6_output2);
191 else
192 return ip6_output2(skb);
193 }
194
195 /*
196 * xmit an sk_buff (used by TCP)
197 */
198
199 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
200 struct ipv6_txoptions *opt, int ipfragok)
201 {
202 struct net *net = sock_net(sk);
203 struct ipv6_pinfo *np = inet6_sk(sk);
204 struct in6_addr *first_hop = &fl->fl6_dst;
205 struct dst_entry *dst = skb->dst;
206 struct ipv6hdr *hdr;
207 u8 proto = fl->proto;
208 int seg_len = skb->len;
209 int hlimit, tclass;
210 u32 mtu;
211
212 if (opt) {
213 unsigned int head_room;
214
215 /* First: exthdrs may take lots of space (~8K for now)
216 MAX_HEADER is not enough.
217 */
218 head_room = opt->opt_nflen + opt->opt_flen;
219 seg_len += head_room;
220 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
221
222 if (skb_headroom(skb) < head_room) {
223 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
224 if (skb2 == NULL) {
225 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
226 IPSTATS_MIB_OUTDISCARDS);
227 kfree_skb(skb);
228 return -ENOBUFS;
229 }
230 kfree_skb(skb);
231 skb = skb2;
232 if (sk)
233 skb_set_owner_w(skb, sk);
234 }
235 if (opt->opt_flen)
236 ipv6_push_frag_opts(skb, opt, &proto);
237 if (opt->opt_nflen)
238 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
239 }
240
241 skb_push(skb, sizeof(struct ipv6hdr));
242 skb_reset_network_header(skb);
243 hdr = ipv6_hdr(skb);
244
245 /* Allow local fragmentation. */
246 if (ipfragok)
247 skb->local_df = 1;
248
249 /*
250 * Fill in the IPv6 header
251 */
252
253 hlimit = -1;
254 if (np)
255 hlimit = np->hop_limit;
256 if (hlimit < 0)
257 hlimit = ip6_dst_hoplimit(dst);
258
259 tclass = -1;
260 if (np)
261 tclass = np->tclass;
262 if (tclass < 0)
263 tclass = 0;
264
265 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
266
267 hdr->payload_len = htons(seg_len);
268 hdr->nexthdr = proto;
269 hdr->hop_limit = hlimit;
270
271 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
272 ipv6_addr_copy(&hdr->daddr, first_hop);
273
274 skb->priority = sk->sk_priority;
275 skb->mark = sk->sk_mark;
276
277 mtu = dst_mtu(dst);
278 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
279 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb->dst),
280 IPSTATS_MIB_OUT, skb->len);
281 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
282 dst_output);
283 }
284
285 if (net_ratelimit())
286 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
287 skb->dev = dst->dev;
288 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
289 IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
290 kfree_skb(skb);
291 return -EMSGSIZE;
292 }
293
294 EXPORT_SYMBOL(ip6_xmit);
295
296 /*
297 * To avoid extra problems ND packets are send through this
298 * routine. It's code duplication but I really want to avoid
299 * extra checks since ipv6_build_header is used by TCP (which
300 * is for us performance critical)
301 */
302
303 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
304 const struct in6_addr *saddr, const struct in6_addr *daddr,
305 int proto, int len)
306 {
307 struct ipv6_pinfo *np = inet6_sk(sk);
308 struct ipv6hdr *hdr;
309 int totlen;
310
311 skb->protocol = htons(ETH_P_IPV6);
312 skb->dev = dev;
313
314 totlen = len + sizeof(struct ipv6hdr);
315
316 skb_reset_network_header(skb);
317 skb_put(skb, sizeof(struct ipv6hdr));
318 hdr = ipv6_hdr(skb);
319
320 *(__be32*)hdr = htonl(0x60000000);
321
322 hdr->payload_len = htons(len);
323 hdr->nexthdr = proto;
324 hdr->hop_limit = np->hop_limit;
325
326 ipv6_addr_copy(&hdr->saddr, saddr);
327 ipv6_addr_copy(&hdr->daddr, daddr);
328
329 return 0;
330 }
331
332 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
333 {
334 struct ip6_ra_chain *ra;
335 struct sock *last = NULL;
336
337 read_lock(&ip6_ra_lock);
338 for (ra = ip6_ra_chain; ra; ra = ra->next) {
339 struct sock *sk = ra->sk;
340 if (sk && ra->sel == sel &&
341 (!sk->sk_bound_dev_if ||
342 sk->sk_bound_dev_if == skb->dev->ifindex)) {
343 if (last) {
344 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
345 if (skb2)
346 rawv6_rcv(last, skb2);
347 }
348 last = sk;
349 }
350 }
351
352 if (last) {
353 rawv6_rcv(last, skb);
354 read_unlock(&ip6_ra_lock);
355 return 1;
356 }
357 read_unlock(&ip6_ra_lock);
358 return 0;
359 }
360
361 static int ip6_forward_proxy_check(struct sk_buff *skb)
362 {
363 struct ipv6hdr *hdr = ipv6_hdr(skb);
364 u8 nexthdr = hdr->nexthdr;
365 int offset;
366
367 if (ipv6_ext_hdr(nexthdr)) {
368 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
369 if (offset < 0)
370 return 0;
371 } else
372 offset = sizeof(struct ipv6hdr);
373
374 if (nexthdr == IPPROTO_ICMPV6) {
375 struct icmp6hdr *icmp6;
376
377 if (!pskb_may_pull(skb, (skb_network_header(skb) +
378 offset + 1 - skb->data)))
379 return 0;
380
381 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
382
383 switch (icmp6->icmp6_type) {
384 case NDISC_ROUTER_SOLICITATION:
385 case NDISC_ROUTER_ADVERTISEMENT:
386 case NDISC_NEIGHBOUR_SOLICITATION:
387 case NDISC_NEIGHBOUR_ADVERTISEMENT:
388 case NDISC_REDIRECT:
389 /* For reaction involving unicast neighbor discovery
390 * message destined to the proxied address, pass it to
391 * input function.
392 */
393 return 1;
394 default:
395 break;
396 }
397 }
398
399 /*
400 * The proxying router can't forward traffic sent to a link-local
401 * address, so signal the sender and discard the packet. This
402 * behavior is clarified by the MIPv6 specification.
403 */
404 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
405 dst_link_failure(skb);
406 return -1;
407 }
408
409 return 0;
410 }
411
412 static inline int ip6_forward_finish(struct sk_buff *skb)
413 {
414 return dst_output(skb);
415 }
416
417 int ip6_forward(struct sk_buff *skb)
418 {
419 struct dst_entry *dst = skb->dst;
420 struct ipv6hdr *hdr = ipv6_hdr(skb);
421 struct inet6_skb_parm *opt = IP6CB(skb);
422 struct net *net = dev_net(dst->dev);
423
424 if (net->ipv6.devconf_all->forwarding == 0)
425 goto error;
426
427 if (skb_warn_if_lro(skb))
428 goto drop;
429
430 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
431 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
432 goto drop;
433 }
434
435 skb_forward_csum(skb);
436
437 /*
438 * We DO NOT make any processing on
439 * RA packets, pushing them to user level AS IS
440 * without ane WARRANTY that application will be able
441 * to interpret them. The reason is that we
442 * cannot make anything clever here.
443 *
444 * We are not end-node, so that if packet contains
445 * AH/ESP, we cannot make anything.
446 * Defragmentation also would be mistake, RA packets
447 * cannot be fragmented, because there is no warranty
448 * that different fragments will go along one path. --ANK
449 */
450 if (opt->ra) {
451 u8 *ptr = skb_network_header(skb) + opt->ra;
452 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
453 return 0;
454 }
455
456 /*
457 * check and decrement ttl
458 */
459 if (hdr->hop_limit <= 1) {
460 /* Force OUTPUT device used as source address */
461 skb->dev = dst->dev;
462 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
463 0, skb->dev);
464 IP6_INC_STATS_BH(net,
465 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
466
467 kfree_skb(skb);
468 return -ETIMEDOUT;
469 }
470
471 /* XXX: idev->cnf.proxy_ndp? */
472 if (net->ipv6.devconf_all->proxy_ndp &&
473 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
474 int proxied = ip6_forward_proxy_check(skb);
475 if (proxied > 0)
476 return ip6_input(skb);
477 else if (proxied < 0) {
478 IP6_INC_STATS(net, ip6_dst_idev(dst),
479 IPSTATS_MIB_INDISCARDS);
480 goto drop;
481 }
482 }
483
484 if (!xfrm6_route_forward(skb)) {
485 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
486 goto drop;
487 }
488 dst = skb->dst;
489
490 /* IPv6 specs say nothing about it, but it is clear that we cannot
491 send redirects to source routed frames.
492 We don't send redirects to frames decapsulated from IPsec.
493 */
494 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
495 !skb_sec_path(skb)) {
496 struct in6_addr *target = NULL;
497 struct rt6_info *rt;
498 struct neighbour *n = dst->neighbour;
499
500 /*
501 * incoming and outgoing devices are the same
502 * send a redirect.
503 */
504
505 rt = (struct rt6_info *) dst;
506 if ((rt->rt6i_flags & RTF_GATEWAY))
507 target = (struct in6_addr*)&n->primary_key;
508 else
509 target = &hdr->daddr;
510
511 /* Limit redirects both by destination (here)
512 and by source (inside ndisc_send_redirect)
513 */
514 if (xrlim_allow(dst, 1*HZ))
515 ndisc_send_redirect(skb, n, target);
516 } else {
517 int addrtype = ipv6_addr_type(&hdr->saddr);
518
519 /* This check is security critical. */
520 if (addrtype == IPV6_ADDR_ANY ||
521 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
522 goto error;
523 if (addrtype & IPV6_ADDR_LINKLOCAL) {
524 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
525 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
526 goto error;
527 }
528 }
529
530 if (skb->len > dst_mtu(dst)) {
531 /* Again, force OUTPUT device used as source address */
532 skb->dev = dst->dev;
533 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
534 IP6_INC_STATS_BH(net,
535 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
536 IP6_INC_STATS_BH(net,
537 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
538 kfree_skb(skb);
539 return -EMSGSIZE;
540 }
541
542 if (skb_cow(skb, dst->dev->hard_header_len)) {
543 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
544 goto drop;
545 }
546
547 hdr = ipv6_hdr(skb);
548
549 /* Mangling hops number delayed to point after skb COW */
550
551 hdr->hop_limit--;
552
553 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
554 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
555 ip6_forward_finish);
556
557 error:
558 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
559 drop:
560 kfree_skb(skb);
561 return -EINVAL;
562 }
563
564 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
565 {
566 to->pkt_type = from->pkt_type;
567 to->priority = from->priority;
568 to->protocol = from->protocol;
569 dst_release(to->dst);
570 to->dst = dst_clone(from->dst);
571 to->dev = from->dev;
572 to->mark = from->mark;
573
574 #ifdef CONFIG_NET_SCHED
575 to->tc_index = from->tc_index;
576 #endif
577 nf_copy(to, from);
578 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
579 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
580 to->nf_trace = from->nf_trace;
581 #endif
582 skb_copy_secmark(to, from);
583 }
584
585 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
586 {
587 u16 offset = sizeof(struct ipv6hdr);
588 struct ipv6_opt_hdr *exthdr =
589 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
590 unsigned int packet_len = skb->tail - skb->network_header;
591 int found_rhdr = 0;
592 *nexthdr = &ipv6_hdr(skb)->nexthdr;
593
594 while (offset + 1 <= packet_len) {
595
596 switch (**nexthdr) {
597
598 case NEXTHDR_HOP:
599 break;
600 case NEXTHDR_ROUTING:
601 found_rhdr = 1;
602 break;
603 case NEXTHDR_DEST:
604 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
605 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
606 break;
607 #endif
608 if (found_rhdr)
609 return offset;
610 break;
611 default :
612 return offset;
613 }
614
615 offset += ipv6_optlen(exthdr);
616 *nexthdr = &exthdr->nexthdr;
617 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
618 offset);
619 }
620
621 return offset;
622 }
623
624 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
625 {
626 struct sk_buff *frag;
627 struct rt6_info *rt = (struct rt6_info*)skb->dst;
628 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
629 struct ipv6hdr *tmp_hdr;
630 struct frag_hdr *fh;
631 unsigned int mtu, hlen, left, len;
632 __be32 frag_id = 0;
633 int ptr, offset = 0, err=0;
634 u8 *prevhdr, nexthdr = 0;
635 struct net *net = dev_net(skb->dst->dev);
636
637 hlen = ip6_find_1stfragopt(skb, &prevhdr);
638 nexthdr = *prevhdr;
639
640 mtu = ip6_skb_dst_mtu(skb);
641
642 /* We must not fragment if the socket is set to force MTU discovery
643 * or if the skb it not generated by a local socket. (This last
644 * check should be redundant, but it's free.)
645 */
646 if (!skb->local_df) {
647 skb->dev = skb->dst->dev;
648 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
649 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
650 IPSTATS_MIB_FRAGFAILS);
651 kfree_skb(skb);
652 return -EMSGSIZE;
653 }
654
655 if (np && np->frag_size < mtu) {
656 if (np->frag_size)
657 mtu = np->frag_size;
658 }
659 mtu -= hlen + sizeof(struct frag_hdr);
660
661 if (skb_shinfo(skb)->frag_list) {
662 int first_len = skb_pagelen(skb);
663 int truesizes = 0;
664
665 if (first_len - hlen > mtu ||
666 ((first_len - hlen) & 7) ||
667 skb_cloned(skb))
668 goto slow_path;
669
670 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
671 /* Correct geometry. */
672 if (frag->len > mtu ||
673 ((frag->len & 7) && frag->next) ||
674 skb_headroom(frag) < hlen)
675 goto slow_path;
676
677 /* Partially cloned skb? */
678 if (skb_shared(frag))
679 goto slow_path;
680
681 BUG_ON(frag->sk);
682 if (skb->sk) {
683 sock_hold(skb->sk);
684 frag->sk = skb->sk;
685 frag->destructor = sock_wfree;
686 truesizes += frag->truesize;
687 }
688 }
689
690 err = 0;
691 offset = 0;
692 frag = skb_shinfo(skb)->frag_list;
693 skb_shinfo(skb)->frag_list = NULL;
694 /* BUILD HEADER */
695
696 *prevhdr = NEXTHDR_FRAGMENT;
697 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
698 if (!tmp_hdr) {
699 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
700 IPSTATS_MIB_FRAGFAILS);
701 return -ENOMEM;
702 }
703
704 __skb_pull(skb, hlen);
705 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
706 __skb_push(skb, hlen);
707 skb_reset_network_header(skb);
708 memcpy(skb_network_header(skb), tmp_hdr, hlen);
709
710 ipv6_select_ident(skb, fh);
711 fh->nexthdr = nexthdr;
712 fh->reserved = 0;
713 fh->frag_off = htons(IP6_MF);
714 frag_id = fh->identification;
715
716 first_len = skb_pagelen(skb);
717 skb->data_len = first_len - skb_headlen(skb);
718 skb->truesize -= truesizes;
719 skb->len = first_len;
720 ipv6_hdr(skb)->payload_len = htons(first_len -
721 sizeof(struct ipv6hdr));
722
723 dst_hold(&rt->u.dst);
724
725 for (;;) {
726 /* Prepare header of the next frame,
727 * before previous one went down. */
728 if (frag) {
729 frag->ip_summed = CHECKSUM_NONE;
730 skb_reset_transport_header(frag);
731 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
732 __skb_push(frag, hlen);
733 skb_reset_network_header(frag);
734 memcpy(skb_network_header(frag), tmp_hdr,
735 hlen);
736 offset += skb->len - hlen - sizeof(struct frag_hdr);
737 fh->nexthdr = nexthdr;
738 fh->reserved = 0;
739 fh->frag_off = htons(offset);
740 if (frag->next != NULL)
741 fh->frag_off |= htons(IP6_MF);
742 fh->identification = frag_id;
743 ipv6_hdr(frag)->payload_len =
744 htons(frag->len -
745 sizeof(struct ipv6hdr));
746 ip6_copy_metadata(frag, skb);
747 }
748
749 err = output(skb);
750 if(!err)
751 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
752 IPSTATS_MIB_FRAGCREATES);
753
754 if (err || !frag)
755 break;
756
757 skb = frag;
758 frag = skb->next;
759 skb->next = NULL;
760 }
761
762 kfree(tmp_hdr);
763
764 if (err == 0) {
765 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
766 IPSTATS_MIB_FRAGOKS);
767 dst_release(&rt->u.dst);
768 return 0;
769 }
770
771 while (frag) {
772 skb = frag->next;
773 kfree_skb(frag);
774 frag = skb;
775 }
776
777 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
778 IPSTATS_MIB_FRAGFAILS);
779 dst_release(&rt->u.dst);
780 return err;
781 }
782
783 slow_path:
784 left = skb->len - hlen; /* Space per frame */
785 ptr = hlen; /* Where to start from */
786
787 /*
788 * Fragment the datagram.
789 */
790
791 *prevhdr = NEXTHDR_FRAGMENT;
792
793 /*
794 * Keep copying data until we run out.
795 */
796 while(left > 0) {
797 len = left;
798 /* IF: it doesn't fit, use 'mtu' - the data space left */
799 if (len > mtu)
800 len = mtu;
801 /* IF: we are not sending upto and including the packet end
802 then align the next start on an eight byte boundary */
803 if (len < left) {
804 len &= ~7;
805 }
806 /*
807 * Allocate buffer.
808 */
809
810 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
811 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
812 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
813 IPSTATS_MIB_FRAGFAILS);
814 err = -ENOMEM;
815 goto fail;
816 }
817
818 /*
819 * Set up data on packet
820 */
821
822 ip6_copy_metadata(frag, skb);
823 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
824 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
825 skb_reset_network_header(frag);
826 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
827 frag->transport_header = (frag->network_header + hlen +
828 sizeof(struct frag_hdr));
829
830 /*
831 * Charge the memory for the fragment to any owner
832 * it might possess
833 */
834 if (skb->sk)
835 skb_set_owner_w(frag, skb->sk);
836
837 /*
838 * Copy the packet header into the new buffer.
839 */
840 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
841
842 /*
843 * Build fragment header.
844 */
845 fh->nexthdr = nexthdr;
846 fh->reserved = 0;
847 if (!frag_id) {
848 ipv6_select_ident(skb, fh);
849 frag_id = fh->identification;
850 } else
851 fh->identification = frag_id;
852
853 /*
854 * Copy a block of the IP datagram.
855 */
856 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
857 BUG();
858 left -= len;
859
860 fh->frag_off = htons(offset);
861 if (left > 0)
862 fh->frag_off |= htons(IP6_MF);
863 ipv6_hdr(frag)->payload_len = htons(frag->len -
864 sizeof(struct ipv6hdr));
865
866 ptr += len;
867 offset += len;
868
869 /*
870 * Put this fragment into the sending queue.
871 */
872 err = output(frag);
873 if (err)
874 goto fail;
875
876 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
877 IPSTATS_MIB_FRAGCREATES);
878 }
879 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
880 IPSTATS_MIB_FRAGOKS);
881 kfree_skb(skb);
882 return err;
883
884 fail:
885 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
886 IPSTATS_MIB_FRAGFAILS);
887 kfree_skb(skb);
888 return err;
889 }
890
891 static inline int ip6_rt_check(struct rt6key *rt_key,
892 struct in6_addr *fl_addr,
893 struct in6_addr *addr_cache)
894 {
895 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
896 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
897 }
898
899 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
900 struct dst_entry *dst,
901 struct flowi *fl)
902 {
903 struct ipv6_pinfo *np = inet6_sk(sk);
904 struct rt6_info *rt = (struct rt6_info *)dst;
905
906 if (!dst)
907 goto out;
908
909 /* Yes, checking route validity in not connected
910 * case is not very simple. Take into account,
911 * that we do not support routing by source, TOS,
912 * and MSG_DONTROUTE --ANK (980726)
913 *
914 * 1. ip6_rt_check(): If route was host route,
915 * check that cached destination is current.
916 * If it is network route, we still may
917 * check its validity using saved pointer
918 * to the last used address: daddr_cache.
919 * We do not want to save whole address now,
920 * (because main consumer of this service
921 * is tcp, which has not this problem),
922 * so that the last trick works only on connected
923 * sockets.
924 * 2. oif also should be the same.
925 */
926 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
927 #ifdef CONFIG_IPV6_SUBTREES
928 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
929 #endif
930 (fl->oif && fl->oif != dst->dev->ifindex)) {
931 dst_release(dst);
932 dst = NULL;
933 }
934
935 out:
936 return dst;
937 }
938
939 static int ip6_dst_lookup_tail(struct sock *sk,
940 struct dst_entry **dst, struct flowi *fl)
941 {
942 int err;
943 struct net *net = sock_net(sk);
944
945 if (*dst == NULL)
946 *dst = ip6_route_output(net, sk, fl);
947
948 if ((err = (*dst)->error))
949 goto out_err_release;
950
951 if (ipv6_addr_any(&fl->fl6_src)) {
952 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
953 &fl->fl6_dst,
954 sk ? inet6_sk(sk)->srcprefs : 0,
955 &fl->fl6_src);
956 if (err)
957 goto out_err_release;
958 }
959
960 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
961 /*
962 * Here if the dst entry we've looked up
963 * has a neighbour entry that is in the INCOMPLETE
964 * state and the src address from the flow is
965 * marked as OPTIMISTIC, we release the found
966 * dst entry and replace it instead with the
967 * dst entry of the nexthop router
968 */
969 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
970 struct inet6_ifaddr *ifp;
971 struct flowi fl_gw;
972 int redirect;
973
974 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
975 (*dst)->dev, 1);
976
977 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
978 if (ifp)
979 in6_ifa_put(ifp);
980
981 if (redirect) {
982 /*
983 * We need to get the dst entry for the
984 * default router instead
985 */
986 dst_release(*dst);
987 memcpy(&fl_gw, fl, sizeof(struct flowi));
988 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
989 *dst = ip6_route_output(net, sk, &fl_gw);
990 if ((err = (*dst)->error))
991 goto out_err_release;
992 }
993 }
994 #endif
995
996 return 0;
997
998 out_err_release:
999 if (err == -ENETUNREACH)
1000 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1001 dst_release(*dst);
1002 *dst = NULL;
1003 return err;
1004 }
1005
1006 /**
1007 * ip6_dst_lookup - perform route lookup on flow
1008 * @sk: socket which provides route info
1009 * @dst: pointer to dst_entry * for result
1010 * @fl: flow to lookup
1011 *
1012 * This function performs a route lookup on the given flow.
1013 *
1014 * It returns zero on success, or a standard errno code on error.
1015 */
1016 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1017 {
1018 *dst = NULL;
1019 return ip6_dst_lookup_tail(sk, dst, fl);
1020 }
1021 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1022
1023 /**
1024 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1025 * @sk: socket which provides the dst cache and route info
1026 * @dst: pointer to dst_entry * for result
1027 * @fl: flow to lookup
1028 *
1029 * This function performs a route lookup on the given flow with the
1030 * possibility of using the cached route in the socket if it is valid.
1031 * It will take the socket dst lock when operating on the dst cache.
1032 * As a result, this function can only be used in process context.
1033 *
1034 * It returns zero on success, or a standard errno code on error.
1035 */
1036 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1037 {
1038 *dst = NULL;
1039 if (sk) {
1040 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1041 *dst = ip6_sk_dst_check(sk, *dst, fl);
1042 }
1043
1044 return ip6_dst_lookup_tail(sk, dst, fl);
1045 }
1046 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1047
1048 static inline int ip6_ufo_append_data(struct sock *sk,
1049 int getfrag(void *from, char *to, int offset, int len,
1050 int odd, struct sk_buff *skb),
1051 void *from, int length, int hh_len, int fragheaderlen,
1052 int transhdrlen, int mtu,unsigned int flags)
1053
1054 {
1055 struct sk_buff *skb;
1056 int err;
1057
1058 /* There is support for UDP large send offload by network
1059 * device, so create one single skb packet containing complete
1060 * udp datagram
1061 */
1062 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1063 skb = sock_alloc_send_skb(sk,
1064 hh_len + fragheaderlen + transhdrlen + 20,
1065 (flags & MSG_DONTWAIT), &err);
1066 if (skb == NULL)
1067 return -ENOMEM;
1068
1069 /* reserve space for Hardware header */
1070 skb_reserve(skb, hh_len);
1071
1072 /* create space for UDP/IP header */
1073 skb_put(skb,fragheaderlen + transhdrlen);
1074
1075 /* initialize network header pointer */
1076 skb_reset_network_header(skb);
1077
1078 /* initialize protocol header pointer */
1079 skb->transport_header = skb->network_header + fragheaderlen;
1080
1081 skb->ip_summed = CHECKSUM_PARTIAL;
1082 skb->csum = 0;
1083 sk->sk_sndmsg_off = 0;
1084 }
1085
1086 err = skb_append_datato_frags(sk,skb, getfrag, from,
1087 (length - transhdrlen));
1088 if (!err) {
1089 struct frag_hdr fhdr;
1090
1091 /* specify the length of each IP datagram fragment*/
1092 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1093 sizeof(struct frag_hdr);
1094 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1095 ipv6_select_ident(skb, &fhdr);
1096 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1097 __skb_queue_tail(&sk->sk_write_queue, skb);
1098
1099 return 0;
1100 }
1101 /* There is not enough support do UPD LSO,
1102 * so follow normal path
1103 */
1104 kfree_skb(skb);
1105
1106 return err;
1107 }
1108
1109 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1110 gfp_t gfp)
1111 {
1112 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1113 }
1114
1115 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1116 gfp_t gfp)
1117 {
1118 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1119 }
1120
1121 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1122 int offset, int len, int odd, struct sk_buff *skb),
1123 void *from, int length, int transhdrlen,
1124 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1125 struct rt6_info *rt, unsigned int flags)
1126 {
1127 struct inet_sock *inet = inet_sk(sk);
1128 struct ipv6_pinfo *np = inet6_sk(sk);
1129 struct sk_buff *skb;
1130 unsigned int maxfraglen, fragheaderlen;
1131 int exthdrlen;
1132 int hh_len;
1133 int mtu;
1134 int copy;
1135 int err;
1136 int offset = 0;
1137 int csummode = CHECKSUM_NONE;
1138
1139 if (flags&MSG_PROBE)
1140 return 0;
1141 if (skb_queue_empty(&sk->sk_write_queue)) {
1142 /*
1143 * setup for corking
1144 */
1145 if (opt) {
1146 if (WARN_ON(np->cork.opt))
1147 return -EINVAL;
1148
1149 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1150 if (unlikely(np->cork.opt == NULL))
1151 return -ENOBUFS;
1152
1153 np->cork.opt->tot_len = opt->tot_len;
1154 np->cork.opt->opt_flen = opt->opt_flen;
1155 np->cork.opt->opt_nflen = opt->opt_nflen;
1156
1157 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1158 sk->sk_allocation);
1159 if (opt->dst0opt && !np->cork.opt->dst0opt)
1160 return -ENOBUFS;
1161
1162 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1163 sk->sk_allocation);
1164 if (opt->dst1opt && !np->cork.opt->dst1opt)
1165 return -ENOBUFS;
1166
1167 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1168 sk->sk_allocation);
1169 if (opt->hopopt && !np->cork.opt->hopopt)
1170 return -ENOBUFS;
1171
1172 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1173 sk->sk_allocation);
1174 if (opt->srcrt && !np->cork.opt->srcrt)
1175 return -ENOBUFS;
1176
1177 /* need source address above miyazawa*/
1178 }
1179 dst_hold(&rt->u.dst);
1180 inet->cork.dst = &rt->u.dst;
1181 inet->cork.fl = *fl;
1182 np->cork.hop_limit = hlimit;
1183 np->cork.tclass = tclass;
1184 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1185 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1186 if (np->frag_size < mtu) {
1187 if (np->frag_size)
1188 mtu = np->frag_size;
1189 }
1190 inet->cork.fragsize = mtu;
1191 if (dst_allfrag(rt->u.dst.path))
1192 inet->cork.flags |= IPCORK_ALLFRAG;
1193 inet->cork.length = 0;
1194 sk->sk_sndmsg_page = NULL;
1195 sk->sk_sndmsg_off = 0;
1196 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1197 rt->rt6i_nfheader_len;
1198 length += exthdrlen;
1199 transhdrlen += exthdrlen;
1200 } else {
1201 rt = (struct rt6_info *)inet->cork.dst;
1202 fl = &inet->cork.fl;
1203 opt = np->cork.opt;
1204 transhdrlen = 0;
1205 exthdrlen = 0;
1206 mtu = inet->cork.fragsize;
1207 }
1208
1209 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1210
1211 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1212 (opt ? opt->opt_nflen : 0);
1213 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1214
1215 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1216 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1217 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1218 return -EMSGSIZE;
1219 }
1220 }
1221
1222 /*
1223 * Let's try using as much space as possible.
1224 * Use MTU if total length of the message fits into the MTU.
1225 * Otherwise, we need to reserve fragment header and
1226 * fragment alignment (= 8-15 octects, in total).
1227 *
1228 * Note that we may need to "move" the data from the tail of
1229 * of the buffer to the new fragment when we split
1230 * the message.
1231 *
1232 * FIXME: It may be fragmented into multiple chunks
1233 * at once if non-fragmentable extension headers
1234 * are too large.
1235 * --yoshfuji
1236 */
1237
1238 inet->cork.length += length;
1239 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1240 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1241
1242 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1243 fragheaderlen, transhdrlen, mtu,
1244 flags);
1245 if (err)
1246 goto error;
1247 return 0;
1248 }
1249
1250 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1251 goto alloc_new_skb;
1252
1253 while (length > 0) {
1254 /* Check if the remaining data fits into current packet. */
1255 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1256 if (copy < length)
1257 copy = maxfraglen - skb->len;
1258
1259 if (copy <= 0) {
1260 char *data;
1261 unsigned int datalen;
1262 unsigned int fraglen;
1263 unsigned int fraggap;
1264 unsigned int alloclen;
1265 struct sk_buff *skb_prev;
1266 alloc_new_skb:
1267 skb_prev = skb;
1268
1269 /* There's no room in the current skb */
1270 if (skb_prev)
1271 fraggap = skb_prev->len - maxfraglen;
1272 else
1273 fraggap = 0;
1274
1275 /*
1276 * If remaining data exceeds the mtu,
1277 * we know we need more fragment(s).
1278 */
1279 datalen = length + fraggap;
1280 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1281 datalen = maxfraglen - fragheaderlen;
1282
1283 fraglen = datalen + fragheaderlen;
1284 if ((flags & MSG_MORE) &&
1285 !(rt->u.dst.dev->features&NETIF_F_SG))
1286 alloclen = mtu;
1287 else
1288 alloclen = datalen + fragheaderlen;
1289
1290 /*
1291 * The last fragment gets additional space at tail.
1292 * Note: we overallocate on fragments with MSG_MODE
1293 * because we have no idea if we're the last one.
1294 */
1295 if (datalen == length + fraggap)
1296 alloclen += rt->u.dst.trailer_len;
1297
1298 /*
1299 * We just reserve space for fragment header.
1300 * Note: this may be overallocation if the message
1301 * (without MSG_MORE) fits into the MTU.
1302 */
1303 alloclen += sizeof(struct frag_hdr);
1304
1305 if (transhdrlen) {
1306 skb = sock_alloc_send_skb(sk,
1307 alloclen + hh_len,
1308 (flags & MSG_DONTWAIT), &err);
1309 } else {
1310 skb = NULL;
1311 if (atomic_read(&sk->sk_wmem_alloc) <=
1312 2 * sk->sk_sndbuf)
1313 skb = sock_wmalloc(sk,
1314 alloclen + hh_len, 1,
1315 sk->sk_allocation);
1316 if (unlikely(skb == NULL))
1317 err = -ENOBUFS;
1318 }
1319 if (skb == NULL)
1320 goto error;
1321 /*
1322 * Fill in the control structures
1323 */
1324 skb->ip_summed = csummode;
1325 skb->csum = 0;
1326 /* reserve for fragmentation */
1327 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1328
1329 /*
1330 * Find where to start putting bytes
1331 */
1332 data = skb_put(skb, fraglen);
1333 skb_set_network_header(skb, exthdrlen);
1334 data += fragheaderlen;
1335 skb->transport_header = (skb->network_header +
1336 fragheaderlen);
1337 if (fraggap) {
1338 skb->csum = skb_copy_and_csum_bits(
1339 skb_prev, maxfraglen,
1340 data + transhdrlen, fraggap, 0);
1341 skb_prev->csum = csum_sub(skb_prev->csum,
1342 skb->csum);
1343 data += fraggap;
1344 pskb_trim_unique(skb_prev, maxfraglen);
1345 }
1346 copy = datalen - transhdrlen - fraggap;
1347 if (copy < 0) {
1348 err = -EINVAL;
1349 kfree_skb(skb);
1350 goto error;
1351 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1352 err = -EFAULT;
1353 kfree_skb(skb);
1354 goto error;
1355 }
1356
1357 offset += copy;
1358 length -= datalen - fraggap;
1359 transhdrlen = 0;
1360 exthdrlen = 0;
1361 csummode = CHECKSUM_NONE;
1362
1363 /*
1364 * Put the packet on the pending queue
1365 */
1366 __skb_queue_tail(&sk->sk_write_queue, skb);
1367 continue;
1368 }
1369
1370 if (copy > length)
1371 copy = length;
1372
1373 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1374 unsigned int off;
1375
1376 off = skb->len;
1377 if (getfrag(from, skb_put(skb, copy),
1378 offset, copy, off, skb) < 0) {
1379 __skb_trim(skb, off);
1380 err = -EFAULT;
1381 goto error;
1382 }
1383 } else {
1384 int i = skb_shinfo(skb)->nr_frags;
1385 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1386 struct page *page = sk->sk_sndmsg_page;
1387 int off = sk->sk_sndmsg_off;
1388 unsigned int left;
1389
1390 if (page && (left = PAGE_SIZE - off) > 0) {
1391 if (copy >= left)
1392 copy = left;
1393 if (page != frag->page) {
1394 if (i == MAX_SKB_FRAGS) {
1395 err = -EMSGSIZE;
1396 goto error;
1397 }
1398 get_page(page);
1399 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1400 frag = &skb_shinfo(skb)->frags[i];
1401 }
1402 } else if(i < MAX_SKB_FRAGS) {
1403 if (copy > PAGE_SIZE)
1404 copy = PAGE_SIZE;
1405 page = alloc_pages(sk->sk_allocation, 0);
1406 if (page == NULL) {
1407 err = -ENOMEM;
1408 goto error;
1409 }
1410 sk->sk_sndmsg_page = page;
1411 sk->sk_sndmsg_off = 0;
1412
1413 skb_fill_page_desc(skb, i, page, 0, 0);
1414 frag = &skb_shinfo(skb)->frags[i];
1415 } else {
1416 err = -EMSGSIZE;
1417 goto error;
1418 }
1419 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1420 err = -EFAULT;
1421 goto error;
1422 }
1423 sk->sk_sndmsg_off += copy;
1424 frag->size += copy;
1425 skb->len += copy;
1426 skb->data_len += copy;
1427 skb->truesize += copy;
1428 atomic_add(copy, &sk->sk_wmem_alloc);
1429 }
1430 offset += copy;
1431 length -= copy;
1432 }
1433 return 0;
1434 error:
1435 inet->cork.length -= length;
1436 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1437 return err;
1438 }
1439
1440 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1441 {
1442 if (np->cork.opt) {
1443 kfree(np->cork.opt->dst0opt);
1444 kfree(np->cork.opt->dst1opt);
1445 kfree(np->cork.opt->hopopt);
1446 kfree(np->cork.opt->srcrt);
1447 kfree(np->cork.opt);
1448 np->cork.opt = NULL;
1449 }
1450
1451 if (inet->cork.dst) {
1452 dst_release(inet->cork.dst);
1453 inet->cork.dst = NULL;
1454 inet->cork.flags &= ~IPCORK_ALLFRAG;
1455 }
1456 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1457 }
1458
1459 int ip6_push_pending_frames(struct sock *sk)
1460 {
1461 struct sk_buff *skb, *tmp_skb;
1462 struct sk_buff **tail_skb;
1463 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1464 struct inet_sock *inet = inet_sk(sk);
1465 struct ipv6_pinfo *np = inet6_sk(sk);
1466 struct net *net = sock_net(sk);
1467 struct ipv6hdr *hdr;
1468 struct ipv6_txoptions *opt = np->cork.opt;
1469 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1470 struct flowi *fl = &inet->cork.fl;
1471 unsigned char proto = fl->proto;
1472 int err = 0;
1473
1474 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1475 goto out;
1476 tail_skb = &(skb_shinfo(skb)->frag_list);
1477
1478 /* move skb->data to ip header from ext header */
1479 if (skb->data < skb_network_header(skb))
1480 __skb_pull(skb, skb_network_offset(skb));
1481 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1482 __skb_pull(tmp_skb, skb_network_header_len(skb));
1483 *tail_skb = tmp_skb;
1484 tail_skb = &(tmp_skb->next);
1485 skb->len += tmp_skb->len;
1486 skb->data_len += tmp_skb->len;
1487 skb->truesize += tmp_skb->truesize;
1488 __sock_put(tmp_skb->sk);
1489 tmp_skb->destructor = NULL;
1490 tmp_skb->sk = NULL;
1491 }
1492
1493 /* Allow local fragmentation. */
1494 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1495 skb->local_df = 1;
1496
1497 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1498 __skb_pull(skb, skb_network_header_len(skb));
1499 if (opt && opt->opt_flen)
1500 ipv6_push_frag_opts(skb, opt, &proto);
1501 if (opt && opt->opt_nflen)
1502 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1503
1504 skb_push(skb, sizeof(struct ipv6hdr));
1505 skb_reset_network_header(skb);
1506 hdr = ipv6_hdr(skb);
1507
1508 *(__be32*)hdr = fl->fl6_flowlabel |
1509 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1510
1511 hdr->hop_limit = np->cork.hop_limit;
1512 hdr->nexthdr = proto;
1513 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1514 ipv6_addr_copy(&hdr->daddr, final_dst);
1515
1516 skb->priority = sk->sk_priority;
1517 skb->mark = sk->sk_mark;
1518
1519 skb->dst = dst_clone(&rt->u.dst);
1520 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1521 if (proto == IPPROTO_ICMPV6) {
1522 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1523
1524 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1525 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1526 }
1527
1528 err = ip6_local_out(skb);
1529 if (err) {
1530 if (err > 0)
1531 err = np->recverr ? net_xmit_errno(err) : 0;
1532 if (err)
1533 goto error;
1534 }
1535
1536 out:
1537 ip6_cork_release(inet, np);
1538 return err;
1539 error:
1540 goto out;
1541 }
1542
1543 void ip6_flush_pending_frames(struct sock *sk)
1544 {
1545 struct sk_buff *skb;
1546
1547 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1548 if (skb->dst)
1549 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb->dst),
1550 IPSTATS_MIB_OUTDISCARDS);
1551 kfree_skb(skb);
1552 }
1553
1554 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1555 }
This page took 0.065611 seconds and 5 git commands to generate.