IPv6: Complete IPV6_DONTFRAG support
[deliverable/linux.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 int __ip6_local_out(struct sk_buff *skb)
62 {
63 int len;
64
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
69
70 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
71 dst_output);
72 }
73
74 int ip6_local_out(struct sk_buff *skb)
75 {
76 int err;
77
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
81
82 return err;
83 }
84 EXPORT_SYMBOL_GPL(ip6_local_out);
85
86 static int ip6_output_finish(struct sk_buff *skb)
87 {
88 struct dst_entry *dst = skb_dst(skb);
89
90 if (dst->hh)
91 return neigh_hh_output(dst->hh, skb);
92 else if (dst->neighbour)
93 return dst->neighbour->output(skb);
94
95 IP6_INC_STATS_BH(dev_net(dst->dev),
96 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
97 kfree_skb(skb);
98 return -EINVAL;
99
100 }
101
102 /* dev_loopback_xmit for use with netfilter. */
103 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
104 {
105 skb_reset_mac_header(newskb);
106 __skb_pull(newskb, skb_network_offset(newskb));
107 newskb->pkt_type = PACKET_LOOPBACK;
108 newskb->ip_summed = CHECKSUM_UNNECESSARY;
109 WARN_ON(!skb_dst(newskb));
110
111 netif_rx_ni(newskb);
112 return 0;
113 }
114
115
116 static int ip6_output2(struct sk_buff *skb)
117 {
118 struct dst_entry *dst = skb_dst(skb);
119 struct net_device *dev = dst->dev;
120
121 skb->protocol = htons(ETH_P_IPV6);
122 skb->dev = dev;
123
124 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
125 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
126
127 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
128 ((mroute6_socket(dev_net(dev)) &&
129 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
130 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
131 &ipv6_hdr(skb)->saddr))) {
132 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
133
134 /* Do not check for IFF_ALLMULTI; multicast routing
135 is not supported in any case.
136 */
137 if (newskb)
138 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
139 NULL, newskb->dev,
140 ip6_dev_loopback_xmit);
141
142 if (ipv6_hdr(skb)->hop_limit == 0) {
143 IP6_INC_STATS(dev_net(dev), idev,
144 IPSTATS_MIB_OUTDISCARDS);
145 kfree_skb(skb);
146 return 0;
147 }
148 }
149
150 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
151 skb->len);
152 }
153
154 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
155 ip6_output_finish);
156 }
157
158 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
159 {
160 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
161
162 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
163 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
164 }
165
166 int ip6_output(struct sk_buff *skb)
167 {
168 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
169 if (unlikely(idev->cnf.disable_ipv6)) {
170 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
171 IPSTATS_MIB_OUTDISCARDS);
172 kfree_skb(skb);
173 return 0;
174 }
175
176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
177 dst_allfrag(skb_dst(skb)))
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
181 }
182
183 /*
184 * xmit an sk_buff (used by TCP, SCTP and DCCP)
185 */
186
187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt)
189 {
190 struct net *net = sock_net(sk);
191 struct ipv6_pinfo *np = inet6_sk(sk);
192 struct in6_addr *first_hop = &fl->fl6_dst;
193 struct dst_entry *dst = skb_dst(skb);
194 struct ipv6hdr *hdr;
195 u8 proto = fl->proto;
196 int seg_len = skb->len;
197 int hlimit = -1;
198 int tclass = 0;
199 u32 mtu;
200
201 if (opt) {
202 unsigned int head_room;
203
204 /* First: exthdrs may take lots of space (~8K for now)
205 MAX_HEADER is not enough.
206 */
207 head_room = opt->opt_nflen + opt->opt_flen;
208 seg_len += head_room;
209 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
210
211 if (skb_headroom(skb) < head_room) {
212 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
213 if (skb2 == NULL) {
214 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
215 IPSTATS_MIB_OUTDISCARDS);
216 kfree_skb(skb);
217 return -ENOBUFS;
218 }
219 kfree_skb(skb);
220 skb = skb2;
221 if (sk)
222 skb_set_owner_w(skb, sk);
223 }
224 if (opt->opt_flen)
225 ipv6_push_frag_opts(skb, opt, &proto);
226 if (opt->opt_nflen)
227 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
228 }
229
230 skb_push(skb, sizeof(struct ipv6hdr));
231 skb_reset_network_header(skb);
232 hdr = ipv6_hdr(skb);
233
234 /*
235 * Fill in the IPv6 header
236 */
237 if (np) {
238 tclass = np->tclass;
239 hlimit = np->hop_limit;
240 }
241 if (hlimit < 0)
242 hlimit = ip6_dst_hoplimit(dst);
243
244 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
245
246 hdr->payload_len = htons(seg_len);
247 hdr->nexthdr = proto;
248 hdr->hop_limit = hlimit;
249
250 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
251 ipv6_addr_copy(&hdr->daddr, first_hop);
252
253 skb->priority = sk->sk_priority;
254 skb->mark = sk->sk_mark;
255
256 mtu = dst_mtu(dst);
257 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
258 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
259 IPSTATS_MIB_OUT, skb->len);
260 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
261 dst_output);
262 }
263
264 if (net_ratelimit())
265 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
266 skb->dev = dst->dev;
267 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
268 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
269 kfree_skb(skb);
270 return -EMSGSIZE;
271 }
272
273 EXPORT_SYMBOL(ip6_xmit);
274
275 /*
276 * To avoid extra problems ND packets are send through this
277 * routine. It's code duplication but I really want to avoid
278 * extra checks since ipv6_build_header is used by TCP (which
279 * is for us performance critical)
280 */
281
282 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
283 const struct in6_addr *saddr, const struct in6_addr *daddr,
284 int proto, int len)
285 {
286 struct ipv6_pinfo *np = inet6_sk(sk);
287 struct ipv6hdr *hdr;
288 int totlen;
289
290 skb->protocol = htons(ETH_P_IPV6);
291 skb->dev = dev;
292
293 totlen = len + sizeof(struct ipv6hdr);
294
295 skb_reset_network_header(skb);
296 skb_put(skb, sizeof(struct ipv6hdr));
297 hdr = ipv6_hdr(skb);
298
299 *(__be32*)hdr = htonl(0x60000000);
300
301 hdr->payload_len = htons(len);
302 hdr->nexthdr = proto;
303 hdr->hop_limit = np->hop_limit;
304
305 ipv6_addr_copy(&hdr->saddr, saddr);
306 ipv6_addr_copy(&hdr->daddr, daddr);
307
308 return 0;
309 }
310
311 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
312 {
313 struct ip6_ra_chain *ra;
314 struct sock *last = NULL;
315
316 read_lock(&ip6_ra_lock);
317 for (ra = ip6_ra_chain; ra; ra = ra->next) {
318 struct sock *sk = ra->sk;
319 if (sk && ra->sel == sel &&
320 (!sk->sk_bound_dev_if ||
321 sk->sk_bound_dev_if == skb->dev->ifindex)) {
322 if (last) {
323 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
324 if (skb2)
325 rawv6_rcv(last, skb2);
326 }
327 last = sk;
328 }
329 }
330
331 if (last) {
332 rawv6_rcv(last, skb);
333 read_unlock(&ip6_ra_lock);
334 return 1;
335 }
336 read_unlock(&ip6_ra_lock);
337 return 0;
338 }
339
340 static int ip6_forward_proxy_check(struct sk_buff *skb)
341 {
342 struct ipv6hdr *hdr = ipv6_hdr(skb);
343 u8 nexthdr = hdr->nexthdr;
344 int offset;
345
346 if (ipv6_ext_hdr(nexthdr)) {
347 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
348 if (offset < 0)
349 return 0;
350 } else
351 offset = sizeof(struct ipv6hdr);
352
353 if (nexthdr == IPPROTO_ICMPV6) {
354 struct icmp6hdr *icmp6;
355
356 if (!pskb_may_pull(skb, (skb_network_header(skb) +
357 offset + 1 - skb->data)))
358 return 0;
359
360 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
361
362 switch (icmp6->icmp6_type) {
363 case NDISC_ROUTER_SOLICITATION:
364 case NDISC_ROUTER_ADVERTISEMENT:
365 case NDISC_NEIGHBOUR_SOLICITATION:
366 case NDISC_NEIGHBOUR_ADVERTISEMENT:
367 case NDISC_REDIRECT:
368 /* For reaction involving unicast neighbor discovery
369 * message destined to the proxied address, pass it to
370 * input function.
371 */
372 return 1;
373 default:
374 break;
375 }
376 }
377
378 /*
379 * The proxying router can't forward traffic sent to a link-local
380 * address, so signal the sender and discard the packet. This
381 * behavior is clarified by the MIPv6 specification.
382 */
383 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
384 dst_link_failure(skb);
385 return -1;
386 }
387
388 return 0;
389 }
390
391 static inline int ip6_forward_finish(struct sk_buff *skb)
392 {
393 return dst_output(skb);
394 }
395
396 int ip6_forward(struct sk_buff *skb)
397 {
398 struct dst_entry *dst = skb_dst(skb);
399 struct ipv6hdr *hdr = ipv6_hdr(skb);
400 struct inet6_skb_parm *opt = IP6CB(skb);
401 struct net *net = dev_net(dst->dev);
402 u32 mtu;
403
404 if (net->ipv6.devconf_all->forwarding == 0)
405 goto error;
406
407 if (skb_warn_if_lro(skb))
408 goto drop;
409
410 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
411 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
412 goto drop;
413 }
414
415 skb_forward_csum(skb);
416
417 /*
418 * We DO NOT make any processing on
419 * RA packets, pushing them to user level AS IS
420 * without ane WARRANTY that application will be able
421 * to interpret them. The reason is that we
422 * cannot make anything clever here.
423 *
424 * We are not end-node, so that if packet contains
425 * AH/ESP, we cannot make anything.
426 * Defragmentation also would be mistake, RA packets
427 * cannot be fragmented, because there is no warranty
428 * that different fragments will go along one path. --ANK
429 */
430 if (opt->ra) {
431 u8 *ptr = skb_network_header(skb) + opt->ra;
432 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
433 return 0;
434 }
435
436 /*
437 * check and decrement ttl
438 */
439 if (hdr->hop_limit <= 1) {
440 /* Force OUTPUT device used as source address */
441 skb->dev = dst->dev;
442 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
443 IP6_INC_STATS_BH(net,
444 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
445
446 kfree_skb(skb);
447 return -ETIMEDOUT;
448 }
449
450 /* XXX: idev->cnf.proxy_ndp? */
451 if (net->ipv6.devconf_all->proxy_ndp &&
452 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
453 int proxied = ip6_forward_proxy_check(skb);
454 if (proxied > 0)
455 return ip6_input(skb);
456 else if (proxied < 0) {
457 IP6_INC_STATS(net, ip6_dst_idev(dst),
458 IPSTATS_MIB_INDISCARDS);
459 goto drop;
460 }
461 }
462
463 if (!xfrm6_route_forward(skb)) {
464 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
465 goto drop;
466 }
467 dst = skb_dst(skb);
468
469 /* IPv6 specs say nothing about it, but it is clear that we cannot
470 send redirects to source routed frames.
471 We don't send redirects to frames decapsulated from IPsec.
472 */
473 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
474 !skb_sec_path(skb)) {
475 struct in6_addr *target = NULL;
476 struct rt6_info *rt;
477 struct neighbour *n = dst->neighbour;
478
479 /*
480 * incoming and outgoing devices are the same
481 * send a redirect.
482 */
483
484 rt = (struct rt6_info *) dst;
485 if ((rt->rt6i_flags & RTF_GATEWAY))
486 target = (struct in6_addr*)&n->primary_key;
487 else
488 target = &hdr->daddr;
489
490 /* Limit redirects both by destination (here)
491 and by source (inside ndisc_send_redirect)
492 */
493 if (xrlim_allow(dst, 1*HZ))
494 ndisc_send_redirect(skb, n, target);
495 } else {
496 int addrtype = ipv6_addr_type(&hdr->saddr);
497
498 /* This check is security critical. */
499 if (addrtype == IPV6_ADDR_ANY ||
500 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
501 goto error;
502 if (addrtype & IPV6_ADDR_LINKLOCAL) {
503 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
504 ICMPV6_NOT_NEIGHBOUR, 0);
505 goto error;
506 }
507 }
508
509 mtu = dst_mtu(dst);
510 if (mtu < IPV6_MIN_MTU)
511 mtu = IPV6_MIN_MTU;
512
513 if (skb->len > mtu) {
514 /* Again, force OUTPUT device used as source address */
515 skb->dev = dst->dev;
516 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
517 IP6_INC_STATS_BH(net,
518 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
519 IP6_INC_STATS_BH(net,
520 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
521 kfree_skb(skb);
522 return -EMSGSIZE;
523 }
524
525 if (skb_cow(skb, dst->dev->hard_header_len)) {
526 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
527 goto drop;
528 }
529
530 hdr = ipv6_hdr(skb);
531
532 /* Mangling hops number delayed to point after skb COW */
533
534 hdr->hop_limit--;
535
536 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
537 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
538 ip6_forward_finish);
539
540 error:
541 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
542 drop:
543 kfree_skb(skb);
544 return -EINVAL;
545 }
546
547 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
548 {
549 to->pkt_type = from->pkt_type;
550 to->priority = from->priority;
551 to->protocol = from->protocol;
552 skb_dst_drop(to);
553 skb_dst_set(to, dst_clone(skb_dst(from)));
554 to->dev = from->dev;
555 to->mark = from->mark;
556
557 #ifdef CONFIG_NET_SCHED
558 to->tc_index = from->tc_index;
559 #endif
560 nf_copy(to, from);
561 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
562 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
563 to->nf_trace = from->nf_trace;
564 #endif
565 skb_copy_secmark(to, from);
566 }
567
568 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
569 {
570 u16 offset = sizeof(struct ipv6hdr);
571 struct ipv6_opt_hdr *exthdr =
572 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
573 unsigned int packet_len = skb->tail - skb->network_header;
574 int found_rhdr = 0;
575 *nexthdr = &ipv6_hdr(skb)->nexthdr;
576
577 while (offset + 1 <= packet_len) {
578
579 switch (**nexthdr) {
580
581 case NEXTHDR_HOP:
582 break;
583 case NEXTHDR_ROUTING:
584 found_rhdr = 1;
585 break;
586 case NEXTHDR_DEST:
587 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
588 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
589 break;
590 #endif
591 if (found_rhdr)
592 return offset;
593 break;
594 default :
595 return offset;
596 }
597
598 offset += ipv6_optlen(exthdr);
599 *nexthdr = &exthdr->nexthdr;
600 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
601 offset);
602 }
603
604 return offset;
605 }
606
607 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
608 {
609 struct sk_buff *frag;
610 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
611 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
612 struct ipv6hdr *tmp_hdr;
613 struct frag_hdr *fh;
614 unsigned int mtu, hlen, left, len;
615 __be32 frag_id = 0;
616 int ptr, offset = 0, err=0;
617 u8 *prevhdr, nexthdr = 0;
618 struct net *net = dev_net(skb_dst(skb)->dev);
619
620 hlen = ip6_find_1stfragopt(skb, &prevhdr);
621 nexthdr = *prevhdr;
622
623 mtu = ip6_skb_dst_mtu(skb);
624
625 /* We must not fragment if the socket is set to force MTU discovery
626 * or if the skb it not generated by a local socket.
627 */
628 if (!skb->local_df) {
629 skb->dev = skb_dst(skb)->dev;
630 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
631 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
632 IPSTATS_MIB_FRAGFAILS);
633 kfree_skb(skb);
634 return -EMSGSIZE;
635 }
636
637 if (np && np->frag_size < mtu) {
638 if (np->frag_size)
639 mtu = np->frag_size;
640 }
641 mtu -= hlen + sizeof(struct frag_hdr);
642
643 if (skb_has_frags(skb)) {
644 int first_len = skb_pagelen(skb);
645 int truesizes = 0;
646
647 if (first_len - hlen > mtu ||
648 ((first_len - hlen) & 7) ||
649 skb_cloned(skb))
650 goto slow_path;
651
652 skb_walk_frags(skb, frag) {
653 /* Correct geometry. */
654 if (frag->len > mtu ||
655 ((frag->len & 7) && frag->next) ||
656 skb_headroom(frag) < hlen)
657 goto slow_path;
658
659 /* Partially cloned skb? */
660 if (skb_shared(frag))
661 goto slow_path;
662
663 BUG_ON(frag->sk);
664 if (skb->sk) {
665 frag->sk = skb->sk;
666 frag->destructor = sock_wfree;
667 truesizes += frag->truesize;
668 }
669 }
670
671 err = 0;
672 offset = 0;
673 frag = skb_shinfo(skb)->frag_list;
674 skb_frag_list_init(skb);
675 /* BUILD HEADER */
676
677 *prevhdr = NEXTHDR_FRAGMENT;
678 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
679 if (!tmp_hdr) {
680 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
681 IPSTATS_MIB_FRAGFAILS);
682 return -ENOMEM;
683 }
684
685 __skb_pull(skb, hlen);
686 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
687 __skb_push(skb, hlen);
688 skb_reset_network_header(skb);
689 memcpy(skb_network_header(skb), tmp_hdr, hlen);
690
691 ipv6_select_ident(fh);
692 fh->nexthdr = nexthdr;
693 fh->reserved = 0;
694 fh->frag_off = htons(IP6_MF);
695 frag_id = fh->identification;
696
697 first_len = skb_pagelen(skb);
698 skb->data_len = first_len - skb_headlen(skb);
699 skb->truesize -= truesizes;
700 skb->len = first_len;
701 ipv6_hdr(skb)->payload_len = htons(first_len -
702 sizeof(struct ipv6hdr));
703
704 dst_hold(&rt->u.dst);
705
706 for (;;) {
707 /* Prepare header of the next frame,
708 * before previous one went down. */
709 if (frag) {
710 frag->ip_summed = CHECKSUM_NONE;
711 skb_reset_transport_header(frag);
712 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
713 __skb_push(frag, hlen);
714 skb_reset_network_header(frag);
715 memcpy(skb_network_header(frag), tmp_hdr,
716 hlen);
717 offset += skb->len - hlen - sizeof(struct frag_hdr);
718 fh->nexthdr = nexthdr;
719 fh->reserved = 0;
720 fh->frag_off = htons(offset);
721 if (frag->next != NULL)
722 fh->frag_off |= htons(IP6_MF);
723 fh->identification = frag_id;
724 ipv6_hdr(frag)->payload_len =
725 htons(frag->len -
726 sizeof(struct ipv6hdr));
727 ip6_copy_metadata(frag, skb);
728 }
729
730 err = output(skb);
731 if(!err)
732 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
733 IPSTATS_MIB_FRAGCREATES);
734
735 if (err || !frag)
736 break;
737
738 skb = frag;
739 frag = skb->next;
740 skb->next = NULL;
741 }
742
743 kfree(tmp_hdr);
744
745 if (err == 0) {
746 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
747 IPSTATS_MIB_FRAGOKS);
748 dst_release(&rt->u.dst);
749 return 0;
750 }
751
752 while (frag) {
753 skb = frag->next;
754 kfree_skb(frag);
755 frag = skb;
756 }
757
758 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
759 IPSTATS_MIB_FRAGFAILS);
760 dst_release(&rt->u.dst);
761 return err;
762 }
763
764 slow_path:
765 left = skb->len - hlen; /* Space per frame */
766 ptr = hlen; /* Where to start from */
767
768 /*
769 * Fragment the datagram.
770 */
771
772 *prevhdr = NEXTHDR_FRAGMENT;
773
774 /*
775 * Keep copying data until we run out.
776 */
777 while(left > 0) {
778 len = left;
779 /* IF: it doesn't fit, use 'mtu' - the data space left */
780 if (len > mtu)
781 len = mtu;
782 /* IF: we are not sending upto and including the packet end
783 then align the next start on an eight byte boundary */
784 if (len < left) {
785 len &= ~7;
786 }
787 /*
788 * Allocate buffer.
789 */
790
791 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
792 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
793 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
794 IPSTATS_MIB_FRAGFAILS);
795 err = -ENOMEM;
796 goto fail;
797 }
798
799 /*
800 * Set up data on packet
801 */
802
803 ip6_copy_metadata(frag, skb);
804 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
805 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
806 skb_reset_network_header(frag);
807 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
808 frag->transport_header = (frag->network_header + hlen +
809 sizeof(struct frag_hdr));
810
811 /*
812 * Charge the memory for the fragment to any owner
813 * it might possess
814 */
815 if (skb->sk)
816 skb_set_owner_w(frag, skb->sk);
817
818 /*
819 * Copy the packet header into the new buffer.
820 */
821 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
822
823 /*
824 * Build fragment header.
825 */
826 fh->nexthdr = nexthdr;
827 fh->reserved = 0;
828 if (!frag_id) {
829 ipv6_select_ident(fh);
830 frag_id = fh->identification;
831 } else
832 fh->identification = frag_id;
833
834 /*
835 * Copy a block of the IP datagram.
836 */
837 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
838 BUG();
839 left -= len;
840
841 fh->frag_off = htons(offset);
842 if (left > 0)
843 fh->frag_off |= htons(IP6_MF);
844 ipv6_hdr(frag)->payload_len = htons(frag->len -
845 sizeof(struct ipv6hdr));
846
847 ptr += len;
848 offset += len;
849
850 /*
851 * Put this fragment into the sending queue.
852 */
853 err = output(frag);
854 if (err)
855 goto fail;
856
857 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
858 IPSTATS_MIB_FRAGCREATES);
859 }
860 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
861 IPSTATS_MIB_FRAGOKS);
862 kfree_skb(skb);
863 return err;
864
865 fail:
866 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
867 IPSTATS_MIB_FRAGFAILS);
868 kfree_skb(skb);
869 return err;
870 }
871
872 static inline int ip6_rt_check(struct rt6key *rt_key,
873 struct in6_addr *fl_addr,
874 struct in6_addr *addr_cache)
875 {
876 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
877 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
878 }
879
880 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881 struct dst_entry *dst,
882 struct flowi *fl)
883 {
884 struct ipv6_pinfo *np = inet6_sk(sk);
885 struct rt6_info *rt = (struct rt6_info *)dst;
886
887 if (!dst)
888 goto out;
889
890 /* Yes, checking route validity in not connected
891 * case is not very simple. Take into account,
892 * that we do not support routing by source, TOS,
893 * and MSG_DONTROUTE --ANK (980726)
894 *
895 * 1. ip6_rt_check(): If route was host route,
896 * check that cached destination is current.
897 * If it is network route, we still may
898 * check its validity using saved pointer
899 * to the last used address: daddr_cache.
900 * We do not want to save whole address now,
901 * (because main consumer of this service
902 * is tcp, which has not this problem),
903 * so that the last trick works only on connected
904 * sockets.
905 * 2. oif also should be the same.
906 */
907 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
908 #ifdef CONFIG_IPV6_SUBTREES
909 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
910 #endif
911 (fl->oif && fl->oif != dst->dev->ifindex)) {
912 dst_release(dst);
913 dst = NULL;
914 }
915
916 out:
917 return dst;
918 }
919
920 static int ip6_dst_lookup_tail(struct sock *sk,
921 struct dst_entry **dst, struct flowi *fl)
922 {
923 int err;
924 struct net *net = sock_net(sk);
925
926 if (*dst == NULL)
927 *dst = ip6_route_output(net, sk, fl);
928
929 if ((err = (*dst)->error))
930 goto out_err_release;
931
932 if (ipv6_addr_any(&fl->fl6_src)) {
933 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
934 &fl->fl6_dst,
935 sk ? inet6_sk(sk)->srcprefs : 0,
936 &fl->fl6_src);
937 if (err)
938 goto out_err_release;
939 }
940
941 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
942 /*
943 * Here if the dst entry we've looked up
944 * has a neighbour entry that is in the INCOMPLETE
945 * state and the src address from the flow is
946 * marked as OPTIMISTIC, we release the found
947 * dst entry and replace it instead with the
948 * dst entry of the nexthop router
949 */
950 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
951 struct inet6_ifaddr *ifp;
952 struct flowi fl_gw;
953 int redirect;
954
955 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
956 (*dst)->dev, 1);
957
958 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
959 if (ifp)
960 in6_ifa_put(ifp);
961
962 if (redirect) {
963 /*
964 * We need to get the dst entry for the
965 * default router instead
966 */
967 dst_release(*dst);
968 memcpy(&fl_gw, fl, sizeof(struct flowi));
969 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
970 *dst = ip6_route_output(net, sk, &fl_gw);
971 if ((err = (*dst)->error))
972 goto out_err_release;
973 }
974 }
975 #endif
976
977 return 0;
978
979 out_err_release:
980 if (err == -ENETUNREACH)
981 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
982 dst_release(*dst);
983 *dst = NULL;
984 return err;
985 }
986
987 /**
988 * ip6_dst_lookup - perform route lookup on flow
989 * @sk: socket which provides route info
990 * @dst: pointer to dst_entry * for result
991 * @fl: flow to lookup
992 *
993 * This function performs a route lookup on the given flow.
994 *
995 * It returns zero on success, or a standard errno code on error.
996 */
997 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
998 {
999 *dst = NULL;
1000 return ip6_dst_lookup_tail(sk, dst, fl);
1001 }
1002 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1003
1004 /**
1005 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1006 * @sk: socket which provides the dst cache and route info
1007 * @dst: pointer to dst_entry * for result
1008 * @fl: flow to lookup
1009 *
1010 * This function performs a route lookup on the given flow with the
1011 * possibility of using the cached route in the socket if it is valid.
1012 * It will take the socket dst lock when operating on the dst cache.
1013 * As a result, this function can only be used in process context.
1014 *
1015 * It returns zero on success, or a standard errno code on error.
1016 */
1017 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1018 {
1019 *dst = NULL;
1020 if (sk) {
1021 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1022 *dst = ip6_sk_dst_check(sk, *dst, fl);
1023 }
1024
1025 return ip6_dst_lookup_tail(sk, dst, fl);
1026 }
1027 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1028
1029 static inline int ip6_ufo_append_data(struct sock *sk,
1030 int getfrag(void *from, char *to, int offset, int len,
1031 int odd, struct sk_buff *skb),
1032 void *from, int length, int hh_len, int fragheaderlen,
1033 int transhdrlen, int mtu,unsigned int flags)
1034
1035 {
1036 struct sk_buff *skb;
1037 int err;
1038
1039 /* There is support for UDP large send offload by network
1040 * device, so create one single skb packet containing complete
1041 * udp datagram
1042 */
1043 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1044 skb = sock_alloc_send_skb(sk,
1045 hh_len + fragheaderlen + transhdrlen + 20,
1046 (flags & MSG_DONTWAIT), &err);
1047 if (skb == NULL)
1048 return -ENOMEM;
1049
1050 /* reserve space for Hardware header */
1051 skb_reserve(skb, hh_len);
1052
1053 /* create space for UDP/IP header */
1054 skb_put(skb,fragheaderlen + transhdrlen);
1055
1056 /* initialize network header pointer */
1057 skb_reset_network_header(skb);
1058
1059 /* initialize protocol header pointer */
1060 skb->transport_header = skb->network_header + fragheaderlen;
1061
1062 skb->ip_summed = CHECKSUM_PARTIAL;
1063 skb->csum = 0;
1064 sk->sk_sndmsg_off = 0;
1065 }
1066
1067 err = skb_append_datato_frags(sk,skb, getfrag, from,
1068 (length - transhdrlen));
1069 if (!err) {
1070 struct frag_hdr fhdr;
1071
1072 /* Specify the length of each IPv6 datagram fragment.
1073 * It has to be a multiple of 8.
1074 */
1075 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1076 sizeof(struct frag_hdr)) & ~7;
1077 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1078 ipv6_select_ident(&fhdr);
1079 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1080 __skb_queue_tail(&sk->sk_write_queue, skb);
1081
1082 return 0;
1083 }
1084 /* There is not enough support do UPD LSO,
1085 * so follow normal path
1086 */
1087 kfree_skb(skb);
1088
1089 return err;
1090 }
1091
1092 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1093 gfp_t gfp)
1094 {
1095 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1096 }
1097
1098 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1099 gfp_t gfp)
1100 {
1101 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1102 }
1103
1104 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1105 int offset, int len, int odd, struct sk_buff *skb),
1106 void *from, int length, int transhdrlen,
1107 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1108 struct rt6_info *rt, unsigned int flags, int dontfrag)
1109 {
1110 struct inet_sock *inet = inet_sk(sk);
1111 struct ipv6_pinfo *np = inet6_sk(sk);
1112 struct sk_buff *skb;
1113 unsigned int maxfraglen, fragheaderlen;
1114 int exthdrlen;
1115 int hh_len;
1116 int mtu;
1117 int copy;
1118 int err;
1119 int offset = 0;
1120 int csummode = CHECKSUM_NONE;
1121
1122 if (flags&MSG_PROBE)
1123 return 0;
1124 if (skb_queue_empty(&sk->sk_write_queue)) {
1125 /*
1126 * setup for corking
1127 */
1128 if (opt) {
1129 if (WARN_ON(np->cork.opt))
1130 return -EINVAL;
1131
1132 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1133 if (unlikely(np->cork.opt == NULL))
1134 return -ENOBUFS;
1135
1136 np->cork.opt->tot_len = opt->tot_len;
1137 np->cork.opt->opt_flen = opt->opt_flen;
1138 np->cork.opt->opt_nflen = opt->opt_nflen;
1139
1140 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1141 sk->sk_allocation);
1142 if (opt->dst0opt && !np->cork.opt->dst0opt)
1143 return -ENOBUFS;
1144
1145 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1146 sk->sk_allocation);
1147 if (opt->dst1opt && !np->cork.opt->dst1opt)
1148 return -ENOBUFS;
1149
1150 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1151 sk->sk_allocation);
1152 if (opt->hopopt && !np->cork.opt->hopopt)
1153 return -ENOBUFS;
1154
1155 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1156 sk->sk_allocation);
1157 if (opt->srcrt && !np->cork.opt->srcrt)
1158 return -ENOBUFS;
1159
1160 /* need source address above miyazawa*/
1161 }
1162 dst_hold(&rt->u.dst);
1163 inet->cork.dst = &rt->u.dst;
1164 inet->cork.fl = *fl;
1165 np->cork.hop_limit = hlimit;
1166 np->cork.tclass = tclass;
1167 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1168 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1169 if (np->frag_size < mtu) {
1170 if (np->frag_size)
1171 mtu = np->frag_size;
1172 }
1173 inet->cork.fragsize = mtu;
1174 if (dst_allfrag(rt->u.dst.path))
1175 inet->cork.flags |= IPCORK_ALLFRAG;
1176 inet->cork.length = 0;
1177 sk->sk_sndmsg_page = NULL;
1178 sk->sk_sndmsg_off = 0;
1179 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1180 rt->rt6i_nfheader_len;
1181 length += exthdrlen;
1182 transhdrlen += exthdrlen;
1183 } else {
1184 rt = (struct rt6_info *)inet->cork.dst;
1185 fl = &inet->cork.fl;
1186 opt = np->cork.opt;
1187 transhdrlen = 0;
1188 exthdrlen = 0;
1189 mtu = inet->cork.fragsize;
1190 }
1191
1192 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1193
1194 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1195 (opt ? opt->opt_nflen : 0);
1196 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1197
1198 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1199 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1200 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1201 return -EMSGSIZE;
1202 }
1203 }
1204
1205 /*
1206 * Let's try using as much space as possible.
1207 * Use MTU if total length of the message fits into the MTU.
1208 * Otherwise, we need to reserve fragment header and
1209 * fragment alignment (= 8-15 octects, in total).
1210 *
1211 * Note that we may need to "move" the data from the tail of
1212 * of the buffer to the new fragment when we split
1213 * the message.
1214 *
1215 * FIXME: It may be fragmented into multiple chunks
1216 * at once if non-fragmentable extension headers
1217 * are too large.
1218 * --yoshfuji
1219 */
1220
1221 inet->cork.length += length;
1222 if (length > mtu) {
1223 int proto = sk->sk_protocol;
1224 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1225 ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen);
1226 return -EMSGSIZE;
1227 }
1228
1229 if (proto == IPPROTO_UDP &&
1230 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1231
1232 err = ip6_ufo_append_data(sk, getfrag, from, length,
1233 hh_len, fragheaderlen,
1234 transhdrlen, mtu, flags);
1235 if (err)
1236 goto error;
1237 return 0;
1238 }
1239 }
1240
1241 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1242 goto alloc_new_skb;
1243
1244 while (length > 0) {
1245 /* Check if the remaining data fits into current packet. */
1246 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1247 if (copy < length)
1248 copy = maxfraglen - skb->len;
1249
1250 if (copy <= 0) {
1251 char *data;
1252 unsigned int datalen;
1253 unsigned int fraglen;
1254 unsigned int fraggap;
1255 unsigned int alloclen;
1256 struct sk_buff *skb_prev;
1257 alloc_new_skb:
1258 skb_prev = skb;
1259
1260 /* There's no room in the current skb */
1261 if (skb_prev)
1262 fraggap = skb_prev->len - maxfraglen;
1263 else
1264 fraggap = 0;
1265
1266 /*
1267 * If remaining data exceeds the mtu,
1268 * we know we need more fragment(s).
1269 */
1270 datalen = length + fraggap;
1271 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1272 datalen = maxfraglen - fragheaderlen;
1273
1274 fraglen = datalen + fragheaderlen;
1275 if ((flags & MSG_MORE) &&
1276 !(rt->u.dst.dev->features&NETIF_F_SG))
1277 alloclen = mtu;
1278 else
1279 alloclen = datalen + fragheaderlen;
1280
1281 /*
1282 * The last fragment gets additional space at tail.
1283 * Note: we overallocate on fragments with MSG_MODE
1284 * because we have no idea if we're the last one.
1285 */
1286 if (datalen == length + fraggap)
1287 alloclen += rt->u.dst.trailer_len;
1288
1289 /*
1290 * We just reserve space for fragment header.
1291 * Note: this may be overallocation if the message
1292 * (without MSG_MORE) fits into the MTU.
1293 */
1294 alloclen += sizeof(struct frag_hdr);
1295
1296 if (transhdrlen) {
1297 skb = sock_alloc_send_skb(sk,
1298 alloclen + hh_len,
1299 (flags & MSG_DONTWAIT), &err);
1300 } else {
1301 skb = NULL;
1302 if (atomic_read(&sk->sk_wmem_alloc) <=
1303 2 * sk->sk_sndbuf)
1304 skb = sock_wmalloc(sk,
1305 alloclen + hh_len, 1,
1306 sk->sk_allocation);
1307 if (unlikely(skb == NULL))
1308 err = -ENOBUFS;
1309 }
1310 if (skb == NULL)
1311 goto error;
1312 /*
1313 * Fill in the control structures
1314 */
1315 skb->ip_summed = csummode;
1316 skb->csum = 0;
1317 /* reserve for fragmentation */
1318 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1319
1320 /*
1321 * Find where to start putting bytes
1322 */
1323 data = skb_put(skb, fraglen);
1324 skb_set_network_header(skb, exthdrlen);
1325 data += fragheaderlen;
1326 skb->transport_header = (skb->network_header +
1327 fragheaderlen);
1328 if (fraggap) {
1329 skb->csum = skb_copy_and_csum_bits(
1330 skb_prev, maxfraglen,
1331 data + transhdrlen, fraggap, 0);
1332 skb_prev->csum = csum_sub(skb_prev->csum,
1333 skb->csum);
1334 data += fraggap;
1335 pskb_trim_unique(skb_prev, maxfraglen);
1336 }
1337 copy = datalen - transhdrlen - fraggap;
1338 if (copy < 0) {
1339 err = -EINVAL;
1340 kfree_skb(skb);
1341 goto error;
1342 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1343 err = -EFAULT;
1344 kfree_skb(skb);
1345 goto error;
1346 }
1347
1348 offset += copy;
1349 length -= datalen - fraggap;
1350 transhdrlen = 0;
1351 exthdrlen = 0;
1352 csummode = CHECKSUM_NONE;
1353
1354 /*
1355 * Put the packet on the pending queue
1356 */
1357 __skb_queue_tail(&sk->sk_write_queue, skb);
1358 continue;
1359 }
1360
1361 if (copy > length)
1362 copy = length;
1363
1364 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1365 unsigned int off;
1366
1367 off = skb->len;
1368 if (getfrag(from, skb_put(skb, copy),
1369 offset, copy, off, skb) < 0) {
1370 __skb_trim(skb, off);
1371 err = -EFAULT;
1372 goto error;
1373 }
1374 } else {
1375 int i = skb_shinfo(skb)->nr_frags;
1376 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1377 struct page *page = sk->sk_sndmsg_page;
1378 int off = sk->sk_sndmsg_off;
1379 unsigned int left;
1380
1381 if (page && (left = PAGE_SIZE - off) > 0) {
1382 if (copy >= left)
1383 copy = left;
1384 if (page != frag->page) {
1385 if (i == MAX_SKB_FRAGS) {
1386 err = -EMSGSIZE;
1387 goto error;
1388 }
1389 get_page(page);
1390 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1391 frag = &skb_shinfo(skb)->frags[i];
1392 }
1393 } else if(i < MAX_SKB_FRAGS) {
1394 if (copy > PAGE_SIZE)
1395 copy = PAGE_SIZE;
1396 page = alloc_pages(sk->sk_allocation, 0);
1397 if (page == NULL) {
1398 err = -ENOMEM;
1399 goto error;
1400 }
1401 sk->sk_sndmsg_page = page;
1402 sk->sk_sndmsg_off = 0;
1403
1404 skb_fill_page_desc(skb, i, page, 0, 0);
1405 frag = &skb_shinfo(skb)->frags[i];
1406 } else {
1407 err = -EMSGSIZE;
1408 goto error;
1409 }
1410 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1411 err = -EFAULT;
1412 goto error;
1413 }
1414 sk->sk_sndmsg_off += copy;
1415 frag->size += copy;
1416 skb->len += copy;
1417 skb->data_len += copy;
1418 skb->truesize += copy;
1419 atomic_add(copy, &sk->sk_wmem_alloc);
1420 }
1421 offset += copy;
1422 length -= copy;
1423 }
1424 return 0;
1425 error:
1426 inet->cork.length -= length;
1427 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1428 return err;
1429 }
1430
1431 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1432 {
1433 if (np->cork.opt) {
1434 kfree(np->cork.opt->dst0opt);
1435 kfree(np->cork.opt->dst1opt);
1436 kfree(np->cork.opt->hopopt);
1437 kfree(np->cork.opt->srcrt);
1438 kfree(np->cork.opt);
1439 np->cork.opt = NULL;
1440 }
1441
1442 if (inet->cork.dst) {
1443 dst_release(inet->cork.dst);
1444 inet->cork.dst = NULL;
1445 inet->cork.flags &= ~IPCORK_ALLFRAG;
1446 }
1447 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1448 }
1449
1450 int ip6_push_pending_frames(struct sock *sk)
1451 {
1452 struct sk_buff *skb, *tmp_skb;
1453 struct sk_buff **tail_skb;
1454 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1455 struct inet_sock *inet = inet_sk(sk);
1456 struct ipv6_pinfo *np = inet6_sk(sk);
1457 struct net *net = sock_net(sk);
1458 struct ipv6hdr *hdr;
1459 struct ipv6_txoptions *opt = np->cork.opt;
1460 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1461 struct flowi *fl = &inet->cork.fl;
1462 unsigned char proto = fl->proto;
1463 int err = 0;
1464
1465 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1466 goto out;
1467 tail_skb = &(skb_shinfo(skb)->frag_list);
1468
1469 /* move skb->data to ip header from ext header */
1470 if (skb->data < skb_network_header(skb))
1471 __skb_pull(skb, skb_network_offset(skb));
1472 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1473 __skb_pull(tmp_skb, skb_network_header_len(skb));
1474 *tail_skb = tmp_skb;
1475 tail_skb = &(tmp_skb->next);
1476 skb->len += tmp_skb->len;
1477 skb->data_len += tmp_skb->len;
1478 skb->truesize += tmp_skb->truesize;
1479 tmp_skb->destructor = NULL;
1480 tmp_skb->sk = NULL;
1481 }
1482
1483 /* Allow local fragmentation. */
1484 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1485 skb->local_df = 1;
1486
1487 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1488 __skb_pull(skb, skb_network_header_len(skb));
1489 if (opt && opt->opt_flen)
1490 ipv6_push_frag_opts(skb, opt, &proto);
1491 if (opt && opt->opt_nflen)
1492 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1493
1494 skb_push(skb, sizeof(struct ipv6hdr));
1495 skb_reset_network_header(skb);
1496 hdr = ipv6_hdr(skb);
1497
1498 *(__be32*)hdr = fl->fl6_flowlabel |
1499 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1500
1501 hdr->hop_limit = np->cork.hop_limit;
1502 hdr->nexthdr = proto;
1503 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1504 ipv6_addr_copy(&hdr->daddr, final_dst);
1505
1506 skb->priority = sk->sk_priority;
1507 skb->mark = sk->sk_mark;
1508
1509 skb_dst_set(skb, dst_clone(&rt->u.dst));
1510 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1511 if (proto == IPPROTO_ICMPV6) {
1512 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1513
1514 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1515 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1516 }
1517
1518 err = ip6_local_out(skb);
1519 if (err) {
1520 if (err > 0)
1521 err = net_xmit_errno(err);
1522 if (err)
1523 goto error;
1524 }
1525
1526 out:
1527 ip6_cork_release(inet, np);
1528 return err;
1529 error:
1530 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1531 goto out;
1532 }
1533
1534 void ip6_flush_pending_frames(struct sock *sk)
1535 {
1536 struct sk_buff *skb;
1537
1538 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1539 if (skb_dst(skb))
1540 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1541 IPSTATS_MIB_OUTDISCARDS);
1542 kfree_skb(skb);
1543 }
1544
1545 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1546 }
This page took 0.088885 seconds and 5 git commands to generate.