Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/lowpan...
[deliverable/linux.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
43
44 #include <net/sock.h>
45 #include <net/snmp.h>
46
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
57
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
60 int __ip6_local_out(struct sk_buff *skb)
61 {
62 int len;
63
64 len = skb->len - sizeof(struct ipv6hdr);
65 if (len > IPV6_MAXPLEN)
66 len = 0;
67 ipv6_hdr(skb)->payload_len = htons(len);
68
69 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
70 dst_output);
71 }
72
73 int ip6_local_out(struct sk_buff *skb)
74 {
75 int err;
76
77 err = __ip6_local_out(skb);
78 if (likely(err == 1))
79 err = dst_output(skb);
80
81 return err;
82 }
83 EXPORT_SYMBOL_GPL(ip6_local_out);
84
85 static int ip6_output_finish(struct sk_buff *skb)
86 {
87 struct dst_entry *dst = skb_dst(skb);
88
89 if (dst->hh)
90 return neigh_hh_output(dst->hh, skb);
91 else if (dst->neighbour)
92 return dst->neighbour->output(skb);
93
94 IP6_INC_STATS_BH(dev_net(dst->dev),
95 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
96 kfree_skb(skb);
97 return -EINVAL;
98
99 }
100
101 /* dev_loopback_xmit for use with netfilter. */
102 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
103 {
104 skb_reset_mac_header(newskb);
105 __skb_pull(newskb, skb_network_offset(newskb));
106 newskb->pkt_type = PACKET_LOOPBACK;
107 newskb->ip_summed = CHECKSUM_UNNECESSARY;
108 WARN_ON(!skb_dst(newskb));
109
110 netif_rx(newskb);
111 return 0;
112 }
113
114
115 static int ip6_output2(struct sk_buff *skb)
116 {
117 struct dst_entry *dst = skb_dst(skb);
118 struct net_device *dev = dst->dev;
119
120 skb->protocol = htons(ETH_P_IPV6);
121 skb->dev = dev;
122
123 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
124 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
125 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
126
127 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
128 ((mroute6_socket(dev_net(dev)) &&
129 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
130 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
131 &ipv6_hdr(skb)->saddr))) {
132 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
133
134 /* Do not check for IFF_ALLMULTI; multicast routing
135 is not supported in any case.
136 */
137 if (newskb)
138 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
139 NULL, newskb->dev,
140 ip6_dev_loopback_xmit);
141
142 if (ipv6_hdr(skb)->hop_limit == 0) {
143 IP6_INC_STATS(dev_net(dev), idev,
144 IPSTATS_MIB_OUTDISCARDS);
145 kfree_skb(skb);
146 return 0;
147 }
148 }
149
150 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
151 skb->len);
152 }
153
154 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
155 ip6_output_finish);
156 }
157
158 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
159 {
160 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
161
162 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
163 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
164 }
165
166 int ip6_output(struct sk_buff *skb)
167 {
168 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
169 if (unlikely(idev->cnf.disable_ipv6)) {
170 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
171 IPSTATS_MIB_OUTDISCARDS);
172 kfree_skb(skb);
173 return 0;
174 }
175
176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
177 dst_allfrag(skb_dst(skb)))
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
181 }
182
183 /*
184 * xmit an sk_buff (used by TCP)
185 */
186
187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok)
189 {
190 struct net *net = sock_net(sk);
191 struct ipv6_pinfo *np = inet6_sk(sk);
192 struct in6_addr *first_hop = &fl->fl6_dst;
193 struct dst_entry *dst = skb_dst(skb);
194 struct ipv6hdr *hdr;
195 u8 proto = fl->proto;
196 int seg_len = skb->len;
197 int hlimit, tclass;
198 u32 mtu;
199
200 if (opt) {
201 unsigned int head_room;
202
203 /* First: exthdrs may take lots of space (~8K for now)
204 MAX_HEADER is not enough.
205 */
206 head_room = opt->opt_nflen + opt->opt_flen;
207 seg_len += head_room;
208 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
209
210 if (skb_headroom(skb) < head_room) {
211 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
212 if (skb2 == NULL) {
213 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
214 IPSTATS_MIB_OUTDISCARDS);
215 kfree_skb(skb);
216 return -ENOBUFS;
217 }
218 kfree_skb(skb);
219 skb = skb2;
220 if (sk)
221 skb_set_owner_w(skb, sk);
222 }
223 if (opt->opt_flen)
224 ipv6_push_frag_opts(skb, opt, &proto);
225 if (opt->opt_nflen)
226 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
227 }
228
229 skb_push(skb, sizeof(struct ipv6hdr));
230 skb_reset_network_header(skb);
231 hdr = ipv6_hdr(skb);
232
233 /* Allow local fragmentation. */
234 if (ipfragok)
235 skb->local_df = 1;
236
237 /*
238 * Fill in the IPv6 header
239 */
240
241 hlimit = -1;
242 if (np)
243 hlimit = np->hop_limit;
244 if (hlimit < 0)
245 hlimit = ip6_dst_hoplimit(dst);
246
247 tclass = -1;
248 if (np)
249 tclass = np->tclass;
250 if (tclass < 0)
251 tclass = 0;
252
253 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
254
255 hdr->payload_len = htons(seg_len);
256 hdr->nexthdr = proto;
257 hdr->hop_limit = hlimit;
258
259 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
260 ipv6_addr_copy(&hdr->daddr, first_hop);
261
262 skb->priority = sk->sk_priority;
263 skb->mark = sk->sk_mark;
264
265 mtu = dst_mtu(dst);
266 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
267 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
268 IPSTATS_MIB_OUT, skb->len);
269 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
270 dst_output);
271 }
272
273 if (net_ratelimit())
274 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
275 skb->dev = dst->dev;
276 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
277 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
278 kfree_skb(skb);
279 return -EMSGSIZE;
280 }
281
282 EXPORT_SYMBOL(ip6_xmit);
283
284 /*
285 * To avoid extra problems ND packets are send through this
286 * routine. It's code duplication but I really want to avoid
287 * extra checks since ipv6_build_header is used by TCP (which
288 * is for us performance critical)
289 */
290
291 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
292 const struct in6_addr *saddr, const struct in6_addr *daddr,
293 int proto, int len)
294 {
295 struct ipv6_pinfo *np = inet6_sk(sk);
296 struct ipv6hdr *hdr;
297 int totlen;
298
299 skb->protocol = htons(ETH_P_IPV6);
300 skb->dev = dev;
301
302 totlen = len + sizeof(struct ipv6hdr);
303
304 skb_reset_network_header(skb);
305 skb_put(skb, sizeof(struct ipv6hdr));
306 hdr = ipv6_hdr(skb);
307
308 *(__be32*)hdr = htonl(0x60000000);
309
310 hdr->payload_len = htons(len);
311 hdr->nexthdr = proto;
312 hdr->hop_limit = np->hop_limit;
313
314 ipv6_addr_copy(&hdr->saddr, saddr);
315 ipv6_addr_copy(&hdr->daddr, daddr);
316
317 return 0;
318 }
319
320 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
321 {
322 struct ip6_ra_chain *ra;
323 struct sock *last = NULL;
324
325 read_lock(&ip6_ra_lock);
326 for (ra = ip6_ra_chain; ra; ra = ra->next) {
327 struct sock *sk = ra->sk;
328 if (sk && ra->sel == sel &&
329 (!sk->sk_bound_dev_if ||
330 sk->sk_bound_dev_if == skb->dev->ifindex)) {
331 if (last) {
332 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
333 if (skb2)
334 rawv6_rcv(last, skb2);
335 }
336 last = sk;
337 }
338 }
339
340 if (last) {
341 rawv6_rcv(last, skb);
342 read_unlock(&ip6_ra_lock);
343 return 1;
344 }
345 read_unlock(&ip6_ra_lock);
346 return 0;
347 }
348
349 static int ip6_forward_proxy_check(struct sk_buff *skb)
350 {
351 struct ipv6hdr *hdr = ipv6_hdr(skb);
352 u8 nexthdr = hdr->nexthdr;
353 int offset;
354
355 if (ipv6_ext_hdr(nexthdr)) {
356 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
357 if (offset < 0)
358 return 0;
359 } else
360 offset = sizeof(struct ipv6hdr);
361
362 if (nexthdr == IPPROTO_ICMPV6) {
363 struct icmp6hdr *icmp6;
364
365 if (!pskb_may_pull(skb, (skb_network_header(skb) +
366 offset + 1 - skb->data)))
367 return 0;
368
369 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
370
371 switch (icmp6->icmp6_type) {
372 case NDISC_ROUTER_SOLICITATION:
373 case NDISC_ROUTER_ADVERTISEMENT:
374 case NDISC_NEIGHBOUR_SOLICITATION:
375 case NDISC_NEIGHBOUR_ADVERTISEMENT:
376 case NDISC_REDIRECT:
377 /* For reaction involving unicast neighbor discovery
378 * message destined to the proxied address, pass it to
379 * input function.
380 */
381 return 1;
382 default:
383 break;
384 }
385 }
386
387 /*
388 * The proxying router can't forward traffic sent to a link-local
389 * address, so signal the sender and discard the packet. This
390 * behavior is clarified by the MIPv6 specification.
391 */
392 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
393 dst_link_failure(skb);
394 return -1;
395 }
396
397 return 0;
398 }
399
400 static inline int ip6_forward_finish(struct sk_buff *skb)
401 {
402 return dst_output(skb);
403 }
404
405 int ip6_forward(struct sk_buff *skb)
406 {
407 struct dst_entry *dst = skb_dst(skb);
408 struct ipv6hdr *hdr = ipv6_hdr(skb);
409 struct inet6_skb_parm *opt = IP6CB(skb);
410 struct net *net = dev_net(dst->dev);
411
412 if (net->ipv6.devconf_all->forwarding == 0)
413 goto error;
414
415 if (skb_warn_if_lro(skb))
416 goto drop;
417
418 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
419 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
420 goto drop;
421 }
422
423 skb_forward_csum(skb);
424
425 /*
426 * We DO NOT make any processing on
427 * RA packets, pushing them to user level AS IS
428 * without ane WARRANTY that application will be able
429 * to interpret them. The reason is that we
430 * cannot make anything clever here.
431 *
432 * We are not end-node, so that if packet contains
433 * AH/ESP, we cannot make anything.
434 * Defragmentation also would be mistake, RA packets
435 * cannot be fragmented, because there is no warranty
436 * that different fragments will go along one path. --ANK
437 */
438 if (opt->ra) {
439 u8 *ptr = skb_network_header(skb) + opt->ra;
440 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
441 return 0;
442 }
443
444 /*
445 * check and decrement ttl
446 */
447 if (hdr->hop_limit <= 1) {
448 /* Force OUTPUT device used as source address */
449 skb->dev = dst->dev;
450 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
451 0, skb->dev);
452 IP6_INC_STATS_BH(net,
453 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
454
455 kfree_skb(skb);
456 return -ETIMEDOUT;
457 }
458
459 /* XXX: idev->cnf.proxy_ndp? */
460 if (net->ipv6.devconf_all->proxy_ndp &&
461 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
462 int proxied = ip6_forward_proxy_check(skb);
463 if (proxied > 0)
464 return ip6_input(skb);
465 else if (proxied < 0) {
466 IP6_INC_STATS(net, ip6_dst_idev(dst),
467 IPSTATS_MIB_INDISCARDS);
468 goto drop;
469 }
470 }
471
472 if (!xfrm6_route_forward(skb)) {
473 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
474 goto drop;
475 }
476 dst = skb_dst(skb);
477
478 /* IPv6 specs say nothing about it, but it is clear that we cannot
479 send redirects to source routed frames.
480 We don't send redirects to frames decapsulated from IPsec.
481 */
482 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
483 !skb_sec_path(skb)) {
484 struct in6_addr *target = NULL;
485 struct rt6_info *rt;
486 struct neighbour *n = dst->neighbour;
487
488 /*
489 * incoming and outgoing devices are the same
490 * send a redirect.
491 */
492
493 rt = (struct rt6_info *) dst;
494 if ((rt->rt6i_flags & RTF_GATEWAY))
495 target = (struct in6_addr*)&n->primary_key;
496 else
497 target = &hdr->daddr;
498
499 /* Limit redirects both by destination (here)
500 and by source (inside ndisc_send_redirect)
501 */
502 if (xrlim_allow(dst, 1*HZ))
503 ndisc_send_redirect(skb, n, target);
504 } else {
505 int addrtype = ipv6_addr_type(&hdr->saddr);
506
507 /* This check is security critical. */
508 if (addrtype == IPV6_ADDR_ANY ||
509 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
510 goto error;
511 if (addrtype & IPV6_ADDR_LINKLOCAL) {
512 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
513 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
514 goto error;
515 }
516 }
517
518 if (skb->len > dst_mtu(dst)) {
519 /* Again, force OUTPUT device used as source address */
520 skb->dev = dst->dev;
521 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
522 IP6_INC_STATS_BH(net,
523 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
524 IP6_INC_STATS_BH(net,
525 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
526 kfree_skb(skb);
527 return -EMSGSIZE;
528 }
529
530 if (skb_cow(skb, dst->dev->hard_header_len)) {
531 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
532 goto drop;
533 }
534
535 hdr = ipv6_hdr(skb);
536
537 /* Mangling hops number delayed to point after skb COW */
538
539 hdr->hop_limit--;
540
541 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
542 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
543 ip6_forward_finish);
544
545 error:
546 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
547 drop:
548 kfree_skb(skb);
549 return -EINVAL;
550 }
551
552 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
553 {
554 to->pkt_type = from->pkt_type;
555 to->priority = from->priority;
556 to->protocol = from->protocol;
557 skb_dst_drop(to);
558 skb_dst_set(to, dst_clone(skb_dst(from)));
559 to->dev = from->dev;
560 to->mark = from->mark;
561
562 #ifdef CONFIG_NET_SCHED
563 to->tc_index = from->tc_index;
564 #endif
565 nf_copy(to, from);
566 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
567 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
568 to->nf_trace = from->nf_trace;
569 #endif
570 skb_copy_secmark(to, from);
571 }
572
573 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
574 {
575 u16 offset = sizeof(struct ipv6hdr);
576 struct ipv6_opt_hdr *exthdr =
577 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
578 unsigned int packet_len = skb->tail - skb->network_header;
579 int found_rhdr = 0;
580 *nexthdr = &ipv6_hdr(skb)->nexthdr;
581
582 while (offset + 1 <= packet_len) {
583
584 switch (**nexthdr) {
585
586 case NEXTHDR_HOP:
587 break;
588 case NEXTHDR_ROUTING:
589 found_rhdr = 1;
590 break;
591 case NEXTHDR_DEST:
592 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
593 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
594 break;
595 #endif
596 if (found_rhdr)
597 return offset;
598 break;
599 default :
600 return offset;
601 }
602
603 offset += ipv6_optlen(exthdr);
604 *nexthdr = &exthdr->nexthdr;
605 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
606 offset);
607 }
608
609 return offset;
610 }
611
612 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
613 {
614 struct sk_buff *frag;
615 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
616 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
617 struct ipv6hdr *tmp_hdr;
618 struct frag_hdr *fh;
619 unsigned int mtu, hlen, left, len;
620 __be32 frag_id = 0;
621 int ptr, offset = 0, err=0;
622 u8 *prevhdr, nexthdr = 0;
623 struct net *net = dev_net(skb_dst(skb)->dev);
624
625 hlen = ip6_find_1stfragopt(skb, &prevhdr);
626 nexthdr = *prevhdr;
627
628 mtu = ip6_skb_dst_mtu(skb);
629
630 /* We must not fragment if the socket is set to force MTU discovery
631 * or if the skb it not generated by a local socket. (This last
632 * check should be redundant, but it's free.)
633 */
634 if (!skb->local_df) {
635 skb->dev = skb_dst(skb)->dev;
636 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
637 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
638 IPSTATS_MIB_FRAGFAILS);
639 kfree_skb(skb);
640 return -EMSGSIZE;
641 }
642
643 if (np && np->frag_size < mtu) {
644 if (np->frag_size)
645 mtu = np->frag_size;
646 }
647 mtu -= hlen + sizeof(struct frag_hdr);
648
649 if (skb_has_frags(skb)) {
650 int first_len = skb_pagelen(skb);
651 int truesizes = 0;
652
653 if (first_len - hlen > mtu ||
654 ((first_len - hlen) & 7) ||
655 skb_cloned(skb))
656 goto slow_path;
657
658 skb_walk_frags(skb, frag) {
659 /* Correct geometry. */
660 if (frag->len > mtu ||
661 ((frag->len & 7) && frag->next) ||
662 skb_headroom(frag) < hlen)
663 goto slow_path;
664
665 /* Partially cloned skb? */
666 if (skb_shared(frag))
667 goto slow_path;
668
669 BUG_ON(frag->sk);
670 if (skb->sk) {
671 frag->sk = skb->sk;
672 frag->destructor = sock_wfree;
673 truesizes += frag->truesize;
674 }
675 }
676
677 err = 0;
678 offset = 0;
679 frag = skb_shinfo(skb)->frag_list;
680 skb_frag_list_init(skb);
681 /* BUILD HEADER */
682
683 *prevhdr = NEXTHDR_FRAGMENT;
684 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
685 if (!tmp_hdr) {
686 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
687 IPSTATS_MIB_FRAGFAILS);
688 return -ENOMEM;
689 }
690
691 __skb_pull(skb, hlen);
692 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
693 __skb_push(skb, hlen);
694 skb_reset_network_header(skb);
695 memcpy(skb_network_header(skb), tmp_hdr, hlen);
696
697 ipv6_select_ident(fh);
698 fh->nexthdr = nexthdr;
699 fh->reserved = 0;
700 fh->frag_off = htons(IP6_MF);
701 frag_id = fh->identification;
702
703 first_len = skb_pagelen(skb);
704 skb->data_len = first_len - skb_headlen(skb);
705 skb->truesize -= truesizes;
706 skb->len = first_len;
707 ipv6_hdr(skb)->payload_len = htons(first_len -
708 sizeof(struct ipv6hdr));
709
710 dst_hold(&rt->u.dst);
711
712 for (;;) {
713 /* Prepare header of the next frame,
714 * before previous one went down. */
715 if (frag) {
716 frag->ip_summed = CHECKSUM_NONE;
717 skb_reset_transport_header(frag);
718 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
719 __skb_push(frag, hlen);
720 skb_reset_network_header(frag);
721 memcpy(skb_network_header(frag), tmp_hdr,
722 hlen);
723 offset += skb->len - hlen - sizeof(struct frag_hdr);
724 fh->nexthdr = nexthdr;
725 fh->reserved = 0;
726 fh->frag_off = htons(offset);
727 if (frag->next != NULL)
728 fh->frag_off |= htons(IP6_MF);
729 fh->identification = frag_id;
730 ipv6_hdr(frag)->payload_len =
731 htons(frag->len -
732 sizeof(struct ipv6hdr));
733 ip6_copy_metadata(frag, skb);
734 }
735
736 err = output(skb);
737 if(!err)
738 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
739 IPSTATS_MIB_FRAGCREATES);
740
741 if (err || !frag)
742 break;
743
744 skb = frag;
745 frag = skb->next;
746 skb->next = NULL;
747 }
748
749 kfree(tmp_hdr);
750
751 if (err == 0) {
752 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
753 IPSTATS_MIB_FRAGOKS);
754 dst_release(&rt->u.dst);
755 return 0;
756 }
757
758 while (frag) {
759 skb = frag->next;
760 kfree_skb(frag);
761 frag = skb;
762 }
763
764 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
765 IPSTATS_MIB_FRAGFAILS);
766 dst_release(&rt->u.dst);
767 return err;
768 }
769
770 slow_path:
771 left = skb->len - hlen; /* Space per frame */
772 ptr = hlen; /* Where to start from */
773
774 /*
775 * Fragment the datagram.
776 */
777
778 *prevhdr = NEXTHDR_FRAGMENT;
779
780 /*
781 * Keep copying data until we run out.
782 */
783 while(left > 0) {
784 len = left;
785 /* IF: it doesn't fit, use 'mtu' - the data space left */
786 if (len > mtu)
787 len = mtu;
788 /* IF: we are not sending upto and including the packet end
789 then align the next start on an eight byte boundary */
790 if (len < left) {
791 len &= ~7;
792 }
793 /*
794 * Allocate buffer.
795 */
796
797 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
798 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
799 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
800 IPSTATS_MIB_FRAGFAILS);
801 err = -ENOMEM;
802 goto fail;
803 }
804
805 /*
806 * Set up data on packet
807 */
808
809 ip6_copy_metadata(frag, skb);
810 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
811 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
812 skb_reset_network_header(frag);
813 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
814 frag->transport_header = (frag->network_header + hlen +
815 sizeof(struct frag_hdr));
816
817 /*
818 * Charge the memory for the fragment to any owner
819 * it might possess
820 */
821 if (skb->sk)
822 skb_set_owner_w(frag, skb->sk);
823
824 /*
825 * Copy the packet header into the new buffer.
826 */
827 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
828
829 /*
830 * Build fragment header.
831 */
832 fh->nexthdr = nexthdr;
833 fh->reserved = 0;
834 if (!frag_id) {
835 ipv6_select_ident(fh);
836 frag_id = fh->identification;
837 } else
838 fh->identification = frag_id;
839
840 /*
841 * Copy a block of the IP datagram.
842 */
843 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
844 BUG();
845 left -= len;
846
847 fh->frag_off = htons(offset);
848 if (left > 0)
849 fh->frag_off |= htons(IP6_MF);
850 ipv6_hdr(frag)->payload_len = htons(frag->len -
851 sizeof(struct ipv6hdr));
852
853 ptr += len;
854 offset += len;
855
856 /*
857 * Put this fragment into the sending queue.
858 */
859 err = output(frag);
860 if (err)
861 goto fail;
862
863 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
864 IPSTATS_MIB_FRAGCREATES);
865 }
866 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
867 IPSTATS_MIB_FRAGOKS);
868 kfree_skb(skb);
869 return err;
870
871 fail:
872 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
873 IPSTATS_MIB_FRAGFAILS);
874 kfree_skb(skb);
875 return err;
876 }
877
878 static inline int ip6_rt_check(struct rt6key *rt_key,
879 struct in6_addr *fl_addr,
880 struct in6_addr *addr_cache)
881 {
882 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
883 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
884 }
885
886 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
887 struct dst_entry *dst,
888 struct flowi *fl)
889 {
890 struct ipv6_pinfo *np = inet6_sk(sk);
891 struct rt6_info *rt = (struct rt6_info *)dst;
892
893 if (!dst)
894 goto out;
895
896 /* Yes, checking route validity in not connected
897 * case is not very simple. Take into account,
898 * that we do not support routing by source, TOS,
899 * and MSG_DONTROUTE --ANK (980726)
900 *
901 * 1. ip6_rt_check(): If route was host route,
902 * check that cached destination is current.
903 * If it is network route, we still may
904 * check its validity using saved pointer
905 * to the last used address: daddr_cache.
906 * We do not want to save whole address now,
907 * (because main consumer of this service
908 * is tcp, which has not this problem),
909 * so that the last trick works only on connected
910 * sockets.
911 * 2. oif also should be the same.
912 */
913 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
914 #ifdef CONFIG_IPV6_SUBTREES
915 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
916 #endif
917 (fl->oif && fl->oif != dst->dev->ifindex)) {
918 dst_release(dst);
919 dst = NULL;
920 }
921
922 out:
923 return dst;
924 }
925
926 static int ip6_dst_lookup_tail(struct sock *sk,
927 struct dst_entry **dst, struct flowi *fl)
928 {
929 int err;
930 struct net *net = sock_net(sk);
931
932 if (*dst == NULL)
933 *dst = ip6_route_output(net, sk, fl);
934
935 if ((err = (*dst)->error))
936 goto out_err_release;
937
938 if (ipv6_addr_any(&fl->fl6_src)) {
939 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
940 &fl->fl6_dst,
941 sk ? inet6_sk(sk)->srcprefs : 0,
942 &fl->fl6_src);
943 if (err)
944 goto out_err_release;
945 }
946
947 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
948 /*
949 * Here if the dst entry we've looked up
950 * has a neighbour entry that is in the INCOMPLETE
951 * state and the src address from the flow is
952 * marked as OPTIMISTIC, we release the found
953 * dst entry and replace it instead with the
954 * dst entry of the nexthop router
955 */
956 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
957 struct inet6_ifaddr *ifp;
958 struct flowi fl_gw;
959 int redirect;
960
961 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
962 (*dst)->dev, 1);
963
964 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
965 if (ifp)
966 in6_ifa_put(ifp);
967
968 if (redirect) {
969 /*
970 * We need to get the dst entry for the
971 * default router instead
972 */
973 dst_release(*dst);
974 memcpy(&fl_gw, fl, sizeof(struct flowi));
975 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
976 *dst = ip6_route_output(net, sk, &fl_gw);
977 if ((err = (*dst)->error))
978 goto out_err_release;
979 }
980 }
981 #endif
982
983 return 0;
984
985 out_err_release:
986 if (err == -ENETUNREACH)
987 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
988 dst_release(*dst);
989 *dst = NULL;
990 return err;
991 }
992
993 /**
994 * ip6_dst_lookup - perform route lookup on flow
995 * @sk: socket which provides route info
996 * @dst: pointer to dst_entry * for result
997 * @fl: flow to lookup
998 *
999 * This function performs a route lookup on the given flow.
1000 *
1001 * It returns zero on success, or a standard errno code on error.
1002 */
1003 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1004 {
1005 *dst = NULL;
1006 return ip6_dst_lookup_tail(sk, dst, fl);
1007 }
1008 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1009
1010 /**
1011 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1012 * @sk: socket which provides the dst cache and route info
1013 * @dst: pointer to dst_entry * for result
1014 * @fl: flow to lookup
1015 *
1016 * This function performs a route lookup on the given flow with the
1017 * possibility of using the cached route in the socket if it is valid.
1018 * It will take the socket dst lock when operating on the dst cache.
1019 * As a result, this function can only be used in process context.
1020 *
1021 * It returns zero on success, or a standard errno code on error.
1022 */
1023 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1024 {
1025 *dst = NULL;
1026 if (sk) {
1027 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1028 *dst = ip6_sk_dst_check(sk, *dst, fl);
1029 }
1030
1031 return ip6_dst_lookup_tail(sk, dst, fl);
1032 }
1033 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1034
1035 static inline int ip6_ufo_append_data(struct sock *sk,
1036 int getfrag(void *from, char *to, int offset, int len,
1037 int odd, struct sk_buff *skb),
1038 void *from, int length, int hh_len, int fragheaderlen,
1039 int transhdrlen, int mtu,unsigned int flags)
1040
1041 {
1042 struct sk_buff *skb;
1043 int err;
1044
1045 /* There is support for UDP large send offload by network
1046 * device, so create one single skb packet containing complete
1047 * udp datagram
1048 */
1049 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1050 skb = sock_alloc_send_skb(sk,
1051 hh_len + fragheaderlen + transhdrlen + 20,
1052 (flags & MSG_DONTWAIT), &err);
1053 if (skb == NULL)
1054 return -ENOMEM;
1055
1056 /* reserve space for Hardware header */
1057 skb_reserve(skb, hh_len);
1058
1059 /* create space for UDP/IP header */
1060 skb_put(skb,fragheaderlen + transhdrlen);
1061
1062 /* initialize network header pointer */
1063 skb_reset_network_header(skb);
1064
1065 /* initialize protocol header pointer */
1066 skb->transport_header = skb->network_header + fragheaderlen;
1067
1068 skb->ip_summed = CHECKSUM_PARTIAL;
1069 skb->csum = 0;
1070 sk->sk_sndmsg_off = 0;
1071 }
1072
1073 err = skb_append_datato_frags(sk,skb, getfrag, from,
1074 (length - transhdrlen));
1075 if (!err) {
1076 struct frag_hdr fhdr;
1077
1078 /* Specify the length of each IPv6 datagram fragment.
1079 * It has to be a multiple of 8.
1080 */
1081 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1082 sizeof(struct frag_hdr)) & ~7;
1083 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1084 ipv6_select_ident(&fhdr);
1085 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1086 __skb_queue_tail(&sk->sk_write_queue, skb);
1087
1088 return 0;
1089 }
1090 /* There is not enough support do UPD LSO,
1091 * so follow normal path
1092 */
1093 kfree_skb(skb);
1094
1095 return err;
1096 }
1097
1098 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1099 gfp_t gfp)
1100 {
1101 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1102 }
1103
1104 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1105 gfp_t gfp)
1106 {
1107 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1108 }
1109
1110 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1111 int offset, int len, int odd, struct sk_buff *skb),
1112 void *from, int length, int transhdrlen,
1113 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1114 struct rt6_info *rt, unsigned int flags)
1115 {
1116 struct inet_sock *inet = inet_sk(sk);
1117 struct ipv6_pinfo *np = inet6_sk(sk);
1118 struct sk_buff *skb;
1119 unsigned int maxfraglen, fragheaderlen;
1120 int exthdrlen;
1121 int hh_len;
1122 int mtu;
1123 int copy;
1124 int err;
1125 int offset = 0;
1126 int csummode = CHECKSUM_NONE;
1127
1128 if (flags&MSG_PROBE)
1129 return 0;
1130 if (skb_queue_empty(&sk->sk_write_queue)) {
1131 /*
1132 * setup for corking
1133 */
1134 if (opt) {
1135 if (WARN_ON(np->cork.opt))
1136 return -EINVAL;
1137
1138 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1139 if (unlikely(np->cork.opt == NULL))
1140 return -ENOBUFS;
1141
1142 np->cork.opt->tot_len = opt->tot_len;
1143 np->cork.opt->opt_flen = opt->opt_flen;
1144 np->cork.opt->opt_nflen = opt->opt_nflen;
1145
1146 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1147 sk->sk_allocation);
1148 if (opt->dst0opt && !np->cork.opt->dst0opt)
1149 return -ENOBUFS;
1150
1151 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1152 sk->sk_allocation);
1153 if (opt->dst1opt && !np->cork.opt->dst1opt)
1154 return -ENOBUFS;
1155
1156 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1157 sk->sk_allocation);
1158 if (opt->hopopt && !np->cork.opt->hopopt)
1159 return -ENOBUFS;
1160
1161 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1162 sk->sk_allocation);
1163 if (opt->srcrt && !np->cork.opt->srcrt)
1164 return -ENOBUFS;
1165
1166 /* need source address above miyazawa*/
1167 }
1168 dst_hold(&rt->u.dst);
1169 inet->cork.dst = &rt->u.dst;
1170 inet->cork.fl = *fl;
1171 np->cork.hop_limit = hlimit;
1172 np->cork.tclass = tclass;
1173 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1174 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1175 if (np->frag_size < mtu) {
1176 if (np->frag_size)
1177 mtu = np->frag_size;
1178 }
1179 inet->cork.fragsize = mtu;
1180 if (dst_allfrag(rt->u.dst.path))
1181 inet->cork.flags |= IPCORK_ALLFRAG;
1182 inet->cork.length = 0;
1183 sk->sk_sndmsg_page = NULL;
1184 sk->sk_sndmsg_off = 0;
1185 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1186 rt->rt6i_nfheader_len;
1187 length += exthdrlen;
1188 transhdrlen += exthdrlen;
1189 } else {
1190 rt = (struct rt6_info *)inet->cork.dst;
1191 fl = &inet->cork.fl;
1192 opt = np->cork.opt;
1193 transhdrlen = 0;
1194 exthdrlen = 0;
1195 mtu = inet->cork.fragsize;
1196 }
1197
1198 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1199
1200 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1201 (opt ? opt->opt_nflen : 0);
1202 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1203
1204 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1205 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1206 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1207 return -EMSGSIZE;
1208 }
1209 }
1210
1211 /*
1212 * Let's try using as much space as possible.
1213 * Use MTU if total length of the message fits into the MTU.
1214 * Otherwise, we need to reserve fragment header and
1215 * fragment alignment (= 8-15 octects, in total).
1216 *
1217 * Note that we may need to "move" the data from the tail of
1218 * of the buffer to the new fragment when we split
1219 * the message.
1220 *
1221 * FIXME: It may be fragmented into multiple chunks
1222 * at once if non-fragmentable extension headers
1223 * are too large.
1224 * --yoshfuji
1225 */
1226
1227 inet->cork.length += length;
1228 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1229 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1230
1231 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1232 fragheaderlen, transhdrlen, mtu,
1233 flags);
1234 if (err)
1235 goto error;
1236 return 0;
1237 }
1238
1239 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1240 goto alloc_new_skb;
1241
1242 while (length > 0) {
1243 /* Check if the remaining data fits into current packet. */
1244 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1245 if (copy < length)
1246 copy = maxfraglen - skb->len;
1247
1248 if (copy <= 0) {
1249 char *data;
1250 unsigned int datalen;
1251 unsigned int fraglen;
1252 unsigned int fraggap;
1253 unsigned int alloclen;
1254 struct sk_buff *skb_prev;
1255 alloc_new_skb:
1256 skb_prev = skb;
1257
1258 /* There's no room in the current skb */
1259 if (skb_prev)
1260 fraggap = skb_prev->len - maxfraglen;
1261 else
1262 fraggap = 0;
1263
1264 /*
1265 * If remaining data exceeds the mtu,
1266 * we know we need more fragment(s).
1267 */
1268 datalen = length + fraggap;
1269 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1270 datalen = maxfraglen - fragheaderlen;
1271
1272 fraglen = datalen + fragheaderlen;
1273 if ((flags & MSG_MORE) &&
1274 !(rt->u.dst.dev->features&NETIF_F_SG))
1275 alloclen = mtu;
1276 else
1277 alloclen = datalen + fragheaderlen;
1278
1279 /*
1280 * The last fragment gets additional space at tail.
1281 * Note: we overallocate on fragments with MSG_MODE
1282 * because we have no idea if we're the last one.
1283 */
1284 if (datalen == length + fraggap)
1285 alloclen += rt->u.dst.trailer_len;
1286
1287 /*
1288 * We just reserve space for fragment header.
1289 * Note: this may be overallocation if the message
1290 * (without MSG_MORE) fits into the MTU.
1291 */
1292 alloclen += sizeof(struct frag_hdr);
1293
1294 if (transhdrlen) {
1295 skb = sock_alloc_send_skb(sk,
1296 alloclen + hh_len,
1297 (flags & MSG_DONTWAIT), &err);
1298 } else {
1299 skb = NULL;
1300 if (atomic_read(&sk->sk_wmem_alloc) <=
1301 2 * sk->sk_sndbuf)
1302 skb = sock_wmalloc(sk,
1303 alloclen + hh_len, 1,
1304 sk->sk_allocation);
1305 if (unlikely(skb == NULL))
1306 err = -ENOBUFS;
1307 }
1308 if (skb == NULL)
1309 goto error;
1310 /*
1311 * Fill in the control structures
1312 */
1313 skb->ip_summed = csummode;
1314 skb->csum = 0;
1315 /* reserve for fragmentation */
1316 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1317
1318 /*
1319 * Find where to start putting bytes
1320 */
1321 data = skb_put(skb, fraglen);
1322 skb_set_network_header(skb, exthdrlen);
1323 data += fragheaderlen;
1324 skb->transport_header = (skb->network_header +
1325 fragheaderlen);
1326 if (fraggap) {
1327 skb->csum = skb_copy_and_csum_bits(
1328 skb_prev, maxfraglen,
1329 data + transhdrlen, fraggap, 0);
1330 skb_prev->csum = csum_sub(skb_prev->csum,
1331 skb->csum);
1332 data += fraggap;
1333 pskb_trim_unique(skb_prev, maxfraglen);
1334 }
1335 copy = datalen - transhdrlen - fraggap;
1336 if (copy < 0) {
1337 err = -EINVAL;
1338 kfree_skb(skb);
1339 goto error;
1340 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1341 err = -EFAULT;
1342 kfree_skb(skb);
1343 goto error;
1344 }
1345
1346 offset += copy;
1347 length -= datalen - fraggap;
1348 transhdrlen = 0;
1349 exthdrlen = 0;
1350 csummode = CHECKSUM_NONE;
1351
1352 /*
1353 * Put the packet on the pending queue
1354 */
1355 __skb_queue_tail(&sk->sk_write_queue, skb);
1356 continue;
1357 }
1358
1359 if (copy > length)
1360 copy = length;
1361
1362 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1363 unsigned int off;
1364
1365 off = skb->len;
1366 if (getfrag(from, skb_put(skb, copy),
1367 offset, copy, off, skb) < 0) {
1368 __skb_trim(skb, off);
1369 err = -EFAULT;
1370 goto error;
1371 }
1372 } else {
1373 int i = skb_shinfo(skb)->nr_frags;
1374 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1375 struct page *page = sk->sk_sndmsg_page;
1376 int off = sk->sk_sndmsg_off;
1377 unsigned int left;
1378
1379 if (page && (left = PAGE_SIZE - off) > 0) {
1380 if (copy >= left)
1381 copy = left;
1382 if (page != frag->page) {
1383 if (i == MAX_SKB_FRAGS) {
1384 err = -EMSGSIZE;
1385 goto error;
1386 }
1387 get_page(page);
1388 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1389 frag = &skb_shinfo(skb)->frags[i];
1390 }
1391 } else if(i < MAX_SKB_FRAGS) {
1392 if (copy > PAGE_SIZE)
1393 copy = PAGE_SIZE;
1394 page = alloc_pages(sk->sk_allocation, 0);
1395 if (page == NULL) {
1396 err = -ENOMEM;
1397 goto error;
1398 }
1399 sk->sk_sndmsg_page = page;
1400 sk->sk_sndmsg_off = 0;
1401
1402 skb_fill_page_desc(skb, i, page, 0, 0);
1403 frag = &skb_shinfo(skb)->frags[i];
1404 } else {
1405 err = -EMSGSIZE;
1406 goto error;
1407 }
1408 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1409 err = -EFAULT;
1410 goto error;
1411 }
1412 sk->sk_sndmsg_off += copy;
1413 frag->size += copy;
1414 skb->len += copy;
1415 skb->data_len += copy;
1416 skb->truesize += copy;
1417 atomic_add(copy, &sk->sk_wmem_alloc);
1418 }
1419 offset += copy;
1420 length -= copy;
1421 }
1422 return 0;
1423 error:
1424 inet->cork.length -= length;
1425 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1426 return err;
1427 }
1428
1429 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1430 {
1431 if (np->cork.opt) {
1432 kfree(np->cork.opt->dst0opt);
1433 kfree(np->cork.opt->dst1opt);
1434 kfree(np->cork.opt->hopopt);
1435 kfree(np->cork.opt->srcrt);
1436 kfree(np->cork.opt);
1437 np->cork.opt = NULL;
1438 }
1439
1440 if (inet->cork.dst) {
1441 dst_release(inet->cork.dst);
1442 inet->cork.dst = NULL;
1443 inet->cork.flags &= ~IPCORK_ALLFRAG;
1444 }
1445 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1446 }
1447
1448 int ip6_push_pending_frames(struct sock *sk)
1449 {
1450 struct sk_buff *skb, *tmp_skb;
1451 struct sk_buff **tail_skb;
1452 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1453 struct inet_sock *inet = inet_sk(sk);
1454 struct ipv6_pinfo *np = inet6_sk(sk);
1455 struct net *net = sock_net(sk);
1456 struct ipv6hdr *hdr;
1457 struct ipv6_txoptions *opt = np->cork.opt;
1458 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1459 struct flowi *fl = &inet->cork.fl;
1460 unsigned char proto = fl->proto;
1461 int err = 0;
1462
1463 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1464 goto out;
1465 tail_skb = &(skb_shinfo(skb)->frag_list);
1466
1467 /* move skb->data to ip header from ext header */
1468 if (skb->data < skb_network_header(skb))
1469 __skb_pull(skb, skb_network_offset(skb));
1470 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1471 __skb_pull(tmp_skb, skb_network_header_len(skb));
1472 *tail_skb = tmp_skb;
1473 tail_skb = &(tmp_skb->next);
1474 skb->len += tmp_skb->len;
1475 skb->data_len += tmp_skb->len;
1476 skb->truesize += tmp_skb->truesize;
1477 tmp_skb->destructor = NULL;
1478 tmp_skb->sk = NULL;
1479 }
1480
1481 /* Allow local fragmentation. */
1482 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1483 skb->local_df = 1;
1484
1485 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1486 __skb_pull(skb, skb_network_header_len(skb));
1487 if (opt && opt->opt_flen)
1488 ipv6_push_frag_opts(skb, opt, &proto);
1489 if (opt && opt->opt_nflen)
1490 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1491
1492 skb_push(skb, sizeof(struct ipv6hdr));
1493 skb_reset_network_header(skb);
1494 hdr = ipv6_hdr(skb);
1495
1496 *(__be32*)hdr = fl->fl6_flowlabel |
1497 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1498
1499 hdr->hop_limit = np->cork.hop_limit;
1500 hdr->nexthdr = proto;
1501 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1502 ipv6_addr_copy(&hdr->daddr, final_dst);
1503
1504 skb->priority = sk->sk_priority;
1505 skb->mark = sk->sk_mark;
1506
1507 skb_dst_set(skb, dst_clone(&rt->u.dst));
1508 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1509 if (proto == IPPROTO_ICMPV6) {
1510 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1511
1512 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1513 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1514 }
1515
1516 err = ip6_local_out(skb);
1517 if (err) {
1518 if (err > 0)
1519 err = np->recverr ? net_xmit_errno(err) : 0;
1520 if (err)
1521 goto error;
1522 }
1523
1524 out:
1525 ip6_cork_release(inet, np);
1526 return err;
1527 error:
1528 goto out;
1529 }
1530
1531 void ip6_flush_pending_frames(struct sock *sk)
1532 {
1533 struct sk_buff *skb;
1534
1535 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1536 if (skb_dst(skb))
1537 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1538 IPSTATS_MIB_OUTDISCARDS);
1539 kfree_skb(skb);
1540 }
1541
1542 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1543 }
This page took 0.072668 seconds and 6 git commands to generate.