netfilter: nf_conntrack: refactor l3proto support for netns
[deliverable/linux.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 int __ip6_local_out(struct sk_buff *skb)
62 {
63 int len;
64
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
69
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
72 }
73
74 int ip6_local_out(struct sk_buff *skb)
75 {
76 int err;
77
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
81
82 return err;
83 }
84 EXPORT_SYMBOL_GPL(ip6_local_out);
85
86 static int ip6_finish_output2(struct sk_buff *skb)
87 {
88 struct dst_entry *dst = skb_dst(skb);
89 struct net_device *dev = dst->dev;
90 struct neighbour *neigh;
91 struct rt6_info *rt;
92
93 skb->protocol = htons(ETH_P_IPV6);
94 skb->dev = dev;
95
96 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
97 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
98
99 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
100 ((mroute6_socket(dev_net(dev), skb) &&
101 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
102 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
103 &ipv6_hdr(skb)->saddr))) {
104 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
105
106 /* Do not check for IFF_ALLMULTI; multicast routing
107 is not supported in any case.
108 */
109 if (newskb)
110 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
111 newskb, NULL, newskb->dev,
112 dev_loopback_xmit);
113
114 if (ipv6_hdr(skb)->hop_limit == 0) {
115 IP6_INC_STATS(dev_net(dev), idev,
116 IPSTATS_MIB_OUTDISCARDS);
117 kfree_skb(skb);
118 return 0;
119 }
120 }
121
122 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
123 skb->len);
124 }
125
126 rt = (struct rt6_info *) dst;
127 neigh = rt->n;
128 if (neigh)
129 return dst_neigh_output(dst, neigh, skb);
130
131 IP6_INC_STATS_BH(dev_net(dst->dev),
132 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
133 kfree_skb(skb);
134 return -EINVAL;
135 }
136
137 static int ip6_finish_output(struct sk_buff *skb)
138 {
139 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
140 dst_allfrag(skb_dst(skb)))
141 return ip6_fragment(skb, ip6_finish_output2);
142 else
143 return ip6_finish_output2(skb);
144 }
145
146 int ip6_output(struct sk_buff *skb)
147 {
148 struct net_device *dev = skb_dst(skb)->dev;
149 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
150 if (unlikely(idev->cnf.disable_ipv6)) {
151 IP6_INC_STATS(dev_net(dev), idev,
152 IPSTATS_MIB_OUTDISCARDS);
153 kfree_skb(skb);
154 return 0;
155 }
156
157 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
158 ip6_finish_output,
159 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
160 }
161
162 /*
163 * xmit an sk_buff (used by TCP, SCTP and DCCP)
164 */
165
166 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
167 struct ipv6_txoptions *opt, int tclass)
168 {
169 struct net *net = sock_net(sk);
170 struct ipv6_pinfo *np = inet6_sk(sk);
171 struct in6_addr *first_hop = &fl6->daddr;
172 struct dst_entry *dst = skb_dst(skb);
173 struct ipv6hdr *hdr;
174 u8 proto = fl6->flowi6_proto;
175 int seg_len = skb->len;
176 int hlimit = -1;
177 u32 mtu;
178
179 if (opt) {
180 unsigned int head_room;
181
182 /* First: exthdrs may take lots of space (~8K for now)
183 MAX_HEADER is not enough.
184 */
185 head_room = opt->opt_nflen + opt->opt_flen;
186 seg_len += head_room;
187 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
188
189 if (skb_headroom(skb) < head_room) {
190 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
191 if (skb2 == NULL) {
192 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
193 IPSTATS_MIB_OUTDISCARDS);
194 kfree_skb(skb);
195 return -ENOBUFS;
196 }
197 consume_skb(skb);
198 skb = skb2;
199 skb_set_owner_w(skb, sk);
200 }
201 if (opt->opt_flen)
202 ipv6_push_frag_opts(skb, opt, &proto);
203 if (opt->opt_nflen)
204 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
205 }
206
207 skb_push(skb, sizeof(struct ipv6hdr));
208 skb_reset_network_header(skb);
209 hdr = ipv6_hdr(skb);
210
211 /*
212 * Fill in the IPv6 header
213 */
214 if (np)
215 hlimit = np->hop_limit;
216 if (hlimit < 0)
217 hlimit = ip6_dst_hoplimit(dst);
218
219 ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
220
221 hdr->payload_len = htons(seg_len);
222 hdr->nexthdr = proto;
223 hdr->hop_limit = hlimit;
224
225 hdr->saddr = fl6->saddr;
226 hdr->daddr = *first_hop;
227
228 skb->priority = sk->sk_priority;
229 skb->mark = sk->sk_mark;
230
231 mtu = dst_mtu(dst);
232 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
233 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
234 IPSTATS_MIB_OUT, skb->len);
235 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
236 dst->dev, dst_output);
237 }
238
239 net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n");
240 skb->dev = dst->dev;
241 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
242 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
243 kfree_skb(skb);
244 return -EMSGSIZE;
245 }
246
247 EXPORT_SYMBOL(ip6_xmit);
248
249 /*
250 * To avoid extra problems ND packets are send through this
251 * routine. It's code duplication but I really want to avoid
252 * extra checks since ipv6_build_header is used by TCP (which
253 * is for us performance critical)
254 */
255
256 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
257 const struct in6_addr *saddr, const struct in6_addr *daddr,
258 int proto, int len)
259 {
260 struct ipv6_pinfo *np = inet6_sk(sk);
261 struct ipv6hdr *hdr;
262
263 skb->protocol = htons(ETH_P_IPV6);
264 skb->dev = dev;
265
266 skb_reset_network_header(skb);
267 skb_put(skb, sizeof(struct ipv6hdr));
268 hdr = ipv6_hdr(skb);
269
270 ip6_flow_hdr(hdr, 0, 0);
271
272 hdr->payload_len = htons(len);
273 hdr->nexthdr = proto;
274 hdr->hop_limit = np->hop_limit;
275
276 hdr->saddr = *saddr;
277 hdr->daddr = *daddr;
278
279 return 0;
280 }
281
282 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
283 {
284 struct ip6_ra_chain *ra;
285 struct sock *last = NULL;
286
287 read_lock(&ip6_ra_lock);
288 for (ra = ip6_ra_chain; ra; ra = ra->next) {
289 struct sock *sk = ra->sk;
290 if (sk && ra->sel == sel &&
291 (!sk->sk_bound_dev_if ||
292 sk->sk_bound_dev_if == skb->dev->ifindex)) {
293 if (last) {
294 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
295 if (skb2)
296 rawv6_rcv(last, skb2);
297 }
298 last = sk;
299 }
300 }
301
302 if (last) {
303 rawv6_rcv(last, skb);
304 read_unlock(&ip6_ra_lock);
305 return 1;
306 }
307 read_unlock(&ip6_ra_lock);
308 return 0;
309 }
310
311 static int ip6_forward_proxy_check(struct sk_buff *skb)
312 {
313 struct ipv6hdr *hdr = ipv6_hdr(skb);
314 u8 nexthdr = hdr->nexthdr;
315 __be16 frag_off;
316 int offset;
317
318 if (ipv6_ext_hdr(nexthdr)) {
319 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
320 if (offset < 0)
321 return 0;
322 } else
323 offset = sizeof(struct ipv6hdr);
324
325 if (nexthdr == IPPROTO_ICMPV6) {
326 struct icmp6hdr *icmp6;
327
328 if (!pskb_may_pull(skb, (skb_network_header(skb) +
329 offset + 1 - skb->data)))
330 return 0;
331
332 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
333
334 switch (icmp6->icmp6_type) {
335 case NDISC_ROUTER_SOLICITATION:
336 case NDISC_ROUTER_ADVERTISEMENT:
337 case NDISC_NEIGHBOUR_SOLICITATION:
338 case NDISC_NEIGHBOUR_ADVERTISEMENT:
339 case NDISC_REDIRECT:
340 /* For reaction involving unicast neighbor discovery
341 * message destined to the proxied address, pass it to
342 * input function.
343 */
344 return 1;
345 default:
346 break;
347 }
348 }
349
350 /*
351 * The proxying router can't forward traffic sent to a link-local
352 * address, so signal the sender and discard the packet. This
353 * behavior is clarified by the MIPv6 specification.
354 */
355 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
356 dst_link_failure(skb);
357 return -1;
358 }
359
360 return 0;
361 }
362
363 static inline int ip6_forward_finish(struct sk_buff *skb)
364 {
365 return dst_output(skb);
366 }
367
368 int ip6_forward(struct sk_buff *skb)
369 {
370 struct dst_entry *dst = skb_dst(skb);
371 struct ipv6hdr *hdr = ipv6_hdr(skb);
372 struct inet6_skb_parm *opt = IP6CB(skb);
373 struct net *net = dev_net(dst->dev);
374 u32 mtu;
375
376 if (net->ipv6.devconf_all->forwarding == 0)
377 goto error;
378
379 if (skb_warn_if_lro(skb))
380 goto drop;
381
382 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
383 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
384 goto drop;
385 }
386
387 if (skb->pkt_type != PACKET_HOST)
388 goto drop;
389
390 skb_forward_csum(skb);
391
392 /*
393 * We DO NOT make any processing on
394 * RA packets, pushing them to user level AS IS
395 * without ane WARRANTY that application will be able
396 * to interpret them. The reason is that we
397 * cannot make anything clever here.
398 *
399 * We are not end-node, so that if packet contains
400 * AH/ESP, we cannot make anything.
401 * Defragmentation also would be mistake, RA packets
402 * cannot be fragmented, because there is no warranty
403 * that different fragments will go along one path. --ANK
404 */
405 if (opt->ra) {
406 u8 *ptr = skb_network_header(skb) + opt->ra;
407 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
408 return 0;
409 }
410
411 /*
412 * check and decrement ttl
413 */
414 if (hdr->hop_limit <= 1) {
415 /* Force OUTPUT device used as source address */
416 skb->dev = dst->dev;
417 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
418 IP6_INC_STATS_BH(net,
419 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
420
421 kfree_skb(skb);
422 return -ETIMEDOUT;
423 }
424
425 /* XXX: idev->cnf.proxy_ndp? */
426 if (net->ipv6.devconf_all->proxy_ndp &&
427 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
428 int proxied = ip6_forward_proxy_check(skb);
429 if (proxied > 0)
430 return ip6_input(skb);
431 else if (proxied < 0) {
432 IP6_INC_STATS(net, ip6_dst_idev(dst),
433 IPSTATS_MIB_INDISCARDS);
434 goto drop;
435 }
436 }
437
438 if (!xfrm6_route_forward(skb)) {
439 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
440 goto drop;
441 }
442 dst = skb_dst(skb);
443
444 /* IPv6 specs say nothing about it, but it is clear that we cannot
445 send redirects to source routed frames.
446 We don't send redirects to frames decapsulated from IPsec.
447 */
448 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
449 struct in6_addr *target = NULL;
450 struct inet_peer *peer;
451 struct rt6_info *rt;
452
453 /*
454 * incoming and outgoing devices are the same
455 * send a redirect.
456 */
457
458 rt = (struct rt6_info *) dst;
459 if (rt->rt6i_flags & RTF_GATEWAY)
460 target = &rt->rt6i_gateway;
461 else
462 target = &hdr->daddr;
463
464 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
465
466 /* Limit redirects both by destination (here)
467 and by source (inside ndisc_send_redirect)
468 */
469 if (inet_peer_xrlim_allow(peer, 1*HZ))
470 ndisc_send_redirect(skb, target);
471 if (peer)
472 inet_putpeer(peer);
473 } else {
474 int addrtype = ipv6_addr_type(&hdr->saddr);
475
476 /* This check is security critical. */
477 if (addrtype == IPV6_ADDR_ANY ||
478 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
479 goto error;
480 if (addrtype & IPV6_ADDR_LINKLOCAL) {
481 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
482 ICMPV6_NOT_NEIGHBOUR, 0);
483 goto error;
484 }
485 }
486
487 mtu = dst_mtu(dst);
488 if (mtu < IPV6_MIN_MTU)
489 mtu = IPV6_MIN_MTU;
490
491 if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
492 (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
493 /* Again, force OUTPUT device used as source address */
494 skb->dev = dst->dev;
495 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
496 IP6_INC_STATS_BH(net,
497 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
498 IP6_INC_STATS_BH(net,
499 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
500 kfree_skb(skb);
501 return -EMSGSIZE;
502 }
503
504 if (skb_cow(skb, dst->dev->hard_header_len)) {
505 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
506 goto drop;
507 }
508
509 hdr = ipv6_hdr(skb);
510
511 /* Mangling hops number delayed to point after skb COW */
512
513 hdr->hop_limit--;
514
515 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
516 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
517 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
518 ip6_forward_finish);
519
520 error:
521 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
522 drop:
523 kfree_skb(skb);
524 return -EINVAL;
525 }
526
527 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
528 {
529 to->pkt_type = from->pkt_type;
530 to->priority = from->priority;
531 to->protocol = from->protocol;
532 skb_dst_drop(to);
533 skb_dst_set(to, dst_clone(skb_dst(from)));
534 to->dev = from->dev;
535 to->mark = from->mark;
536
537 #ifdef CONFIG_NET_SCHED
538 to->tc_index = from->tc_index;
539 #endif
540 nf_copy(to, from);
541 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
542 to->nf_trace = from->nf_trace;
543 #endif
544 skb_copy_secmark(to, from);
545 }
546
547 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
548 {
549 struct sk_buff *frag;
550 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
551 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
552 struct ipv6hdr *tmp_hdr;
553 struct frag_hdr *fh;
554 unsigned int mtu, hlen, left, len;
555 int hroom, troom;
556 __be32 frag_id = 0;
557 int ptr, offset = 0, err=0;
558 u8 *prevhdr, nexthdr = 0;
559 struct net *net = dev_net(skb_dst(skb)->dev);
560
561 hlen = ip6_find_1stfragopt(skb, &prevhdr);
562 nexthdr = *prevhdr;
563
564 mtu = ip6_skb_dst_mtu(skb);
565
566 /* We must not fragment if the socket is set to force MTU discovery
567 * or if the skb it not generated by a local socket.
568 */
569 if (unlikely(!skb->local_df && skb->len > mtu) ||
570 (IP6CB(skb)->frag_max_size &&
571 IP6CB(skb)->frag_max_size > mtu)) {
572 if (skb->sk && dst_allfrag(skb_dst(skb)))
573 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
574
575 skb->dev = skb_dst(skb)->dev;
576 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
577 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
578 IPSTATS_MIB_FRAGFAILS);
579 kfree_skb(skb);
580 return -EMSGSIZE;
581 }
582
583 if (np && np->frag_size < mtu) {
584 if (np->frag_size)
585 mtu = np->frag_size;
586 }
587 mtu -= hlen + sizeof(struct frag_hdr);
588
589 if (skb_has_frag_list(skb)) {
590 int first_len = skb_pagelen(skb);
591 struct sk_buff *frag2;
592
593 if (first_len - hlen > mtu ||
594 ((first_len - hlen) & 7) ||
595 skb_cloned(skb))
596 goto slow_path;
597
598 skb_walk_frags(skb, frag) {
599 /* Correct geometry. */
600 if (frag->len > mtu ||
601 ((frag->len & 7) && frag->next) ||
602 skb_headroom(frag) < hlen)
603 goto slow_path_clean;
604
605 /* Partially cloned skb? */
606 if (skb_shared(frag))
607 goto slow_path_clean;
608
609 BUG_ON(frag->sk);
610 if (skb->sk) {
611 frag->sk = skb->sk;
612 frag->destructor = sock_wfree;
613 }
614 skb->truesize -= frag->truesize;
615 }
616
617 err = 0;
618 offset = 0;
619 frag = skb_shinfo(skb)->frag_list;
620 skb_frag_list_init(skb);
621 /* BUILD HEADER */
622
623 *prevhdr = NEXTHDR_FRAGMENT;
624 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
625 if (!tmp_hdr) {
626 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
627 IPSTATS_MIB_FRAGFAILS);
628 return -ENOMEM;
629 }
630
631 __skb_pull(skb, hlen);
632 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
633 __skb_push(skb, hlen);
634 skb_reset_network_header(skb);
635 memcpy(skb_network_header(skb), tmp_hdr, hlen);
636
637 ipv6_select_ident(fh, rt);
638 fh->nexthdr = nexthdr;
639 fh->reserved = 0;
640 fh->frag_off = htons(IP6_MF);
641 frag_id = fh->identification;
642
643 first_len = skb_pagelen(skb);
644 skb->data_len = first_len - skb_headlen(skb);
645 skb->len = first_len;
646 ipv6_hdr(skb)->payload_len = htons(first_len -
647 sizeof(struct ipv6hdr));
648
649 dst_hold(&rt->dst);
650
651 for (;;) {
652 /* Prepare header of the next frame,
653 * before previous one went down. */
654 if (frag) {
655 frag->ip_summed = CHECKSUM_NONE;
656 skb_reset_transport_header(frag);
657 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
658 __skb_push(frag, hlen);
659 skb_reset_network_header(frag);
660 memcpy(skb_network_header(frag), tmp_hdr,
661 hlen);
662 offset += skb->len - hlen - sizeof(struct frag_hdr);
663 fh->nexthdr = nexthdr;
664 fh->reserved = 0;
665 fh->frag_off = htons(offset);
666 if (frag->next != NULL)
667 fh->frag_off |= htons(IP6_MF);
668 fh->identification = frag_id;
669 ipv6_hdr(frag)->payload_len =
670 htons(frag->len -
671 sizeof(struct ipv6hdr));
672 ip6_copy_metadata(frag, skb);
673 }
674
675 err = output(skb);
676 if(!err)
677 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
678 IPSTATS_MIB_FRAGCREATES);
679
680 if (err || !frag)
681 break;
682
683 skb = frag;
684 frag = skb->next;
685 skb->next = NULL;
686 }
687
688 kfree(tmp_hdr);
689
690 if (err == 0) {
691 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
692 IPSTATS_MIB_FRAGOKS);
693 ip6_rt_put(rt);
694 return 0;
695 }
696
697 while (frag) {
698 skb = frag->next;
699 kfree_skb(frag);
700 frag = skb;
701 }
702
703 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
704 IPSTATS_MIB_FRAGFAILS);
705 ip6_rt_put(rt);
706 return err;
707
708 slow_path_clean:
709 skb_walk_frags(skb, frag2) {
710 if (frag2 == frag)
711 break;
712 frag2->sk = NULL;
713 frag2->destructor = NULL;
714 skb->truesize += frag2->truesize;
715 }
716 }
717
718 slow_path:
719 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
720 skb_checksum_help(skb))
721 goto fail;
722
723 left = skb->len - hlen; /* Space per frame */
724 ptr = hlen; /* Where to start from */
725
726 /*
727 * Fragment the datagram.
728 */
729
730 *prevhdr = NEXTHDR_FRAGMENT;
731 hroom = LL_RESERVED_SPACE(rt->dst.dev);
732 troom = rt->dst.dev->needed_tailroom;
733
734 /*
735 * Keep copying data until we run out.
736 */
737 while(left > 0) {
738 len = left;
739 /* IF: it doesn't fit, use 'mtu' - the data space left */
740 if (len > mtu)
741 len = mtu;
742 /* IF: we are not sending up to and including the packet end
743 then align the next start on an eight byte boundary */
744 if (len < left) {
745 len &= ~7;
746 }
747 /*
748 * Allocate buffer.
749 */
750
751 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
752 hroom + troom, GFP_ATOMIC)) == NULL) {
753 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
754 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
755 IPSTATS_MIB_FRAGFAILS);
756 err = -ENOMEM;
757 goto fail;
758 }
759
760 /*
761 * Set up data on packet
762 */
763
764 ip6_copy_metadata(frag, skb);
765 skb_reserve(frag, hroom);
766 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
767 skb_reset_network_header(frag);
768 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
769 frag->transport_header = (frag->network_header + hlen +
770 sizeof(struct frag_hdr));
771
772 /*
773 * Charge the memory for the fragment to any owner
774 * it might possess
775 */
776 if (skb->sk)
777 skb_set_owner_w(frag, skb->sk);
778
779 /*
780 * Copy the packet header into the new buffer.
781 */
782 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
783
784 /*
785 * Build fragment header.
786 */
787 fh->nexthdr = nexthdr;
788 fh->reserved = 0;
789 if (!frag_id) {
790 ipv6_select_ident(fh, rt);
791 frag_id = fh->identification;
792 } else
793 fh->identification = frag_id;
794
795 /*
796 * Copy a block of the IP datagram.
797 */
798 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
799 BUG();
800 left -= len;
801
802 fh->frag_off = htons(offset);
803 if (left > 0)
804 fh->frag_off |= htons(IP6_MF);
805 ipv6_hdr(frag)->payload_len = htons(frag->len -
806 sizeof(struct ipv6hdr));
807
808 ptr += len;
809 offset += len;
810
811 /*
812 * Put this fragment into the sending queue.
813 */
814 err = output(frag);
815 if (err)
816 goto fail;
817
818 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
819 IPSTATS_MIB_FRAGCREATES);
820 }
821 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
822 IPSTATS_MIB_FRAGOKS);
823 consume_skb(skb);
824 return err;
825
826 fail:
827 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
828 IPSTATS_MIB_FRAGFAILS);
829 kfree_skb(skb);
830 return err;
831 }
832
833 static inline int ip6_rt_check(const struct rt6key *rt_key,
834 const struct in6_addr *fl_addr,
835 const struct in6_addr *addr_cache)
836 {
837 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
838 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
839 }
840
841 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
842 struct dst_entry *dst,
843 const struct flowi6 *fl6)
844 {
845 struct ipv6_pinfo *np = inet6_sk(sk);
846 struct rt6_info *rt = (struct rt6_info *)dst;
847
848 if (!dst)
849 goto out;
850
851 /* Yes, checking route validity in not connected
852 * case is not very simple. Take into account,
853 * that we do not support routing by source, TOS,
854 * and MSG_DONTROUTE --ANK (980726)
855 *
856 * 1. ip6_rt_check(): If route was host route,
857 * check that cached destination is current.
858 * If it is network route, we still may
859 * check its validity using saved pointer
860 * to the last used address: daddr_cache.
861 * We do not want to save whole address now,
862 * (because main consumer of this service
863 * is tcp, which has not this problem),
864 * so that the last trick works only on connected
865 * sockets.
866 * 2. oif also should be the same.
867 */
868 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
869 #ifdef CONFIG_IPV6_SUBTREES
870 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
871 #endif
872 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
873 dst_release(dst);
874 dst = NULL;
875 }
876
877 out:
878 return dst;
879 }
880
881 static int ip6_dst_lookup_tail(struct sock *sk,
882 struct dst_entry **dst, struct flowi6 *fl6)
883 {
884 struct net *net = sock_net(sk);
885 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
886 struct neighbour *n;
887 struct rt6_info *rt;
888 #endif
889 int err;
890
891 if (*dst == NULL)
892 *dst = ip6_route_output(net, sk, fl6);
893
894 if ((err = (*dst)->error))
895 goto out_err_release;
896
897 if (ipv6_addr_any(&fl6->saddr)) {
898 struct rt6_info *rt = (struct rt6_info *) *dst;
899 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
900 sk ? inet6_sk(sk)->srcprefs : 0,
901 &fl6->saddr);
902 if (err)
903 goto out_err_release;
904 }
905
906 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
907 /*
908 * Here if the dst entry we've looked up
909 * has a neighbour entry that is in the INCOMPLETE
910 * state and the src address from the flow is
911 * marked as OPTIMISTIC, we release the found
912 * dst entry and replace it instead with the
913 * dst entry of the nexthop router
914 */
915 rt = (struct rt6_info *) *dst;
916 n = rt->n;
917 if (n && !(n->nud_state & NUD_VALID)) {
918 struct inet6_ifaddr *ifp;
919 struct flowi6 fl_gw6;
920 int redirect;
921
922 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
923 (*dst)->dev, 1);
924
925 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
926 if (ifp)
927 in6_ifa_put(ifp);
928
929 if (redirect) {
930 /*
931 * We need to get the dst entry for the
932 * default router instead
933 */
934 dst_release(*dst);
935 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
936 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
937 *dst = ip6_route_output(net, sk, &fl_gw6);
938 if ((err = (*dst)->error))
939 goto out_err_release;
940 }
941 }
942 #endif
943
944 return 0;
945
946 out_err_release:
947 if (err == -ENETUNREACH)
948 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
949 dst_release(*dst);
950 *dst = NULL;
951 return err;
952 }
953
954 /**
955 * ip6_dst_lookup - perform route lookup on flow
956 * @sk: socket which provides route info
957 * @dst: pointer to dst_entry * for result
958 * @fl6: flow to lookup
959 *
960 * This function performs a route lookup on the given flow.
961 *
962 * It returns zero on success, or a standard errno code on error.
963 */
964 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
965 {
966 *dst = NULL;
967 return ip6_dst_lookup_tail(sk, dst, fl6);
968 }
969 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
970
971 /**
972 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
973 * @sk: socket which provides route info
974 * @fl6: flow to lookup
975 * @final_dst: final destination address for ipsec lookup
976 * @can_sleep: we are in a sleepable context
977 *
978 * This function performs a route lookup on the given flow.
979 *
980 * It returns a valid dst pointer on success, or a pointer encoded
981 * error code.
982 */
983 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
984 const struct in6_addr *final_dst,
985 bool can_sleep)
986 {
987 struct dst_entry *dst = NULL;
988 int err;
989
990 err = ip6_dst_lookup_tail(sk, &dst, fl6);
991 if (err)
992 return ERR_PTR(err);
993 if (final_dst)
994 fl6->daddr = *final_dst;
995 if (can_sleep)
996 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
997
998 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
999 }
1000 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1001
1002 /**
1003 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1004 * @sk: socket which provides the dst cache and route info
1005 * @fl6: flow to lookup
1006 * @final_dst: final destination address for ipsec lookup
1007 * @can_sleep: we are in a sleepable context
1008 *
1009 * This function performs a route lookup on the given flow with the
1010 * possibility of using the cached route in the socket if it is valid.
1011 * It will take the socket dst lock when operating on the dst cache.
1012 * As a result, this function can only be used in process context.
1013 *
1014 * It returns a valid dst pointer on success, or a pointer encoded
1015 * error code.
1016 */
1017 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1018 const struct in6_addr *final_dst,
1019 bool can_sleep)
1020 {
1021 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1022 int err;
1023
1024 dst = ip6_sk_dst_check(sk, dst, fl6);
1025
1026 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1027 if (err)
1028 return ERR_PTR(err);
1029 if (final_dst)
1030 fl6->daddr = *final_dst;
1031 if (can_sleep)
1032 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1033
1034 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1035 }
1036 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1037
1038 static inline int ip6_ufo_append_data(struct sock *sk,
1039 int getfrag(void *from, char *to, int offset, int len,
1040 int odd, struct sk_buff *skb),
1041 void *from, int length, int hh_len, int fragheaderlen,
1042 int transhdrlen, int mtu,unsigned int flags,
1043 struct rt6_info *rt)
1044
1045 {
1046 struct sk_buff *skb;
1047 int err;
1048
1049 /* There is support for UDP large send offload by network
1050 * device, so create one single skb packet containing complete
1051 * udp datagram
1052 */
1053 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1054 skb = sock_alloc_send_skb(sk,
1055 hh_len + fragheaderlen + transhdrlen + 20,
1056 (flags & MSG_DONTWAIT), &err);
1057 if (skb == NULL)
1058 return err;
1059
1060 /* reserve space for Hardware header */
1061 skb_reserve(skb, hh_len);
1062
1063 /* create space for UDP/IP header */
1064 skb_put(skb,fragheaderlen + transhdrlen);
1065
1066 /* initialize network header pointer */
1067 skb_reset_network_header(skb);
1068
1069 /* initialize protocol header pointer */
1070 skb->transport_header = skb->network_header + fragheaderlen;
1071
1072 skb->ip_summed = CHECKSUM_PARTIAL;
1073 skb->csum = 0;
1074 }
1075
1076 err = skb_append_datato_frags(sk,skb, getfrag, from,
1077 (length - transhdrlen));
1078 if (!err) {
1079 struct frag_hdr fhdr;
1080
1081 /* Specify the length of each IPv6 datagram fragment.
1082 * It has to be a multiple of 8.
1083 */
1084 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1085 sizeof(struct frag_hdr)) & ~7;
1086 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1087 ipv6_select_ident(&fhdr, rt);
1088 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1089 __skb_queue_tail(&sk->sk_write_queue, skb);
1090
1091 return 0;
1092 }
1093 /* There is not enough support do UPD LSO,
1094 * so follow normal path
1095 */
1096 kfree_skb(skb);
1097
1098 return err;
1099 }
1100
1101 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1102 gfp_t gfp)
1103 {
1104 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1105 }
1106
1107 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1108 gfp_t gfp)
1109 {
1110 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1111 }
1112
1113 static void ip6_append_data_mtu(int *mtu,
1114 int *maxfraglen,
1115 unsigned int fragheaderlen,
1116 struct sk_buff *skb,
1117 struct rt6_info *rt)
1118 {
1119 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1120 if (skb == NULL) {
1121 /* first fragment, reserve header_len */
1122 *mtu = *mtu - rt->dst.header_len;
1123
1124 } else {
1125 /*
1126 * this fragment is not first, the headers
1127 * space is regarded as data space.
1128 */
1129 *mtu = dst_mtu(rt->dst.path);
1130 }
1131 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1132 + fragheaderlen - sizeof(struct frag_hdr);
1133 }
1134 }
1135
1136 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1137 int offset, int len, int odd, struct sk_buff *skb),
1138 void *from, int length, int transhdrlen,
1139 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1140 struct rt6_info *rt, unsigned int flags, int dontfrag)
1141 {
1142 struct inet_sock *inet = inet_sk(sk);
1143 struct ipv6_pinfo *np = inet6_sk(sk);
1144 struct inet_cork *cork;
1145 struct sk_buff *skb, *skb_prev = NULL;
1146 unsigned int maxfraglen, fragheaderlen;
1147 int exthdrlen;
1148 int dst_exthdrlen;
1149 int hh_len;
1150 int mtu;
1151 int copy;
1152 int err;
1153 int offset = 0;
1154 __u8 tx_flags = 0;
1155
1156 if (flags&MSG_PROBE)
1157 return 0;
1158 cork = &inet->cork.base;
1159 if (skb_queue_empty(&sk->sk_write_queue)) {
1160 /*
1161 * setup for corking
1162 */
1163 if (opt) {
1164 if (WARN_ON(np->cork.opt))
1165 return -EINVAL;
1166
1167 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1168 if (unlikely(np->cork.opt == NULL))
1169 return -ENOBUFS;
1170
1171 np->cork.opt->tot_len = opt->tot_len;
1172 np->cork.opt->opt_flen = opt->opt_flen;
1173 np->cork.opt->opt_nflen = opt->opt_nflen;
1174
1175 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1176 sk->sk_allocation);
1177 if (opt->dst0opt && !np->cork.opt->dst0opt)
1178 return -ENOBUFS;
1179
1180 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1181 sk->sk_allocation);
1182 if (opt->dst1opt && !np->cork.opt->dst1opt)
1183 return -ENOBUFS;
1184
1185 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1186 sk->sk_allocation);
1187 if (opt->hopopt && !np->cork.opt->hopopt)
1188 return -ENOBUFS;
1189
1190 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1191 sk->sk_allocation);
1192 if (opt->srcrt && !np->cork.opt->srcrt)
1193 return -ENOBUFS;
1194
1195 /* need source address above miyazawa*/
1196 }
1197 dst_hold(&rt->dst);
1198 cork->dst = &rt->dst;
1199 inet->cork.fl.u.ip6 = *fl6;
1200 np->cork.hop_limit = hlimit;
1201 np->cork.tclass = tclass;
1202 if (rt->dst.flags & DST_XFRM_TUNNEL)
1203 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1204 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1205 else
1206 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1207 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1208 if (np->frag_size < mtu) {
1209 if (np->frag_size)
1210 mtu = np->frag_size;
1211 }
1212 cork->fragsize = mtu;
1213 if (dst_allfrag(rt->dst.path))
1214 cork->flags |= IPCORK_ALLFRAG;
1215 cork->length = 0;
1216 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1217 length += exthdrlen;
1218 transhdrlen += exthdrlen;
1219 dst_exthdrlen = rt->dst.header_len;
1220 } else {
1221 rt = (struct rt6_info *)cork->dst;
1222 fl6 = &inet->cork.fl.u.ip6;
1223 opt = np->cork.opt;
1224 transhdrlen = 0;
1225 exthdrlen = 0;
1226 dst_exthdrlen = 0;
1227 mtu = cork->fragsize;
1228 }
1229
1230 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1231
1232 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1233 (opt ? opt->opt_nflen : 0);
1234 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1235
1236 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1237 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1238 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1239 return -EMSGSIZE;
1240 }
1241 }
1242
1243 /* For UDP, check if TX timestamp is enabled */
1244 if (sk->sk_type == SOCK_DGRAM) {
1245 err = sock_tx_timestamp(sk, &tx_flags);
1246 if (err)
1247 goto error;
1248 }
1249
1250 /*
1251 * Let's try using as much space as possible.
1252 * Use MTU if total length of the message fits into the MTU.
1253 * Otherwise, we need to reserve fragment header and
1254 * fragment alignment (= 8-15 octects, in total).
1255 *
1256 * Note that we may need to "move" the data from the tail of
1257 * of the buffer to the new fragment when we split
1258 * the message.
1259 *
1260 * FIXME: It may be fragmented into multiple chunks
1261 * at once if non-fragmentable extension headers
1262 * are too large.
1263 * --yoshfuji
1264 */
1265
1266 cork->length += length;
1267 if (length > mtu) {
1268 int proto = sk->sk_protocol;
1269 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1270 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1271 return -EMSGSIZE;
1272 }
1273
1274 if (proto == IPPROTO_UDP &&
1275 (rt->dst.dev->features & NETIF_F_UFO)) {
1276
1277 err = ip6_ufo_append_data(sk, getfrag, from, length,
1278 hh_len, fragheaderlen,
1279 transhdrlen, mtu, flags, rt);
1280 if (err)
1281 goto error;
1282 return 0;
1283 }
1284 }
1285
1286 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1287 goto alloc_new_skb;
1288
1289 while (length > 0) {
1290 /* Check if the remaining data fits into current packet. */
1291 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1292 if (copy < length)
1293 copy = maxfraglen - skb->len;
1294
1295 if (copy <= 0) {
1296 char *data;
1297 unsigned int datalen;
1298 unsigned int fraglen;
1299 unsigned int fraggap;
1300 unsigned int alloclen;
1301 alloc_new_skb:
1302 /* There's no room in the current skb */
1303 if (skb)
1304 fraggap = skb->len - maxfraglen;
1305 else
1306 fraggap = 0;
1307 /* update mtu and maxfraglen if necessary */
1308 if (skb == NULL || skb_prev == NULL)
1309 ip6_append_data_mtu(&mtu, &maxfraglen,
1310 fragheaderlen, skb, rt);
1311
1312 skb_prev = skb;
1313
1314 /*
1315 * If remaining data exceeds the mtu,
1316 * we know we need more fragment(s).
1317 */
1318 datalen = length + fraggap;
1319
1320 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1321 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1322 if ((flags & MSG_MORE) &&
1323 !(rt->dst.dev->features&NETIF_F_SG))
1324 alloclen = mtu;
1325 else
1326 alloclen = datalen + fragheaderlen;
1327
1328 alloclen += dst_exthdrlen;
1329
1330 if (datalen != length + fraggap) {
1331 /*
1332 * this is not the last fragment, the trailer
1333 * space is regarded as data space.
1334 */
1335 datalen += rt->dst.trailer_len;
1336 }
1337
1338 alloclen += rt->dst.trailer_len;
1339 fraglen = datalen + fragheaderlen;
1340
1341 /*
1342 * We just reserve space for fragment header.
1343 * Note: this may be overallocation if the message
1344 * (without MSG_MORE) fits into the MTU.
1345 */
1346 alloclen += sizeof(struct frag_hdr);
1347
1348 if (transhdrlen) {
1349 skb = sock_alloc_send_skb(sk,
1350 alloclen + hh_len,
1351 (flags & MSG_DONTWAIT), &err);
1352 } else {
1353 skb = NULL;
1354 if (atomic_read(&sk->sk_wmem_alloc) <=
1355 2 * sk->sk_sndbuf)
1356 skb = sock_wmalloc(sk,
1357 alloclen + hh_len, 1,
1358 sk->sk_allocation);
1359 if (unlikely(skb == NULL))
1360 err = -ENOBUFS;
1361 else {
1362 /* Only the initial fragment
1363 * is time stamped.
1364 */
1365 tx_flags = 0;
1366 }
1367 }
1368 if (skb == NULL)
1369 goto error;
1370 /*
1371 * Fill in the control structures
1372 */
1373 skb->ip_summed = CHECKSUM_NONE;
1374 skb->csum = 0;
1375 /* reserve for fragmentation and ipsec header */
1376 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1377 dst_exthdrlen);
1378
1379 if (sk->sk_type == SOCK_DGRAM)
1380 skb_shinfo(skb)->tx_flags = tx_flags;
1381
1382 /*
1383 * Find where to start putting bytes
1384 */
1385 data = skb_put(skb, fraglen);
1386 skb_set_network_header(skb, exthdrlen);
1387 data += fragheaderlen;
1388 skb->transport_header = (skb->network_header +
1389 fragheaderlen);
1390 if (fraggap) {
1391 skb->csum = skb_copy_and_csum_bits(
1392 skb_prev, maxfraglen,
1393 data + transhdrlen, fraggap, 0);
1394 skb_prev->csum = csum_sub(skb_prev->csum,
1395 skb->csum);
1396 data += fraggap;
1397 pskb_trim_unique(skb_prev, maxfraglen);
1398 }
1399 copy = datalen - transhdrlen - fraggap;
1400
1401 if (copy < 0) {
1402 err = -EINVAL;
1403 kfree_skb(skb);
1404 goto error;
1405 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1406 err = -EFAULT;
1407 kfree_skb(skb);
1408 goto error;
1409 }
1410
1411 offset += copy;
1412 length -= datalen - fraggap;
1413 transhdrlen = 0;
1414 exthdrlen = 0;
1415 dst_exthdrlen = 0;
1416
1417 /*
1418 * Put the packet on the pending queue
1419 */
1420 __skb_queue_tail(&sk->sk_write_queue, skb);
1421 continue;
1422 }
1423
1424 if (copy > length)
1425 copy = length;
1426
1427 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1428 unsigned int off;
1429
1430 off = skb->len;
1431 if (getfrag(from, skb_put(skb, copy),
1432 offset, copy, off, skb) < 0) {
1433 __skb_trim(skb, off);
1434 err = -EFAULT;
1435 goto error;
1436 }
1437 } else {
1438 int i = skb_shinfo(skb)->nr_frags;
1439 struct page_frag *pfrag = sk_page_frag(sk);
1440
1441 err = -ENOMEM;
1442 if (!sk_page_frag_refill(sk, pfrag))
1443 goto error;
1444
1445 if (!skb_can_coalesce(skb, i, pfrag->page,
1446 pfrag->offset)) {
1447 err = -EMSGSIZE;
1448 if (i == MAX_SKB_FRAGS)
1449 goto error;
1450
1451 __skb_fill_page_desc(skb, i, pfrag->page,
1452 pfrag->offset, 0);
1453 skb_shinfo(skb)->nr_frags = ++i;
1454 get_page(pfrag->page);
1455 }
1456 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1457 if (getfrag(from,
1458 page_address(pfrag->page) + pfrag->offset,
1459 offset, copy, skb->len, skb) < 0)
1460 goto error_efault;
1461
1462 pfrag->offset += copy;
1463 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1464 skb->len += copy;
1465 skb->data_len += copy;
1466 skb->truesize += copy;
1467 atomic_add(copy, &sk->sk_wmem_alloc);
1468 }
1469 offset += copy;
1470 length -= copy;
1471 }
1472
1473 return 0;
1474
1475 error_efault:
1476 err = -EFAULT;
1477 error:
1478 cork->length -= length;
1479 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1480 return err;
1481 }
1482 EXPORT_SYMBOL_GPL(ip6_append_data);
1483
1484 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1485 {
1486 if (np->cork.opt) {
1487 kfree(np->cork.opt->dst0opt);
1488 kfree(np->cork.opt->dst1opt);
1489 kfree(np->cork.opt->hopopt);
1490 kfree(np->cork.opt->srcrt);
1491 kfree(np->cork.opt);
1492 np->cork.opt = NULL;
1493 }
1494
1495 if (inet->cork.base.dst) {
1496 dst_release(inet->cork.base.dst);
1497 inet->cork.base.dst = NULL;
1498 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1499 }
1500 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1501 }
1502
1503 int ip6_push_pending_frames(struct sock *sk)
1504 {
1505 struct sk_buff *skb, *tmp_skb;
1506 struct sk_buff **tail_skb;
1507 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1508 struct inet_sock *inet = inet_sk(sk);
1509 struct ipv6_pinfo *np = inet6_sk(sk);
1510 struct net *net = sock_net(sk);
1511 struct ipv6hdr *hdr;
1512 struct ipv6_txoptions *opt = np->cork.opt;
1513 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1514 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1515 unsigned char proto = fl6->flowi6_proto;
1516 int err = 0;
1517
1518 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1519 goto out;
1520 tail_skb = &(skb_shinfo(skb)->frag_list);
1521
1522 /* move skb->data to ip header from ext header */
1523 if (skb->data < skb_network_header(skb))
1524 __skb_pull(skb, skb_network_offset(skb));
1525 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1526 __skb_pull(tmp_skb, skb_network_header_len(skb));
1527 *tail_skb = tmp_skb;
1528 tail_skb = &(tmp_skb->next);
1529 skb->len += tmp_skb->len;
1530 skb->data_len += tmp_skb->len;
1531 skb->truesize += tmp_skb->truesize;
1532 tmp_skb->destructor = NULL;
1533 tmp_skb->sk = NULL;
1534 }
1535
1536 /* Allow local fragmentation. */
1537 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1538 skb->local_df = 1;
1539
1540 *final_dst = fl6->daddr;
1541 __skb_pull(skb, skb_network_header_len(skb));
1542 if (opt && opt->opt_flen)
1543 ipv6_push_frag_opts(skb, opt, &proto);
1544 if (opt && opt->opt_nflen)
1545 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1546
1547 skb_push(skb, sizeof(struct ipv6hdr));
1548 skb_reset_network_header(skb);
1549 hdr = ipv6_hdr(skb);
1550
1551 ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1552 hdr->hop_limit = np->cork.hop_limit;
1553 hdr->nexthdr = proto;
1554 hdr->saddr = fl6->saddr;
1555 hdr->daddr = *final_dst;
1556
1557 skb->priority = sk->sk_priority;
1558 skb->mark = sk->sk_mark;
1559
1560 skb_dst_set(skb, dst_clone(&rt->dst));
1561 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1562 if (proto == IPPROTO_ICMPV6) {
1563 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1564
1565 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1566 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1567 }
1568
1569 err = ip6_local_out(skb);
1570 if (err) {
1571 if (err > 0)
1572 err = net_xmit_errno(err);
1573 if (err)
1574 goto error;
1575 }
1576
1577 out:
1578 ip6_cork_release(inet, np);
1579 return err;
1580 error:
1581 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1582 goto out;
1583 }
1584 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1585
1586 void ip6_flush_pending_frames(struct sock *sk)
1587 {
1588 struct sk_buff *skb;
1589
1590 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1591 if (skb_dst(skb))
1592 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1593 IPSTATS_MIB_OUTDISCARDS);
1594 kfree_skb(skb);
1595 }
1596
1597 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1598 }
1599 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
This page took 0.106654 seconds and 5 git commands to generate.