Merge tag 'for-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/kishon/linux...
[deliverable/linux.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 static int ip6_finish_output2(struct sk_buff *skb)
60 {
61 struct dst_entry *dst = skb_dst(skb);
62 struct net_device *dev = dst->dev;
63 struct neighbour *neigh;
64 struct in6_addr *nexthop;
65 int ret;
66
67 skb->protocol = htons(ETH_P_IPV6);
68 skb->dev = dev;
69
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
72
73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
74 ((mroute6_socket(dev_net(dev), skb) &&
75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 newskb, NULL, newskb->dev,
86 dev_loopback_xmit);
87
88 if (ipv6_hdr(skb)->hop_limit == 0) {
89 IP6_INC_STATS(dev_net(dev), idev,
90 IPSTATS_MIB_OUTDISCARDS);
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
96 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
97 skb->len);
98
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL &&
101 !(dev->flags & IFF_LOOPBACK)) {
102 kfree_skb(skb);
103 return 0;
104 }
105 }
106
107 rcu_read_lock_bh();
108 nexthop = rt6_nexthop((struct rt6_info *)dst);
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 if (!IS_ERR(neigh)) {
113 ret = dst_neigh_output(dst, neigh, skb);
114 rcu_read_unlock_bh();
115 return ret;
116 }
117 rcu_read_unlock_bh();
118
119 IP6_INC_STATS(dev_net(dst->dev),
120 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
121 kfree_skb(skb);
122 return -EINVAL;
123 }
124
125 static int ip6_finish_output(struct sk_buff *skb)
126 {
127 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
128 dst_allfrag(skb_dst(skb)) ||
129 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
130 return ip6_fragment(skb, ip6_finish_output2);
131 else
132 return ip6_finish_output2(skb);
133 }
134
135 int ip6_output(struct sock *sk, struct sk_buff *skb)
136 {
137 struct net_device *dev = skb_dst(skb)->dev;
138 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
139 if (unlikely(idev->cnf.disable_ipv6)) {
140 IP6_INC_STATS(dev_net(dev), idev,
141 IPSTATS_MIB_OUTDISCARDS);
142 kfree_skb(skb);
143 return 0;
144 }
145
146 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
147 ip6_finish_output,
148 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
149 }
150
151 /*
152 * xmit an sk_buff (used by TCP, SCTP and DCCP)
153 */
154
155 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
156 struct ipv6_txoptions *opt, int tclass)
157 {
158 struct net *net = sock_net(sk);
159 struct ipv6_pinfo *np = inet6_sk(sk);
160 struct in6_addr *first_hop = &fl6->daddr;
161 struct dst_entry *dst = skb_dst(skb);
162 struct ipv6hdr *hdr;
163 u8 proto = fl6->flowi6_proto;
164 int seg_len = skb->len;
165 int hlimit = -1;
166 u32 mtu;
167
168 if (opt) {
169 unsigned int head_room;
170
171 /* First: exthdrs may take lots of space (~8K for now)
172 MAX_HEADER is not enough.
173 */
174 head_room = opt->opt_nflen + opt->opt_flen;
175 seg_len += head_room;
176 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
177
178 if (skb_headroom(skb) < head_room) {
179 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
180 if (skb2 == NULL) {
181 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
182 IPSTATS_MIB_OUTDISCARDS);
183 kfree_skb(skb);
184 return -ENOBUFS;
185 }
186 consume_skb(skb);
187 skb = skb2;
188 skb_set_owner_w(skb, sk);
189 }
190 if (opt->opt_flen)
191 ipv6_push_frag_opts(skb, opt, &proto);
192 if (opt->opt_nflen)
193 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
194 }
195
196 skb_push(skb, sizeof(struct ipv6hdr));
197 skb_reset_network_header(skb);
198 hdr = ipv6_hdr(skb);
199
200 /*
201 * Fill in the IPv6 header
202 */
203 if (np)
204 hlimit = np->hop_limit;
205 if (hlimit < 0)
206 hlimit = ip6_dst_hoplimit(dst);
207
208 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
209 np->autoflowlabel));
210
211 hdr->payload_len = htons(seg_len);
212 hdr->nexthdr = proto;
213 hdr->hop_limit = hlimit;
214
215 hdr->saddr = fl6->saddr;
216 hdr->daddr = *first_hop;
217
218 skb->protocol = htons(ETH_P_IPV6);
219 skb->priority = sk->sk_priority;
220 skb->mark = sk->sk_mark;
221
222 mtu = dst_mtu(dst);
223 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
224 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
225 IPSTATS_MIB_OUT, skb->len);
226 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
227 dst->dev, dst_output);
228 }
229
230 skb->dev = dst->dev;
231 ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
232 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
233 kfree_skb(skb);
234 return -EMSGSIZE;
235 }
236 EXPORT_SYMBOL(ip6_xmit);
237
238 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
239 {
240 struct ip6_ra_chain *ra;
241 struct sock *last = NULL;
242
243 read_lock(&ip6_ra_lock);
244 for (ra = ip6_ra_chain; ra; ra = ra->next) {
245 struct sock *sk = ra->sk;
246 if (sk && ra->sel == sel &&
247 (!sk->sk_bound_dev_if ||
248 sk->sk_bound_dev_if == skb->dev->ifindex)) {
249 if (last) {
250 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
251 if (skb2)
252 rawv6_rcv(last, skb2);
253 }
254 last = sk;
255 }
256 }
257
258 if (last) {
259 rawv6_rcv(last, skb);
260 read_unlock(&ip6_ra_lock);
261 return 1;
262 }
263 read_unlock(&ip6_ra_lock);
264 return 0;
265 }
266
267 static int ip6_forward_proxy_check(struct sk_buff *skb)
268 {
269 struct ipv6hdr *hdr = ipv6_hdr(skb);
270 u8 nexthdr = hdr->nexthdr;
271 __be16 frag_off;
272 int offset;
273
274 if (ipv6_ext_hdr(nexthdr)) {
275 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
276 if (offset < 0)
277 return 0;
278 } else
279 offset = sizeof(struct ipv6hdr);
280
281 if (nexthdr == IPPROTO_ICMPV6) {
282 struct icmp6hdr *icmp6;
283
284 if (!pskb_may_pull(skb, (skb_network_header(skb) +
285 offset + 1 - skb->data)))
286 return 0;
287
288 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
289
290 switch (icmp6->icmp6_type) {
291 case NDISC_ROUTER_SOLICITATION:
292 case NDISC_ROUTER_ADVERTISEMENT:
293 case NDISC_NEIGHBOUR_SOLICITATION:
294 case NDISC_NEIGHBOUR_ADVERTISEMENT:
295 case NDISC_REDIRECT:
296 /* For reaction involving unicast neighbor discovery
297 * message destined to the proxied address, pass it to
298 * input function.
299 */
300 return 1;
301 default:
302 break;
303 }
304 }
305
306 /*
307 * The proxying router can't forward traffic sent to a link-local
308 * address, so signal the sender and discard the packet. This
309 * behavior is clarified by the MIPv6 specification.
310 */
311 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
312 dst_link_failure(skb);
313 return -1;
314 }
315
316 return 0;
317 }
318
319 static inline int ip6_forward_finish(struct sk_buff *skb)
320 {
321 skb_sender_cpu_clear(skb);
322 return dst_output(skb);
323 }
324
325 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
326 {
327 unsigned int mtu;
328 struct inet6_dev *idev;
329
330 if (dst_metric_locked(dst, RTAX_MTU)) {
331 mtu = dst_metric_raw(dst, RTAX_MTU);
332 if (mtu)
333 return mtu;
334 }
335
336 mtu = IPV6_MIN_MTU;
337 rcu_read_lock();
338 idev = __in6_dev_get(dst->dev);
339 if (idev)
340 mtu = idev->cnf.mtu6;
341 rcu_read_unlock();
342
343 return mtu;
344 }
345
346 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
347 {
348 if (skb->len <= mtu)
349 return false;
350
351 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
352 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
353 return true;
354
355 if (skb->ignore_df)
356 return false;
357
358 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
359 return false;
360
361 return true;
362 }
363
364 int ip6_forward(struct sk_buff *skb)
365 {
366 struct dst_entry *dst = skb_dst(skb);
367 struct ipv6hdr *hdr = ipv6_hdr(skb);
368 struct inet6_skb_parm *opt = IP6CB(skb);
369 struct net *net = dev_net(dst->dev);
370 u32 mtu;
371
372 if (net->ipv6.devconf_all->forwarding == 0)
373 goto error;
374
375 if (skb->pkt_type != PACKET_HOST)
376 goto drop;
377
378 if (skb_warn_if_lro(skb))
379 goto drop;
380
381 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
382 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
383 IPSTATS_MIB_INDISCARDS);
384 goto drop;
385 }
386
387 skb_forward_csum(skb);
388
389 /*
390 * We DO NOT make any processing on
391 * RA packets, pushing them to user level AS IS
392 * without ane WARRANTY that application will be able
393 * to interpret them. The reason is that we
394 * cannot make anything clever here.
395 *
396 * We are not end-node, so that if packet contains
397 * AH/ESP, we cannot make anything.
398 * Defragmentation also would be mistake, RA packets
399 * cannot be fragmented, because there is no warranty
400 * that different fragments will go along one path. --ANK
401 */
402 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
403 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
404 return 0;
405 }
406
407 /*
408 * check and decrement ttl
409 */
410 if (hdr->hop_limit <= 1) {
411 /* Force OUTPUT device used as source address */
412 skb->dev = dst->dev;
413 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
414 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
415 IPSTATS_MIB_INHDRERRORS);
416
417 kfree_skb(skb);
418 return -ETIMEDOUT;
419 }
420
421 /* XXX: idev->cnf.proxy_ndp? */
422 if (net->ipv6.devconf_all->proxy_ndp &&
423 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
424 int proxied = ip6_forward_proxy_check(skb);
425 if (proxied > 0)
426 return ip6_input(skb);
427 else if (proxied < 0) {
428 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
429 IPSTATS_MIB_INDISCARDS);
430 goto drop;
431 }
432 }
433
434 if (!xfrm6_route_forward(skb)) {
435 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
436 IPSTATS_MIB_INDISCARDS);
437 goto drop;
438 }
439 dst = skb_dst(skb);
440
441 /* IPv6 specs say nothing about it, but it is clear that we cannot
442 send redirects to source routed frames.
443 We don't send redirects to frames decapsulated from IPsec.
444 */
445 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
446 struct in6_addr *target = NULL;
447 struct inet_peer *peer;
448 struct rt6_info *rt;
449
450 /*
451 * incoming and outgoing devices are the same
452 * send a redirect.
453 */
454
455 rt = (struct rt6_info *) dst;
456 if (rt->rt6i_flags & RTF_GATEWAY)
457 target = &rt->rt6i_gateway;
458 else
459 target = &hdr->daddr;
460
461 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
462
463 /* Limit redirects both by destination (here)
464 and by source (inside ndisc_send_redirect)
465 */
466 if (inet_peer_xrlim_allow(peer, 1*HZ))
467 ndisc_send_redirect(skb, target);
468 if (peer)
469 inet_putpeer(peer);
470 } else {
471 int addrtype = ipv6_addr_type(&hdr->saddr);
472
473 /* This check is security critical. */
474 if (addrtype == IPV6_ADDR_ANY ||
475 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
476 goto error;
477 if (addrtype & IPV6_ADDR_LINKLOCAL) {
478 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
479 ICMPV6_NOT_NEIGHBOUR, 0);
480 goto error;
481 }
482 }
483
484 mtu = ip6_dst_mtu_forward(dst);
485 if (mtu < IPV6_MIN_MTU)
486 mtu = IPV6_MIN_MTU;
487
488 if (ip6_pkt_too_big(skb, mtu)) {
489 /* Again, force OUTPUT device used as source address */
490 skb->dev = dst->dev;
491 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
492 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
493 IPSTATS_MIB_INTOOBIGERRORS);
494 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
495 IPSTATS_MIB_FRAGFAILS);
496 kfree_skb(skb);
497 return -EMSGSIZE;
498 }
499
500 if (skb_cow(skb, dst->dev->hard_header_len)) {
501 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
502 IPSTATS_MIB_OUTDISCARDS);
503 goto drop;
504 }
505
506 hdr = ipv6_hdr(skb);
507
508 /* Mangling hops number delayed to point after skb COW */
509
510 hdr->hop_limit--;
511
512 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
513 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
514 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
515 ip6_forward_finish);
516
517 error:
518 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
519 drop:
520 kfree_skb(skb);
521 return -EINVAL;
522 }
523
524 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
525 {
526 to->pkt_type = from->pkt_type;
527 to->priority = from->priority;
528 to->protocol = from->protocol;
529 skb_dst_drop(to);
530 skb_dst_set(to, dst_clone(skb_dst(from)));
531 to->dev = from->dev;
532 to->mark = from->mark;
533
534 #ifdef CONFIG_NET_SCHED
535 to->tc_index = from->tc_index;
536 #endif
537 nf_copy(to, from);
538 skb_copy_secmark(to, from);
539 }
540
541 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
542 {
543 struct sk_buff *frag;
544 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
545 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
546 inet6_sk(skb->sk) : NULL;
547 struct ipv6hdr *tmp_hdr;
548 struct frag_hdr *fh;
549 unsigned int mtu, hlen, left, len;
550 int hroom, troom;
551 __be32 frag_id = 0;
552 int ptr, offset = 0, err = 0;
553 u8 *prevhdr, nexthdr = 0;
554 struct net *net = dev_net(skb_dst(skb)->dev);
555
556 hlen = ip6_find_1stfragopt(skb, &prevhdr);
557 nexthdr = *prevhdr;
558
559 mtu = ip6_skb_dst_mtu(skb);
560
561 /* We must not fragment if the socket is set to force MTU discovery
562 * or if the skb it not generated by a local socket.
563 */
564 if (unlikely(!skb->ignore_df && skb->len > mtu) ||
565 (IP6CB(skb)->frag_max_size &&
566 IP6CB(skb)->frag_max_size > mtu)) {
567 if (skb->sk && dst_allfrag(skb_dst(skb)))
568 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
569
570 skb->dev = skb_dst(skb)->dev;
571 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
572 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
573 IPSTATS_MIB_FRAGFAILS);
574 kfree_skb(skb);
575 return -EMSGSIZE;
576 }
577
578 if (np && np->frag_size < mtu) {
579 if (np->frag_size)
580 mtu = np->frag_size;
581 }
582 mtu -= hlen + sizeof(struct frag_hdr);
583
584 if (skb_has_frag_list(skb)) {
585 int first_len = skb_pagelen(skb);
586 struct sk_buff *frag2;
587
588 if (first_len - hlen > mtu ||
589 ((first_len - hlen) & 7) ||
590 skb_cloned(skb))
591 goto slow_path;
592
593 skb_walk_frags(skb, frag) {
594 /* Correct geometry. */
595 if (frag->len > mtu ||
596 ((frag->len & 7) && frag->next) ||
597 skb_headroom(frag) < hlen)
598 goto slow_path_clean;
599
600 /* Partially cloned skb? */
601 if (skb_shared(frag))
602 goto slow_path_clean;
603
604 BUG_ON(frag->sk);
605 if (skb->sk) {
606 frag->sk = skb->sk;
607 frag->destructor = sock_wfree;
608 }
609 skb->truesize -= frag->truesize;
610 }
611
612 err = 0;
613 offset = 0;
614 frag = skb_shinfo(skb)->frag_list;
615 skb_frag_list_init(skb);
616 /* BUILD HEADER */
617
618 *prevhdr = NEXTHDR_FRAGMENT;
619 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
620 if (!tmp_hdr) {
621 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
622 IPSTATS_MIB_FRAGFAILS);
623 return -ENOMEM;
624 }
625
626 __skb_pull(skb, hlen);
627 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
628 __skb_push(skb, hlen);
629 skb_reset_network_header(skb);
630 memcpy(skb_network_header(skb), tmp_hdr, hlen);
631
632 ipv6_select_ident(fh, rt);
633 fh->nexthdr = nexthdr;
634 fh->reserved = 0;
635 fh->frag_off = htons(IP6_MF);
636 frag_id = fh->identification;
637
638 first_len = skb_pagelen(skb);
639 skb->data_len = first_len - skb_headlen(skb);
640 skb->len = first_len;
641 ipv6_hdr(skb)->payload_len = htons(first_len -
642 sizeof(struct ipv6hdr));
643
644 dst_hold(&rt->dst);
645
646 for (;;) {
647 /* Prepare header of the next frame,
648 * before previous one went down. */
649 if (frag) {
650 frag->ip_summed = CHECKSUM_NONE;
651 skb_reset_transport_header(frag);
652 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
653 __skb_push(frag, hlen);
654 skb_reset_network_header(frag);
655 memcpy(skb_network_header(frag), tmp_hdr,
656 hlen);
657 offset += skb->len - hlen - sizeof(struct frag_hdr);
658 fh->nexthdr = nexthdr;
659 fh->reserved = 0;
660 fh->frag_off = htons(offset);
661 if (frag->next != NULL)
662 fh->frag_off |= htons(IP6_MF);
663 fh->identification = frag_id;
664 ipv6_hdr(frag)->payload_len =
665 htons(frag->len -
666 sizeof(struct ipv6hdr));
667 ip6_copy_metadata(frag, skb);
668 }
669
670 err = output(skb);
671 if (!err)
672 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
673 IPSTATS_MIB_FRAGCREATES);
674
675 if (err || !frag)
676 break;
677
678 skb = frag;
679 frag = skb->next;
680 skb->next = NULL;
681 }
682
683 kfree(tmp_hdr);
684
685 if (err == 0) {
686 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
687 IPSTATS_MIB_FRAGOKS);
688 ip6_rt_put(rt);
689 return 0;
690 }
691
692 kfree_skb_list(frag);
693
694 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
695 IPSTATS_MIB_FRAGFAILS);
696 ip6_rt_put(rt);
697 return err;
698
699 slow_path_clean:
700 skb_walk_frags(skb, frag2) {
701 if (frag2 == frag)
702 break;
703 frag2->sk = NULL;
704 frag2->destructor = NULL;
705 skb->truesize += frag2->truesize;
706 }
707 }
708
709 slow_path:
710 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
711 skb_checksum_help(skb))
712 goto fail;
713
714 left = skb->len - hlen; /* Space per frame */
715 ptr = hlen; /* Where to start from */
716
717 /*
718 * Fragment the datagram.
719 */
720
721 *prevhdr = NEXTHDR_FRAGMENT;
722 hroom = LL_RESERVED_SPACE(rt->dst.dev);
723 troom = rt->dst.dev->needed_tailroom;
724
725 /*
726 * Keep copying data until we run out.
727 */
728 while (left > 0) {
729 len = left;
730 /* IF: it doesn't fit, use 'mtu' - the data space left */
731 if (len > mtu)
732 len = mtu;
733 /* IF: we are not sending up to and including the packet end
734 then align the next start on an eight byte boundary */
735 if (len < left) {
736 len &= ~7;
737 }
738
739 /* Allocate buffer */
740 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
741 hroom + troom, GFP_ATOMIC);
742 if (!frag) {
743 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
744 IPSTATS_MIB_FRAGFAILS);
745 err = -ENOMEM;
746 goto fail;
747 }
748
749 /*
750 * Set up data on packet
751 */
752
753 ip6_copy_metadata(frag, skb);
754 skb_reserve(frag, hroom);
755 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
756 skb_reset_network_header(frag);
757 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
758 frag->transport_header = (frag->network_header + hlen +
759 sizeof(struct frag_hdr));
760
761 /*
762 * Charge the memory for the fragment to any owner
763 * it might possess
764 */
765 if (skb->sk)
766 skb_set_owner_w(frag, skb->sk);
767
768 /*
769 * Copy the packet header into the new buffer.
770 */
771 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
772
773 /*
774 * Build fragment header.
775 */
776 fh->nexthdr = nexthdr;
777 fh->reserved = 0;
778 if (!frag_id) {
779 ipv6_select_ident(fh, rt);
780 frag_id = fh->identification;
781 } else
782 fh->identification = frag_id;
783
784 /*
785 * Copy a block of the IP datagram.
786 */
787 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
788 len));
789 left -= len;
790
791 fh->frag_off = htons(offset);
792 if (left > 0)
793 fh->frag_off |= htons(IP6_MF);
794 ipv6_hdr(frag)->payload_len = htons(frag->len -
795 sizeof(struct ipv6hdr));
796
797 ptr += len;
798 offset += len;
799
800 /*
801 * Put this fragment into the sending queue.
802 */
803 err = output(frag);
804 if (err)
805 goto fail;
806
807 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
808 IPSTATS_MIB_FRAGCREATES);
809 }
810 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
811 IPSTATS_MIB_FRAGOKS);
812 consume_skb(skb);
813 return err;
814
815 fail:
816 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
817 IPSTATS_MIB_FRAGFAILS);
818 kfree_skb(skb);
819 return err;
820 }
821
822 static inline int ip6_rt_check(const struct rt6key *rt_key,
823 const struct in6_addr *fl_addr,
824 const struct in6_addr *addr_cache)
825 {
826 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
827 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
828 }
829
830 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
831 struct dst_entry *dst,
832 const struct flowi6 *fl6)
833 {
834 struct ipv6_pinfo *np = inet6_sk(sk);
835 struct rt6_info *rt;
836
837 if (!dst)
838 goto out;
839
840 if (dst->ops->family != AF_INET6) {
841 dst_release(dst);
842 return NULL;
843 }
844
845 rt = (struct rt6_info *)dst;
846 /* Yes, checking route validity in not connected
847 * case is not very simple. Take into account,
848 * that we do not support routing by source, TOS,
849 * and MSG_DONTROUTE --ANK (980726)
850 *
851 * 1. ip6_rt_check(): If route was host route,
852 * check that cached destination is current.
853 * If it is network route, we still may
854 * check its validity using saved pointer
855 * to the last used address: daddr_cache.
856 * We do not want to save whole address now,
857 * (because main consumer of this service
858 * is tcp, which has not this problem),
859 * so that the last trick works only on connected
860 * sockets.
861 * 2. oif also should be the same.
862 */
863 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
864 #ifdef CONFIG_IPV6_SUBTREES
865 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
866 #endif
867 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
868 dst_release(dst);
869 dst = NULL;
870 }
871
872 out:
873 return dst;
874 }
875
876 static int ip6_dst_lookup_tail(struct sock *sk,
877 struct dst_entry **dst, struct flowi6 *fl6)
878 {
879 struct net *net = sock_net(sk);
880 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
881 struct neighbour *n;
882 struct rt6_info *rt;
883 #endif
884 int err;
885
886 if (*dst == NULL)
887 *dst = ip6_route_output(net, sk, fl6);
888
889 err = (*dst)->error;
890 if (err)
891 goto out_err_release;
892
893 if (ipv6_addr_any(&fl6->saddr)) {
894 struct rt6_info *rt = (struct rt6_info *) *dst;
895 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
896 sk ? inet6_sk(sk)->srcprefs : 0,
897 &fl6->saddr);
898 if (err)
899 goto out_err_release;
900 }
901
902 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
903 /*
904 * Here if the dst entry we've looked up
905 * has a neighbour entry that is in the INCOMPLETE
906 * state and the src address from the flow is
907 * marked as OPTIMISTIC, we release the found
908 * dst entry and replace it instead with the
909 * dst entry of the nexthop router
910 */
911 rt = (struct rt6_info *) *dst;
912 rcu_read_lock_bh();
913 n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
914 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
915 rcu_read_unlock_bh();
916
917 if (err) {
918 struct inet6_ifaddr *ifp;
919 struct flowi6 fl_gw6;
920 int redirect;
921
922 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
923 (*dst)->dev, 1);
924
925 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
926 if (ifp)
927 in6_ifa_put(ifp);
928
929 if (redirect) {
930 /*
931 * We need to get the dst entry for the
932 * default router instead
933 */
934 dst_release(*dst);
935 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
936 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
937 *dst = ip6_route_output(net, sk, &fl_gw6);
938 err = (*dst)->error;
939 if (err)
940 goto out_err_release;
941 }
942 }
943 #endif
944
945 return 0;
946
947 out_err_release:
948 if (err == -ENETUNREACH)
949 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
950 dst_release(*dst);
951 *dst = NULL;
952 return err;
953 }
954
955 /**
956 * ip6_dst_lookup - perform route lookup on flow
957 * @sk: socket which provides route info
958 * @dst: pointer to dst_entry * for result
959 * @fl6: flow to lookup
960 *
961 * This function performs a route lookup on the given flow.
962 *
963 * It returns zero on success, or a standard errno code on error.
964 */
965 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
966 {
967 *dst = NULL;
968 return ip6_dst_lookup_tail(sk, dst, fl6);
969 }
970 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
971
972 /**
973 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
974 * @sk: socket which provides route info
975 * @fl6: flow to lookup
976 * @final_dst: final destination address for ipsec lookup
977 *
978 * This function performs a route lookup on the given flow.
979 *
980 * It returns a valid dst pointer on success, or a pointer encoded
981 * error code.
982 */
983 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
984 const struct in6_addr *final_dst)
985 {
986 struct dst_entry *dst = NULL;
987 int err;
988
989 err = ip6_dst_lookup_tail(sk, &dst, fl6);
990 if (err)
991 return ERR_PTR(err);
992 if (final_dst)
993 fl6->daddr = *final_dst;
994
995 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
996 }
997 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
998
999 /**
1000 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1001 * @sk: socket which provides the dst cache and route info
1002 * @fl6: flow to lookup
1003 * @final_dst: final destination address for ipsec lookup
1004 *
1005 * This function performs a route lookup on the given flow with the
1006 * possibility of using the cached route in the socket if it is valid.
1007 * It will take the socket dst lock when operating on the dst cache.
1008 * As a result, this function can only be used in process context.
1009 *
1010 * It returns a valid dst pointer on success, or a pointer encoded
1011 * error code.
1012 */
1013 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1014 const struct in6_addr *final_dst)
1015 {
1016 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1017 int err;
1018
1019 dst = ip6_sk_dst_check(sk, dst, fl6);
1020
1021 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1022 if (err)
1023 return ERR_PTR(err);
1024 if (final_dst)
1025 fl6->daddr = *final_dst;
1026
1027 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1028 }
1029 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1030
1031 static inline int ip6_ufo_append_data(struct sock *sk,
1032 struct sk_buff_head *queue,
1033 int getfrag(void *from, char *to, int offset, int len,
1034 int odd, struct sk_buff *skb),
1035 void *from, int length, int hh_len, int fragheaderlen,
1036 int transhdrlen, int mtu, unsigned int flags,
1037 struct rt6_info *rt)
1038
1039 {
1040 struct sk_buff *skb;
1041 struct frag_hdr fhdr;
1042 int err;
1043
1044 /* There is support for UDP large send offload by network
1045 * device, so create one single skb packet containing complete
1046 * udp datagram
1047 */
1048 skb = skb_peek_tail(queue);
1049 if (skb == NULL) {
1050 skb = sock_alloc_send_skb(sk,
1051 hh_len + fragheaderlen + transhdrlen + 20,
1052 (flags & MSG_DONTWAIT), &err);
1053 if (skb == NULL)
1054 return err;
1055
1056 /* reserve space for Hardware header */
1057 skb_reserve(skb, hh_len);
1058
1059 /* create space for UDP/IP header */
1060 skb_put(skb, fragheaderlen + transhdrlen);
1061
1062 /* initialize network header pointer */
1063 skb_reset_network_header(skb);
1064
1065 /* initialize protocol header pointer */
1066 skb->transport_header = skb->network_header + fragheaderlen;
1067
1068 skb->protocol = htons(ETH_P_IPV6);
1069 skb->csum = 0;
1070
1071 __skb_queue_tail(queue, skb);
1072 } else if (skb_is_gso(skb)) {
1073 goto append;
1074 }
1075
1076 skb->ip_summed = CHECKSUM_PARTIAL;
1077 /* Specify the length of each IPv6 datagram fragment.
1078 * It has to be a multiple of 8.
1079 */
1080 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1081 sizeof(struct frag_hdr)) & ~7;
1082 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1083 ipv6_select_ident(&fhdr, rt);
1084 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1085
1086 append:
1087 return skb_append_datato_frags(sk, skb, getfrag, from,
1088 (length - transhdrlen));
1089 }
1090
1091 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1092 gfp_t gfp)
1093 {
1094 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1095 }
1096
1097 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1098 gfp_t gfp)
1099 {
1100 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1101 }
1102
1103 static void ip6_append_data_mtu(unsigned int *mtu,
1104 int *maxfraglen,
1105 unsigned int fragheaderlen,
1106 struct sk_buff *skb,
1107 struct rt6_info *rt,
1108 unsigned int orig_mtu)
1109 {
1110 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1111 if (skb == NULL) {
1112 /* first fragment, reserve header_len */
1113 *mtu = orig_mtu - rt->dst.header_len;
1114
1115 } else {
1116 /*
1117 * this fragment is not first, the headers
1118 * space is regarded as data space.
1119 */
1120 *mtu = orig_mtu;
1121 }
1122 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1123 + fragheaderlen - sizeof(struct frag_hdr);
1124 }
1125 }
1126
1127 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1128 struct inet6_cork *v6_cork,
1129 int hlimit, int tclass, struct ipv6_txoptions *opt,
1130 struct rt6_info *rt, struct flowi6 *fl6)
1131 {
1132 struct ipv6_pinfo *np = inet6_sk(sk);
1133 unsigned int mtu;
1134
1135 /*
1136 * setup for corking
1137 */
1138 if (opt) {
1139 if (WARN_ON(v6_cork->opt))
1140 return -EINVAL;
1141
1142 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
1143 if (unlikely(v6_cork->opt == NULL))
1144 return -ENOBUFS;
1145
1146 v6_cork->opt->tot_len = opt->tot_len;
1147 v6_cork->opt->opt_flen = opt->opt_flen;
1148 v6_cork->opt->opt_nflen = opt->opt_nflen;
1149
1150 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1151 sk->sk_allocation);
1152 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1153 return -ENOBUFS;
1154
1155 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1156 sk->sk_allocation);
1157 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1158 return -ENOBUFS;
1159
1160 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1161 sk->sk_allocation);
1162 if (opt->hopopt && !v6_cork->opt->hopopt)
1163 return -ENOBUFS;
1164
1165 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1166 sk->sk_allocation);
1167 if (opt->srcrt && !v6_cork->opt->srcrt)
1168 return -ENOBUFS;
1169
1170 /* need source address above miyazawa*/
1171 }
1172 dst_hold(&rt->dst);
1173 cork->base.dst = &rt->dst;
1174 cork->fl.u.ip6 = *fl6;
1175 v6_cork->hop_limit = hlimit;
1176 v6_cork->tclass = tclass;
1177 if (rt->dst.flags & DST_XFRM_TUNNEL)
1178 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1179 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1180 else
1181 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1182 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1183 if (np->frag_size < mtu) {
1184 if (np->frag_size)
1185 mtu = np->frag_size;
1186 }
1187 cork->base.fragsize = mtu;
1188 if (dst_allfrag(rt->dst.path))
1189 cork->base.flags |= IPCORK_ALLFRAG;
1190 cork->base.length = 0;
1191
1192 return 0;
1193 }
1194
1195 static int __ip6_append_data(struct sock *sk,
1196 struct flowi6 *fl6,
1197 struct sk_buff_head *queue,
1198 struct inet_cork *cork,
1199 struct inet6_cork *v6_cork,
1200 struct page_frag *pfrag,
1201 int getfrag(void *from, char *to, int offset,
1202 int len, int odd, struct sk_buff *skb),
1203 void *from, int length, int transhdrlen,
1204 unsigned int flags, int dontfrag)
1205 {
1206 struct sk_buff *skb, *skb_prev = NULL;
1207 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1208 int exthdrlen = 0;
1209 int dst_exthdrlen = 0;
1210 int hh_len;
1211 int copy;
1212 int err;
1213 int offset = 0;
1214 __u8 tx_flags = 0;
1215 u32 tskey = 0;
1216 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1217 struct ipv6_txoptions *opt = v6_cork->opt;
1218 int csummode = CHECKSUM_NONE;
1219
1220 skb = skb_peek_tail(queue);
1221 if (!skb) {
1222 exthdrlen = opt ? opt->opt_flen : 0;
1223 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1224 }
1225
1226 mtu = cork->fragsize;
1227 orig_mtu = mtu;
1228
1229 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1230
1231 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1232 (opt ? opt->opt_nflen : 0);
1233 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1234 sizeof(struct frag_hdr);
1235
1236 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1237 unsigned int maxnonfragsize, headersize;
1238
1239 headersize = sizeof(struct ipv6hdr) +
1240 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1241 (dst_allfrag(&rt->dst) ?
1242 sizeof(struct frag_hdr) : 0) +
1243 rt->rt6i_nfheader_len;
1244
1245 if (ip6_sk_ignore_df(sk))
1246 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1247 else
1248 maxnonfragsize = mtu;
1249
1250 /* dontfrag active */
1251 if ((cork->length + length > mtu - headersize) && dontfrag &&
1252 (sk->sk_protocol == IPPROTO_UDP ||
1253 sk->sk_protocol == IPPROTO_RAW)) {
1254 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1255 sizeof(struct ipv6hdr));
1256 goto emsgsize;
1257 }
1258
1259 if (cork->length + length > maxnonfragsize - headersize) {
1260 emsgsize:
1261 ipv6_local_error(sk, EMSGSIZE, fl6,
1262 mtu - headersize +
1263 sizeof(struct ipv6hdr));
1264 return -EMSGSIZE;
1265 }
1266 }
1267
1268 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1269 sock_tx_timestamp(sk, &tx_flags);
1270 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1271 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1272 tskey = sk->sk_tskey++;
1273 }
1274
1275 /* If this is the first and only packet and device
1276 * supports checksum offloading, let's use it.
1277 */
1278 if (!skb && sk->sk_protocol == IPPROTO_UDP &&
1279 length + fragheaderlen < mtu &&
1280 rt->dst.dev->features & NETIF_F_V6_CSUM &&
1281 !exthdrlen)
1282 csummode = CHECKSUM_PARTIAL;
1283 /*
1284 * Let's try using as much space as possible.
1285 * Use MTU if total length of the message fits into the MTU.
1286 * Otherwise, we need to reserve fragment header and
1287 * fragment alignment (= 8-15 octects, in total).
1288 *
1289 * Note that we may need to "move" the data from the tail of
1290 * of the buffer to the new fragment when we split
1291 * the message.
1292 *
1293 * FIXME: It may be fragmented into multiple chunks
1294 * at once if non-fragmentable extension headers
1295 * are too large.
1296 * --yoshfuji
1297 */
1298
1299 cork->length += length;
1300 if (((length > mtu) ||
1301 (skb && skb_is_gso(skb))) &&
1302 (sk->sk_protocol == IPPROTO_UDP) &&
1303 (rt->dst.dev->features & NETIF_F_UFO) &&
1304 (sk->sk_type == SOCK_DGRAM)) {
1305 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
1306 hh_len, fragheaderlen,
1307 transhdrlen, mtu, flags, rt);
1308 if (err)
1309 goto error;
1310 return 0;
1311 }
1312
1313 if (!skb)
1314 goto alloc_new_skb;
1315
1316 while (length > 0) {
1317 /* Check if the remaining data fits into current packet. */
1318 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1319 if (copy < length)
1320 copy = maxfraglen - skb->len;
1321
1322 if (copy <= 0) {
1323 char *data;
1324 unsigned int datalen;
1325 unsigned int fraglen;
1326 unsigned int fraggap;
1327 unsigned int alloclen;
1328 alloc_new_skb:
1329 /* There's no room in the current skb */
1330 if (skb)
1331 fraggap = skb->len - maxfraglen;
1332 else
1333 fraggap = 0;
1334 /* update mtu and maxfraglen if necessary */
1335 if (skb == NULL || skb_prev == NULL)
1336 ip6_append_data_mtu(&mtu, &maxfraglen,
1337 fragheaderlen, skb, rt,
1338 orig_mtu);
1339
1340 skb_prev = skb;
1341
1342 /*
1343 * If remaining data exceeds the mtu,
1344 * we know we need more fragment(s).
1345 */
1346 datalen = length + fraggap;
1347
1348 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1349 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1350 if ((flags & MSG_MORE) &&
1351 !(rt->dst.dev->features&NETIF_F_SG))
1352 alloclen = mtu;
1353 else
1354 alloclen = datalen + fragheaderlen;
1355
1356 alloclen += dst_exthdrlen;
1357
1358 if (datalen != length + fraggap) {
1359 /*
1360 * this is not the last fragment, the trailer
1361 * space is regarded as data space.
1362 */
1363 datalen += rt->dst.trailer_len;
1364 }
1365
1366 alloclen += rt->dst.trailer_len;
1367 fraglen = datalen + fragheaderlen;
1368
1369 /*
1370 * We just reserve space for fragment header.
1371 * Note: this may be overallocation if the message
1372 * (without MSG_MORE) fits into the MTU.
1373 */
1374 alloclen += sizeof(struct frag_hdr);
1375
1376 if (transhdrlen) {
1377 skb = sock_alloc_send_skb(sk,
1378 alloclen + hh_len,
1379 (flags & MSG_DONTWAIT), &err);
1380 } else {
1381 skb = NULL;
1382 if (atomic_read(&sk->sk_wmem_alloc) <=
1383 2 * sk->sk_sndbuf)
1384 skb = sock_wmalloc(sk,
1385 alloclen + hh_len, 1,
1386 sk->sk_allocation);
1387 if (unlikely(skb == NULL))
1388 err = -ENOBUFS;
1389 }
1390 if (skb == NULL)
1391 goto error;
1392 /*
1393 * Fill in the control structures
1394 */
1395 skb->protocol = htons(ETH_P_IPV6);
1396 skb->ip_summed = csummode;
1397 skb->csum = 0;
1398 /* reserve for fragmentation and ipsec header */
1399 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1400 dst_exthdrlen);
1401
1402 /* Only the initial fragment is time stamped */
1403 skb_shinfo(skb)->tx_flags = tx_flags;
1404 tx_flags = 0;
1405 skb_shinfo(skb)->tskey = tskey;
1406 tskey = 0;
1407
1408 /*
1409 * Find where to start putting bytes
1410 */
1411 data = skb_put(skb, fraglen);
1412 skb_set_network_header(skb, exthdrlen);
1413 data += fragheaderlen;
1414 skb->transport_header = (skb->network_header +
1415 fragheaderlen);
1416 if (fraggap) {
1417 skb->csum = skb_copy_and_csum_bits(
1418 skb_prev, maxfraglen,
1419 data + transhdrlen, fraggap, 0);
1420 skb_prev->csum = csum_sub(skb_prev->csum,
1421 skb->csum);
1422 data += fraggap;
1423 pskb_trim_unique(skb_prev, maxfraglen);
1424 }
1425 copy = datalen - transhdrlen - fraggap;
1426
1427 if (copy < 0) {
1428 err = -EINVAL;
1429 kfree_skb(skb);
1430 goto error;
1431 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1432 err = -EFAULT;
1433 kfree_skb(skb);
1434 goto error;
1435 }
1436
1437 offset += copy;
1438 length -= datalen - fraggap;
1439 transhdrlen = 0;
1440 exthdrlen = 0;
1441 dst_exthdrlen = 0;
1442
1443 /*
1444 * Put the packet on the pending queue
1445 */
1446 __skb_queue_tail(queue, skb);
1447 continue;
1448 }
1449
1450 if (copy > length)
1451 copy = length;
1452
1453 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1454 unsigned int off;
1455
1456 off = skb->len;
1457 if (getfrag(from, skb_put(skb, copy),
1458 offset, copy, off, skb) < 0) {
1459 __skb_trim(skb, off);
1460 err = -EFAULT;
1461 goto error;
1462 }
1463 } else {
1464 int i = skb_shinfo(skb)->nr_frags;
1465
1466 err = -ENOMEM;
1467 if (!sk_page_frag_refill(sk, pfrag))
1468 goto error;
1469
1470 if (!skb_can_coalesce(skb, i, pfrag->page,
1471 pfrag->offset)) {
1472 err = -EMSGSIZE;
1473 if (i == MAX_SKB_FRAGS)
1474 goto error;
1475
1476 __skb_fill_page_desc(skb, i, pfrag->page,
1477 pfrag->offset, 0);
1478 skb_shinfo(skb)->nr_frags = ++i;
1479 get_page(pfrag->page);
1480 }
1481 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1482 if (getfrag(from,
1483 page_address(pfrag->page) + pfrag->offset,
1484 offset, copy, skb->len, skb) < 0)
1485 goto error_efault;
1486
1487 pfrag->offset += copy;
1488 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1489 skb->len += copy;
1490 skb->data_len += copy;
1491 skb->truesize += copy;
1492 atomic_add(copy, &sk->sk_wmem_alloc);
1493 }
1494 offset += copy;
1495 length -= copy;
1496 }
1497
1498 return 0;
1499
1500 error_efault:
1501 err = -EFAULT;
1502 error:
1503 cork->length -= length;
1504 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1505 return err;
1506 }
1507
1508 int ip6_append_data(struct sock *sk,
1509 int getfrag(void *from, char *to, int offset, int len,
1510 int odd, struct sk_buff *skb),
1511 void *from, int length, int transhdrlen, int hlimit,
1512 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1513 struct rt6_info *rt, unsigned int flags, int dontfrag)
1514 {
1515 struct inet_sock *inet = inet_sk(sk);
1516 struct ipv6_pinfo *np = inet6_sk(sk);
1517 int exthdrlen;
1518 int err;
1519
1520 if (flags&MSG_PROBE)
1521 return 0;
1522 if (skb_queue_empty(&sk->sk_write_queue)) {
1523 /*
1524 * setup for corking
1525 */
1526 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1527 tclass, opt, rt, fl6);
1528 if (err)
1529 return err;
1530
1531 exthdrlen = (opt ? opt->opt_flen : 0);
1532 length += exthdrlen;
1533 transhdrlen += exthdrlen;
1534 } else {
1535 fl6 = &inet->cork.fl.u.ip6;
1536 transhdrlen = 0;
1537 }
1538
1539 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1540 &np->cork, sk_page_frag(sk), getfrag,
1541 from, length, transhdrlen, flags, dontfrag);
1542 }
1543 EXPORT_SYMBOL_GPL(ip6_append_data);
1544
1545 static void ip6_cork_release(struct inet_cork_full *cork,
1546 struct inet6_cork *v6_cork)
1547 {
1548 if (v6_cork->opt) {
1549 kfree(v6_cork->opt->dst0opt);
1550 kfree(v6_cork->opt->dst1opt);
1551 kfree(v6_cork->opt->hopopt);
1552 kfree(v6_cork->opt->srcrt);
1553 kfree(v6_cork->opt);
1554 v6_cork->opt = NULL;
1555 }
1556
1557 if (cork->base.dst) {
1558 dst_release(cork->base.dst);
1559 cork->base.dst = NULL;
1560 cork->base.flags &= ~IPCORK_ALLFRAG;
1561 }
1562 memset(&cork->fl, 0, sizeof(cork->fl));
1563 }
1564
1565 struct sk_buff *__ip6_make_skb(struct sock *sk,
1566 struct sk_buff_head *queue,
1567 struct inet_cork_full *cork,
1568 struct inet6_cork *v6_cork)
1569 {
1570 struct sk_buff *skb, *tmp_skb;
1571 struct sk_buff **tail_skb;
1572 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1573 struct ipv6_pinfo *np = inet6_sk(sk);
1574 struct net *net = sock_net(sk);
1575 struct ipv6hdr *hdr;
1576 struct ipv6_txoptions *opt = v6_cork->opt;
1577 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1578 struct flowi6 *fl6 = &cork->fl.u.ip6;
1579 unsigned char proto = fl6->flowi6_proto;
1580
1581 skb = __skb_dequeue(queue);
1582 if (skb == NULL)
1583 goto out;
1584 tail_skb = &(skb_shinfo(skb)->frag_list);
1585
1586 /* move skb->data to ip header from ext header */
1587 if (skb->data < skb_network_header(skb))
1588 __skb_pull(skb, skb_network_offset(skb));
1589 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1590 __skb_pull(tmp_skb, skb_network_header_len(skb));
1591 *tail_skb = tmp_skb;
1592 tail_skb = &(tmp_skb->next);
1593 skb->len += tmp_skb->len;
1594 skb->data_len += tmp_skb->len;
1595 skb->truesize += tmp_skb->truesize;
1596 tmp_skb->destructor = NULL;
1597 tmp_skb->sk = NULL;
1598 }
1599
1600 /* Allow local fragmentation. */
1601 skb->ignore_df = ip6_sk_ignore_df(sk);
1602
1603 *final_dst = fl6->daddr;
1604 __skb_pull(skb, skb_network_header_len(skb));
1605 if (opt && opt->opt_flen)
1606 ipv6_push_frag_opts(skb, opt, &proto);
1607 if (opt && opt->opt_nflen)
1608 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1609
1610 skb_push(skb, sizeof(struct ipv6hdr));
1611 skb_reset_network_header(skb);
1612 hdr = ipv6_hdr(skb);
1613
1614 ip6_flow_hdr(hdr, v6_cork->tclass,
1615 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1616 np->autoflowlabel));
1617 hdr->hop_limit = v6_cork->hop_limit;
1618 hdr->nexthdr = proto;
1619 hdr->saddr = fl6->saddr;
1620 hdr->daddr = *final_dst;
1621
1622 skb->priority = sk->sk_priority;
1623 skb->mark = sk->sk_mark;
1624
1625 skb_dst_set(skb, dst_clone(&rt->dst));
1626 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1627 if (proto == IPPROTO_ICMPV6) {
1628 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1629
1630 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1631 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1632 }
1633
1634 ip6_cork_release(cork, v6_cork);
1635 out:
1636 return skb;
1637 }
1638
1639 int ip6_send_skb(struct sk_buff *skb)
1640 {
1641 struct net *net = sock_net(skb->sk);
1642 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1643 int err;
1644
1645 err = ip6_local_out(skb);
1646 if (err) {
1647 if (err > 0)
1648 err = net_xmit_errno(err);
1649 if (err)
1650 IP6_INC_STATS(net, rt->rt6i_idev,
1651 IPSTATS_MIB_OUTDISCARDS);
1652 }
1653
1654 return err;
1655 }
1656
1657 int ip6_push_pending_frames(struct sock *sk)
1658 {
1659 struct sk_buff *skb;
1660
1661 skb = ip6_finish_skb(sk);
1662 if (!skb)
1663 return 0;
1664
1665 return ip6_send_skb(skb);
1666 }
1667 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1668
1669 static void __ip6_flush_pending_frames(struct sock *sk,
1670 struct sk_buff_head *queue,
1671 struct inet_cork_full *cork,
1672 struct inet6_cork *v6_cork)
1673 {
1674 struct sk_buff *skb;
1675
1676 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1677 if (skb_dst(skb))
1678 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1679 IPSTATS_MIB_OUTDISCARDS);
1680 kfree_skb(skb);
1681 }
1682
1683 ip6_cork_release(cork, v6_cork);
1684 }
1685
1686 void ip6_flush_pending_frames(struct sock *sk)
1687 {
1688 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1689 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1690 }
1691 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1692
1693 struct sk_buff *ip6_make_skb(struct sock *sk,
1694 int getfrag(void *from, char *to, int offset,
1695 int len, int odd, struct sk_buff *skb),
1696 void *from, int length, int transhdrlen,
1697 int hlimit, int tclass,
1698 struct ipv6_txoptions *opt, struct flowi6 *fl6,
1699 struct rt6_info *rt, unsigned int flags,
1700 int dontfrag)
1701 {
1702 struct inet_cork_full cork;
1703 struct inet6_cork v6_cork;
1704 struct sk_buff_head queue;
1705 int exthdrlen = (opt ? opt->opt_flen : 0);
1706 int err;
1707
1708 if (flags & MSG_PROBE)
1709 return NULL;
1710
1711 __skb_queue_head_init(&queue);
1712
1713 cork.base.flags = 0;
1714 cork.base.addr = 0;
1715 cork.base.opt = NULL;
1716 v6_cork.opt = NULL;
1717 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1718 if (err)
1719 return ERR_PTR(err);
1720
1721 if (dontfrag < 0)
1722 dontfrag = inet6_sk(sk)->dontfrag;
1723
1724 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1725 &current->task_frag, getfrag, from,
1726 length + exthdrlen, transhdrlen + exthdrlen,
1727 flags, dontfrag);
1728 if (err) {
1729 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1730 return ERR_PTR(err);
1731 }
1732
1733 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1734 }
This page took 0.067615 seconds and 5 git commands to generate.