dst: Pass net into dst->output
[deliverable/linux.git] / net / ipv6 / ip6_output.c
... / ...
CommitLineData
1/*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29#include <linux/errno.h>
30#include <linux/kernel.h>
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
39#include <linux/module.h>
40#include <linux/slab.h>
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
57#include <linux/mroute6.h>
58
59static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60{
61 struct dst_entry *dst = skb_dst(skb);
62 struct net_device *dev = dst->dev;
63 struct neighbour *neigh;
64 struct in6_addr *nexthop;
65 int ret;
66
67 skb->protocol = htons(ETH_P_IPV6);
68 skb->dev = dev;
69
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
72
73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
74 ((mroute6_socket(net, skb) &&
75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 net, sk, newskb, NULL, newskb->dev,
86 dev_loopback_xmit);
87
88 if (ipv6_hdr(skb)->hop_limit == 0) {
89 IP6_INC_STATS(net, idev,
90 IPSTATS_MIB_OUTDISCARDS);
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
97
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
101 kfree_skb(skb);
102 return 0;
103 }
104 }
105
106 rcu_read_lock_bh();
107 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
108 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
109 if (unlikely(!neigh))
110 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
111 if (!IS_ERR(neigh)) {
112 ret = dst_neigh_output(dst, neigh, skb);
113 rcu_read_unlock_bh();
114 return ret;
115 }
116 rcu_read_unlock_bh();
117
118 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
119 kfree_skb(skb);
120 return -EINVAL;
121}
122
123static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
124{
125 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
126 dst_allfrag(skb_dst(skb)) ||
127 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
128 return ip6_fragment(net, sk, skb, ip6_finish_output2);
129 else
130 return ip6_finish_output2(net, sk, skb);
131}
132
133int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
134{
135 struct net_device *dev = skb_dst(skb)->dev;
136 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
137
138 if (unlikely(idev->cnf.disable_ipv6)) {
139 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
140 kfree_skb(skb);
141 return 0;
142 }
143
144 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
145 net, sk, skb, NULL, dev,
146 ip6_finish_output,
147 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
148}
149
150/*
151 * xmit an sk_buff (used by TCP, SCTP and DCCP)
152 * Note : socket lock is not held for SYNACK packets, but might be modified
153 * by calls to skb_set_owner_w() and ipv6_local_error(),
154 * which are using proper atomic operations or spinlocks.
155 */
156int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
157 struct ipv6_txoptions *opt, int tclass)
158{
159 struct net *net = sock_net(sk);
160 const struct ipv6_pinfo *np = inet6_sk(sk);
161 struct in6_addr *first_hop = &fl6->daddr;
162 struct dst_entry *dst = skb_dst(skb);
163 struct ipv6hdr *hdr;
164 u8 proto = fl6->flowi6_proto;
165 int seg_len = skb->len;
166 int hlimit = -1;
167 u32 mtu;
168
169 if (opt) {
170 unsigned int head_room;
171
172 /* First: exthdrs may take lots of space (~8K for now)
173 MAX_HEADER is not enough.
174 */
175 head_room = opt->opt_nflen + opt->opt_flen;
176 seg_len += head_room;
177 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
178
179 if (skb_headroom(skb) < head_room) {
180 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
181 if (!skb2) {
182 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
183 IPSTATS_MIB_OUTDISCARDS);
184 kfree_skb(skb);
185 return -ENOBUFS;
186 }
187 consume_skb(skb);
188 skb = skb2;
189 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
190 * it is safe to call in our context (socket lock not held)
191 */
192 skb_set_owner_w(skb, (struct sock *)sk);
193 }
194 if (opt->opt_flen)
195 ipv6_push_frag_opts(skb, opt, &proto);
196 if (opt->opt_nflen)
197 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
198 }
199
200 skb_push(skb, sizeof(struct ipv6hdr));
201 skb_reset_network_header(skb);
202 hdr = ipv6_hdr(skb);
203
204 /*
205 * Fill in the IPv6 header
206 */
207 if (np)
208 hlimit = np->hop_limit;
209 if (hlimit < 0)
210 hlimit = ip6_dst_hoplimit(dst);
211
212 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
213 np->autoflowlabel, fl6));
214
215 hdr->payload_len = htons(seg_len);
216 hdr->nexthdr = proto;
217 hdr->hop_limit = hlimit;
218
219 hdr->saddr = fl6->saddr;
220 hdr->daddr = *first_hop;
221
222 skb->protocol = htons(ETH_P_IPV6);
223 skb->priority = sk->sk_priority;
224 skb->mark = sk->sk_mark;
225
226 mtu = dst_mtu(dst);
227 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
228 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
229 IPSTATS_MIB_OUT, skb->len);
230 /* hooks should never assume socket lock is held.
231 * we promote our socket to non const
232 */
233 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
234 net, (struct sock *)sk, skb, NULL, dst->dev,
235 dst_output);
236 }
237
238 skb->dev = dst->dev;
239 /* ipv6_local_error() does not require socket lock,
240 * we promote our socket to non const
241 */
242 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
243
244 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
245 kfree_skb(skb);
246 return -EMSGSIZE;
247}
248EXPORT_SYMBOL(ip6_xmit);
249
250static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
251{
252 struct ip6_ra_chain *ra;
253 struct sock *last = NULL;
254
255 read_lock(&ip6_ra_lock);
256 for (ra = ip6_ra_chain; ra; ra = ra->next) {
257 struct sock *sk = ra->sk;
258 if (sk && ra->sel == sel &&
259 (!sk->sk_bound_dev_if ||
260 sk->sk_bound_dev_if == skb->dev->ifindex)) {
261 if (last) {
262 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
263 if (skb2)
264 rawv6_rcv(last, skb2);
265 }
266 last = sk;
267 }
268 }
269
270 if (last) {
271 rawv6_rcv(last, skb);
272 read_unlock(&ip6_ra_lock);
273 return 1;
274 }
275 read_unlock(&ip6_ra_lock);
276 return 0;
277}
278
279static int ip6_forward_proxy_check(struct sk_buff *skb)
280{
281 struct ipv6hdr *hdr = ipv6_hdr(skb);
282 u8 nexthdr = hdr->nexthdr;
283 __be16 frag_off;
284 int offset;
285
286 if (ipv6_ext_hdr(nexthdr)) {
287 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
288 if (offset < 0)
289 return 0;
290 } else
291 offset = sizeof(struct ipv6hdr);
292
293 if (nexthdr == IPPROTO_ICMPV6) {
294 struct icmp6hdr *icmp6;
295
296 if (!pskb_may_pull(skb, (skb_network_header(skb) +
297 offset + 1 - skb->data)))
298 return 0;
299
300 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
301
302 switch (icmp6->icmp6_type) {
303 case NDISC_ROUTER_SOLICITATION:
304 case NDISC_ROUTER_ADVERTISEMENT:
305 case NDISC_NEIGHBOUR_SOLICITATION:
306 case NDISC_NEIGHBOUR_ADVERTISEMENT:
307 case NDISC_REDIRECT:
308 /* For reaction involving unicast neighbor discovery
309 * message destined to the proxied address, pass it to
310 * input function.
311 */
312 return 1;
313 default:
314 break;
315 }
316 }
317
318 /*
319 * The proxying router can't forward traffic sent to a link-local
320 * address, so signal the sender and discard the packet. This
321 * behavior is clarified by the MIPv6 specification.
322 */
323 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
324 dst_link_failure(skb);
325 return -1;
326 }
327
328 return 0;
329}
330
331static inline int ip6_forward_finish(struct net *net, struct sock *sk,
332 struct sk_buff *skb)
333{
334 skb_sender_cpu_clear(skb);
335 return dst_output(net, sk, skb);
336}
337
338static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
339{
340 unsigned int mtu;
341 struct inet6_dev *idev;
342
343 if (dst_metric_locked(dst, RTAX_MTU)) {
344 mtu = dst_metric_raw(dst, RTAX_MTU);
345 if (mtu)
346 return mtu;
347 }
348
349 mtu = IPV6_MIN_MTU;
350 rcu_read_lock();
351 idev = __in6_dev_get(dst->dev);
352 if (idev)
353 mtu = idev->cnf.mtu6;
354 rcu_read_unlock();
355
356 return mtu;
357}
358
359static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
360{
361 if (skb->len <= mtu)
362 return false;
363
364 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
365 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
366 return true;
367
368 if (skb->ignore_df)
369 return false;
370
371 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
372 return false;
373
374 return true;
375}
376
377int ip6_forward(struct sk_buff *skb)
378{
379 struct dst_entry *dst = skb_dst(skb);
380 struct ipv6hdr *hdr = ipv6_hdr(skb);
381 struct inet6_skb_parm *opt = IP6CB(skb);
382 struct net *net = dev_net(dst->dev);
383 u32 mtu;
384
385 if (net->ipv6.devconf_all->forwarding == 0)
386 goto error;
387
388 if (skb->pkt_type != PACKET_HOST)
389 goto drop;
390
391 if (skb_warn_if_lro(skb))
392 goto drop;
393
394 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
395 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
396 IPSTATS_MIB_INDISCARDS);
397 goto drop;
398 }
399
400 skb_forward_csum(skb);
401
402 /*
403 * We DO NOT make any processing on
404 * RA packets, pushing them to user level AS IS
405 * without ane WARRANTY that application will be able
406 * to interpret them. The reason is that we
407 * cannot make anything clever here.
408 *
409 * We are not end-node, so that if packet contains
410 * AH/ESP, we cannot make anything.
411 * Defragmentation also would be mistake, RA packets
412 * cannot be fragmented, because there is no warranty
413 * that different fragments will go along one path. --ANK
414 */
415 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
416 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
417 return 0;
418 }
419
420 /*
421 * check and decrement ttl
422 */
423 if (hdr->hop_limit <= 1) {
424 /* Force OUTPUT device used as source address */
425 skb->dev = dst->dev;
426 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
427 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
428 IPSTATS_MIB_INHDRERRORS);
429
430 kfree_skb(skb);
431 return -ETIMEDOUT;
432 }
433
434 /* XXX: idev->cnf.proxy_ndp? */
435 if (net->ipv6.devconf_all->proxy_ndp &&
436 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
437 int proxied = ip6_forward_proxy_check(skb);
438 if (proxied > 0)
439 return ip6_input(skb);
440 else if (proxied < 0) {
441 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
442 IPSTATS_MIB_INDISCARDS);
443 goto drop;
444 }
445 }
446
447 if (!xfrm6_route_forward(skb)) {
448 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
449 IPSTATS_MIB_INDISCARDS);
450 goto drop;
451 }
452 dst = skb_dst(skb);
453
454 /* IPv6 specs say nothing about it, but it is clear that we cannot
455 send redirects to source routed frames.
456 We don't send redirects to frames decapsulated from IPsec.
457 */
458 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
459 struct in6_addr *target = NULL;
460 struct inet_peer *peer;
461 struct rt6_info *rt;
462
463 /*
464 * incoming and outgoing devices are the same
465 * send a redirect.
466 */
467
468 rt = (struct rt6_info *) dst;
469 if (rt->rt6i_flags & RTF_GATEWAY)
470 target = &rt->rt6i_gateway;
471 else
472 target = &hdr->daddr;
473
474 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
475
476 /* Limit redirects both by destination (here)
477 and by source (inside ndisc_send_redirect)
478 */
479 if (inet_peer_xrlim_allow(peer, 1*HZ))
480 ndisc_send_redirect(skb, target);
481 if (peer)
482 inet_putpeer(peer);
483 } else {
484 int addrtype = ipv6_addr_type(&hdr->saddr);
485
486 /* This check is security critical. */
487 if (addrtype == IPV6_ADDR_ANY ||
488 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
489 goto error;
490 if (addrtype & IPV6_ADDR_LINKLOCAL) {
491 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
492 ICMPV6_NOT_NEIGHBOUR, 0);
493 goto error;
494 }
495 }
496
497 mtu = ip6_dst_mtu_forward(dst);
498 if (mtu < IPV6_MIN_MTU)
499 mtu = IPV6_MIN_MTU;
500
501 if (ip6_pkt_too_big(skb, mtu)) {
502 /* Again, force OUTPUT device used as source address */
503 skb->dev = dst->dev;
504 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
505 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
506 IPSTATS_MIB_INTOOBIGERRORS);
507 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
508 IPSTATS_MIB_FRAGFAILS);
509 kfree_skb(skb);
510 return -EMSGSIZE;
511 }
512
513 if (skb_cow(skb, dst->dev->hard_header_len)) {
514 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
515 IPSTATS_MIB_OUTDISCARDS);
516 goto drop;
517 }
518
519 hdr = ipv6_hdr(skb);
520
521 /* Mangling hops number delayed to point after skb COW */
522
523 hdr->hop_limit--;
524
525 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
526 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
527 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
528 net, NULL, skb, skb->dev, dst->dev,
529 ip6_forward_finish);
530
531error:
532 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
533drop:
534 kfree_skb(skb);
535 return -EINVAL;
536}
537
538static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
539{
540 to->pkt_type = from->pkt_type;
541 to->priority = from->priority;
542 to->protocol = from->protocol;
543 skb_dst_drop(to);
544 skb_dst_set(to, dst_clone(skb_dst(from)));
545 to->dev = from->dev;
546 to->mark = from->mark;
547
548#ifdef CONFIG_NET_SCHED
549 to->tc_index = from->tc_index;
550#endif
551 nf_copy(to, from);
552 skb_copy_secmark(to, from);
553}
554
555int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
556 int (*output)(struct net *, struct sock *, struct sk_buff *))
557{
558 struct sk_buff *frag;
559 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
560 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
561 inet6_sk(skb->sk) : NULL;
562 struct ipv6hdr *tmp_hdr;
563 struct frag_hdr *fh;
564 unsigned int mtu, hlen, left, len;
565 int hroom, troom;
566 __be32 frag_id;
567 int ptr, offset = 0, err = 0;
568 u8 *prevhdr, nexthdr = 0;
569
570 hlen = ip6_find_1stfragopt(skb, &prevhdr);
571 nexthdr = *prevhdr;
572
573 mtu = ip6_skb_dst_mtu(skb);
574
575 /* We must not fragment if the socket is set to force MTU discovery
576 * or if the skb it not generated by a local socket.
577 */
578 if (unlikely(!skb->ignore_df && skb->len > mtu))
579 goto fail_toobig;
580
581 if (IP6CB(skb)->frag_max_size) {
582 if (IP6CB(skb)->frag_max_size > mtu)
583 goto fail_toobig;
584
585 /* don't send fragments larger than what we received */
586 mtu = IP6CB(skb)->frag_max_size;
587 if (mtu < IPV6_MIN_MTU)
588 mtu = IPV6_MIN_MTU;
589 }
590
591 if (np && np->frag_size < mtu) {
592 if (np->frag_size)
593 mtu = np->frag_size;
594 }
595 mtu -= hlen + sizeof(struct frag_hdr);
596
597 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
598 &ipv6_hdr(skb)->saddr);
599
600 hroom = LL_RESERVED_SPACE(rt->dst.dev);
601 if (skb_has_frag_list(skb)) {
602 int first_len = skb_pagelen(skb);
603 struct sk_buff *frag2;
604
605 if (first_len - hlen > mtu ||
606 ((first_len - hlen) & 7) ||
607 skb_cloned(skb) ||
608 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
609 goto slow_path;
610
611 skb_walk_frags(skb, frag) {
612 /* Correct geometry. */
613 if (frag->len > mtu ||
614 ((frag->len & 7) && frag->next) ||
615 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
616 goto slow_path_clean;
617
618 /* Partially cloned skb? */
619 if (skb_shared(frag))
620 goto slow_path_clean;
621
622 BUG_ON(frag->sk);
623 if (skb->sk) {
624 frag->sk = skb->sk;
625 frag->destructor = sock_wfree;
626 }
627 skb->truesize -= frag->truesize;
628 }
629
630 err = 0;
631 offset = 0;
632 /* BUILD HEADER */
633
634 *prevhdr = NEXTHDR_FRAGMENT;
635 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
636 if (!tmp_hdr) {
637 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
638 IPSTATS_MIB_FRAGFAILS);
639 err = -ENOMEM;
640 goto fail;
641 }
642 frag = skb_shinfo(skb)->frag_list;
643 skb_frag_list_init(skb);
644
645 __skb_pull(skb, hlen);
646 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
647 __skb_push(skb, hlen);
648 skb_reset_network_header(skb);
649 memcpy(skb_network_header(skb), tmp_hdr, hlen);
650
651 fh->nexthdr = nexthdr;
652 fh->reserved = 0;
653 fh->frag_off = htons(IP6_MF);
654 fh->identification = frag_id;
655
656 first_len = skb_pagelen(skb);
657 skb->data_len = first_len - skb_headlen(skb);
658 skb->len = first_len;
659 ipv6_hdr(skb)->payload_len = htons(first_len -
660 sizeof(struct ipv6hdr));
661
662 dst_hold(&rt->dst);
663
664 for (;;) {
665 /* Prepare header of the next frame,
666 * before previous one went down. */
667 if (frag) {
668 frag->ip_summed = CHECKSUM_NONE;
669 skb_reset_transport_header(frag);
670 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
671 __skb_push(frag, hlen);
672 skb_reset_network_header(frag);
673 memcpy(skb_network_header(frag), tmp_hdr,
674 hlen);
675 offset += skb->len - hlen - sizeof(struct frag_hdr);
676 fh->nexthdr = nexthdr;
677 fh->reserved = 0;
678 fh->frag_off = htons(offset);
679 if (frag->next)
680 fh->frag_off |= htons(IP6_MF);
681 fh->identification = frag_id;
682 ipv6_hdr(frag)->payload_len =
683 htons(frag->len -
684 sizeof(struct ipv6hdr));
685 ip6_copy_metadata(frag, skb);
686 }
687
688 err = output(net, sk, skb);
689 if (!err)
690 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
691 IPSTATS_MIB_FRAGCREATES);
692
693 if (err || !frag)
694 break;
695
696 skb = frag;
697 frag = skb->next;
698 skb->next = NULL;
699 }
700
701 kfree(tmp_hdr);
702
703 if (err == 0) {
704 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
705 IPSTATS_MIB_FRAGOKS);
706 ip6_rt_put(rt);
707 return 0;
708 }
709
710 kfree_skb_list(frag);
711
712 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
713 IPSTATS_MIB_FRAGFAILS);
714 ip6_rt_put(rt);
715 return err;
716
717slow_path_clean:
718 skb_walk_frags(skb, frag2) {
719 if (frag2 == frag)
720 break;
721 frag2->sk = NULL;
722 frag2->destructor = NULL;
723 skb->truesize += frag2->truesize;
724 }
725 }
726
727slow_path:
728 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
729 skb_checksum_help(skb))
730 goto fail;
731
732 left = skb->len - hlen; /* Space per frame */
733 ptr = hlen; /* Where to start from */
734
735 /*
736 * Fragment the datagram.
737 */
738
739 *prevhdr = NEXTHDR_FRAGMENT;
740 troom = rt->dst.dev->needed_tailroom;
741
742 /*
743 * Keep copying data until we run out.
744 */
745 while (left > 0) {
746 len = left;
747 /* IF: it doesn't fit, use 'mtu' - the data space left */
748 if (len > mtu)
749 len = mtu;
750 /* IF: we are not sending up to and including the packet end
751 then align the next start on an eight byte boundary */
752 if (len < left) {
753 len &= ~7;
754 }
755
756 /* Allocate buffer */
757 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
758 hroom + troom, GFP_ATOMIC);
759 if (!frag) {
760 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
761 IPSTATS_MIB_FRAGFAILS);
762 err = -ENOMEM;
763 goto fail;
764 }
765
766 /*
767 * Set up data on packet
768 */
769
770 ip6_copy_metadata(frag, skb);
771 skb_reserve(frag, hroom);
772 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
773 skb_reset_network_header(frag);
774 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
775 frag->transport_header = (frag->network_header + hlen +
776 sizeof(struct frag_hdr));
777
778 /*
779 * Charge the memory for the fragment to any owner
780 * it might possess
781 */
782 if (skb->sk)
783 skb_set_owner_w(frag, skb->sk);
784
785 /*
786 * Copy the packet header into the new buffer.
787 */
788 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
789
790 /*
791 * Build fragment header.
792 */
793 fh->nexthdr = nexthdr;
794 fh->reserved = 0;
795 fh->identification = frag_id;
796
797 /*
798 * Copy a block of the IP datagram.
799 */
800 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
801 len));
802 left -= len;
803
804 fh->frag_off = htons(offset);
805 if (left > 0)
806 fh->frag_off |= htons(IP6_MF);
807 ipv6_hdr(frag)->payload_len = htons(frag->len -
808 sizeof(struct ipv6hdr));
809
810 ptr += len;
811 offset += len;
812
813 /*
814 * Put this fragment into the sending queue.
815 */
816 err = output(net, sk, frag);
817 if (err)
818 goto fail;
819
820 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
821 IPSTATS_MIB_FRAGCREATES);
822 }
823 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
824 IPSTATS_MIB_FRAGOKS);
825 consume_skb(skb);
826 return err;
827
828fail_toobig:
829 if (skb->sk && dst_allfrag(skb_dst(skb)))
830 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
831
832 skb->dev = skb_dst(skb)->dev;
833 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
834 err = -EMSGSIZE;
835
836fail:
837 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
838 IPSTATS_MIB_FRAGFAILS);
839 kfree_skb(skb);
840 return err;
841}
842
843static inline int ip6_rt_check(const struct rt6key *rt_key,
844 const struct in6_addr *fl_addr,
845 const struct in6_addr *addr_cache)
846{
847 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
848 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
849}
850
851static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
852 struct dst_entry *dst,
853 const struct flowi6 *fl6)
854{
855 struct ipv6_pinfo *np = inet6_sk(sk);
856 struct rt6_info *rt;
857
858 if (!dst)
859 goto out;
860
861 if (dst->ops->family != AF_INET6) {
862 dst_release(dst);
863 return NULL;
864 }
865
866 rt = (struct rt6_info *)dst;
867 /* Yes, checking route validity in not connected
868 * case is not very simple. Take into account,
869 * that we do not support routing by source, TOS,
870 * and MSG_DONTROUTE --ANK (980726)
871 *
872 * 1. ip6_rt_check(): If route was host route,
873 * check that cached destination is current.
874 * If it is network route, we still may
875 * check its validity using saved pointer
876 * to the last used address: daddr_cache.
877 * We do not want to save whole address now,
878 * (because main consumer of this service
879 * is tcp, which has not this problem),
880 * so that the last trick works only on connected
881 * sockets.
882 * 2. oif also should be the same.
883 */
884 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
885#ifdef CONFIG_IPV6_SUBTREES
886 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
887#endif
888 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
889 dst_release(dst);
890 dst = NULL;
891 }
892
893out:
894 return dst;
895}
896
897static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
898 struct dst_entry **dst, struct flowi6 *fl6)
899{
900#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
901 struct neighbour *n;
902 struct rt6_info *rt;
903#endif
904 int err;
905
906 /* The correct way to handle this would be to do
907 * ip6_route_get_saddr, and then ip6_route_output; however,
908 * the route-specific preferred source forces the
909 * ip6_route_output call _before_ ip6_route_get_saddr.
910 *
911 * In source specific routing (no src=any default route),
912 * ip6_route_output will fail given src=any saddr, though, so
913 * that's why we try it again later.
914 */
915 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
916 struct rt6_info *rt;
917 bool had_dst = *dst != NULL;
918
919 if (!had_dst)
920 *dst = ip6_route_output(net, sk, fl6);
921 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
922 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
923 sk ? inet6_sk(sk)->srcprefs : 0,
924 &fl6->saddr);
925 if (err)
926 goto out_err_release;
927
928 /* If we had an erroneous initial result, pretend it
929 * never existed and let the SA-enabled version take
930 * over.
931 */
932 if (!had_dst && (*dst)->error) {
933 dst_release(*dst);
934 *dst = NULL;
935 }
936 }
937
938 if (!*dst)
939 *dst = ip6_route_output(net, sk, fl6);
940
941 err = (*dst)->error;
942 if (err)
943 goto out_err_release;
944
945#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
946 /*
947 * Here if the dst entry we've looked up
948 * has a neighbour entry that is in the INCOMPLETE
949 * state and the src address from the flow is
950 * marked as OPTIMISTIC, we release the found
951 * dst entry and replace it instead with the
952 * dst entry of the nexthop router
953 */
954 rt = (struct rt6_info *) *dst;
955 rcu_read_lock_bh();
956 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
957 rt6_nexthop(rt, &fl6->daddr));
958 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
959 rcu_read_unlock_bh();
960
961 if (err) {
962 struct inet6_ifaddr *ifp;
963 struct flowi6 fl_gw6;
964 int redirect;
965
966 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
967 (*dst)->dev, 1);
968
969 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
970 if (ifp)
971 in6_ifa_put(ifp);
972
973 if (redirect) {
974 /*
975 * We need to get the dst entry for the
976 * default router instead
977 */
978 dst_release(*dst);
979 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
980 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
981 *dst = ip6_route_output(net, sk, &fl_gw6);
982 err = (*dst)->error;
983 if (err)
984 goto out_err_release;
985 }
986 }
987#endif
988
989 return 0;
990
991out_err_release:
992 if (err == -ENETUNREACH)
993 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
994 dst_release(*dst);
995 *dst = NULL;
996 return err;
997}
998
999/**
1000 * ip6_dst_lookup - perform route lookup on flow
1001 * @sk: socket which provides route info
1002 * @dst: pointer to dst_entry * for result
1003 * @fl6: flow to lookup
1004 *
1005 * This function performs a route lookup on the given flow.
1006 *
1007 * It returns zero on success, or a standard errno code on error.
1008 */
1009int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1010 struct flowi6 *fl6)
1011{
1012 *dst = NULL;
1013 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1014}
1015EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1016
1017/**
1018 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1019 * @sk: socket which provides route info
1020 * @fl6: flow to lookup
1021 * @final_dst: final destination address for ipsec lookup
1022 *
1023 * This function performs a route lookup on the given flow.
1024 *
1025 * It returns a valid dst pointer on success, or a pointer encoded
1026 * error code.
1027 */
1028struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1029 const struct in6_addr *final_dst)
1030{
1031 struct dst_entry *dst = NULL;
1032 int err;
1033
1034 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1035 if (err)
1036 return ERR_PTR(err);
1037 if (final_dst)
1038 fl6->daddr = *final_dst;
1039 if (!fl6->flowi6_oif)
1040 fl6->flowi6_oif = dst->dev->ifindex;
1041
1042 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1043}
1044EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1045
1046/**
1047 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1048 * @sk: socket which provides the dst cache and route info
1049 * @fl6: flow to lookup
1050 * @final_dst: final destination address for ipsec lookup
1051 *
1052 * This function performs a route lookup on the given flow with the
1053 * possibility of using the cached route in the socket if it is valid.
1054 * It will take the socket dst lock when operating on the dst cache.
1055 * As a result, this function can only be used in process context.
1056 *
1057 * It returns a valid dst pointer on success, or a pointer encoded
1058 * error code.
1059 */
1060struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1061 const struct in6_addr *final_dst)
1062{
1063 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1064 int err;
1065
1066 dst = ip6_sk_dst_check(sk, dst, fl6);
1067
1068 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1069 if (err)
1070 return ERR_PTR(err);
1071 if (final_dst)
1072 fl6->daddr = *final_dst;
1073
1074 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1075}
1076EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1077
1078static inline int ip6_ufo_append_data(struct sock *sk,
1079 struct sk_buff_head *queue,
1080 int getfrag(void *from, char *to, int offset, int len,
1081 int odd, struct sk_buff *skb),
1082 void *from, int length, int hh_len, int fragheaderlen,
1083 int transhdrlen, int mtu, unsigned int flags,
1084 const struct flowi6 *fl6)
1085
1086{
1087 struct sk_buff *skb;
1088 int err;
1089
1090 /* There is support for UDP large send offload by network
1091 * device, so create one single skb packet containing complete
1092 * udp datagram
1093 */
1094 skb = skb_peek_tail(queue);
1095 if (!skb) {
1096 skb = sock_alloc_send_skb(sk,
1097 hh_len + fragheaderlen + transhdrlen + 20,
1098 (flags & MSG_DONTWAIT), &err);
1099 if (!skb)
1100 return err;
1101
1102 /* reserve space for Hardware header */
1103 skb_reserve(skb, hh_len);
1104
1105 /* create space for UDP/IP header */
1106 skb_put(skb, fragheaderlen + transhdrlen);
1107
1108 /* initialize network header pointer */
1109 skb_reset_network_header(skb);
1110
1111 /* initialize protocol header pointer */
1112 skb->transport_header = skb->network_header + fragheaderlen;
1113
1114 skb->protocol = htons(ETH_P_IPV6);
1115 skb->csum = 0;
1116
1117 __skb_queue_tail(queue, skb);
1118 } else if (skb_is_gso(skb)) {
1119 goto append;
1120 }
1121
1122 skb->ip_summed = CHECKSUM_PARTIAL;
1123 /* Specify the length of each IPv6 datagram fragment.
1124 * It has to be a multiple of 8.
1125 */
1126 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1127 sizeof(struct frag_hdr)) & ~7;
1128 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1129 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1130 &fl6->daddr,
1131 &fl6->saddr);
1132
1133append:
1134 return skb_append_datato_frags(sk, skb, getfrag, from,
1135 (length - transhdrlen));
1136}
1137
1138static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1139 gfp_t gfp)
1140{
1141 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1142}
1143
1144static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1145 gfp_t gfp)
1146{
1147 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1148}
1149
1150static void ip6_append_data_mtu(unsigned int *mtu,
1151 int *maxfraglen,
1152 unsigned int fragheaderlen,
1153 struct sk_buff *skb,
1154 struct rt6_info *rt,
1155 unsigned int orig_mtu)
1156{
1157 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1158 if (!skb) {
1159 /* first fragment, reserve header_len */
1160 *mtu = orig_mtu - rt->dst.header_len;
1161
1162 } else {
1163 /*
1164 * this fragment is not first, the headers
1165 * space is regarded as data space.
1166 */
1167 *mtu = orig_mtu;
1168 }
1169 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1170 + fragheaderlen - sizeof(struct frag_hdr);
1171 }
1172}
1173
1174static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1175 struct inet6_cork *v6_cork,
1176 int hlimit, int tclass, struct ipv6_txoptions *opt,
1177 struct rt6_info *rt, struct flowi6 *fl6)
1178{
1179 struct ipv6_pinfo *np = inet6_sk(sk);
1180 unsigned int mtu;
1181
1182 /*
1183 * setup for corking
1184 */
1185 if (opt) {
1186 if (WARN_ON(v6_cork->opt))
1187 return -EINVAL;
1188
1189 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
1190 if (unlikely(!v6_cork->opt))
1191 return -ENOBUFS;
1192
1193 v6_cork->opt->tot_len = opt->tot_len;
1194 v6_cork->opt->opt_flen = opt->opt_flen;
1195 v6_cork->opt->opt_nflen = opt->opt_nflen;
1196
1197 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1198 sk->sk_allocation);
1199 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1200 return -ENOBUFS;
1201
1202 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1203 sk->sk_allocation);
1204 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1205 return -ENOBUFS;
1206
1207 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1208 sk->sk_allocation);
1209 if (opt->hopopt && !v6_cork->opt->hopopt)
1210 return -ENOBUFS;
1211
1212 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1213 sk->sk_allocation);
1214 if (opt->srcrt && !v6_cork->opt->srcrt)
1215 return -ENOBUFS;
1216
1217 /* need source address above miyazawa*/
1218 }
1219 dst_hold(&rt->dst);
1220 cork->base.dst = &rt->dst;
1221 cork->fl.u.ip6 = *fl6;
1222 v6_cork->hop_limit = hlimit;
1223 v6_cork->tclass = tclass;
1224 if (rt->dst.flags & DST_XFRM_TUNNEL)
1225 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1226 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1227 else
1228 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1229 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1230 if (np->frag_size < mtu) {
1231 if (np->frag_size)
1232 mtu = np->frag_size;
1233 }
1234 cork->base.fragsize = mtu;
1235 if (dst_allfrag(rt->dst.path))
1236 cork->base.flags |= IPCORK_ALLFRAG;
1237 cork->base.length = 0;
1238
1239 return 0;
1240}
1241
1242static int __ip6_append_data(struct sock *sk,
1243 struct flowi6 *fl6,
1244 struct sk_buff_head *queue,
1245 struct inet_cork *cork,
1246 struct inet6_cork *v6_cork,
1247 struct page_frag *pfrag,
1248 int getfrag(void *from, char *to, int offset,
1249 int len, int odd, struct sk_buff *skb),
1250 void *from, int length, int transhdrlen,
1251 unsigned int flags, int dontfrag)
1252{
1253 struct sk_buff *skb, *skb_prev = NULL;
1254 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1255 int exthdrlen = 0;
1256 int dst_exthdrlen = 0;
1257 int hh_len;
1258 int copy;
1259 int err;
1260 int offset = 0;
1261 __u8 tx_flags = 0;
1262 u32 tskey = 0;
1263 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1264 struct ipv6_txoptions *opt = v6_cork->opt;
1265 int csummode = CHECKSUM_NONE;
1266
1267 skb = skb_peek_tail(queue);
1268 if (!skb) {
1269 exthdrlen = opt ? opt->opt_flen : 0;
1270 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1271 }
1272
1273 mtu = cork->fragsize;
1274 orig_mtu = mtu;
1275
1276 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1277
1278 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1279 (opt ? opt->opt_nflen : 0);
1280 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1281 sizeof(struct frag_hdr);
1282
1283 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1284 unsigned int maxnonfragsize, headersize;
1285
1286 headersize = sizeof(struct ipv6hdr) +
1287 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1288 (dst_allfrag(&rt->dst) ?
1289 sizeof(struct frag_hdr) : 0) +
1290 rt->rt6i_nfheader_len;
1291
1292 if (ip6_sk_ignore_df(sk))
1293 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1294 else
1295 maxnonfragsize = mtu;
1296
1297 /* dontfrag active */
1298 if ((cork->length + length > mtu - headersize) && dontfrag &&
1299 (sk->sk_protocol == IPPROTO_UDP ||
1300 sk->sk_protocol == IPPROTO_RAW)) {
1301 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1302 sizeof(struct ipv6hdr));
1303 goto emsgsize;
1304 }
1305
1306 if (cork->length + length > maxnonfragsize - headersize) {
1307emsgsize:
1308 ipv6_local_error(sk, EMSGSIZE, fl6,
1309 mtu - headersize +
1310 sizeof(struct ipv6hdr));
1311 return -EMSGSIZE;
1312 }
1313 }
1314
1315 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1316 sock_tx_timestamp(sk, &tx_flags);
1317 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1318 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1319 tskey = sk->sk_tskey++;
1320 }
1321
1322 /* If this is the first and only packet and device
1323 * supports checksum offloading, let's use it.
1324 * Use transhdrlen, same as IPv4, because partial
1325 * sums only work when transhdrlen is set.
1326 */
1327 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1328 length + fragheaderlen < mtu &&
1329 rt->dst.dev->features & NETIF_F_V6_CSUM &&
1330 !exthdrlen)
1331 csummode = CHECKSUM_PARTIAL;
1332 /*
1333 * Let's try using as much space as possible.
1334 * Use MTU if total length of the message fits into the MTU.
1335 * Otherwise, we need to reserve fragment header and
1336 * fragment alignment (= 8-15 octects, in total).
1337 *
1338 * Note that we may need to "move" the data from the tail of
1339 * of the buffer to the new fragment when we split
1340 * the message.
1341 *
1342 * FIXME: It may be fragmented into multiple chunks
1343 * at once if non-fragmentable extension headers
1344 * are too large.
1345 * --yoshfuji
1346 */
1347
1348 cork->length += length;
1349 if (((length > mtu) ||
1350 (skb && skb_is_gso(skb))) &&
1351 (sk->sk_protocol == IPPROTO_UDP) &&
1352 (rt->dst.dev->features & NETIF_F_UFO) &&
1353 (sk->sk_type == SOCK_DGRAM)) {
1354 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
1355 hh_len, fragheaderlen,
1356 transhdrlen, mtu, flags, fl6);
1357 if (err)
1358 goto error;
1359 return 0;
1360 }
1361
1362 if (!skb)
1363 goto alloc_new_skb;
1364
1365 while (length > 0) {
1366 /* Check if the remaining data fits into current packet. */
1367 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1368 if (copy < length)
1369 copy = maxfraglen - skb->len;
1370
1371 if (copy <= 0) {
1372 char *data;
1373 unsigned int datalen;
1374 unsigned int fraglen;
1375 unsigned int fraggap;
1376 unsigned int alloclen;
1377alloc_new_skb:
1378 /* There's no room in the current skb */
1379 if (skb)
1380 fraggap = skb->len - maxfraglen;
1381 else
1382 fraggap = 0;
1383 /* update mtu and maxfraglen if necessary */
1384 if (!skb || !skb_prev)
1385 ip6_append_data_mtu(&mtu, &maxfraglen,
1386 fragheaderlen, skb, rt,
1387 orig_mtu);
1388
1389 skb_prev = skb;
1390
1391 /*
1392 * If remaining data exceeds the mtu,
1393 * we know we need more fragment(s).
1394 */
1395 datalen = length + fraggap;
1396
1397 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1398 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1399 if ((flags & MSG_MORE) &&
1400 !(rt->dst.dev->features&NETIF_F_SG))
1401 alloclen = mtu;
1402 else
1403 alloclen = datalen + fragheaderlen;
1404
1405 alloclen += dst_exthdrlen;
1406
1407 if (datalen != length + fraggap) {
1408 /*
1409 * this is not the last fragment, the trailer
1410 * space is regarded as data space.
1411 */
1412 datalen += rt->dst.trailer_len;
1413 }
1414
1415 alloclen += rt->dst.trailer_len;
1416 fraglen = datalen + fragheaderlen;
1417
1418 /*
1419 * We just reserve space for fragment header.
1420 * Note: this may be overallocation if the message
1421 * (without MSG_MORE) fits into the MTU.
1422 */
1423 alloclen += sizeof(struct frag_hdr);
1424
1425 if (transhdrlen) {
1426 skb = sock_alloc_send_skb(sk,
1427 alloclen + hh_len,
1428 (flags & MSG_DONTWAIT), &err);
1429 } else {
1430 skb = NULL;
1431 if (atomic_read(&sk->sk_wmem_alloc) <=
1432 2 * sk->sk_sndbuf)
1433 skb = sock_wmalloc(sk,
1434 alloclen + hh_len, 1,
1435 sk->sk_allocation);
1436 if (unlikely(!skb))
1437 err = -ENOBUFS;
1438 }
1439 if (!skb)
1440 goto error;
1441 /*
1442 * Fill in the control structures
1443 */
1444 skb->protocol = htons(ETH_P_IPV6);
1445 skb->ip_summed = csummode;
1446 skb->csum = 0;
1447 /* reserve for fragmentation and ipsec header */
1448 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1449 dst_exthdrlen);
1450
1451 /* Only the initial fragment is time stamped */
1452 skb_shinfo(skb)->tx_flags = tx_flags;
1453 tx_flags = 0;
1454 skb_shinfo(skb)->tskey = tskey;
1455 tskey = 0;
1456
1457 /*
1458 * Find where to start putting bytes
1459 */
1460 data = skb_put(skb, fraglen);
1461 skb_set_network_header(skb, exthdrlen);
1462 data += fragheaderlen;
1463 skb->transport_header = (skb->network_header +
1464 fragheaderlen);
1465 if (fraggap) {
1466 skb->csum = skb_copy_and_csum_bits(
1467 skb_prev, maxfraglen,
1468 data + transhdrlen, fraggap, 0);
1469 skb_prev->csum = csum_sub(skb_prev->csum,
1470 skb->csum);
1471 data += fraggap;
1472 pskb_trim_unique(skb_prev, maxfraglen);
1473 }
1474 copy = datalen - transhdrlen - fraggap;
1475
1476 if (copy < 0) {
1477 err = -EINVAL;
1478 kfree_skb(skb);
1479 goto error;
1480 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1481 err = -EFAULT;
1482 kfree_skb(skb);
1483 goto error;
1484 }
1485
1486 offset += copy;
1487 length -= datalen - fraggap;
1488 transhdrlen = 0;
1489 exthdrlen = 0;
1490 dst_exthdrlen = 0;
1491
1492 /*
1493 * Put the packet on the pending queue
1494 */
1495 __skb_queue_tail(queue, skb);
1496 continue;
1497 }
1498
1499 if (copy > length)
1500 copy = length;
1501
1502 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1503 unsigned int off;
1504
1505 off = skb->len;
1506 if (getfrag(from, skb_put(skb, copy),
1507 offset, copy, off, skb) < 0) {
1508 __skb_trim(skb, off);
1509 err = -EFAULT;
1510 goto error;
1511 }
1512 } else {
1513 int i = skb_shinfo(skb)->nr_frags;
1514
1515 err = -ENOMEM;
1516 if (!sk_page_frag_refill(sk, pfrag))
1517 goto error;
1518
1519 if (!skb_can_coalesce(skb, i, pfrag->page,
1520 pfrag->offset)) {
1521 err = -EMSGSIZE;
1522 if (i == MAX_SKB_FRAGS)
1523 goto error;
1524
1525 __skb_fill_page_desc(skb, i, pfrag->page,
1526 pfrag->offset, 0);
1527 skb_shinfo(skb)->nr_frags = ++i;
1528 get_page(pfrag->page);
1529 }
1530 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1531 if (getfrag(from,
1532 page_address(pfrag->page) + pfrag->offset,
1533 offset, copy, skb->len, skb) < 0)
1534 goto error_efault;
1535
1536 pfrag->offset += copy;
1537 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1538 skb->len += copy;
1539 skb->data_len += copy;
1540 skb->truesize += copy;
1541 atomic_add(copy, &sk->sk_wmem_alloc);
1542 }
1543 offset += copy;
1544 length -= copy;
1545 }
1546
1547 return 0;
1548
1549error_efault:
1550 err = -EFAULT;
1551error:
1552 cork->length -= length;
1553 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1554 return err;
1555}
1556
1557int ip6_append_data(struct sock *sk,
1558 int getfrag(void *from, char *to, int offset, int len,
1559 int odd, struct sk_buff *skb),
1560 void *from, int length, int transhdrlen, int hlimit,
1561 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1562 struct rt6_info *rt, unsigned int flags, int dontfrag)
1563{
1564 struct inet_sock *inet = inet_sk(sk);
1565 struct ipv6_pinfo *np = inet6_sk(sk);
1566 int exthdrlen;
1567 int err;
1568
1569 if (flags&MSG_PROBE)
1570 return 0;
1571 if (skb_queue_empty(&sk->sk_write_queue)) {
1572 /*
1573 * setup for corking
1574 */
1575 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1576 tclass, opt, rt, fl6);
1577 if (err)
1578 return err;
1579
1580 exthdrlen = (opt ? opt->opt_flen : 0);
1581 length += exthdrlen;
1582 transhdrlen += exthdrlen;
1583 } else {
1584 fl6 = &inet->cork.fl.u.ip6;
1585 transhdrlen = 0;
1586 }
1587
1588 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1589 &np->cork, sk_page_frag(sk), getfrag,
1590 from, length, transhdrlen, flags, dontfrag);
1591}
1592EXPORT_SYMBOL_GPL(ip6_append_data);
1593
1594static void ip6_cork_release(struct inet_cork_full *cork,
1595 struct inet6_cork *v6_cork)
1596{
1597 if (v6_cork->opt) {
1598 kfree(v6_cork->opt->dst0opt);
1599 kfree(v6_cork->opt->dst1opt);
1600 kfree(v6_cork->opt->hopopt);
1601 kfree(v6_cork->opt->srcrt);
1602 kfree(v6_cork->opt);
1603 v6_cork->opt = NULL;
1604 }
1605
1606 if (cork->base.dst) {
1607 dst_release(cork->base.dst);
1608 cork->base.dst = NULL;
1609 cork->base.flags &= ~IPCORK_ALLFRAG;
1610 }
1611 memset(&cork->fl, 0, sizeof(cork->fl));
1612}
1613
1614struct sk_buff *__ip6_make_skb(struct sock *sk,
1615 struct sk_buff_head *queue,
1616 struct inet_cork_full *cork,
1617 struct inet6_cork *v6_cork)
1618{
1619 struct sk_buff *skb, *tmp_skb;
1620 struct sk_buff **tail_skb;
1621 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1622 struct ipv6_pinfo *np = inet6_sk(sk);
1623 struct net *net = sock_net(sk);
1624 struct ipv6hdr *hdr;
1625 struct ipv6_txoptions *opt = v6_cork->opt;
1626 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1627 struct flowi6 *fl6 = &cork->fl.u.ip6;
1628 unsigned char proto = fl6->flowi6_proto;
1629
1630 skb = __skb_dequeue(queue);
1631 if (!skb)
1632 goto out;
1633 tail_skb = &(skb_shinfo(skb)->frag_list);
1634
1635 /* move skb->data to ip header from ext header */
1636 if (skb->data < skb_network_header(skb))
1637 __skb_pull(skb, skb_network_offset(skb));
1638 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1639 __skb_pull(tmp_skb, skb_network_header_len(skb));
1640 *tail_skb = tmp_skb;
1641 tail_skb = &(tmp_skb->next);
1642 skb->len += tmp_skb->len;
1643 skb->data_len += tmp_skb->len;
1644 skb->truesize += tmp_skb->truesize;
1645 tmp_skb->destructor = NULL;
1646 tmp_skb->sk = NULL;
1647 }
1648
1649 /* Allow local fragmentation. */
1650 skb->ignore_df = ip6_sk_ignore_df(sk);
1651
1652 *final_dst = fl6->daddr;
1653 __skb_pull(skb, skb_network_header_len(skb));
1654 if (opt && opt->opt_flen)
1655 ipv6_push_frag_opts(skb, opt, &proto);
1656 if (opt && opt->opt_nflen)
1657 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1658
1659 skb_push(skb, sizeof(struct ipv6hdr));
1660 skb_reset_network_header(skb);
1661 hdr = ipv6_hdr(skb);
1662
1663 ip6_flow_hdr(hdr, v6_cork->tclass,
1664 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1665 np->autoflowlabel, fl6));
1666 hdr->hop_limit = v6_cork->hop_limit;
1667 hdr->nexthdr = proto;
1668 hdr->saddr = fl6->saddr;
1669 hdr->daddr = *final_dst;
1670
1671 skb->priority = sk->sk_priority;
1672 skb->mark = sk->sk_mark;
1673
1674 skb_dst_set(skb, dst_clone(&rt->dst));
1675 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1676 if (proto == IPPROTO_ICMPV6) {
1677 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1678
1679 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1680 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1681 }
1682
1683 ip6_cork_release(cork, v6_cork);
1684out:
1685 return skb;
1686}
1687
1688int ip6_send_skb(struct sk_buff *skb)
1689{
1690 struct net *net = sock_net(skb->sk);
1691 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1692 int err;
1693
1694 err = ip6_local_out(net, skb->sk, skb);
1695 if (err) {
1696 if (err > 0)
1697 err = net_xmit_errno(err);
1698 if (err)
1699 IP6_INC_STATS(net, rt->rt6i_idev,
1700 IPSTATS_MIB_OUTDISCARDS);
1701 }
1702
1703 return err;
1704}
1705
1706int ip6_push_pending_frames(struct sock *sk)
1707{
1708 struct sk_buff *skb;
1709
1710 skb = ip6_finish_skb(sk);
1711 if (!skb)
1712 return 0;
1713
1714 return ip6_send_skb(skb);
1715}
1716EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1717
1718static void __ip6_flush_pending_frames(struct sock *sk,
1719 struct sk_buff_head *queue,
1720 struct inet_cork_full *cork,
1721 struct inet6_cork *v6_cork)
1722{
1723 struct sk_buff *skb;
1724
1725 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1726 if (skb_dst(skb))
1727 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1728 IPSTATS_MIB_OUTDISCARDS);
1729 kfree_skb(skb);
1730 }
1731
1732 ip6_cork_release(cork, v6_cork);
1733}
1734
1735void ip6_flush_pending_frames(struct sock *sk)
1736{
1737 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1738 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1739}
1740EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1741
1742struct sk_buff *ip6_make_skb(struct sock *sk,
1743 int getfrag(void *from, char *to, int offset,
1744 int len, int odd, struct sk_buff *skb),
1745 void *from, int length, int transhdrlen,
1746 int hlimit, int tclass,
1747 struct ipv6_txoptions *opt, struct flowi6 *fl6,
1748 struct rt6_info *rt, unsigned int flags,
1749 int dontfrag)
1750{
1751 struct inet_cork_full cork;
1752 struct inet6_cork v6_cork;
1753 struct sk_buff_head queue;
1754 int exthdrlen = (opt ? opt->opt_flen : 0);
1755 int err;
1756
1757 if (flags & MSG_PROBE)
1758 return NULL;
1759
1760 __skb_queue_head_init(&queue);
1761
1762 cork.base.flags = 0;
1763 cork.base.addr = 0;
1764 cork.base.opt = NULL;
1765 v6_cork.opt = NULL;
1766 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1767 if (err)
1768 return ERR_PTR(err);
1769
1770 if (dontfrag < 0)
1771 dontfrag = inet6_sk(sk)->dontfrag;
1772
1773 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1774 &current->task_frag, getfrag, from,
1775 length + exthdrlen, transhdrlen + exthdrlen,
1776 flags, dontfrag);
1777 if (err) {
1778 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1779 return ERR_PTR(err);
1780 }
1781
1782 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1783}
This page took 0.038904 seconds and 5 git commands to generate.