Merge commit 'fixes.2015.02.23a' into core/rcu
[deliverable/linux.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
ca254490 58#include <net/l3mdev.h>
1da177e4 59
7d8c6e39 60static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 61{
adf30907 62 struct dst_entry *dst = skb_dst(skb);
1da177e4 63 struct net_device *dev = dst->dev;
f6b72b62 64 struct neighbour *neigh;
6fd6ce20
YH
65 struct in6_addr *nexthop;
66 int ret;
1da177e4
LT
67
68 skb->protocol = htons(ETH_P_IPV6);
69 skb->dev = dev;
70
0660e03f 71 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 72 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 73
7026b1dd 74 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
78126c41 75 ((mroute6_socket(net, skb) &&
bd91b8bf 76 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
77 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
78 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
79 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
80
81 /* Do not check for IFF_ALLMULTI; multicast routing
82 is not supported in any case.
83 */
84 if (newskb)
b2e0b385 85 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 86 net, sk, newskb, NULL, newskb->dev,
95603e22 87 dev_loopback_xmit);
1da177e4 88
0660e03f 89 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 90 IP6_INC_STATS(net, idev,
3bd653c8 91 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
92 kfree_skb(skb);
93 return 0;
94 }
95 }
96
78126c41 97 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
98
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL &&
101 !(dev->flags & IFF_LOOPBACK)) {
102 kfree_skb(skb);
103 return 0;
104 }
1da177e4
LT
105 }
106
6fd6ce20 107 rcu_read_lock_bh();
2647a9b0 108 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 if (!IS_ERR(neigh)) {
113 ret = dst_neigh_output(dst, neigh, skb);
114 rcu_read_unlock_bh();
115 return ret;
116 }
117 rcu_read_unlock_bh();
05e3aa09 118
78126c41 119 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
120 kfree_skb(skb);
121 return -EINVAL;
1da177e4
LT
122}
123
0c4b51f0 124static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490
JE
125{
126 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
127 dst_allfrag(skb_dst(skb)) ||
128 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 129 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 130 else
7d8c6e39 131 return ip6_finish_output2(net, sk, skb);
9e508490
JE
132}
133
ede2059d 134int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 135{
9e508490 136 struct net_device *dev = skb_dst(skb)->dev;
adf30907 137 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 138
778d80be 139 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 140 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
141 kfree_skb(skb);
142 return 0;
143 }
144
29a26a56
EB
145 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
146 net, sk, skb, NULL, dev,
9c6eb28a
JE
147 ip6_finish_output,
148 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
149}
150
1da177e4 151/*
1c1e9d2b
ED
152 * xmit an sk_buff (used by TCP, SCTP and DCCP)
153 * Note : socket lock is not held for SYNACK packets, but might be modified
154 * by calls to skb_set_owner_w() and ipv6_local_error(),
155 * which are using proper atomic operations or spinlocks.
1da177e4 156 */
1c1e9d2b 157int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 158 struct ipv6_txoptions *opt, int tclass)
1da177e4 159{
3bd653c8 160 struct net *net = sock_net(sk);
1c1e9d2b 161 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 162 struct in6_addr *first_hop = &fl6->daddr;
adf30907 163 struct dst_entry *dst = skb_dst(skb);
1da177e4 164 struct ipv6hdr *hdr;
4c9483b2 165 u8 proto = fl6->flowi6_proto;
1da177e4 166 int seg_len = skb->len;
e651f03a 167 int hlimit = -1;
1da177e4
LT
168 u32 mtu;
169
170 if (opt) {
c2636b4d 171 unsigned int head_room;
1da177e4
LT
172
173 /* First: exthdrs may take lots of space (~8K for now)
174 MAX_HEADER is not enough.
175 */
176 head_room = opt->opt_nflen + opt->opt_flen;
177 seg_len += head_room;
178 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
179
180 if (skb_headroom(skb) < head_room) {
181 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 182 if (!skb2) {
adf30907 183 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
184 IPSTATS_MIB_OUTDISCARDS);
185 kfree_skb(skb);
1da177e4
LT
186 return -ENOBUFS;
187 }
808db80a 188 consume_skb(skb);
a11d206d 189 skb = skb2;
1c1e9d2b
ED
190 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
191 * it is safe to call in our context (socket lock not held)
192 */
193 skb_set_owner_w(skb, (struct sock *)sk);
1da177e4
LT
194 }
195 if (opt->opt_flen)
196 ipv6_push_frag_opts(skb, opt, &proto);
197 if (opt->opt_nflen)
198 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
199 }
200
e2d1bca7
ACM
201 skb_push(skb, sizeof(struct ipv6hdr));
202 skb_reset_network_header(skb);
0660e03f 203 hdr = ipv6_hdr(skb);
1da177e4
LT
204
205 /*
206 * Fill in the IPv6 header
207 */
b903d324 208 if (np)
1da177e4
LT
209 hlimit = np->hop_limit;
210 if (hlimit < 0)
6b75d090 211 hlimit = ip6_dst_hoplimit(dst);
1da177e4 212
cb1ce2ef 213 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 214 np->autoflowlabel, fl6));
41a1f8ea 215
1da177e4
LT
216 hdr->payload_len = htons(seg_len);
217 hdr->nexthdr = proto;
218 hdr->hop_limit = hlimit;
219
4e3fd7a0
AD
220 hdr->saddr = fl6->saddr;
221 hdr->daddr = *first_hop;
1da177e4 222
9c9c9ad5 223 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 224 skb->priority = sk->sk_priority;
4a19ec58 225 skb->mark = sk->sk_mark;
a2c2064f 226
1da177e4 227 mtu = dst_mtu(dst);
60ff7467 228 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 229 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 230 IPSTATS_MIB_OUT, skb->len);
1c1e9d2b
ED
231 /* hooks should never assume socket lock is held.
232 * we promote our socket to non const
233 */
29a26a56 234 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 235 net, (struct sock *)sk, skb, NULL, dst->dev,
13206b6b 236 dst_output);
1da177e4
LT
237 }
238
1da177e4 239 skb->dev = dst->dev;
1c1e9d2b
ED
240 /* ipv6_local_error() does not require socket lock,
241 * we promote our socket to non const
242 */
243 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
244
adf30907 245 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
246 kfree_skb(skb);
247 return -EMSGSIZE;
248}
7159039a
YH
249EXPORT_SYMBOL(ip6_xmit);
250
1da177e4
LT
251static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
252{
253 struct ip6_ra_chain *ra;
254 struct sock *last = NULL;
255
256 read_lock(&ip6_ra_lock);
257 for (ra = ip6_ra_chain; ra; ra = ra->next) {
258 struct sock *sk = ra->sk;
0bd1b59b
AM
259 if (sk && ra->sel == sel &&
260 (!sk->sk_bound_dev_if ||
261 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
262 if (last) {
263 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
264 if (skb2)
265 rawv6_rcv(last, skb2);
266 }
267 last = sk;
268 }
269 }
270
271 if (last) {
272 rawv6_rcv(last, skb);
273 read_unlock(&ip6_ra_lock);
274 return 1;
275 }
276 read_unlock(&ip6_ra_lock);
277 return 0;
278}
279
e21e0b5f
VN
280static int ip6_forward_proxy_check(struct sk_buff *skb)
281{
0660e03f 282 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 283 u8 nexthdr = hdr->nexthdr;
75f2811c 284 __be16 frag_off;
e21e0b5f
VN
285 int offset;
286
287 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 288 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
289 if (offset < 0)
290 return 0;
291 } else
292 offset = sizeof(struct ipv6hdr);
293
294 if (nexthdr == IPPROTO_ICMPV6) {
295 struct icmp6hdr *icmp6;
296
d56f90a7
ACM
297 if (!pskb_may_pull(skb, (skb_network_header(skb) +
298 offset + 1 - skb->data)))
e21e0b5f
VN
299 return 0;
300
d56f90a7 301 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
302
303 switch (icmp6->icmp6_type) {
304 case NDISC_ROUTER_SOLICITATION:
305 case NDISC_ROUTER_ADVERTISEMENT:
306 case NDISC_NEIGHBOUR_SOLICITATION:
307 case NDISC_NEIGHBOUR_ADVERTISEMENT:
308 case NDISC_REDIRECT:
309 /* For reaction involving unicast neighbor discovery
310 * message destined to the proxied address, pass it to
311 * input function.
312 */
313 return 1;
314 default:
315 break;
316 }
317 }
318
74553b09
VN
319 /*
320 * The proxying router can't forward traffic sent to a link-local
321 * address, so signal the sender and discard the packet. This
322 * behavior is clarified by the MIPv6 specification.
323 */
324 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
325 dst_link_failure(skb);
326 return -1;
327 }
328
e21e0b5f
VN
329 return 0;
330}
331
0c4b51f0
EB
332static inline int ip6_forward_finish(struct net *net, struct sock *sk,
333 struct sk_buff *skb)
1da177e4 334{
c29390c6 335 skb_sender_cpu_clear(skb);
13206b6b 336 return dst_output(net, sk, skb);
1da177e4
LT
337}
338
0954cf9c
HFS
339static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
340{
341 unsigned int mtu;
342 struct inet6_dev *idev;
343
344 if (dst_metric_locked(dst, RTAX_MTU)) {
345 mtu = dst_metric_raw(dst, RTAX_MTU);
346 if (mtu)
347 return mtu;
348 }
349
350 mtu = IPV6_MIN_MTU;
351 rcu_read_lock();
352 idev = __in6_dev_get(dst->dev);
353 if (idev)
354 mtu = idev->cnf.mtu6;
355 rcu_read_unlock();
356
357 return mtu;
358}
359
fe6cc55f
FW
360static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
361{
418a3156 362 if (skb->len <= mtu)
fe6cc55f
FW
363 return false;
364
60ff7467 365 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
366 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
367 return true;
368
60ff7467 369 if (skb->ignore_df)
418a3156
FW
370 return false;
371
fe6cc55f
FW
372 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
373 return false;
374
375 return true;
376}
377
1da177e4
LT
378int ip6_forward(struct sk_buff *skb)
379{
adf30907 380 struct dst_entry *dst = skb_dst(skb);
0660e03f 381 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 382 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 383 struct net *net = dev_net(dst->dev);
14f3ad6f 384 u32 mtu;
1ab1457c 385
53b7997f 386 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
387 goto error;
388
090f1166
LR
389 if (skb->pkt_type != PACKET_HOST)
390 goto drop;
391
9ef2e965
HFS
392 if (unlikely(skb->sk))
393 goto drop;
394
4497b076
BH
395 if (skb_warn_if_lro(skb))
396 goto drop;
397
1da177e4 398 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
15c77d8b
ED
399 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
400 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
401 goto drop;
402 }
403
35fc92a9 404 skb_forward_csum(skb);
1da177e4
LT
405
406 /*
407 * We DO NOT make any processing on
408 * RA packets, pushing them to user level AS IS
409 * without ane WARRANTY that application will be able
410 * to interpret them. The reason is that we
411 * cannot make anything clever here.
412 *
413 * We are not end-node, so that if packet contains
414 * AH/ESP, we cannot make anything.
415 * Defragmentation also would be mistake, RA packets
416 * cannot be fragmented, because there is no warranty
417 * that different fragments will go along one path. --ANK
418 */
ab4eb353
YH
419 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
420 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
421 return 0;
422 }
423
424 /*
425 * check and decrement ttl
426 */
427 if (hdr->hop_limit <= 1) {
428 /* Force OUTPUT device used as source address */
429 skb->dev = dst->dev;
3ffe533c 430 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
15c77d8b
ED
431 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
432 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
433
434 kfree_skb(skb);
435 return -ETIMEDOUT;
436 }
437
fbea49e1 438 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 439 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 440 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
441 int proxied = ip6_forward_proxy_check(skb);
442 if (proxied > 0)
e21e0b5f 443 return ip6_input(skb);
74553b09 444 else if (proxied < 0) {
15c77d8b
ED
445 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
446 IPSTATS_MIB_INDISCARDS);
74553b09
VN
447 goto drop;
448 }
e21e0b5f
VN
449 }
450
1da177e4 451 if (!xfrm6_route_forward(skb)) {
15c77d8b
ED
452 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
453 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
454 goto drop;
455 }
adf30907 456 dst = skb_dst(skb);
1da177e4
LT
457
458 /* IPv6 specs say nothing about it, but it is clear that we cannot
459 send redirects to source routed frames.
1e5dc146 460 We don't send redirects to frames decapsulated from IPsec.
1da177e4 461 */
c45a3dfb 462 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 463 struct in6_addr *target = NULL;
fbfe95a4 464 struct inet_peer *peer;
1da177e4 465 struct rt6_info *rt;
1da177e4
LT
466
467 /*
468 * incoming and outgoing devices are the same
469 * send a redirect.
470 */
471
472 rt = (struct rt6_info *) dst;
c45a3dfb
DM
473 if (rt->rt6i_flags & RTF_GATEWAY)
474 target = &rt->rt6i_gateway;
1da177e4
LT
475 else
476 target = &hdr->daddr;
477
fd0273d7 478 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 479
1da177e4
LT
480 /* Limit redirects both by destination (here)
481 and by source (inside ndisc_send_redirect)
482 */
fbfe95a4 483 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 484 ndisc_send_redirect(skb, target);
1d861aa4
DM
485 if (peer)
486 inet_putpeer(peer);
5bb1ab09
DS
487 } else {
488 int addrtype = ipv6_addr_type(&hdr->saddr);
489
1da177e4 490 /* This check is security critical. */
f81b2e7d
YH
491 if (addrtype == IPV6_ADDR_ANY ||
492 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
493 goto error;
494 if (addrtype & IPV6_ADDR_LINKLOCAL) {
495 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 496 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
497 goto error;
498 }
1da177e4
LT
499 }
500
0954cf9c 501 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
502 if (mtu < IPV6_MIN_MTU)
503 mtu = IPV6_MIN_MTU;
504
fe6cc55f 505 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
506 /* Again, force OUTPUT device used as source address */
507 skb->dev = dst->dev;
14f3ad6f 508 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
15c77d8b
ED
509 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
510 IPSTATS_MIB_INTOOBIGERRORS);
511 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
512 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
513 kfree_skb(skb);
514 return -EMSGSIZE;
515 }
516
517 if (skb_cow(skb, dst->dev->hard_header_len)) {
15c77d8b
ED
518 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
519 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
520 goto drop;
521 }
522
0660e03f 523 hdr = ipv6_hdr(skb);
1da177e4
LT
524
525 /* Mangling hops number delayed to point after skb COW */
1ab1457c 526
1da177e4
LT
527 hdr->hop_limit--;
528
483a47d2 529 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
2d8dbb04 530 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
29a26a56
EB
531 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
532 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 533 ip6_forward_finish);
1da177e4
LT
534
535error:
483a47d2 536 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
537drop:
538 kfree_skb(skb);
539 return -EINVAL;
540}
541
542static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
543{
544 to->pkt_type = from->pkt_type;
545 to->priority = from->priority;
546 to->protocol = from->protocol;
adf30907
ED
547 skb_dst_drop(to);
548 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 549 to->dev = from->dev;
82e91ffe 550 to->mark = from->mark;
1da177e4
LT
551
552#ifdef CONFIG_NET_SCHED
553 to->tc_index = from->tc_index;
554#endif
e7ac05f3 555 nf_copy(to, from);
984bc16c 556 skb_copy_secmark(to, from);
1da177e4
LT
557}
558
7d8c6e39
EB
559int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
560 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 561{
1da177e4 562 struct sk_buff *frag;
67ba4152 563 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 564 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
565 inet6_sk(skb->sk) : NULL;
1da177e4
LT
566 struct ipv6hdr *tmp_hdr;
567 struct frag_hdr *fh;
568 unsigned int mtu, hlen, left, len;
a7ae1992 569 int hroom, troom;
286c2349 570 __be32 frag_id;
67ba4152 571 int ptr, offset = 0, err = 0;
1da177e4
LT
572 u8 *prevhdr, nexthdr = 0;
573
1da177e4
LT
574 hlen = ip6_find_1stfragopt(skb, &prevhdr);
575 nexthdr = *prevhdr;
576
628a5c56 577 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
578
579 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 580 * or if the skb it not generated by a local socket.
b881ef76 581 */
485fca66
FW
582 if (unlikely(!skb->ignore_df && skb->len > mtu))
583 goto fail_toobig;
a34a101e 584
485fca66
FW
585 if (IP6CB(skb)->frag_max_size) {
586 if (IP6CB(skb)->frag_max_size > mtu)
587 goto fail_toobig;
588
589 /* don't send fragments larger than what we received */
590 mtu = IP6CB(skb)->frag_max_size;
591 if (mtu < IPV6_MIN_MTU)
592 mtu = IPV6_MIN_MTU;
b881ef76
JH
593 }
594
d91675f9
YH
595 if (np && np->frag_size < mtu) {
596 if (np->frag_size)
597 mtu = np->frag_size;
598 }
89bc7848 599 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
b72a2b01 600 goto fail_toobig;
1e0d69a9 601 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 602
fd0273d7
MKL
603 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
604 &ipv6_hdr(skb)->saddr);
286c2349 605
405c92f7
HFS
606 if (skb->ip_summed == CHECKSUM_PARTIAL &&
607 (err = skb_checksum_help(skb)))
608 goto fail;
609
1d325d21 610 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 611 if (skb_has_frag_list(skb)) {
1da177e4 612 int first_len = skb_pagelen(skb);
3d13008e 613 struct sk_buff *frag2;
1da177e4
LT
614
615 if (first_len - hlen > mtu ||
616 ((first_len - hlen) & 7) ||
1d325d21
FW
617 skb_cloned(skb) ||
618 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
619 goto slow_path;
620
4d9092bb 621 skb_walk_frags(skb, frag) {
1da177e4
LT
622 /* Correct geometry. */
623 if (frag->len > mtu ||
624 ((frag->len & 7) && frag->next) ||
1d325d21 625 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 626 goto slow_path_clean;
1da177e4 627
1da177e4
LT
628 /* Partially cloned skb? */
629 if (skb_shared(frag))
3d13008e 630 goto slow_path_clean;
2fdba6b0
HX
631
632 BUG_ON(frag->sk);
633 if (skb->sk) {
2fdba6b0
HX
634 frag->sk = skb->sk;
635 frag->destructor = sock_wfree;
2fdba6b0 636 }
3d13008e 637 skb->truesize -= frag->truesize;
1da177e4
LT
638 }
639
640 err = 0;
641 offset = 0;
1da177e4
LT
642 /* BUILD HEADER */
643
9a217a1c 644 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 645 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 646 if (!tmp_hdr) {
adf30907 647 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 648 IPSTATS_MIB_FRAGFAILS);
1d325d21
FW
649 err = -ENOMEM;
650 goto fail;
1da177e4 651 }
1d325d21
FW
652 frag = skb_shinfo(skb)->frag_list;
653 skb_frag_list_init(skb);
1da177e4 654
1da177e4 655 __skb_pull(skb, hlen);
67ba4152 656 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
657 __skb_push(skb, hlen);
658 skb_reset_network_header(skb);
d56f90a7 659 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 660
1da177e4
LT
661 fh->nexthdr = nexthdr;
662 fh->reserved = 0;
663 fh->frag_off = htons(IP6_MF);
286c2349 664 fh->identification = frag_id;
1da177e4
LT
665
666 first_len = skb_pagelen(skb);
667 skb->data_len = first_len - skb_headlen(skb);
668 skb->len = first_len;
0660e03f
ACM
669 ipv6_hdr(skb)->payload_len = htons(first_len -
670 sizeof(struct ipv6hdr));
a11d206d 671
d8d1f30b 672 dst_hold(&rt->dst);
1da177e4
LT
673
674 for (;;) {
675 /* Prepare header of the next frame,
676 * before previous one went down. */
677 if (frag) {
678 frag->ip_summed = CHECKSUM_NONE;
badff6d0 679 skb_reset_transport_header(frag);
67ba4152 680 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
681 __skb_push(frag, hlen);
682 skb_reset_network_header(frag);
d56f90a7
ACM
683 memcpy(skb_network_header(frag), tmp_hdr,
684 hlen);
1da177e4
LT
685 offset += skb->len - hlen - sizeof(struct frag_hdr);
686 fh->nexthdr = nexthdr;
687 fh->reserved = 0;
688 fh->frag_off = htons(offset);
53b24b8f 689 if (frag->next)
1da177e4
LT
690 fh->frag_off |= htons(IP6_MF);
691 fh->identification = frag_id;
0660e03f
ACM
692 ipv6_hdr(frag)->payload_len =
693 htons(frag->len -
694 sizeof(struct ipv6hdr));
1da177e4
LT
695 ip6_copy_metadata(frag, skb);
696 }
1ab1457c 697
7d8c6e39 698 err = output(net, sk, skb);
67ba4152 699 if (!err)
d8d1f30b 700 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 701 IPSTATS_MIB_FRAGCREATES);
dafee490 702
1da177e4
LT
703 if (err || !frag)
704 break;
705
706 skb = frag;
707 frag = skb->next;
708 skb->next = NULL;
709 }
710
a51482bd 711 kfree(tmp_hdr);
1da177e4
LT
712
713 if (err == 0) {
d8d1f30b 714 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 715 IPSTATS_MIB_FRAGOKS);
94e187c0 716 ip6_rt_put(rt);
1da177e4
LT
717 return 0;
718 }
719
46cfd725 720 kfree_skb_list(frag);
1da177e4 721
d8d1f30b 722 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 723 IPSTATS_MIB_FRAGFAILS);
94e187c0 724 ip6_rt_put(rt);
1da177e4 725 return err;
3d13008e
ED
726
727slow_path_clean:
728 skb_walk_frags(skb, frag2) {
729 if (frag2 == frag)
730 break;
731 frag2->sk = NULL;
732 frag2->destructor = NULL;
733 skb->truesize += frag2->truesize;
734 }
1da177e4
LT
735 }
736
737slow_path:
738 left = skb->len - hlen; /* Space per frame */
739 ptr = hlen; /* Where to start from */
740
741 /*
742 * Fragment the datagram.
743 */
744
745 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992 746 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
747
748 /*
749 * Keep copying data until we run out.
750 */
67ba4152 751 while (left > 0) {
1da177e4
LT
752 len = left;
753 /* IF: it doesn't fit, use 'mtu' - the data space left */
754 if (len > mtu)
755 len = mtu;
25985edc 756 /* IF: we are not sending up to and including the packet end
1da177e4
LT
757 then align the next start on an eight byte boundary */
758 if (len < left) {
759 len &= ~7;
760 }
1da177e4 761
cbffccc9
JP
762 /* Allocate buffer */
763 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
764 hroom + troom, GFP_ATOMIC);
765 if (!frag) {
adf30907 766 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 767 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
768 err = -ENOMEM;
769 goto fail;
770 }
771
772 /*
773 * Set up data on packet
774 */
775
776 ip6_copy_metadata(frag, skb);
a7ae1992 777 skb_reserve(frag, hroom);
1da177e4 778 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 779 skb_reset_network_header(frag);
badff6d0 780 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
781 frag->transport_header = (frag->network_header + hlen +
782 sizeof(struct frag_hdr));
1da177e4
LT
783
784 /*
785 * Charge the memory for the fragment to any owner
786 * it might possess
787 */
788 if (skb->sk)
789 skb_set_owner_w(frag, skb->sk);
790
791 /*
792 * Copy the packet header into the new buffer.
793 */
d626f62b 794 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
795
796 /*
797 * Build fragment header.
798 */
799 fh->nexthdr = nexthdr;
800 fh->reserved = 0;
286c2349 801 fh->identification = frag_id;
1da177e4
LT
802
803 /*
804 * Copy a block of the IP datagram.
805 */
e3f0b86b
HS
806 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
807 len));
1da177e4
LT
808 left -= len;
809
810 fh->frag_off = htons(offset);
811 if (left > 0)
812 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
813 ipv6_hdr(frag)->payload_len = htons(frag->len -
814 sizeof(struct ipv6hdr));
1da177e4
LT
815
816 ptr += len;
817 offset += len;
818
819 /*
820 * Put this fragment into the sending queue.
821 */
7d8c6e39 822 err = output(net, sk, frag);
1da177e4
LT
823 if (err)
824 goto fail;
dafee490 825
adf30907 826 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 827 IPSTATS_MIB_FRAGCREATES);
1da177e4 828 }
adf30907 829 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 830 IPSTATS_MIB_FRAGOKS);
808db80a 831 consume_skb(skb);
1da177e4
LT
832 return err;
833
485fca66
FW
834fail_toobig:
835 if (skb->sk && dst_allfrag(skb_dst(skb)))
836 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
837
838 skb->dev = skb_dst(skb)->dev;
839 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
840 err = -EMSGSIZE;
841
1da177e4 842fail:
adf30907 843 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 844 IPSTATS_MIB_FRAGFAILS);
1ab1457c 845 kfree_skb(skb);
1da177e4
LT
846 return err;
847}
848
b71d1d42
ED
849static inline int ip6_rt_check(const struct rt6key *rt_key,
850 const struct in6_addr *fl_addr,
851 const struct in6_addr *addr_cache)
cf6b1982 852{
a02cec21 853 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 854 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
855}
856
497c615a
HX
857static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
858 struct dst_entry *dst,
b71d1d42 859 const struct flowi6 *fl6)
1da177e4 860{
497c615a 861 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 862 struct rt6_info *rt;
1da177e4 863
497c615a
HX
864 if (!dst)
865 goto out;
866
a963a37d
ED
867 if (dst->ops->family != AF_INET6) {
868 dst_release(dst);
869 return NULL;
870 }
871
872 rt = (struct rt6_info *)dst;
497c615a
HX
873 /* Yes, checking route validity in not connected
874 * case is not very simple. Take into account,
875 * that we do not support routing by source, TOS,
67ba4152 876 * and MSG_DONTROUTE --ANK (980726)
497c615a 877 *
cf6b1982
YH
878 * 1. ip6_rt_check(): If route was host route,
879 * check that cached destination is current.
497c615a
HX
880 * If it is network route, we still may
881 * check its validity using saved pointer
882 * to the last used address: daddr_cache.
883 * We do not want to save whole address now,
884 * (because main consumer of this service
885 * is tcp, which has not this problem),
886 * so that the last trick works only on connected
887 * sockets.
888 * 2. oif also should be the same.
889 */
4c9483b2 890 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 891#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 892 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 893#endif
ca254490
DA
894 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
895 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
896 dst_release(dst);
897 dst = NULL;
1da177e4
LT
898 }
899
497c615a
HX
900out:
901 return dst;
902}
903
3aef934f 904static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 905 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 906{
69cce1d1
DM
907#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
908 struct neighbour *n;
97cac082 909 struct rt6_info *rt;
69cce1d1
DM
910#endif
911 int err;
6f21c96a 912 int flags = 0;
497c615a 913
e16e888b
MS
914 /* The correct way to handle this would be to do
915 * ip6_route_get_saddr, and then ip6_route_output; however,
916 * the route-specific preferred source forces the
917 * ip6_route_output call _before_ ip6_route_get_saddr.
918 *
919 * In source specific routing (no src=any default route),
920 * ip6_route_output will fail given src=any saddr, though, so
921 * that's why we try it again later.
922 */
923 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
924 struct rt6_info *rt;
925 bool had_dst = *dst != NULL;
1da177e4 926
e16e888b
MS
927 if (!had_dst)
928 *dst = ip6_route_output(net, sk, fl6);
929 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
930 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
931 sk ? inet6_sk(sk)->srcprefs : 0,
932 &fl6->saddr);
44456d37 933 if (err)
1da177e4 934 goto out_err_release;
e16e888b
MS
935
936 /* If we had an erroneous initial result, pretend it
937 * never existed and let the SA-enabled version take
938 * over.
939 */
940 if (!had_dst && (*dst)->error) {
941 dst_release(*dst);
942 *dst = NULL;
943 }
6f21c96a
PA
944
945 if (fl6->flowi6_oif)
946 flags |= RT6_LOOKUP_F_IFACE;
1da177e4
LT
947 }
948
e16e888b 949 if (!*dst)
6f21c96a 950 *dst = ip6_route_output_flags(net, sk, fl6, flags);
e16e888b
MS
951
952 err = (*dst)->error;
953 if (err)
954 goto out_err_release;
955
95c385b4 956#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
957 /*
958 * Here if the dst entry we've looked up
959 * has a neighbour entry that is in the INCOMPLETE
960 * state and the src address from the flow is
961 * marked as OPTIMISTIC, we release the found
962 * dst entry and replace it instead with the
963 * dst entry of the nexthop router
964 */
c56bf6fe 965 rt = (struct rt6_info *) *dst;
707be1ff 966 rcu_read_lock_bh();
2647a9b0
MKL
967 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
968 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
969 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
970 rcu_read_unlock_bh();
971
972 if (err) {
e550dfb0 973 struct inet6_ifaddr *ifp;
4c9483b2 974 struct flowi6 fl_gw6;
e550dfb0
NH
975 int redirect;
976
4c9483b2 977 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
978 (*dst)->dev, 1);
979
980 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
981 if (ifp)
982 in6_ifa_put(ifp);
983
984 if (redirect) {
985 /*
986 * We need to get the dst entry for the
987 * default router instead
988 */
989 dst_release(*dst);
4c9483b2
DM
990 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
991 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
992 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
993 err = (*dst)->error;
994 if (err)
e550dfb0 995 goto out_err_release;
95c385b4 996 }
e550dfb0 997 }
95c385b4
NH
998#endif
999
1da177e4
LT
1000 return 0;
1001
1002out_err_release:
ca46f9c8 1003 if (err == -ENETUNREACH)
5ac68e7c 1004 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1005 dst_release(*dst);
1006 *dst = NULL;
1007 return err;
1008}
34a0b3cd 1009
497c615a
HX
1010/**
1011 * ip6_dst_lookup - perform route lookup on flow
1012 * @sk: socket which provides route info
1013 * @dst: pointer to dst_entry * for result
4c9483b2 1014 * @fl6: flow to lookup
497c615a
HX
1015 *
1016 * This function performs a route lookup on the given flow.
1017 *
1018 * It returns zero on success, or a standard errno code on error.
1019 */
343d60aa
RP
1020int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1021 struct flowi6 *fl6)
497c615a
HX
1022{
1023 *dst = NULL;
343d60aa 1024 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1025}
3cf3dc6c
ACM
1026EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1027
497c615a 1028/**
68d0c6d3
DM
1029 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1030 * @sk: socket which provides route info
4c9483b2 1031 * @fl6: flow to lookup
68d0c6d3 1032 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1033 *
1034 * This function performs a route lookup on the given flow.
1035 *
1036 * It returns a valid dst pointer on success, or a pointer encoded
1037 * error code.
1038 */
3aef934f 1039struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1040 const struct in6_addr *final_dst)
68d0c6d3
DM
1041{
1042 struct dst_entry *dst = NULL;
1043 int err;
1044
343d60aa 1045 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1046 if (err)
1047 return ERR_PTR(err);
1048 if (final_dst)
4e3fd7a0 1049 fl6->daddr = *final_dst;
a0a9f33b 1050 if (!fl6->flowi6_oif)
ca254490 1051 fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
2774c131 1052
f92ee619 1053 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1054}
1055EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1056
1057/**
1058 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1059 * @sk: socket which provides the dst cache and route info
4c9483b2 1060 * @fl6: flow to lookup
68d0c6d3 1061 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1062 *
1063 * This function performs a route lookup on the given flow with the
1064 * possibility of using the cached route in the socket if it is valid.
1065 * It will take the socket dst lock when operating on the dst cache.
1066 * As a result, this function can only be used in process context.
1067 *
68d0c6d3
DM
1068 * It returns a valid dst pointer on success, or a pointer encoded
1069 * error code.
497c615a 1070 */
4c9483b2 1071struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1072 const struct in6_addr *final_dst)
497c615a 1073{
68d0c6d3
DM
1074 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1075 int err;
497c615a 1076
4c9483b2 1077 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1078
343d60aa 1079 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1080 if (err)
1081 return ERR_PTR(err);
1082 if (final_dst)
4e3fd7a0 1083 fl6->daddr = *final_dst;
2774c131 1084
f92ee619 1085 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1086}
68d0c6d3 1087EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1088
34a0b3cd 1089static inline int ip6_ufo_append_data(struct sock *sk,
0bbe84a6 1090 struct sk_buff_head *queue,
e89e9cf5
AR
1091 int getfrag(void *from, char *to, int offset, int len,
1092 int odd, struct sk_buff *skb),
1093 void *from, int length, int hh_len, int fragheaderlen,
67ba4152 1094 int transhdrlen, int mtu, unsigned int flags,
fd0273d7 1095 const struct flowi6 *fl6)
e89e9cf5
AR
1096
1097{
1098 struct sk_buff *skb;
1099 int err;
1100
1101 /* There is support for UDP large send offload by network
1102 * device, so create one single skb packet containing complete
1103 * udp datagram
1104 */
0bbe84a6 1105 skb = skb_peek_tail(queue);
63159f29 1106 if (!skb) {
e89e9cf5
AR
1107 skb = sock_alloc_send_skb(sk,
1108 hh_len + fragheaderlen + transhdrlen + 20,
1109 (flags & MSG_DONTWAIT), &err);
63159f29 1110 if (!skb)
504744e4 1111 return err;
e89e9cf5
AR
1112
1113 /* reserve space for Hardware header */
1114 skb_reserve(skb, hh_len);
1115
1116 /* create space for UDP/IP header */
67ba4152 1117 skb_put(skb, fragheaderlen + transhdrlen);
e89e9cf5
AR
1118
1119 /* initialize network header pointer */
c1d2bbe1 1120 skb_reset_network_header(skb);
e89e9cf5
AR
1121
1122 /* initialize protocol header pointer */
b0e380b1 1123 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1124
9c9c9ad5 1125 skb->protocol = htons(ETH_P_IPV6);
e89e9cf5 1126 skb->csum = 0;
e89e9cf5 1127
0bbe84a6 1128 __skb_queue_tail(queue, skb);
c547dbf5
JP
1129 } else if (skb_is_gso(skb)) {
1130 goto append;
e89e9cf5 1131 }
e89e9cf5 1132
c547dbf5
JP
1133 skb->ip_summed = CHECKSUM_PARTIAL;
1134 /* Specify the length of each IPv6 datagram fragment.
1135 * It has to be a multiple of 8.
1136 */
1137 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1138 sizeof(struct frag_hdr)) & ~7;
1139 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
fd0273d7
MKL
1140 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1141 &fl6->daddr,
1142 &fl6->saddr);
c547dbf5
JP
1143
1144append:
2811ebac
HFS
1145 return skb_append_datato_frags(sk, skb, getfrag, from,
1146 (length - transhdrlen));
e89e9cf5 1147}
1da177e4 1148
0178b695
HX
1149static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1150 gfp_t gfp)
1151{
1152 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1153}
1154
1155static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1156 gfp_t gfp)
1157{
1158 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1159}
1160
75a493e6 1161static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1162 int *maxfraglen,
1163 unsigned int fragheaderlen,
1164 struct sk_buff *skb,
75a493e6 1165 struct rt6_info *rt,
e367c2d0 1166 unsigned int orig_mtu)
0c183379
G
1167{
1168 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1169 if (!skb) {
0c183379 1170 /* first fragment, reserve header_len */
e367c2d0 1171 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1172
1173 } else {
1174 /*
1175 * this fragment is not first, the headers
1176 * space is regarded as data space.
1177 */
e367c2d0 1178 *mtu = orig_mtu;
0c183379
G
1179 }
1180 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1181 + fragheaderlen - sizeof(struct frag_hdr);
1182 }
1183}
1184
366e41d9
VY
1185static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1186 struct inet6_cork *v6_cork,
1187 int hlimit, int tclass, struct ipv6_txoptions *opt,
1188 struct rt6_info *rt, struct flowi6 *fl6)
1189{
1190 struct ipv6_pinfo *np = inet6_sk(sk);
1191 unsigned int mtu;
1192
1193 /*
1194 * setup for corking
1195 */
1196 if (opt) {
1197 if (WARN_ON(v6_cork->opt))
1198 return -EINVAL;
1199
1200 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
63159f29 1201 if (unlikely(!v6_cork->opt))
366e41d9
VY
1202 return -ENOBUFS;
1203
1204 v6_cork->opt->tot_len = opt->tot_len;
1205 v6_cork->opt->opt_flen = opt->opt_flen;
1206 v6_cork->opt->opt_nflen = opt->opt_nflen;
1207
1208 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1209 sk->sk_allocation);
1210 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1211 return -ENOBUFS;
1212
1213 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1214 sk->sk_allocation);
1215 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1216 return -ENOBUFS;
1217
1218 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1219 sk->sk_allocation);
1220 if (opt->hopopt && !v6_cork->opt->hopopt)
1221 return -ENOBUFS;
1222
1223 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1224 sk->sk_allocation);
1225 if (opt->srcrt && !v6_cork->opt->srcrt)
1226 return -ENOBUFS;
1227
1228 /* need source address above miyazawa*/
1229 }
1230 dst_hold(&rt->dst);
1231 cork->base.dst = &rt->dst;
1232 cork->fl.u.ip6 = *fl6;
1233 v6_cork->hop_limit = hlimit;
1234 v6_cork->tclass = tclass;
1235 if (rt->dst.flags & DST_XFRM_TUNNEL)
1236 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1237 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1238 else
1239 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1240 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1241 if (np->frag_size < mtu) {
1242 if (np->frag_size)
1243 mtu = np->frag_size;
1244 }
1245 cork->base.fragsize = mtu;
1246 if (dst_allfrag(rt->dst.path))
1247 cork->base.flags |= IPCORK_ALLFRAG;
1248 cork->base.length = 0;
1249
1250 return 0;
1251}
1252
0bbe84a6
VY
1253static int __ip6_append_data(struct sock *sk,
1254 struct flowi6 *fl6,
1255 struct sk_buff_head *queue,
1256 struct inet_cork *cork,
1257 struct inet6_cork *v6_cork,
1258 struct page_frag *pfrag,
1259 int getfrag(void *from, char *to, int offset,
1260 int len, int odd, struct sk_buff *skb),
1261 void *from, int length, int transhdrlen,
1262 unsigned int flags, int dontfrag)
1da177e4 1263{
0c183379 1264 struct sk_buff *skb, *skb_prev = NULL;
e367c2d0 1265 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
0bbe84a6
VY
1266 int exthdrlen = 0;
1267 int dst_exthdrlen = 0;
1da177e4 1268 int hh_len;
1da177e4
LT
1269 int copy;
1270 int err;
1271 int offset = 0;
a693e698 1272 __u8 tx_flags = 0;
09c2d251 1273 u32 tskey = 0;
0bbe84a6
VY
1274 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1275 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1276 int csummode = CHECKSUM_NONE;
682b1a9d 1277 unsigned int maxnonfragsize, headersize;
1da177e4 1278
0bbe84a6
VY
1279 skb = skb_peek_tail(queue);
1280 if (!skb) {
1281 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1282 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1283 }
0bbe84a6 1284
366e41d9 1285 mtu = cork->fragsize;
e367c2d0 1286 orig_mtu = mtu;
1da177e4 1287
d8d1f30b 1288 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1289
a1b05140 1290 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1291 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1292 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1293 sizeof(struct frag_hdr);
1da177e4 1294
682b1a9d
HFS
1295 headersize = sizeof(struct ipv6hdr) +
1296 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1297 (dst_allfrag(&rt->dst) ?
1298 sizeof(struct frag_hdr) : 0) +
1299 rt->rt6i_nfheader_len;
1300
1301 if (cork->length + length > mtu - headersize && dontfrag &&
1302 (sk->sk_protocol == IPPROTO_UDP ||
1303 sk->sk_protocol == IPPROTO_RAW)) {
1304 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1305 sizeof(struct ipv6hdr));
1306 goto emsgsize;
1307 }
4df98e76 1308
682b1a9d
HFS
1309 if (ip6_sk_ignore_df(sk))
1310 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1311 else
1312 maxnonfragsize = mtu;
4df98e76 1313
682b1a9d 1314 if (cork->length + length > maxnonfragsize - headersize) {
4df98e76 1315emsgsize:
682b1a9d
HFS
1316 ipv6_local_error(sk, EMSGSIZE, fl6,
1317 mtu - headersize +
1318 sizeof(struct ipv6hdr));
1319 return -EMSGSIZE;
1da177e4
LT
1320 }
1321
682b1a9d
HFS
1322 /* CHECKSUM_PARTIAL only with no extension headers and when
1323 * we are not going to fragment
1324 */
1325 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1326 headersize == sizeof(struct ipv6hdr) &&
1327 length < mtu - headersize &&
1328 !(flags & MSG_MORE) &&
c8cd0989 1329 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
682b1a9d
HFS
1330 csummode = CHECKSUM_PARTIAL;
1331
09c2d251 1332 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
bf84a010 1333 sock_tx_timestamp(sk, &tx_flags);
09c2d251
WB
1334 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1335 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1336 tskey = sk->sk_tskey++;
1337 }
a693e698 1338
1da177e4
LT
1339 /*
1340 * Let's try using as much space as possible.
1341 * Use MTU if total length of the message fits into the MTU.
1342 * Otherwise, we need to reserve fragment header and
1343 * fragment alignment (= 8-15 octects, in total).
1344 *
1345 * Note that we may need to "move" the data from the tail of
1ab1457c 1346 * of the buffer to the new fragment when we split
1da177e4
LT
1347 * the message.
1348 *
1ab1457c 1349 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1350 * at once if non-fragmentable extension headers
1351 * are too large.
1ab1457c 1352 * --yoshfuji
1da177e4
LT
1353 */
1354
2811ebac
HFS
1355 cork->length += length;
1356 if (((length > mtu) ||
1357 (skb && skb_is_gso(skb))) &&
1358 (sk->sk_protocol == IPPROTO_UDP) &&
acf8dd0a 1359 (rt->dst.dev->features & NETIF_F_UFO) &&
40ba3302 1360 (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
0bbe84a6 1361 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
2811ebac 1362 hh_len, fragheaderlen,
fd0273d7 1363 transhdrlen, mtu, flags, fl6);
2811ebac
HFS
1364 if (err)
1365 goto error;
1366 return 0;
e89e9cf5 1367 }
1da177e4 1368
2811ebac 1369 if (!skb)
1da177e4
LT
1370 goto alloc_new_skb;
1371
1372 while (length > 0) {
1373 /* Check if the remaining data fits into current packet. */
bdc712b4 1374 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1375 if (copy < length)
1376 copy = maxfraglen - skb->len;
1377
1378 if (copy <= 0) {
1379 char *data;
1380 unsigned int datalen;
1381 unsigned int fraglen;
1382 unsigned int fraggap;
1383 unsigned int alloclen;
1da177e4 1384alloc_new_skb:
1da177e4 1385 /* There's no room in the current skb */
0c183379
G
1386 if (skb)
1387 fraggap = skb->len - maxfraglen;
1da177e4
LT
1388 else
1389 fraggap = 0;
0c183379 1390 /* update mtu and maxfraglen if necessary */
63159f29 1391 if (!skb || !skb_prev)
0c183379 1392 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1393 fragheaderlen, skb, rt,
e367c2d0 1394 orig_mtu);
0c183379
G
1395
1396 skb_prev = skb;
1da177e4
LT
1397
1398 /*
1399 * If remaining data exceeds the mtu,
1400 * we know we need more fragment(s).
1401 */
1402 datalen = length + fraggap;
1da177e4 1403
0c183379
G
1404 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1405 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1406 if ((flags & MSG_MORE) &&
d8d1f30b 1407 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1408 alloclen = mtu;
1409 else
1410 alloclen = datalen + fragheaderlen;
1411
299b0767
SK
1412 alloclen += dst_exthdrlen;
1413
0c183379
G
1414 if (datalen != length + fraggap) {
1415 /*
1416 * this is not the last fragment, the trailer
1417 * space is regarded as data space.
1418 */
1419 datalen += rt->dst.trailer_len;
1420 }
1421
1422 alloclen += rt->dst.trailer_len;
1423 fraglen = datalen + fragheaderlen;
1da177e4
LT
1424
1425 /*
1426 * We just reserve space for fragment header.
1ab1457c 1427 * Note: this may be overallocation if the message
1da177e4
LT
1428 * (without MSG_MORE) fits into the MTU.
1429 */
1430 alloclen += sizeof(struct frag_hdr);
1431
1432 if (transhdrlen) {
1433 skb = sock_alloc_send_skb(sk,
1434 alloclen + hh_len,
1435 (flags & MSG_DONTWAIT), &err);
1436 } else {
1437 skb = NULL;
1438 if (atomic_read(&sk->sk_wmem_alloc) <=
1439 2 * sk->sk_sndbuf)
1440 skb = sock_wmalloc(sk,
1441 alloclen + hh_len, 1,
1442 sk->sk_allocation);
63159f29 1443 if (unlikely(!skb))
1da177e4
LT
1444 err = -ENOBUFS;
1445 }
63159f29 1446 if (!skb)
1da177e4
LT
1447 goto error;
1448 /*
1449 * Fill in the control structures
1450 */
9c9c9ad5 1451 skb->protocol = htons(ETH_P_IPV6);
32dce968 1452 skb->ip_summed = csummode;
1da177e4 1453 skb->csum = 0;
1f85851e
G
1454 /* reserve for fragmentation and ipsec header */
1455 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1456 dst_exthdrlen);
1da177e4 1457
11878b40
WB
1458 /* Only the initial fragment is time stamped */
1459 skb_shinfo(skb)->tx_flags = tx_flags;
1460 tx_flags = 0;
09c2d251
WB
1461 skb_shinfo(skb)->tskey = tskey;
1462 tskey = 0;
a693e698 1463
1da177e4
LT
1464 /*
1465 * Find where to start putting bytes
1466 */
1f85851e
G
1467 data = skb_put(skb, fraglen);
1468 skb_set_network_header(skb, exthdrlen);
1469 data += fragheaderlen;
b0e380b1
ACM
1470 skb->transport_header = (skb->network_header +
1471 fragheaderlen);
1da177e4
LT
1472 if (fraggap) {
1473 skb->csum = skb_copy_and_csum_bits(
1474 skb_prev, maxfraglen,
1475 data + transhdrlen, fraggap, 0);
1476 skb_prev->csum = csum_sub(skb_prev->csum,
1477 skb->csum);
1478 data += fraggap;
e9fa4f7b 1479 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1480 }
1481 copy = datalen - transhdrlen - fraggap;
299b0767 1482
1da177e4
LT
1483 if (copy < 0) {
1484 err = -EINVAL;
1485 kfree_skb(skb);
1486 goto error;
1487 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1488 err = -EFAULT;
1489 kfree_skb(skb);
1490 goto error;
1491 }
1492
1493 offset += copy;
1494 length -= datalen - fraggap;
1495 transhdrlen = 0;
1496 exthdrlen = 0;
299b0767 1497 dst_exthdrlen = 0;
1da177e4
LT
1498
1499 /*
1500 * Put the packet on the pending queue
1501 */
0bbe84a6 1502 __skb_queue_tail(queue, skb);
1da177e4
LT
1503 continue;
1504 }
1505
1506 if (copy > length)
1507 copy = length;
1508
d8d1f30b 1509 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1510 unsigned int off;
1511
1512 off = skb->len;
1513 if (getfrag(from, skb_put(skb, copy),
1514 offset, copy, off, skb) < 0) {
1515 __skb_trim(skb, off);
1516 err = -EFAULT;
1517 goto error;
1518 }
1519 } else {
1520 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1521
5640f768
ED
1522 err = -ENOMEM;
1523 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1524 goto error;
5640f768
ED
1525
1526 if (!skb_can_coalesce(skb, i, pfrag->page,
1527 pfrag->offset)) {
1528 err = -EMSGSIZE;
1529 if (i == MAX_SKB_FRAGS)
1530 goto error;
1531
1532 __skb_fill_page_desc(skb, i, pfrag->page,
1533 pfrag->offset, 0);
1534 skb_shinfo(skb)->nr_frags = ++i;
1535 get_page(pfrag->page);
1da177e4 1536 }
5640f768 1537 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1538 if (getfrag(from,
5640f768
ED
1539 page_address(pfrag->page) + pfrag->offset,
1540 offset, copy, skb->len, skb) < 0)
1541 goto error_efault;
1542
1543 pfrag->offset += copy;
1544 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1545 skb->len += copy;
1546 skb->data_len += copy;
f945fa7a
HX
1547 skb->truesize += copy;
1548 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1549 }
1550 offset += copy;
1551 length -= copy;
1552 }
5640f768 1553
1da177e4 1554 return 0;
5640f768
ED
1555
1556error_efault:
1557 err = -EFAULT;
1da177e4 1558error:
bdc712b4 1559 cork->length -= length;
3bd653c8 1560 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1561 return err;
1562}
0bbe84a6
VY
1563
1564int ip6_append_data(struct sock *sk,
1565 int getfrag(void *from, char *to, int offset, int len,
1566 int odd, struct sk_buff *skb),
1567 void *from, int length, int transhdrlen, int hlimit,
1568 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1569 struct rt6_info *rt, unsigned int flags, int dontfrag)
1570{
1571 struct inet_sock *inet = inet_sk(sk);
1572 struct ipv6_pinfo *np = inet6_sk(sk);
1573 int exthdrlen;
1574 int err;
1575
1576 if (flags&MSG_PROBE)
1577 return 0;
1578 if (skb_queue_empty(&sk->sk_write_queue)) {
1579 /*
1580 * setup for corking
1581 */
1582 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1583 tclass, opt, rt, fl6);
1584 if (err)
1585 return err;
1586
1587 exthdrlen = (opt ? opt->opt_flen : 0);
1588 length += exthdrlen;
1589 transhdrlen += exthdrlen;
1590 } else {
1591 fl6 = &inet->cork.fl.u.ip6;
1592 transhdrlen = 0;
1593 }
1594
1595 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1596 &np->cork, sk_page_frag(sk), getfrag,
1597 from, length, transhdrlen, flags, dontfrag);
1598}
a495f836 1599EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1600
366e41d9
VY
1601static void ip6_cork_release(struct inet_cork_full *cork,
1602 struct inet6_cork *v6_cork)
bf138862 1603{
366e41d9
VY
1604 if (v6_cork->opt) {
1605 kfree(v6_cork->opt->dst0opt);
1606 kfree(v6_cork->opt->dst1opt);
1607 kfree(v6_cork->opt->hopopt);
1608 kfree(v6_cork->opt->srcrt);
1609 kfree(v6_cork->opt);
1610 v6_cork->opt = NULL;
0178b695
HX
1611 }
1612
366e41d9
VY
1613 if (cork->base.dst) {
1614 dst_release(cork->base.dst);
1615 cork->base.dst = NULL;
1616 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1617 }
366e41d9 1618 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1619}
1620
6422398c
VY
1621struct sk_buff *__ip6_make_skb(struct sock *sk,
1622 struct sk_buff_head *queue,
1623 struct inet_cork_full *cork,
1624 struct inet6_cork *v6_cork)
1da177e4
LT
1625{
1626 struct sk_buff *skb, *tmp_skb;
1627 struct sk_buff **tail_skb;
1628 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1629 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1630 struct net *net = sock_net(sk);
1da177e4 1631 struct ipv6hdr *hdr;
6422398c
VY
1632 struct ipv6_txoptions *opt = v6_cork->opt;
1633 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1634 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1635 unsigned char proto = fl6->flowi6_proto;
1da177e4 1636
6422398c 1637 skb = __skb_dequeue(queue);
63159f29 1638 if (!skb)
1da177e4
LT
1639 goto out;
1640 tail_skb = &(skb_shinfo(skb)->frag_list);
1641
1642 /* move skb->data to ip header from ext header */
d56f90a7 1643 if (skb->data < skb_network_header(skb))
bbe735e4 1644 __skb_pull(skb, skb_network_offset(skb));
6422398c 1645 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1646 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1647 *tail_skb = tmp_skb;
1648 tail_skb = &(tmp_skb->next);
1649 skb->len += tmp_skb->len;
1650 skb->data_len += tmp_skb->len;
1da177e4 1651 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1652 tmp_skb->destructor = NULL;
1653 tmp_skb->sk = NULL;
1da177e4
LT
1654 }
1655
28a89453 1656 /* Allow local fragmentation. */
60ff7467 1657 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1658
4e3fd7a0 1659 *final_dst = fl6->daddr;
cfe1fc77 1660 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1661 if (opt && opt->opt_flen)
1662 ipv6_push_frag_opts(skb, opt, &proto);
1663 if (opt && opt->opt_nflen)
1664 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1665
e2d1bca7
ACM
1666 skb_push(skb, sizeof(struct ipv6hdr));
1667 skb_reset_network_header(skb);
0660e03f 1668 hdr = ipv6_hdr(skb);
1ab1457c 1669
6422398c 1670 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1671 ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 1672 np->autoflowlabel, fl6));
6422398c 1673 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1674 hdr->nexthdr = proto;
4e3fd7a0
AD
1675 hdr->saddr = fl6->saddr;
1676 hdr->daddr = *final_dst;
1da177e4 1677
a2c2064f 1678 skb->priority = sk->sk_priority;
4a19ec58 1679 skb->mark = sk->sk_mark;
a2c2064f 1680
d8d1f30b 1681 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1682 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1683 if (proto == IPPROTO_ICMPV6) {
adf30907 1684 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1685
43a43b60
HFS
1686 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1687 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1688 }
1689
6422398c
VY
1690 ip6_cork_release(cork, v6_cork);
1691out:
1692 return skb;
1693}
1694
1695int ip6_send_skb(struct sk_buff *skb)
1696{
1697 struct net *net = sock_net(skb->sk);
1698 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1699 int err;
1700
33224b16 1701 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1702 if (err) {
1703 if (err > 0)
6ce9e7b5 1704 err = net_xmit_errno(err);
1da177e4 1705 if (err)
6422398c
VY
1706 IP6_INC_STATS(net, rt->rt6i_idev,
1707 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1708 }
1709
1da177e4 1710 return err;
6422398c
VY
1711}
1712
1713int ip6_push_pending_frames(struct sock *sk)
1714{
1715 struct sk_buff *skb;
1716
1717 skb = ip6_finish_skb(sk);
1718 if (!skb)
1719 return 0;
1720
1721 return ip6_send_skb(skb);
1da177e4 1722}
a495f836 1723EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1724
0bbe84a6 1725static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1726 struct sk_buff_head *queue,
1727 struct inet_cork_full *cork,
1728 struct inet6_cork *v6_cork)
1da177e4 1729{
1da177e4
LT
1730 struct sk_buff *skb;
1731
0bbe84a6 1732 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1733 if (skb_dst(skb))
1734 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1735 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1736 kfree_skb(skb);
1737 }
1738
6422398c 1739 ip6_cork_release(cork, v6_cork);
1da177e4 1740}
0bbe84a6
VY
1741
1742void ip6_flush_pending_frames(struct sock *sk)
1743{
6422398c
VY
1744 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1745 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1746}
a495f836 1747EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1748
1749struct sk_buff *ip6_make_skb(struct sock *sk,
1750 int getfrag(void *from, char *to, int offset,
1751 int len, int odd, struct sk_buff *skb),
1752 void *from, int length, int transhdrlen,
1753 int hlimit, int tclass,
1754 struct ipv6_txoptions *opt, struct flowi6 *fl6,
1755 struct rt6_info *rt, unsigned int flags,
1756 int dontfrag)
1757{
1758 struct inet_cork_full cork;
1759 struct inet6_cork v6_cork;
1760 struct sk_buff_head queue;
1761 int exthdrlen = (opt ? opt->opt_flen : 0);
1762 int err;
1763
1764 if (flags & MSG_PROBE)
1765 return NULL;
1766
1767 __skb_queue_head_init(&queue);
1768
1769 cork.base.flags = 0;
1770 cork.base.addr = 0;
1771 cork.base.opt = NULL;
1772 v6_cork.opt = NULL;
1773 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1774 if (err)
1775 return ERR_PTR(err);
1776
1777 if (dontfrag < 0)
1778 dontfrag = inet6_sk(sk)->dontfrag;
1779
1780 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1781 &current->task_frag, getfrag, from,
1782 length + exthdrlen, transhdrlen + exthdrlen,
1783 flags, dontfrag);
1784 if (err) {
1785 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1786 return ERR_PTR(err);
1787 }
1788
1789 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1790}
This page took 1.139085 seconds and 5 git commands to generate.