dst: Pass net into dst->output
[deliverable/linux.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
7d8c6e39 59static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 60{
adf30907 61 struct dst_entry *dst = skb_dst(skb);
1da177e4 62 struct net_device *dev = dst->dev;
f6b72b62 63 struct neighbour *neigh;
6fd6ce20
YH
64 struct in6_addr *nexthop;
65 int ret;
1da177e4
LT
66
67 skb->protocol = htons(ETH_P_IPV6);
68 skb->dev = dev;
69
0660e03f 70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 72
7026b1dd 73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
78126c41 74 ((mroute6_socket(net, skb) &&
bd91b8bf 75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
b2e0b385 84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 85 net, sk, newskb, NULL, newskb->dev,
95603e22 86 dev_loopback_xmit);
1da177e4 87
0660e03f 88 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 89 IP6_INC_STATS(net, idev,
3bd653c8 90 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
78126c41 96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
97
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
101 kfree_skb(skb);
102 return 0;
103 }
1da177e4
LT
104 }
105
6fd6ce20 106 rcu_read_lock_bh();
2647a9b0 107 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
108 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
109 if (unlikely(!neigh))
110 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
111 if (!IS_ERR(neigh)) {
112 ret = dst_neigh_output(dst, neigh, skb);
113 rcu_read_unlock_bh();
114 return ret;
115 }
116 rcu_read_unlock_bh();
05e3aa09 117
78126c41 118 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
119 kfree_skb(skb);
120 return -EINVAL;
1da177e4
LT
121}
122
0c4b51f0 123static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490
JE
124{
125 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
126 dst_allfrag(skb_dst(skb)) ||
127 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 128 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 129 else
7d8c6e39 130 return ip6_finish_output2(net, sk, skb);
9e508490
JE
131}
132
ede2059d 133int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 134{
9e508490 135 struct net_device *dev = skb_dst(skb)->dev;
adf30907 136 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 137
778d80be 138 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 139 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
140 kfree_skb(skb);
141 return 0;
142 }
143
29a26a56
EB
144 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
145 net, sk, skb, NULL, dev,
9c6eb28a
JE
146 ip6_finish_output,
147 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
148}
149
1da177e4 150/*
1c1e9d2b
ED
151 * xmit an sk_buff (used by TCP, SCTP and DCCP)
152 * Note : socket lock is not held for SYNACK packets, but might be modified
153 * by calls to skb_set_owner_w() and ipv6_local_error(),
154 * which are using proper atomic operations or spinlocks.
1da177e4 155 */
1c1e9d2b 156int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 157 struct ipv6_txoptions *opt, int tclass)
1da177e4 158{
3bd653c8 159 struct net *net = sock_net(sk);
1c1e9d2b 160 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 161 struct in6_addr *first_hop = &fl6->daddr;
adf30907 162 struct dst_entry *dst = skb_dst(skb);
1da177e4 163 struct ipv6hdr *hdr;
4c9483b2 164 u8 proto = fl6->flowi6_proto;
1da177e4 165 int seg_len = skb->len;
e651f03a 166 int hlimit = -1;
1da177e4
LT
167 u32 mtu;
168
169 if (opt) {
c2636b4d 170 unsigned int head_room;
1da177e4
LT
171
172 /* First: exthdrs may take lots of space (~8K for now)
173 MAX_HEADER is not enough.
174 */
175 head_room = opt->opt_nflen + opt->opt_flen;
176 seg_len += head_room;
177 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
178
179 if (skb_headroom(skb) < head_room) {
180 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 181 if (!skb2) {
adf30907 182 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
183 IPSTATS_MIB_OUTDISCARDS);
184 kfree_skb(skb);
1da177e4
LT
185 return -ENOBUFS;
186 }
808db80a 187 consume_skb(skb);
a11d206d 188 skb = skb2;
1c1e9d2b
ED
189 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
190 * it is safe to call in our context (socket lock not held)
191 */
192 skb_set_owner_w(skb, (struct sock *)sk);
1da177e4
LT
193 }
194 if (opt->opt_flen)
195 ipv6_push_frag_opts(skb, opt, &proto);
196 if (opt->opt_nflen)
197 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
198 }
199
e2d1bca7
ACM
200 skb_push(skb, sizeof(struct ipv6hdr));
201 skb_reset_network_header(skb);
0660e03f 202 hdr = ipv6_hdr(skb);
1da177e4
LT
203
204 /*
205 * Fill in the IPv6 header
206 */
b903d324 207 if (np)
1da177e4
LT
208 hlimit = np->hop_limit;
209 if (hlimit < 0)
6b75d090 210 hlimit = ip6_dst_hoplimit(dst);
1da177e4 211
cb1ce2ef 212 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 213 np->autoflowlabel, fl6));
41a1f8ea 214
1da177e4
LT
215 hdr->payload_len = htons(seg_len);
216 hdr->nexthdr = proto;
217 hdr->hop_limit = hlimit;
218
4e3fd7a0
AD
219 hdr->saddr = fl6->saddr;
220 hdr->daddr = *first_hop;
1da177e4 221
9c9c9ad5 222 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 223 skb->priority = sk->sk_priority;
4a19ec58 224 skb->mark = sk->sk_mark;
a2c2064f 225
1da177e4 226 mtu = dst_mtu(dst);
60ff7467 227 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 228 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 229 IPSTATS_MIB_OUT, skb->len);
1c1e9d2b
ED
230 /* hooks should never assume socket lock is held.
231 * we promote our socket to non const
232 */
29a26a56 233 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 234 net, (struct sock *)sk, skb, NULL, dst->dev,
13206b6b 235 dst_output);
1da177e4
LT
236 }
237
1da177e4 238 skb->dev = dst->dev;
1c1e9d2b
ED
239 /* ipv6_local_error() does not require socket lock,
240 * we promote our socket to non const
241 */
242 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
243
adf30907 244 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
245 kfree_skb(skb);
246 return -EMSGSIZE;
247}
7159039a
YH
248EXPORT_SYMBOL(ip6_xmit);
249
1da177e4
LT
250static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
251{
252 struct ip6_ra_chain *ra;
253 struct sock *last = NULL;
254
255 read_lock(&ip6_ra_lock);
256 for (ra = ip6_ra_chain; ra; ra = ra->next) {
257 struct sock *sk = ra->sk;
0bd1b59b
AM
258 if (sk && ra->sel == sel &&
259 (!sk->sk_bound_dev_if ||
260 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
261 if (last) {
262 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
263 if (skb2)
264 rawv6_rcv(last, skb2);
265 }
266 last = sk;
267 }
268 }
269
270 if (last) {
271 rawv6_rcv(last, skb);
272 read_unlock(&ip6_ra_lock);
273 return 1;
274 }
275 read_unlock(&ip6_ra_lock);
276 return 0;
277}
278
e21e0b5f
VN
279static int ip6_forward_proxy_check(struct sk_buff *skb)
280{
0660e03f 281 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 282 u8 nexthdr = hdr->nexthdr;
75f2811c 283 __be16 frag_off;
e21e0b5f
VN
284 int offset;
285
286 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 287 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
288 if (offset < 0)
289 return 0;
290 } else
291 offset = sizeof(struct ipv6hdr);
292
293 if (nexthdr == IPPROTO_ICMPV6) {
294 struct icmp6hdr *icmp6;
295
d56f90a7
ACM
296 if (!pskb_may_pull(skb, (skb_network_header(skb) +
297 offset + 1 - skb->data)))
e21e0b5f
VN
298 return 0;
299
d56f90a7 300 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
301
302 switch (icmp6->icmp6_type) {
303 case NDISC_ROUTER_SOLICITATION:
304 case NDISC_ROUTER_ADVERTISEMENT:
305 case NDISC_NEIGHBOUR_SOLICITATION:
306 case NDISC_NEIGHBOUR_ADVERTISEMENT:
307 case NDISC_REDIRECT:
308 /* For reaction involving unicast neighbor discovery
309 * message destined to the proxied address, pass it to
310 * input function.
311 */
312 return 1;
313 default:
314 break;
315 }
316 }
317
74553b09
VN
318 /*
319 * The proxying router can't forward traffic sent to a link-local
320 * address, so signal the sender and discard the packet. This
321 * behavior is clarified by the MIPv6 specification.
322 */
323 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
324 dst_link_failure(skb);
325 return -1;
326 }
327
e21e0b5f
VN
328 return 0;
329}
330
0c4b51f0
EB
331static inline int ip6_forward_finish(struct net *net, struct sock *sk,
332 struct sk_buff *skb)
1da177e4 333{
c29390c6 334 skb_sender_cpu_clear(skb);
13206b6b 335 return dst_output(net, sk, skb);
1da177e4
LT
336}
337
0954cf9c
HFS
338static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
339{
340 unsigned int mtu;
341 struct inet6_dev *idev;
342
343 if (dst_metric_locked(dst, RTAX_MTU)) {
344 mtu = dst_metric_raw(dst, RTAX_MTU);
345 if (mtu)
346 return mtu;
347 }
348
349 mtu = IPV6_MIN_MTU;
350 rcu_read_lock();
351 idev = __in6_dev_get(dst->dev);
352 if (idev)
353 mtu = idev->cnf.mtu6;
354 rcu_read_unlock();
355
356 return mtu;
357}
358
fe6cc55f
FW
359static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
360{
418a3156 361 if (skb->len <= mtu)
fe6cc55f
FW
362 return false;
363
60ff7467 364 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
365 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
366 return true;
367
60ff7467 368 if (skb->ignore_df)
418a3156
FW
369 return false;
370
fe6cc55f
FW
371 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
372 return false;
373
374 return true;
375}
376
1da177e4
LT
377int ip6_forward(struct sk_buff *skb)
378{
adf30907 379 struct dst_entry *dst = skb_dst(skb);
0660e03f 380 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 381 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 382 struct net *net = dev_net(dst->dev);
14f3ad6f 383 u32 mtu;
1ab1457c 384
53b7997f 385 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
386 goto error;
387
090f1166
LR
388 if (skb->pkt_type != PACKET_HOST)
389 goto drop;
390
4497b076
BH
391 if (skb_warn_if_lro(skb))
392 goto drop;
393
1da177e4 394 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
15c77d8b
ED
395 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
396 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
397 goto drop;
398 }
399
35fc92a9 400 skb_forward_csum(skb);
1da177e4
LT
401
402 /*
403 * We DO NOT make any processing on
404 * RA packets, pushing them to user level AS IS
405 * without ane WARRANTY that application will be able
406 * to interpret them. The reason is that we
407 * cannot make anything clever here.
408 *
409 * We are not end-node, so that if packet contains
410 * AH/ESP, we cannot make anything.
411 * Defragmentation also would be mistake, RA packets
412 * cannot be fragmented, because there is no warranty
413 * that different fragments will go along one path. --ANK
414 */
ab4eb353
YH
415 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
416 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
417 return 0;
418 }
419
420 /*
421 * check and decrement ttl
422 */
423 if (hdr->hop_limit <= 1) {
424 /* Force OUTPUT device used as source address */
425 skb->dev = dst->dev;
3ffe533c 426 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
15c77d8b
ED
427 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
428 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
429
430 kfree_skb(skb);
431 return -ETIMEDOUT;
432 }
433
fbea49e1 434 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 435 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 436 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
437 int proxied = ip6_forward_proxy_check(skb);
438 if (proxied > 0)
e21e0b5f 439 return ip6_input(skb);
74553b09 440 else if (proxied < 0) {
15c77d8b
ED
441 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
442 IPSTATS_MIB_INDISCARDS);
74553b09
VN
443 goto drop;
444 }
e21e0b5f
VN
445 }
446
1da177e4 447 if (!xfrm6_route_forward(skb)) {
15c77d8b
ED
448 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
449 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
450 goto drop;
451 }
adf30907 452 dst = skb_dst(skb);
1da177e4
LT
453
454 /* IPv6 specs say nothing about it, but it is clear that we cannot
455 send redirects to source routed frames.
1e5dc146 456 We don't send redirects to frames decapsulated from IPsec.
1da177e4 457 */
c45a3dfb 458 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 459 struct in6_addr *target = NULL;
fbfe95a4 460 struct inet_peer *peer;
1da177e4 461 struct rt6_info *rt;
1da177e4
LT
462
463 /*
464 * incoming and outgoing devices are the same
465 * send a redirect.
466 */
467
468 rt = (struct rt6_info *) dst;
c45a3dfb
DM
469 if (rt->rt6i_flags & RTF_GATEWAY)
470 target = &rt->rt6i_gateway;
1da177e4
LT
471 else
472 target = &hdr->daddr;
473
fd0273d7 474 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 475
1da177e4
LT
476 /* Limit redirects both by destination (here)
477 and by source (inside ndisc_send_redirect)
478 */
fbfe95a4 479 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 480 ndisc_send_redirect(skb, target);
1d861aa4
DM
481 if (peer)
482 inet_putpeer(peer);
5bb1ab09
DS
483 } else {
484 int addrtype = ipv6_addr_type(&hdr->saddr);
485
1da177e4 486 /* This check is security critical. */
f81b2e7d
YH
487 if (addrtype == IPV6_ADDR_ANY ||
488 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
489 goto error;
490 if (addrtype & IPV6_ADDR_LINKLOCAL) {
491 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 492 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
493 goto error;
494 }
1da177e4
LT
495 }
496
0954cf9c 497 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
498 if (mtu < IPV6_MIN_MTU)
499 mtu = IPV6_MIN_MTU;
500
fe6cc55f 501 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
502 /* Again, force OUTPUT device used as source address */
503 skb->dev = dst->dev;
14f3ad6f 504 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
15c77d8b
ED
505 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
506 IPSTATS_MIB_INTOOBIGERRORS);
507 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
508 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
509 kfree_skb(skb);
510 return -EMSGSIZE;
511 }
512
513 if (skb_cow(skb, dst->dev->hard_header_len)) {
15c77d8b
ED
514 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
515 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
516 goto drop;
517 }
518
0660e03f 519 hdr = ipv6_hdr(skb);
1da177e4
LT
520
521 /* Mangling hops number delayed to point after skb COW */
1ab1457c 522
1da177e4
LT
523 hdr->hop_limit--;
524
483a47d2 525 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
2d8dbb04 526 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
29a26a56
EB
527 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
528 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 529 ip6_forward_finish);
1da177e4
LT
530
531error:
483a47d2 532 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
533drop:
534 kfree_skb(skb);
535 return -EINVAL;
536}
537
538static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
539{
540 to->pkt_type = from->pkt_type;
541 to->priority = from->priority;
542 to->protocol = from->protocol;
adf30907
ED
543 skb_dst_drop(to);
544 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 545 to->dev = from->dev;
82e91ffe 546 to->mark = from->mark;
1da177e4
LT
547
548#ifdef CONFIG_NET_SCHED
549 to->tc_index = from->tc_index;
550#endif
e7ac05f3 551 nf_copy(to, from);
984bc16c 552 skb_copy_secmark(to, from);
1da177e4
LT
553}
554
7d8c6e39
EB
555int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
556 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 557{
1da177e4 558 struct sk_buff *frag;
67ba4152 559 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 560 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
561 inet6_sk(skb->sk) : NULL;
1da177e4
LT
562 struct ipv6hdr *tmp_hdr;
563 struct frag_hdr *fh;
564 unsigned int mtu, hlen, left, len;
a7ae1992 565 int hroom, troom;
286c2349 566 __be32 frag_id;
67ba4152 567 int ptr, offset = 0, err = 0;
1da177e4
LT
568 u8 *prevhdr, nexthdr = 0;
569
1da177e4
LT
570 hlen = ip6_find_1stfragopt(skb, &prevhdr);
571 nexthdr = *prevhdr;
572
628a5c56 573 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
574
575 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 576 * or if the skb it not generated by a local socket.
b881ef76 577 */
485fca66
FW
578 if (unlikely(!skb->ignore_df && skb->len > mtu))
579 goto fail_toobig;
a34a101e 580
485fca66
FW
581 if (IP6CB(skb)->frag_max_size) {
582 if (IP6CB(skb)->frag_max_size > mtu)
583 goto fail_toobig;
584
585 /* don't send fragments larger than what we received */
586 mtu = IP6CB(skb)->frag_max_size;
587 if (mtu < IPV6_MIN_MTU)
588 mtu = IPV6_MIN_MTU;
b881ef76
JH
589 }
590
d91675f9
YH
591 if (np && np->frag_size < mtu) {
592 if (np->frag_size)
593 mtu = np->frag_size;
594 }
595 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 596
fd0273d7
MKL
597 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
598 &ipv6_hdr(skb)->saddr);
286c2349 599
1d325d21 600 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 601 if (skb_has_frag_list(skb)) {
1da177e4 602 int first_len = skb_pagelen(skb);
3d13008e 603 struct sk_buff *frag2;
1da177e4
LT
604
605 if (first_len - hlen > mtu ||
606 ((first_len - hlen) & 7) ||
1d325d21
FW
607 skb_cloned(skb) ||
608 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
609 goto slow_path;
610
4d9092bb 611 skb_walk_frags(skb, frag) {
1da177e4
LT
612 /* Correct geometry. */
613 if (frag->len > mtu ||
614 ((frag->len & 7) && frag->next) ||
1d325d21 615 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 616 goto slow_path_clean;
1da177e4 617
1da177e4
LT
618 /* Partially cloned skb? */
619 if (skb_shared(frag))
3d13008e 620 goto slow_path_clean;
2fdba6b0
HX
621
622 BUG_ON(frag->sk);
623 if (skb->sk) {
2fdba6b0
HX
624 frag->sk = skb->sk;
625 frag->destructor = sock_wfree;
2fdba6b0 626 }
3d13008e 627 skb->truesize -= frag->truesize;
1da177e4
LT
628 }
629
630 err = 0;
631 offset = 0;
1da177e4
LT
632 /* BUILD HEADER */
633
9a217a1c 634 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 635 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 636 if (!tmp_hdr) {
adf30907 637 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 638 IPSTATS_MIB_FRAGFAILS);
1d325d21
FW
639 err = -ENOMEM;
640 goto fail;
1da177e4 641 }
1d325d21
FW
642 frag = skb_shinfo(skb)->frag_list;
643 skb_frag_list_init(skb);
1da177e4 644
1da177e4 645 __skb_pull(skb, hlen);
67ba4152 646 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
647 __skb_push(skb, hlen);
648 skb_reset_network_header(skb);
d56f90a7 649 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 650
1da177e4
LT
651 fh->nexthdr = nexthdr;
652 fh->reserved = 0;
653 fh->frag_off = htons(IP6_MF);
286c2349 654 fh->identification = frag_id;
1da177e4
LT
655
656 first_len = skb_pagelen(skb);
657 skb->data_len = first_len - skb_headlen(skb);
658 skb->len = first_len;
0660e03f
ACM
659 ipv6_hdr(skb)->payload_len = htons(first_len -
660 sizeof(struct ipv6hdr));
a11d206d 661
d8d1f30b 662 dst_hold(&rt->dst);
1da177e4
LT
663
664 for (;;) {
665 /* Prepare header of the next frame,
666 * before previous one went down. */
667 if (frag) {
668 frag->ip_summed = CHECKSUM_NONE;
badff6d0 669 skb_reset_transport_header(frag);
67ba4152 670 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
671 __skb_push(frag, hlen);
672 skb_reset_network_header(frag);
d56f90a7
ACM
673 memcpy(skb_network_header(frag), tmp_hdr,
674 hlen);
1da177e4
LT
675 offset += skb->len - hlen - sizeof(struct frag_hdr);
676 fh->nexthdr = nexthdr;
677 fh->reserved = 0;
678 fh->frag_off = htons(offset);
53b24b8f 679 if (frag->next)
1da177e4
LT
680 fh->frag_off |= htons(IP6_MF);
681 fh->identification = frag_id;
0660e03f
ACM
682 ipv6_hdr(frag)->payload_len =
683 htons(frag->len -
684 sizeof(struct ipv6hdr));
1da177e4
LT
685 ip6_copy_metadata(frag, skb);
686 }
1ab1457c 687
7d8c6e39 688 err = output(net, sk, skb);
67ba4152 689 if (!err)
d8d1f30b 690 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 691 IPSTATS_MIB_FRAGCREATES);
dafee490 692
1da177e4
LT
693 if (err || !frag)
694 break;
695
696 skb = frag;
697 frag = skb->next;
698 skb->next = NULL;
699 }
700
a51482bd 701 kfree(tmp_hdr);
1da177e4
LT
702
703 if (err == 0) {
d8d1f30b 704 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 705 IPSTATS_MIB_FRAGOKS);
94e187c0 706 ip6_rt_put(rt);
1da177e4
LT
707 return 0;
708 }
709
46cfd725 710 kfree_skb_list(frag);
1da177e4 711
d8d1f30b 712 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 713 IPSTATS_MIB_FRAGFAILS);
94e187c0 714 ip6_rt_put(rt);
1da177e4 715 return err;
3d13008e
ED
716
717slow_path_clean:
718 skb_walk_frags(skb, frag2) {
719 if (frag2 == frag)
720 break;
721 frag2->sk = NULL;
722 frag2->destructor = NULL;
723 skb->truesize += frag2->truesize;
724 }
1da177e4
LT
725 }
726
727slow_path:
72e843bb
ED
728 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
729 skb_checksum_help(skb))
730 goto fail;
731
1da177e4
LT
732 left = skb->len - hlen; /* Space per frame */
733 ptr = hlen; /* Where to start from */
734
735 /*
736 * Fragment the datagram.
737 */
738
739 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992 740 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
741
742 /*
743 * Keep copying data until we run out.
744 */
67ba4152 745 while (left > 0) {
1da177e4
LT
746 len = left;
747 /* IF: it doesn't fit, use 'mtu' - the data space left */
748 if (len > mtu)
749 len = mtu;
25985edc 750 /* IF: we are not sending up to and including the packet end
1da177e4
LT
751 then align the next start on an eight byte boundary */
752 if (len < left) {
753 len &= ~7;
754 }
1da177e4 755
cbffccc9
JP
756 /* Allocate buffer */
757 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
758 hroom + troom, GFP_ATOMIC);
759 if (!frag) {
adf30907 760 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 761 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
762 err = -ENOMEM;
763 goto fail;
764 }
765
766 /*
767 * Set up data on packet
768 */
769
770 ip6_copy_metadata(frag, skb);
a7ae1992 771 skb_reserve(frag, hroom);
1da177e4 772 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 773 skb_reset_network_header(frag);
badff6d0 774 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
775 frag->transport_header = (frag->network_header + hlen +
776 sizeof(struct frag_hdr));
1da177e4
LT
777
778 /*
779 * Charge the memory for the fragment to any owner
780 * it might possess
781 */
782 if (skb->sk)
783 skb_set_owner_w(frag, skb->sk);
784
785 /*
786 * Copy the packet header into the new buffer.
787 */
d626f62b 788 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
789
790 /*
791 * Build fragment header.
792 */
793 fh->nexthdr = nexthdr;
794 fh->reserved = 0;
286c2349 795 fh->identification = frag_id;
1da177e4
LT
796
797 /*
798 * Copy a block of the IP datagram.
799 */
e3f0b86b
HS
800 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
801 len));
1da177e4
LT
802 left -= len;
803
804 fh->frag_off = htons(offset);
805 if (left > 0)
806 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
807 ipv6_hdr(frag)->payload_len = htons(frag->len -
808 sizeof(struct ipv6hdr));
1da177e4
LT
809
810 ptr += len;
811 offset += len;
812
813 /*
814 * Put this fragment into the sending queue.
815 */
7d8c6e39 816 err = output(net, sk, frag);
1da177e4
LT
817 if (err)
818 goto fail;
dafee490 819
adf30907 820 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 821 IPSTATS_MIB_FRAGCREATES);
1da177e4 822 }
adf30907 823 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 824 IPSTATS_MIB_FRAGOKS);
808db80a 825 consume_skb(skb);
1da177e4
LT
826 return err;
827
485fca66
FW
828fail_toobig:
829 if (skb->sk && dst_allfrag(skb_dst(skb)))
830 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
831
832 skb->dev = skb_dst(skb)->dev;
833 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
834 err = -EMSGSIZE;
835
1da177e4 836fail:
adf30907 837 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 838 IPSTATS_MIB_FRAGFAILS);
1ab1457c 839 kfree_skb(skb);
1da177e4
LT
840 return err;
841}
842
b71d1d42
ED
843static inline int ip6_rt_check(const struct rt6key *rt_key,
844 const struct in6_addr *fl_addr,
845 const struct in6_addr *addr_cache)
cf6b1982 846{
a02cec21 847 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 848 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
849}
850
497c615a
HX
851static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
852 struct dst_entry *dst,
b71d1d42 853 const struct flowi6 *fl6)
1da177e4 854{
497c615a 855 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 856 struct rt6_info *rt;
1da177e4 857
497c615a
HX
858 if (!dst)
859 goto out;
860
a963a37d
ED
861 if (dst->ops->family != AF_INET6) {
862 dst_release(dst);
863 return NULL;
864 }
865
866 rt = (struct rt6_info *)dst;
497c615a
HX
867 /* Yes, checking route validity in not connected
868 * case is not very simple. Take into account,
869 * that we do not support routing by source, TOS,
67ba4152 870 * and MSG_DONTROUTE --ANK (980726)
497c615a 871 *
cf6b1982
YH
872 * 1. ip6_rt_check(): If route was host route,
873 * check that cached destination is current.
497c615a
HX
874 * If it is network route, we still may
875 * check its validity using saved pointer
876 * to the last used address: daddr_cache.
877 * We do not want to save whole address now,
878 * (because main consumer of this service
879 * is tcp, which has not this problem),
880 * so that the last trick works only on connected
881 * sockets.
882 * 2. oif also should be the same.
883 */
4c9483b2 884 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 885#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 886 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 887#endif
4c9483b2 888 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
497c615a
HX
889 dst_release(dst);
890 dst = NULL;
1da177e4
LT
891 }
892
497c615a
HX
893out:
894 return dst;
895}
896
3aef934f 897static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 898 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 899{
69cce1d1
DM
900#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
901 struct neighbour *n;
97cac082 902 struct rt6_info *rt;
69cce1d1
DM
903#endif
904 int err;
497c615a 905
e16e888b
MS
906 /* The correct way to handle this would be to do
907 * ip6_route_get_saddr, and then ip6_route_output; however,
908 * the route-specific preferred source forces the
909 * ip6_route_output call _before_ ip6_route_get_saddr.
910 *
911 * In source specific routing (no src=any default route),
912 * ip6_route_output will fail given src=any saddr, though, so
913 * that's why we try it again later.
914 */
915 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
916 struct rt6_info *rt;
917 bool had_dst = *dst != NULL;
1da177e4 918
e16e888b
MS
919 if (!had_dst)
920 *dst = ip6_route_output(net, sk, fl6);
921 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
922 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
923 sk ? inet6_sk(sk)->srcprefs : 0,
924 &fl6->saddr);
44456d37 925 if (err)
1da177e4 926 goto out_err_release;
e16e888b
MS
927
928 /* If we had an erroneous initial result, pretend it
929 * never existed and let the SA-enabled version take
930 * over.
931 */
932 if (!had_dst && (*dst)->error) {
933 dst_release(*dst);
934 *dst = NULL;
935 }
1da177e4
LT
936 }
937
e16e888b
MS
938 if (!*dst)
939 *dst = ip6_route_output(net, sk, fl6);
940
941 err = (*dst)->error;
942 if (err)
943 goto out_err_release;
944
95c385b4 945#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
946 /*
947 * Here if the dst entry we've looked up
948 * has a neighbour entry that is in the INCOMPLETE
949 * state and the src address from the flow is
950 * marked as OPTIMISTIC, we release the found
951 * dst entry and replace it instead with the
952 * dst entry of the nexthop router
953 */
c56bf6fe 954 rt = (struct rt6_info *) *dst;
707be1ff 955 rcu_read_lock_bh();
2647a9b0
MKL
956 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
957 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
958 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
959 rcu_read_unlock_bh();
960
961 if (err) {
e550dfb0 962 struct inet6_ifaddr *ifp;
4c9483b2 963 struct flowi6 fl_gw6;
e550dfb0
NH
964 int redirect;
965
4c9483b2 966 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
967 (*dst)->dev, 1);
968
969 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
970 if (ifp)
971 in6_ifa_put(ifp);
972
973 if (redirect) {
974 /*
975 * We need to get the dst entry for the
976 * default router instead
977 */
978 dst_release(*dst);
4c9483b2
DM
979 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
980 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
981 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
982 err = (*dst)->error;
983 if (err)
e550dfb0 984 goto out_err_release;
95c385b4 985 }
e550dfb0 986 }
95c385b4
NH
987#endif
988
1da177e4
LT
989 return 0;
990
991out_err_release:
ca46f9c8 992 if (err == -ENETUNREACH)
5ac68e7c 993 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
994 dst_release(*dst);
995 *dst = NULL;
996 return err;
997}
34a0b3cd 998
497c615a
HX
999/**
1000 * ip6_dst_lookup - perform route lookup on flow
1001 * @sk: socket which provides route info
1002 * @dst: pointer to dst_entry * for result
4c9483b2 1003 * @fl6: flow to lookup
497c615a
HX
1004 *
1005 * This function performs a route lookup on the given flow.
1006 *
1007 * It returns zero on success, or a standard errno code on error.
1008 */
343d60aa
RP
1009int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1010 struct flowi6 *fl6)
497c615a
HX
1011{
1012 *dst = NULL;
343d60aa 1013 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1014}
3cf3dc6c
ACM
1015EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1016
497c615a 1017/**
68d0c6d3
DM
1018 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1019 * @sk: socket which provides route info
4c9483b2 1020 * @fl6: flow to lookup
68d0c6d3 1021 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1022 *
1023 * This function performs a route lookup on the given flow.
1024 *
1025 * It returns a valid dst pointer on success, or a pointer encoded
1026 * error code.
1027 */
3aef934f 1028struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1029 const struct in6_addr *final_dst)
68d0c6d3
DM
1030{
1031 struct dst_entry *dst = NULL;
1032 int err;
1033
343d60aa 1034 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1035 if (err)
1036 return ERR_PTR(err);
1037 if (final_dst)
4e3fd7a0 1038 fl6->daddr = *final_dst;
a0a9f33b
PS
1039 if (!fl6->flowi6_oif)
1040 fl6->flowi6_oif = dst->dev->ifindex;
2774c131 1041
f92ee619 1042 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1043}
1044EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1045
1046/**
1047 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1048 * @sk: socket which provides the dst cache and route info
4c9483b2 1049 * @fl6: flow to lookup
68d0c6d3 1050 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1051 *
1052 * This function performs a route lookup on the given flow with the
1053 * possibility of using the cached route in the socket if it is valid.
1054 * It will take the socket dst lock when operating on the dst cache.
1055 * As a result, this function can only be used in process context.
1056 *
68d0c6d3
DM
1057 * It returns a valid dst pointer on success, or a pointer encoded
1058 * error code.
497c615a 1059 */
4c9483b2 1060struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1061 const struct in6_addr *final_dst)
497c615a 1062{
68d0c6d3
DM
1063 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1064 int err;
497c615a 1065
4c9483b2 1066 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1067
343d60aa 1068 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1069 if (err)
1070 return ERR_PTR(err);
1071 if (final_dst)
4e3fd7a0 1072 fl6->daddr = *final_dst;
2774c131 1073
f92ee619 1074 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1075}
68d0c6d3 1076EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1077
34a0b3cd 1078static inline int ip6_ufo_append_data(struct sock *sk,
0bbe84a6 1079 struct sk_buff_head *queue,
e89e9cf5
AR
1080 int getfrag(void *from, char *to, int offset, int len,
1081 int odd, struct sk_buff *skb),
1082 void *from, int length, int hh_len, int fragheaderlen,
67ba4152 1083 int transhdrlen, int mtu, unsigned int flags,
fd0273d7 1084 const struct flowi6 *fl6)
e89e9cf5
AR
1085
1086{
1087 struct sk_buff *skb;
1088 int err;
1089
1090 /* There is support for UDP large send offload by network
1091 * device, so create one single skb packet containing complete
1092 * udp datagram
1093 */
0bbe84a6 1094 skb = skb_peek_tail(queue);
63159f29 1095 if (!skb) {
e89e9cf5
AR
1096 skb = sock_alloc_send_skb(sk,
1097 hh_len + fragheaderlen + transhdrlen + 20,
1098 (flags & MSG_DONTWAIT), &err);
63159f29 1099 if (!skb)
504744e4 1100 return err;
e89e9cf5
AR
1101
1102 /* reserve space for Hardware header */
1103 skb_reserve(skb, hh_len);
1104
1105 /* create space for UDP/IP header */
67ba4152 1106 skb_put(skb, fragheaderlen + transhdrlen);
e89e9cf5
AR
1107
1108 /* initialize network header pointer */
c1d2bbe1 1109 skb_reset_network_header(skb);
e89e9cf5
AR
1110
1111 /* initialize protocol header pointer */
b0e380b1 1112 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1113
9c9c9ad5 1114 skb->protocol = htons(ETH_P_IPV6);
e89e9cf5 1115 skb->csum = 0;
e89e9cf5 1116
0bbe84a6 1117 __skb_queue_tail(queue, skb);
c547dbf5
JP
1118 } else if (skb_is_gso(skb)) {
1119 goto append;
e89e9cf5 1120 }
e89e9cf5 1121
c547dbf5
JP
1122 skb->ip_summed = CHECKSUM_PARTIAL;
1123 /* Specify the length of each IPv6 datagram fragment.
1124 * It has to be a multiple of 8.
1125 */
1126 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1127 sizeof(struct frag_hdr)) & ~7;
1128 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
fd0273d7
MKL
1129 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1130 &fl6->daddr,
1131 &fl6->saddr);
c547dbf5
JP
1132
1133append:
2811ebac
HFS
1134 return skb_append_datato_frags(sk, skb, getfrag, from,
1135 (length - transhdrlen));
e89e9cf5 1136}
1da177e4 1137
0178b695
HX
1138static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1139 gfp_t gfp)
1140{
1141 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1142}
1143
1144static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1145 gfp_t gfp)
1146{
1147 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1148}
1149
75a493e6 1150static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1151 int *maxfraglen,
1152 unsigned int fragheaderlen,
1153 struct sk_buff *skb,
75a493e6 1154 struct rt6_info *rt,
e367c2d0 1155 unsigned int orig_mtu)
0c183379
G
1156{
1157 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1158 if (!skb) {
0c183379 1159 /* first fragment, reserve header_len */
e367c2d0 1160 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1161
1162 } else {
1163 /*
1164 * this fragment is not first, the headers
1165 * space is regarded as data space.
1166 */
e367c2d0 1167 *mtu = orig_mtu;
0c183379
G
1168 }
1169 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1170 + fragheaderlen - sizeof(struct frag_hdr);
1171 }
1172}
1173
366e41d9
VY
1174static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1175 struct inet6_cork *v6_cork,
1176 int hlimit, int tclass, struct ipv6_txoptions *opt,
1177 struct rt6_info *rt, struct flowi6 *fl6)
1178{
1179 struct ipv6_pinfo *np = inet6_sk(sk);
1180 unsigned int mtu;
1181
1182 /*
1183 * setup for corking
1184 */
1185 if (opt) {
1186 if (WARN_ON(v6_cork->opt))
1187 return -EINVAL;
1188
1189 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
63159f29 1190 if (unlikely(!v6_cork->opt))
366e41d9
VY
1191 return -ENOBUFS;
1192
1193 v6_cork->opt->tot_len = opt->tot_len;
1194 v6_cork->opt->opt_flen = opt->opt_flen;
1195 v6_cork->opt->opt_nflen = opt->opt_nflen;
1196
1197 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1198 sk->sk_allocation);
1199 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1200 return -ENOBUFS;
1201
1202 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1203 sk->sk_allocation);
1204 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1205 return -ENOBUFS;
1206
1207 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1208 sk->sk_allocation);
1209 if (opt->hopopt && !v6_cork->opt->hopopt)
1210 return -ENOBUFS;
1211
1212 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1213 sk->sk_allocation);
1214 if (opt->srcrt && !v6_cork->opt->srcrt)
1215 return -ENOBUFS;
1216
1217 /* need source address above miyazawa*/
1218 }
1219 dst_hold(&rt->dst);
1220 cork->base.dst = &rt->dst;
1221 cork->fl.u.ip6 = *fl6;
1222 v6_cork->hop_limit = hlimit;
1223 v6_cork->tclass = tclass;
1224 if (rt->dst.flags & DST_XFRM_TUNNEL)
1225 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1226 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1227 else
1228 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1229 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1230 if (np->frag_size < mtu) {
1231 if (np->frag_size)
1232 mtu = np->frag_size;
1233 }
1234 cork->base.fragsize = mtu;
1235 if (dst_allfrag(rt->dst.path))
1236 cork->base.flags |= IPCORK_ALLFRAG;
1237 cork->base.length = 0;
1238
1239 return 0;
1240}
1241
0bbe84a6
VY
1242static int __ip6_append_data(struct sock *sk,
1243 struct flowi6 *fl6,
1244 struct sk_buff_head *queue,
1245 struct inet_cork *cork,
1246 struct inet6_cork *v6_cork,
1247 struct page_frag *pfrag,
1248 int getfrag(void *from, char *to, int offset,
1249 int len, int odd, struct sk_buff *skb),
1250 void *from, int length, int transhdrlen,
1251 unsigned int flags, int dontfrag)
1da177e4 1252{
0c183379 1253 struct sk_buff *skb, *skb_prev = NULL;
e367c2d0 1254 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
0bbe84a6
VY
1255 int exthdrlen = 0;
1256 int dst_exthdrlen = 0;
1da177e4 1257 int hh_len;
1da177e4
LT
1258 int copy;
1259 int err;
1260 int offset = 0;
a693e698 1261 __u8 tx_flags = 0;
09c2d251 1262 u32 tskey = 0;
0bbe84a6
VY
1263 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1264 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1265 int csummode = CHECKSUM_NONE;
1da177e4 1266
0bbe84a6
VY
1267 skb = skb_peek_tail(queue);
1268 if (!skb) {
1269 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1270 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1271 }
0bbe84a6 1272
366e41d9 1273 mtu = cork->fragsize;
e367c2d0 1274 orig_mtu = mtu;
1da177e4 1275
d8d1f30b 1276 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1277
a1b05140 1278 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1279 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1280 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1281 sizeof(struct frag_hdr);
1da177e4
LT
1282
1283 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
4df98e76
HFS
1284 unsigned int maxnonfragsize, headersize;
1285
1286 headersize = sizeof(struct ipv6hdr) +
3a1cebe7 1287 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
4df98e76
HFS
1288 (dst_allfrag(&rt->dst) ?
1289 sizeof(struct frag_hdr) : 0) +
1290 rt->rt6i_nfheader_len;
1291
60ff7467 1292 if (ip6_sk_ignore_df(sk))
0b95227a
HFS
1293 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1294 else
1295 maxnonfragsize = mtu;
4df98e76
HFS
1296
1297 /* dontfrag active */
1298 if ((cork->length + length > mtu - headersize) && dontfrag &&
1299 (sk->sk_protocol == IPPROTO_UDP ||
1300 sk->sk_protocol == IPPROTO_RAW)) {
1301 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1302 sizeof(struct ipv6hdr));
1303 goto emsgsize;
1304 }
1305
1306 if (cork->length + length > maxnonfragsize - headersize) {
1307emsgsize:
1308 ipv6_local_error(sk, EMSGSIZE, fl6,
1309 mtu - headersize +
1310 sizeof(struct ipv6hdr));
1da177e4
LT
1311 return -EMSGSIZE;
1312 }
1313 }
1314
09c2d251 1315 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
bf84a010 1316 sock_tx_timestamp(sk, &tx_flags);
09c2d251
WB
1317 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1318 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1319 tskey = sk->sk_tskey++;
1320 }
a693e698 1321
32dce968
VY
1322 /* If this is the first and only packet and device
1323 * supports checksum offloading, let's use it.
e87a468e
VY
1324 * Use transhdrlen, same as IPv4, because partial
1325 * sums only work when transhdrlen is set.
32dce968 1326 */
e87a468e 1327 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
32dce968
VY
1328 length + fragheaderlen < mtu &&
1329 rt->dst.dev->features & NETIF_F_V6_CSUM &&
1330 !exthdrlen)
1331 csummode = CHECKSUM_PARTIAL;
1da177e4
LT
1332 /*
1333 * Let's try using as much space as possible.
1334 * Use MTU if total length of the message fits into the MTU.
1335 * Otherwise, we need to reserve fragment header and
1336 * fragment alignment (= 8-15 octects, in total).
1337 *
1338 * Note that we may need to "move" the data from the tail of
1ab1457c 1339 * of the buffer to the new fragment when we split
1da177e4
LT
1340 * the message.
1341 *
1ab1457c 1342 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1343 * at once if non-fragmentable extension headers
1344 * are too large.
1ab1457c 1345 * --yoshfuji
1da177e4
LT
1346 */
1347
2811ebac
HFS
1348 cork->length += length;
1349 if (((length > mtu) ||
1350 (skb && skb_is_gso(skb))) &&
1351 (sk->sk_protocol == IPPROTO_UDP) &&
acf8dd0a
MK
1352 (rt->dst.dev->features & NETIF_F_UFO) &&
1353 (sk->sk_type == SOCK_DGRAM)) {
0bbe84a6 1354 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
2811ebac 1355 hh_len, fragheaderlen,
fd0273d7 1356 transhdrlen, mtu, flags, fl6);
2811ebac
HFS
1357 if (err)
1358 goto error;
1359 return 0;
e89e9cf5 1360 }
1da177e4 1361
2811ebac 1362 if (!skb)
1da177e4
LT
1363 goto alloc_new_skb;
1364
1365 while (length > 0) {
1366 /* Check if the remaining data fits into current packet. */
bdc712b4 1367 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1368 if (copy < length)
1369 copy = maxfraglen - skb->len;
1370
1371 if (copy <= 0) {
1372 char *data;
1373 unsigned int datalen;
1374 unsigned int fraglen;
1375 unsigned int fraggap;
1376 unsigned int alloclen;
1da177e4 1377alloc_new_skb:
1da177e4 1378 /* There's no room in the current skb */
0c183379
G
1379 if (skb)
1380 fraggap = skb->len - maxfraglen;
1da177e4
LT
1381 else
1382 fraggap = 0;
0c183379 1383 /* update mtu and maxfraglen if necessary */
63159f29 1384 if (!skb || !skb_prev)
0c183379 1385 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1386 fragheaderlen, skb, rt,
e367c2d0 1387 orig_mtu);
0c183379
G
1388
1389 skb_prev = skb;
1da177e4
LT
1390
1391 /*
1392 * If remaining data exceeds the mtu,
1393 * we know we need more fragment(s).
1394 */
1395 datalen = length + fraggap;
1da177e4 1396
0c183379
G
1397 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1398 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1399 if ((flags & MSG_MORE) &&
d8d1f30b 1400 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1401 alloclen = mtu;
1402 else
1403 alloclen = datalen + fragheaderlen;
1404
299b0767
SK
1405 alloclen += dst_exthdrlen;
1406
0c183379
G
1407 if (datalen != length + fraggap) {
1408 /*
1409 * this is not the last fragment, the trailer
1410 * space is regarded as data space.
1411 */
1412 datalen += rt->dst.trailer_len;
1413 }
1414
1415 alloclen += rt->dst.trailer_len;
1416 fraglen = datalen + fragheaderlen;
1da177e4
LT
1417
1418 /*
1419 * We just reserve space for fragment header.
1ab1457c 1420 * Note: this may be overallocation if the message
1da177e4
LT
1421 * (without MSG_MORE) fits into the MTU.
1422 */
1423 alloclen += sizeof(struct frag_hdr);
1424
1425 if (transhdrlen) {
1426 skb = sock_alloc_send_skb(sk,
1427 alloclen + hh_len,
1428 (flags & MSG_DONTWAIT), &err);
1429 } else {
1430 skb = NULL;
1431 if (atomic_read(&sk->sk_wmem_alloc) <=
1432 2 * sk->sk_sndbuf)
1433 skb = sock_wmalloc(sk,
1434 alloclen + hh_len, 1,
1435 sk->sk_allocation);
63159f29 1436 if (unlikely(!skb))
1da177e4
LT
1437 err = -ENOBUFS;
1438 }
63159f29 1439 if (!skb)
1da177e4
LT
1440 goto error;
1441 /*
1442 * Fill in the control structures
1443 */
9c9c9ad5 1444 skb->protocol = htons(ETH_P_IPV6);
32dce968 1445 skb->ip_summed = csummode;
1da177e4 1446 skb->csum = 0;
1f85851e
G
1447 /* reserve for fragmentation and ipsec header */
1448 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1449 dst_exthdrlen);
1da177e4 1450
11878b40
WB
1451 /* Only the initial fragment is time stamped */
1452 skb_shinfo(skb)->tx_flags = tx_flags;
1453 tx_flags = 0;
09c2d251
WB
1454 skb_shinfo(skb)->tskey = tskey;
1455 tskey = 0;
a693e698 1456
1da177e4
LT
1457 /*
1458 * Find where to start putting bytes
1459 */
1f85851e
G
1460 data = skb_put(skb, fraglen);
1461 skb_set_network_header(skb, exthdrlen);
1462 data += fragheaderlen;
b0e380b1
ACM
1463 skb->transport_header = (skb->network_header +
1464 fragheaderlen);
1da177e4
LT
1465 if (fraggap) {
1466 skb->csum = skb_copy_and_csum_bits(
1467 skb_prev, maxfraglen,
1468 data + transhdrlen, fraggap, 0);
1469 skb_prev->csum = csum_sub(skb_prev->csum,
1470 skb->csum);
1471 data += fraggap;
e9fa4f7b 1472 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1473 }
1474 copy = datalen - transhdrlen - fraggap;
299b0767 1475
1da177e4
LT
1476 if (copy < 0) {
1477 err = -EINVAL;
1478 kfree_skb(skb);
1479 goto error;
1480 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1481 err = -EFAULT;
1482 kfree_skb(skb);
1483 goto error;
1484 }
1485
1486 offset += copy;
1487 length -= datalen - fraggap;
1488 transhdrlen = 0;
1489 exthdrlen = 0;
299b0767 1490 dst_exthdrlen = 0;
1da177e4
LT
1491
1492 /*
1493 * Put the packet on the pending queue
1494 */
0bbe84a6 1495 __skb_queue_tail(queue, skb);
1da177e4
LT
1496 continue;
1497 }
1498
1499 if (copy > length)
1500 copy = length;
1501
d8d1f30b 1502 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1503 unsigned int off;
1504
1505 off = skb->len;
1506 if (getfrag(from, skb_put(skb, copy),
1507 offset, copy, off, skb) < 0) {
1508 __skb_trim(skb, off);
1509 err = -EFAULT;
1510 goto error;
1511 }
1512 } else {
1513 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1514
5640f768
ED
1515 err = -ENOMEM;
1516 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1517 goto error;
5640f768
ED
1518
1519 if (!skb_can_coalesce(skb, i, pfrag->page,
1520 pfrag->offset)) {
1521 err = -EMSGSIZE;
1522 if (i == MAX_SKB_FRAGS)
1523 goto error;
1524
1525 __skb_fill_page_desc(skb, i, pfrag->page,
1526 pfrag->offset, 0);
1527 skb_shinfo(skb)->nr_frags = ++i;
1528 get_page(pfrag->page);
1da177e4 1529 }
5640f768 1530 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1531 if (getfrag(from,
5640f768
ED
1532 page_address(pfrag->page) + pfrag->offset,
1533 offset, copy, skb->len, skb) < 0)
1534 goto error_efault;
1535
1536 pfrag->offset += copy;
1537 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1538 skb->len += copy;
1539 skb->data_len += copy;
f945fa7a
HX
1540 skb->truesize += copy;
1541 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1542 }
1543 offset += copy;
1544 length -= copy;
1545 }
5640f768 1546
1da177e4 1547 return 0;
5640f768
ED
1548
1549error_efault:
1550 err = -EFAULT;
1da177e4 1551error:
bdc712b4 1552 cork->length -= length;
3bd653c8 1553 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1554 return err;
1555}
0bbe84a6
VY
1556
1557int ip6_append_data(struct sock *sk,
1558 int getfrag(void *from, char *to, int offset, int len,
1559 int odd, struct sk_buff *skb),
1560 void *from, int length, int transhdrlen, int hlimit,
1561 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1562 struct rt6_info *rt, unsigned int flags, int dontfrag)
1563{
1564 struct inet_sock *inet = inet_sk(sk);
1565 struct ipv6_pinfo *np = inet6_sk(sk);
1566 int exthdrlen;
1567 int err;
1568
1569 if (flags&MSG_PROBE)
1570 return 0;
1571 if (skb_queue_empty(&sk->sk_write_queue)) {
1572 /*
1573 * setup for corking
1574 */
1575 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1576 tclass, opt, rt, fl6);
1577 if (err)
1578 return err;
1579
1580 exthdrlen = (opt ? opt->opt_flen : 0);
1581 length += exthdrlen;
1582 transhdrlen += exthdrlen;
1583 } else {
1584 fl6 = &inet->cork.fl.u.ip6;
1585 transhdrlen = 0;
1586 }
1587
1588 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1589 &np->cork, sk_page_frag(sk), getfrag,
1590 from, length, transhdrlen, flags, dontfrag);
1591}
a495f836 1592EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1593
366e41d9
VY
1594static void ip6_cork_release(struct inet_cork_full *cork,
1595 struct inet6_cork *v6_cork)
bf138862 1596{
366e41d9
VY
1597 if (v6_cork->opt) {
1598 kfree(v6_cork->opt->dst0opt);
1599 kfree(v6_cork->opt->dst1opt);
1600 kfree(v6_cork->opt->hopopt);
1601 kfree(v6_cork->opt->srcrt);
1602 kfree(v6_cork->opt);
1603 v6_cork->opt = NULL;
0178b695
HX
1604 }
1605
366e41d9
VY
1606 if (cork->base.dst) {
1607 dst_release(cork->base.dst);
1608 cork->base.dst = NULL;
1609 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1610 }
366e41d9 1611 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1612}
1613
6422398c
VY
1614struct sk_buff *__ip6_make_skb(struct sock *sk,
1615 struct sk_buff_head *queue,
1616 struct inet_cork_full *cork,
1617 struct inet6_cork *v6_cork)
1da177e4
LT
1618{
1619 struct sk_buff *skb, *tmp_skb;
1620 struct sk_buff **tail_skb;
1621 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1622 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1623 struct net *net = sock_net(sk);
1da177e4 1624 struct ipv6hdr *hdr;
6422398c
VY
1625 struct ipv6_txoptions *opt = v6_cork->opt;
1626 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1627 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1628 unsigned char proto = fl6->flowi6_proto;
1da177e4 1629
6422398c 1630 skb = __skb_dequeue(queue);
63159f29 1631 if (!skb)
1da177e4
LT
1632 goto out;
1633 tail_skb = &(skb_shinfo(skb)->frag_list);
1634
1635 /* move skb->data to ip header from ext header */
d56f90a7 1636 if (skb->data < skb_network_header(skb))
bbe735e4 1637 __skb_pull(skb, skb_network_offset(skb));
6422398c 1638 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1639 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1640 *tail_skb = tmp_skb;
1641 tail_skb = &(tmp_skb->next);
1642 skb->len += tmp_skb->len;
1643 skb->data_len += tmp_skb->len;
1da177e4 1644 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1645 tmp_skb->destructor = NULL;
1646 tmp_skb->sk = NULL;
1da177e4
LT
1647 }
1648
28a89453 1649 /* Allow local fragmentation. */
60ff7467 1650 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1651
4e3fd7a0 1652 *final_dst = fl6->daddr;
cfe1fc77 1653 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1654 if (opt && opt->opt_flen)
1655 ipv6_push_frag_opts(skb, opt, &proto);
1656 if (opt && opt->opt_nflen)
1657 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1658
e2d1bca7
ACM
1659 skb_push(skb, sizeof(struct ipv6hdr));
1660 skb_reset_network_header(skb);
0660e03f 1661 hdr = ipv6_hdr(skb);
1ab1457c 1662
6422398c 1663 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1664 ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 1665 np->autoflowlabel, fl6));
6422398c 1666 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1667 hdr->nexthdr = proto;
4e3fd7a0
AD
1668 hdr->saddr = fl6->saddr;
1669 hdr->daddr = *final_dst;
1da177e4 1670
a2c2064f 1671 skb->priority = sk->sk_priority;
4a19ec58 1672 skb->mark = sk->sk_mark;
a2c2064f 1673
d8d1f30b 1674 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1675 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1676 if (proto == IPPROTO_ICMPV6) {
adf30907 1677 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1678
43a43b60
HFS
1679 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1680 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1681 }
1682
6422398c
VY
1683 ip6_cork_release(cork, v6_cork);
1684out:
1685 return skb;
1686}
1687
1688int ip6_send_skb(struct sk_buff *skb)
1689{
1690 struct net *net = sock_net(skb->sk);
1691 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1692 int err;
1693
33224b16 1694 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1695 if (err) {
1696 if (err > 0)
6ce9e7b5 1697 err = net_xmit_errno(err);
1da177e4 1698 if (err)
6422398c
VY
1699 IP6_INC_STATS(net, rt->rt6i_idev,
1700 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1701 }
1702
1da177e4 1703 return err;
6422398c
VY
1704}
1705
1706int ip6_push_pending_frames(struct sock *sk)
1707{
1708 struct sk_buff *skb;
1709
1710 skb = ip6_finish_skb(sk);
1711 if (!skb)
1712 return 0;
1713
1714 return ip6_send_skb(skb);
1da177e4 1715}
a495f836 1716EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1717
0bbe84a6 1718static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1719 struct sk_buff_head *queue,
1720 struct inet_cork_full *cork,
1721 struct inet6_cork *v6_cork)
1da177e4 1722{
1da177e4
LT
1723 struct sk_buff *skb;
1724
0bbe84a6 1725 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1726 if (skb_dst(skb))
1727 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1728 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1729 kfree_skb(skb);
1730 }
1731
6422398c 1732 ip6_cork_release(cork, v6_cork);
1da177e4 1733}
0bbe84a6
VY
1734
1735void ip6_flush_pending_frames(struct sock *sk)
1736{
6422398c
VY
1737 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1738 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1739}
a495f836 1740EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1741
1742struct sk_buff *ip6_make_skb(struct sock *sk,
1743 int getfrag(void *from, char *to, int offset,
1744 int len, int odd, struct sk_buff *skb),
1745 void *from, int length, int transhdrlen,
1746 int hlimit, int tclass,
1747 struct ipv6_txoptions *opt, struct flowi6 *fl6,
1748 struct rt6_info *rt, unsigned int flags,
1749 int dontfrag)
1750{
1751 struct inet_cork_full cork;
1752 struct inet6_cork v6_cork;
1753 struct sk_buff_head queue;
1754 int exthdrlen = (opt ? opt->opt_flen : 0);
1755 int err;
1756
1757 if (flags & MSG_PROBE)
1758 return NULL;
1759
1760 __skb_queue_head_init(&queue);
1761
1762 cork.base.flags = 0;
1763 cork.base.addr = 0;
1764 cork.base.opt = NULL;
1765 v6_cork.opt = NULL;
1766 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1767 if (err)
1768 return ERR_PTR(err);
1769
1770 if (dontfrag < 0)
1771 dontfrag = inet6_sk(sk)->dontfrag;
1772
1773 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1774 &current->task_frag, getfrag, from,
1775 length + exthdrlen, transhdrlen + exthdrlen,
1776 flags, dontfrag);
1777 if (err) {
1778 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1779 return ERR_PTR(err);
1780 }
1781
1782 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1783}
This page took 1.061582 seconds and 5 git commands to generate.