net: core: introduce netif_skb_dev_features
[deliverable/linux.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
9e508490 59static int ip6_finish_output2(struct sk_buff *skb)
1da177e4 60{
adf30907 61 struct dst_entry *dst = skb_dst(skb);
1da177e4 62 struct net_device *dev = dst->dev;
f6b72b62 63 struct neighbour *neigh;
6fd6ce20
YH
64 struct in6_addr *nexthop;
65 int ret;
1da177e4
LT
66
67 skb->protocol = htons(ETH_P_IPV6);
68 skb->dev = dev;
69
0660e03f 70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 72
7ad6848c 73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
d1db275d 74 ((mroute6_socket(dev_net(dev), skb) &&
bd91b8bf 75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
b2e0b385
JE
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 newskb, NULL, newskb->dev,
95603e22 86 dev_loopback_xmit);
1da177e4 87
0660e03f 88 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
89 IP6_INC_STATS(dev_net(dev), idev,
90 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
edf391ff
NH
96 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
97 skb->len);
dd408515
HFS
98
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL &&
101 !(dev->flags & IFF_LOOPBACK)) {
102 kfree_skb(skb);
103 return 0;
104 }
1da177e4
LT
105 }
106
6fd6ce20 107 rcu_read_lock_bh();
550bab42 108 nexthop = rt6_nexthop((struct rt6_info *)dst);
6fd6ce20
YH
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 if (!IS_ERR(neigh)) {
113 ret = dst_neigh_output(dst, neigh, skb);
114 rcu_read_unlock_bh();
115 return ret;
116 }
117 rcu_read_unlock_bh();
05e3aa09 118
7f88c6b2
HFS
119 IP6_INC_STATS(dev_net(dst->dev),
120 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
121 kfree_skb(skb);
122 return -EINVAL;
1da177e4
LT
123}
124
9e508490
JE
125static int ip6_finish_output(struct sk_buff *skb)
126{
127 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
128 dst_allfrag(skb_dst(skb)) ||
129 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
9e508490
JE
130 return ip6_fragment(skb, ip6_finish_output2);
131 else
132 return ip6_finish_output2(skb);
133}
134
1da177e4
LT
135int ip6_output(struct sk_buff *skb)
136{
9e508490 137 struct net_device *dev = skb_dst(skb)->dev;
adf30907 138 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 139 if (unlikely(idev->cnf.disable_ipv6)) {
9e508490 140 IP6_INC_STATS(dev_net(dev), idev,
3bd653c8 141 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
142 kfree_skb(skb);
143 return 0;
144 }
145
9c6eb28a
JE
146 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
147 ip6_finish_output,
148 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
149}
150
1da177e4 151/*
b5d43998 152 * xmit an sk_buff (used by TCP, SCTP and DCCP)
1da177e4
LT
153 */
154
4c9483b2 155int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 156 struct ipv6_txoptions *opt, int tclass)
1da177e4 157{
3bd653c8 158 struct net *net = sock_net(sk);
b30bd282 159 struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 160 struct in6_addr *first_hop = &fl6->daddr;
adf30907 161 struct dst_entry *dst = skb_dst(skb);
1da177e4 162 struct ipv6hdr *hdr;
4c9483b2 163 u8 proto = fl6->flowi6_proto;
1da177e4 164 int seg_len = skb->len;
e651f03a 165 int hlimit = -1;
1da177e4
LT
166 u32 mtu;
167
168 if (opt) {
c2636b4d 169 unsigned int head_room;
1da177e4
LT
170
171 /* First: exthdrs may take lots of space (~8K for now)
172 MAX_HEADER is not enough.
173 */
174 head_room = opt->opt_nflen + opt->opt_flen;
175 seg_len += head_room;
176 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
177
178 if (skb_headroom(skb) < head_room) {
179 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 180 if (skb2 == NULL) {
adf30907 181 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
182 IPSTATS_MIB_OUTDISCARDS);
183 kfree_skb(skb);
1da177e4
LT
184 return -ENOBUFS;
185 }
808db80a 186 consume_skb(skb);
a11d206d 187 skb = skb2;
83d7eb29 188 skb_set_owner_w(skb, sk);
1da177e4
LT
189 }
190 if (opt->opt_flen)
191 ipv6_push_frag_opts(skb, opt, &proto);
192 if (opt->opt_nflen)
193 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
194 }
195
e2d1bca7
ACM
196 skb_push(skb, sizeof(struct ipv6hdr));
197 skb_reset_network_header(skb);
0660e03f 198 hdr = ipv6_hdr(skb);
1da177e4
LT
199
200 /*
201 * Fill in the IPv6 header
202 */
b903d324 203 if (np)
1da177e4
LT
204 hlimit = np->hop_limit;
205 if (hlimit < 0)
6b75d090 206 hlimit = ip6_dst_hoplimit(dst);
1da177e4 207
3e4e4c1f 208 ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
41a1f8ea 209
1da177e4
LT
210 hdr->payload_len = htons(seg_len);
211 hdr->nexthdr = proto;
212 hdr->hop_limit = hlimit;
213
4e3fd7a0
AD
214 hdr->saddr = fl6->saddr;
215 hdr->daddr = *first_hop;
1da177e4 216
9c9c9ad5 217 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 218 skb->priority = sk->sk_priority;
4a19ec58 219 skb->mark = sk->sk_mark;
a2c2064f 220
1da177e4 221 mtu = dst_mtu(dst);
283d07ac 222 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
adf30907 223 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 224 IPSTATS_MIB_OUT, skb->len);
b2e0b385
JE
225 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
226 dst->dev, dst_output);
1da177e4
LT
227 }
228
1da177e4 229 skb->dev = dst->dev;
f4e53e29 230 ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
adf30907 231 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
232 kfree_skb(skb);
233 return -EMSGSIZE;
234}
235
7159039a
YH
236EXPORT_SYMBOL(ip6_xmit);
237
1da177e4
LT
238static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
239{
240 struct ip6_ra_chain *ra;
241 struct sock *last = NULL;
242
243 read_lock(&ip6_ra_lock);
244 for (ra = ip6_ra_chain; ra; ra = ra->next) {
245 struct sock *sk = ra->sk;
0bd1b59b
AM
246 if (sk && ra->sel == sel &&
247 (!sk->sk_bound_dev_if ||
248 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
249 if (last) {
250 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
251 if (skb2)
252 rawv6_rcv(last, skb2);
253 }
254 last = sk;
255 }
256 }
257
258 if (last) {
259 rawv6_rcv(last, skb);
260 read_unlock(&ip6_ra_lock);
261 return 1;
262 }
263 read_unlock(&ip6_ra_lock);
264 return 0;
265}
266
e21e0b5f
VN
267static int ip6_forward_proxy_check(struct sk_buff *skb)
268{
0660e03f 269 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 270 u8 nexthdr = hdr->nexthdr;
75f2811c 271 __be16 frag_off;
e21e0b5f
VN
272 int offset;
273
274 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 275 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
276 if (offset < 0)
277 return 0;
278 } else
279 offset = sizeof(struct ipv6hdr);
280
281 if (nexthdr == IPPROTO_ICMPV6) {
282 struct icmp6hdr *icmp6;
283
d56f90a7
ACM
284 if (!pskb_may_pull(skb, (skb_network_header(skb) +
285 offset + 1 - skb->data)))
e21e0b5f
VN
286 return 0;
287
d56f90a7 288 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
289
290 switch (icmp6->icmp6_type) {
291 case NDISC_ROUTER_SOLICITATION:
292 case NDISC_ROUTER_ADVERTISEMENT:
293 case NDISC_NEIGHBOUR_SOLICITATION:
294 case NDISC_NEIGHBOUR_ADVERTISEMENT:
295 case NDISC_REDIRECT:
296 /* For reaction involving unicast neighbor discovery
297 * message destined to the proxied address, pass it to
298 * input function.
299 */
300 return 1;
301 default:
302 break;
303 }
304 }
305
74553b09
VN
306 /*
307 * The proxying router can't forward traffic sent to a link-local
308 * address, so signal the sender and discard the packet. This
309 * behavior is clarified by the MIPv6 specification.
310 */
311 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
312 dst_link_failure(skb);
313 return -1;
314 }
315
e21e0b5f
VN
316 return 0;
317}
318
1da177e4
LT
319static inline int ip6_forward_finish(struct sk_buff *skb)
320{
321 return dst_output(skb);
322}
323
0954cf9c
HFS
324static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
325{
326 unsigned int mtu;
327 struct inet6_dev *idev;
328
329 if (dst_metric_locked(dst, RTAX_MTU)) {
330 mtu = dst_metric_raw(dst, RTAX_MTU);
331 if (mtu)
332 return mtu;
333 }
334
335 mtu = IPV6_MIN_MTU;
336 rcu_read_lock();
337 idev = __in6_dev_get(dst->dev);
338 if (idev)
339 mtu = idev->cnf.mtu6;
340 rcu_read_unlock();
341
342 return mtu;
343}
344
1da177e4
LT
345int ip6_forward(struct sk_buff *skb)
346{
adf30907 347 struct dst_entry *dst = skb_dst(skb);
0660e03f 348 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 349 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 350 struct net *net = dev_net(dst->dev);
14f3ad6f 351 u32 mtu;
1ab1457c 352
53b7997f 353 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
354 goto error;
355
4497b076
BH
356 if (skb_warn_if_lro(skb))
357 goto drop;
358
1da177e4 359 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
15c77d8b
ED
360 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
361 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
362 goto drop;
363 }
364
72b43d08
AK
365 if (skb->pkt_type != PACKET_HOST)
366 goto drop;
367
35fc92a9 368 skb_forward_csum(skb);
1da177e4
LT
369
370 /*
371 * We DO NOT make any processing on
372 * RA packets, pushing them to user level AS IS
373 * without ane WARRANTY that application will be able
374 * to interpret them. The reason is that we
375 * cannot make anything clever here.
376 *
377 * We are not end-node, so that if packet contains
378 * AH/ESP, we cannot make anything.
379 * Defragmentation also would be mistake, RA packets
380 * cannot be fragmented, because there is no warranty
381 * that different fragments will go along one path. --ANK
382 */
ab4eb353
YH
383 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
384 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
385 return 0;
386 }
387
388 /*
389 * check and decrement ttl
390 */
391 if (hdr->hop_limit <= 1) {
392 /* Force OUTPUT device used as source address */
393 skb->dev = dst->dev;
3ffe533c 394 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
15c77d8b
ED
395 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
396 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
397
398 kfree_skb(skb);
399 return -ETIMEDOUT;
400 }
401
fbea49e1 402 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 403 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 404 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
405 int proxied = ip6_forward_proxy_check(skb);
406 if (proxied > 0)
e21e0b5f 407 return ip6_input(skb);
74553b09 408 else if (proxied < 0) {
15c77d8b
ED
409 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
410 IPSTATS_MIB_INDISCARDS);
74553b09
VN
411 goto drop;
412 }
e21e0b5f
VN
413 }
414
1da177e4 415 if (!xfrm6_route_forward(skb)) {
15c77d8b
ED
416 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
417 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
418 goto drop;
419 }
adf30907 420 dst = skb_dst(skb);
1da177e4
LT
421
422 /* IPv6 specs say nothing about it, but it is clear that we cannot
423 send redirects to source routed frames.
1e5dc146 424 We don't send redirects to frames decapsulated from IPsec.
1da177e4 425 */
c45a3dfb 426 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 427 struct in6_addr *target = NULL;
fbfe95a4 428 struct inet_peer *peer;
1da177e4 429 struct rt6_info *rt;
1da177e4
LT
430
431 /*
432 * incoming and outgoing devices are the same
433 * send a redirect.
434 */
435
436 rt = (struct rt6_info *) dst;
c45a3dfb
DM
437 if (rt->rt6i_flags & RTF_GATEWAY)
438 target = &rt->rt6i_gateway;
1da177e4
LT
439 else
440 target = &hdr->daddr;
441
1d861aa4 442 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
92d86829 443
1da177e4
LT
444 /* Limit redirects both by destination (here)
445 and by source (inside ndisc_send_redirect)
446 */
fbfe95a4 447 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 448 ndisc_send_redirect(skb, target);
1d861aa4
DM
449 if (peer)
450 inet_putpeer(peer);
5bb1ab09
DS
451 } else {
452 int addrtype = ipv6_addr_type(&hdr->saddr);
453
1da177e4 454 /* This check is security critical. */
f81b2e7d
YH
455 if (addrtype == IPV6_ADDR_ANY ||
456 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
457 goto error;
458 if (addrtype & IPV6_ADDR_LINKLOCAL) {
459 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 460 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
461 goto error;
462 }
1da177e4
LT
463 }
464
0954cf9c 465 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
466 if (mtu < IPV6_MIN_MTU)
467 mtu = IPV6_MIN_MTU;
468
4cdd3408
PM
469 if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
470 (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
1da177e4
LT
471 /* Again, force OUTPUT device used as source address */
472 skb->dev = dst->dev;
14f3ad6f 473 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
15c77d8b
ED
474 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
475 IPSTATS_MIB_INTOOBIGERRORS);
476 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
477 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
478 kfree_skb(skb);
479 return -EMSGSIZE;
480 }
481
482 if (skb_cow(skb, dst->dev->hard_header_len)) {
15c77d8b
ED
483 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
484 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
485 goto drop;
486 }
487
0660e03f 488 hdr = ipv6_hdr(skb);
1da177e4
LT
489
490 /* Mangling hops number delayed to point after skb COW */
1ab1457c 491
1da177e4
LT
492 hdr->hop_limit--;
493
483a47d2 494 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
2d8dbb04 495 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
b2e0b385 496 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
6e23ae2a 497 ip6_forward_finish);
1da177e4
LT
498
499error:
483a47d2 500 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
501drop:
502 kfree_skb(skb);
503 return -EINVAL;
504}
505
506static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
507{
508 to->pkt_type = from->pkt_type;
509 to->priority = from->priority;
510 to->protocol = from->protocol;
adf30907
ED
511 skb_dst_drop(to);
512 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 513 to->dev = from->dev;
82e91ffe 514 to->mark = from->mark;
1da177e4
LT
515
516#ifdef CONFIG_NET_SCHED
517 to->tc_index = from->tc_index;
518#endif
e7ac05f3 519 nf_copy(to, from);
07a93626 520#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
ba9dda3a
JK
521 to->nf_trace = from->nf_trace;
522#endif
984bc16c 523 skb_copy_secmark(to, from);
1da177e4
LT
524}
525
ad0081e4 526int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
1da177e4 527{
1da177e4 528 struct sk_buff *frag;
adf30907 529 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
d91675f9 530 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
531 struct ipv6hdr *tmp_hdr;
532 struct frag_hdr *fh;
533 unsigned int mtu, hlen, left, len;
a7ae1992 534 int hroom, troom;
ae08e1f0 535 __be32 frag_id = 0;
1da177e4
LT
536 int ptr, offset = 0, err=0;
537 u8 *prevhdr, nexthdr = 0;
adf30907 538 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 539
1da177e4
LT
540 hlen = ip6_find_1stfragopt(skb, &prevhdr);
541 nexthdr = *prevhdr;
542
628a5c56 543 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
544
545 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 546 * or if the skb it not generated by a local socket.
b881ef76 547 */
4cdd3408
PM
548 if (unlikely(!skb->local_df && skb->len > mtu) ||
549 (IP6CB(skb)->frag_max_size &&
550 IP6CB(skb)->frag_max_size > mtu)) {
a34a101e
ED
551 if (skb->sk && dst_allfrag(skb_dst(skb)))
552 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
553
adf30907 554 skb->dev = skb_dst(skb)->dev;
3ffe533c 555 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 556 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 557 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
558 kfree_skb(skb);
559 return -EMSGSIZE;
560 }
561
d91675f9
YH
562 if (np && np->frag_size < mtu) {
563 if (np->frag_size)
564 mtu = np->frag_size;
565 }
566 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 567
21dc3301 568 if (skb_has_frag_list(skb)) {
1da177e4 569 int first_len = skb_pagelen(skb);
3d13008e 570 struct sk_buff *frag2;
1da177e4
LT
571
572 if (first_len - hlen > mtu ||
573 ((first_len - hlen) & 7) ||
574 skb_cloned(skb))
575 goto slow_path;
576
4d9092bb 577 skb_walk_frags(skb, frag) {
1da177e4
LT
578 /* Correct geometry. */
579 if (frag->len > mtu ||
580 ((frag->len & 7) && frag->next) ||
581 skb_headroom(frag) < hlen)
3d13008e 582 goto slow_path_clean;
1da177e4 583
1da177e4
LT
584 /* Partially cloned skb? */
585 if (skb_shared(frag))
3d13008e 586 goto slow_path_clean;
2fdba6b0
HX
587
588 BUG_ON(frag->sk);
589 if (skb->sk) {
2fdba6b0
HX
590 frag->sk = skb->sk;
591 frag->destructor = sock_wfree;
2fdba6b0 592 }
3d13008e 593 skb->truesize -= frag->truesize;
1da177e4
LT
594 }
595
596 err = 0;
597 offset = 0;
598 frag = skb_shinfo(skb)->frag_list;
4d9092bb 599 skb_frag_list_init(skb);
1da177e4
LT
600 /* BUILD HEADER */
601
9a217a1c 602 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 603 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 604 if (!tmp_hdr) {
adf30907 605 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 606 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
607 return -ENOMEM;
608 }
609
1da177e4
LT
610 __skb_pull(skb, hlen);
611 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
612 __skb_push(skb, hlen);
613 skb_reset_network_header(skb);
d56f90a7 614 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 615
87c48fa3 616 ipv6_select_ident(fh, rt);
1da177e4
LT
617 fh->nexthdr = nexthdr;
618 fh->reserved = 0;
619 fh->frag_off = htons(IP6_MF);
620 frag_id = fh->identification;
621
622 first_len = skb_pagelen(skb);
623 skb->data_len = first_len - skb_headlen(skb);
624 skb->len = first_len;
0660e03f
ACM
625 ipv6_hdr(skb)->payload_len = htons(first_len -
626 sizeof(struct ipv6hdr));
a11d206d 627
d8d1f30b 628 dst_hold(&rt->dst);
1da177e4
LT
629
630 for (;;) {
631 /* Prepare header of the next frame,
632 * before previous one went down. */
633 if (frag) {
634 frag->ip_summed = CHECKSUM_NONE;
badff6d0 635 skb_reset_transport_header(frag);
1da177e4 636 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
637 __skb_push(frag, hlen);
638 skb_reset_network_header(frag);
d56f90a7
ACM
639 memcpy(skb_network_header(frag), tmp_hdr,
640 hlen);
1da177e4
LT
641 offset += skb->len - hlen - sizeof(struct frag_hdr);
642 fh->nexthdr = nexthdr;
643 fh->reserved = 0;
644 fh->frag_off = htons(offset);
645 if (frag->next != NULL)
646 fh->frag_off |= htons(IP6_MF);
647 fh->identification = frag_id;
0660e03f
ACM
648 ipv6_hdr(frag)->payload_len =
649 htons(frag->len -
650 sizeof(struct ipv6hdr));
1da177e4
LT
651 ip6_copy_metadata(frag, skb);
652 }
1ab1457c 653
1da177e4 654 err = output(skb);
dafee490 655 if(!err)
d8d1f30b 656 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 657 IPSTATS_MIB_FRAGCREATES);
dafee490 658
1da177e4
LT
659 if (err || !frag)
660 break;
661
662 skb = frag;
663 frag = skb->next;
664 skb->next = NULL;
665 }
666
a51482bd 667 kfree(tmp_hdr);
1da177e4
LT
668
669 if (err == 0) {
d8d1f30b 670 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 671 IPSTATS_MIB_FRAGOKS);
94e187c0 672 ip6_rt_put(rt);
1da177e4
LT
673 return 0;
674 }
675
676 while (frag) {
677 skb = frag->next;
678 kfree_skb(frag);
679 frag = skb;
680 }
681
d8d1f30b 682 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 683 IPSTATS_MIB_FRAGFAILS);
94e187c0 684 ip6_rt_put(rt);
1da177e4 685 return err;
3d13008e
ED
686
687slow_path_clean:
688 skb_walk_frags(skb, frag2) {
689 if (frag2 == frag)
690 break;
691 frag2->sk = NULL;
692 frag2->destructor = NULL;
693 skb->truesize += frag2->truesize;
694 }
1da177e4
LT
695 }
696
697slow_path:
72e843bb
ED
698 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
699 skb_checksum_help(skb))
700 goto fail;
701
1da177e4
LT
702 left = skb->len - hlen; /* Space per frame */
703 ptr = hlen; /* Where to start from */
704
705 /*
706 * Fragment the datagram.
707 */
708
709 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992
HX
710 hroom = LL_RESERVED_SPACE(rt->dst.dev);
711 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
712
713 /*
714 * Keep copying data until we run out.
715 */
716 while(left > 0) {
717 len = left;
718 /* IF: it doesn't fit, use 'mtu' - the data space left */
719 if (len > mtu)
720 len = mtu;
25985edc 721 /* IF: we are not sending up to and including the packet end
1da177e4
LT
722 then align the next start on an eight byte boundary */
723 if (len < left) {
724 len &= ~7;
725 }
726 /*
727 * Allocate buffer.
728 */
729
a7ae1992
HX
730 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
731 hroom + troom, GFP_ATOMIC)) == NULL) {
64ce2073 732 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
adf30907 733 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 734 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
735 err = -ENOMEM;
736 goto fail;
737 }
738
739 /*
740 * Set up data on packet
741 */
742
743 ip6_copy_metadata(frag, skb);
a7ae1992 744 skb_reserve(frag, hroom);
1da177e4 745 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 746 skb_reset_network_header(frag);
badff6d0 747 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
748 frag->transport_header = (frag->network_header + hlen +
749 sizeof(struct frag_hdr));
1da177e4
LT
750
751 /*
752 * Charge the memory for the fragment to any owner
753 * it might possess
754 */
755 if (skb->sk)
756 skb_set_owner_w(frag, skb->sk);
757
758 /*
759 * Copy the packet header into the new buffer.
760 */
d626f62b 761 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
762
763 /*
764 * Build fragment header.
765 */
766 fh->nexthdr = nexthdr;
767 fh->reserved = 0;
f36d6ab1 768 if (!frag_id) {
87c48fa3 769 ipv6_select_ident(fh, rt);
1da177e4
LT
770 frag_id = fh->identification;
771 } else
772 fh->identification = frag_id;
773
774 /*
775 * Copy a block of the IP datagram.
776 */
8984e41d 777 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
778 BUG();
779 left -= len;
780
781 fh->frag_off = htons(offset);
782 if (left > 0)
783 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
784 ipv6_hdr(frag)->payload_len = htons(frag->len -
785 sizeof(struct ipv6hdr));
1da177e4
LT
786
787 ptr += len;
788 offset += len;
789
790 /*
791 * Put this fragment into the sending queue.
792 */
1da177e4
LT
793 err = output(frag);
794 if (err)
795 goto fail;
dafee490 796
adf30907 797 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 798 IPSTATS_MIB_FRAGCREATES);
1da177e4 799 }
adf30907 800 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 801 IPSTATS_MIB_FRAGOKS);
808db80a 802 consume_skb(skb);
1da177e4
LT
803 return err;
804
805fail:
adf30907 806 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 807 IPSTATS_MIB_FRAGFAILS);
1ab1457c 808 kfree_skb(skb);
1da177e4
LT
809 return err;
810}
811
b71d1d42
ED
812static inline int ip6_rt_check(const struct rt6key *rt_key,
813 const struct in6_addr *fl_addr,
814 const struct in6_addr *addr_cache)
cf6b1982 815{
a02cec21
ED
816 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
817 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
818}
819
497c615a
HX
820static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
821 struct dst_entry *dst,
b71d1d42 822 const struct flowi6 *fl6)
1da177e4 823{
497c615a 824 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 825 struct rt6_info *rt;
1da177e4 826
497c615a
HX
827 if (!dst)
828 goto out;
829
a963a37d
ED
830 if (dst->ops->family != AF_INET6) {
831 dst_release(dst);
832 return NULL;
833 }
834
835 rt = (struct rt6_info *)dst;
497c615a
HX
836 /* Yes, checking route validity in not connected
837 * case is not very simple. Take into account,
838 * that we do not support routing by source, TOS,
839 * and MSG_DONTROUTE --ANK (980726)
840 *
cf6b1982
YH
841 * 1. ip6_rt_check(): If route was host route,
842 * check that cached destination is current.
497c615a
HX
843 * If it is network route, we still may
844 * check its validity using saved pointer
845 * to the last used address: daddr_cache.
846 * We do not want to save whole address now,
847 * (because main consumer of this service
848 * is tcp, which has not this problem),
849 * so that the last trick works only on connected
850 * sockets.
851 * 2. oif also should be the same.
852 */
4c9483b2 853 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 854#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 855 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 856#endif
4c9483b2 857 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
497c615a
HX
858 dst_release(dst);
859 dst = NULL;
1da177e4
LT
860 }
861
497c615a
HX
862out:
863 return dst;
864}
865
866static int ip6_dst_lookup_tail(struct sock *sk,
4c9483b2 867 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 868{
3b1e0a65 869 struct net *net = sock_net(sk);
69cce1d1
DM
870#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
871 struct neighbour *n;
97cac082 872 struct rt6_info *rt;
69cce1d1
DM
873#endif
874 int err;
497c615a 875
1da177e4 876 if (*dst == NULL)
4c9483b2 877 *dst = ip6_route_output(net, sk, fl6);
1da177e4
LT
878
879 if ((err = (*dst)->error))
880 goto out_err_release;
881
4c9483b2 882 if (ipv6_addr_any(&fl6->saddr)) {
c3968a85
DW
883 struct rt6_info *rt = (struct rt6_info *) *dst;
884 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
885 sk ? inet6_sk(sk)->srcprefs : 0,
886 &fl6->saddr);
44456d37 887 if (err)
1da177e4 888 goto out_err_release;
1da177e4
LT
889 }
890
95c385b4 891#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
892 /*
893 * Here if the dst entry we've looked up
894 * has a neighbour entry that is in the INCOMPLETE
895 * state and the src address from the flow is
896 * marked as OPTIMISTIC, we release the found
897 * dst entry and replace it instead with the
898 * dst entry of the nexthop router
899 */
c56bf6fe 900 rt = (struct rt6_info *) *dst;
707be1ff 901 rcu_read_lock_bh();
550bab42 902 n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
707be1ff
YH
903 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
904 rcu_read_unlock_bh();
905
906 if (err) {
e550dfb0 907 struct inet6_ifaddr *ifp;
4c9483b2 908 struct flowi6 fl_gw6;
e550dfb0
NH
909 int redirect;
910
4c9483b2 911 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
912 (*dst)->dev, 1);
913
914 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
915 if (ifp)
916 in6_ifa_put(ifp);
917
918 if (redirect) {
919 /*
920 * We need to get the dst entry for the
921 * default router instead
922 */
923 dst_release(*dst);
4c9483b2
DM
924 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
925 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
926 *dst = ip6_route_output(net, sk, &fl_gw6);
e550dfb0
NH
927 if ((err = (*dst)->error))
928 goto out_err_release;
95c385b4 929 }
e550dfb0 930 }
95c385b4
NH
931#endif
932
1da177e4
LT
933 return 0;
934
935out_err_release:
ca46f9c8 936 if (err == -ENETUNREACH)
5ac68e7c 937 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
938 dst_release(*dst);
939 *dst = NULL;
940 return err;
941}
34a0b3cd 942
497c615a
HX
943/**
944 * ip6_dst_lookup - perform route lookup on flow
945 * @sk: socket which provides route info
946 * @dst: pointer to dst_entry * for result
4c9483b2 947 * @fl6: flow to lookup
497c615a
HX
948 *
949 * This function performs a route lookup on the given flow.
950 *
951 * It returns zero on success, or a standard errno code on error.
952 */
4c9483b2 953int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
497c615a
HX
954{
955 *dst = NULL;
4c9483b2 956 return ip6_dst_lookup_tail(sk, dst, fl6);
497c615a 957}
3cf3dc6c
ACM
958EXPORT_SYMBOL_GPL(ip6_dst_lookup);
959
497c615a 960/**
68d0c6d3
DM
961 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
962 * @sk: socket which provides route info
4c9483b2 963 * @fl6: flow to lookup
68d0c6d3 964 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
965 *
966 * This function performs a route lookup on the given flow.
967 *
968 * It returns a valid dst pointer on success, or a pointer encoded
969 * error code.
970 */
4c9483b2 971struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 972 const struct in6_addr *final_dst)
68d0c6d3
DM
973{
974 struct dst_entry *dst = NULL;
975 int err;
976
4c9483b2 977 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
978 if (err)
979 return ERR_PTR(err);
980 if (final_dst)
4e3fd7a0 981 fl6->daddr = *final_dst;
2774c131 982
4c9483b2 983 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
984}
985EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
986
987/**
988 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 989 * @sk: socket which provides the dst cache and route info
4c9483b2 990 * @fl6: flow to lookup
68d0c6d3 991 * @final_dst: final destination address for ipsec lookup
497c615a
HX
992 *
993 * This function performs a route lookup on the given flow with the
994 * possibility of using the cached route in the socket if it is valid.
995 * It will take the socket dst lock when operating on the dst cache.
996 * As a result, this function can only be used in process context.
997 *
68d0c6d3
DM
998 * It returns a valid dst pointer on success, or a pointer encoded
999 * error code.
497c615a 1000 */
4c9483b2 1001struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1002 const struct in6_addr *final_dst)
497c615a 1003{
68d0c6d3
DM
1004 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1005 int err;
497c615a 1006
4c9483b2 1007 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1008
4c9483b2 1009 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
1010 if (err)
1011 return ERR_PTR(err);
1012 if (final_dst)
4e3fd7a0 1013 fl6->daddr = *final_dst;
2774c131 1014
4c9483b2 1015 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1016}
68d0c6d3 1017EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1018
34a0b3cd 1019static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1020 int getfrag(void *from, char *to, int offset, int len,
1021 int odd, struct sk_buff *skb),
1022 void *from, int length, int hh_len, int fragheaderlen,
87c48fa3
ED
1023 int transhdrlen, int mtu,unsigned int flags,
1024 struct rt6_info *rt)
e89e9cf5
AR
1025
1026{
1027 struct sk_buff *skb;
c547dbf5 1028 struct frag_hdr fhdr;
e89e9cf5
AR
1029 int err;
1030
1031 /* There is support for UDP large send offload by network
1032 * device, so create one single skb packet containing complete
1033 * udp datagram
1034 */
1035 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1036 skb = sock_alloc_send_skb(sk,
1037 hh_len + fragheaderlen + transhdrlen + 20,
1038 (flags & MSG_DONTWAIT), &err);
1039 if (skb == NULL)
504744e4 1040 return err;
e89e9cf5
AR
1041
1042 /* reserve space for Hardware header */
1043 skb_reserve(skb, hh_len);
1044
1045 /* create space for UDP/IP header */
1046 skb_put(skb,fragheaderlen + transhdrlen);
1047
1048 /* initialize network header pointer */
c1d2bbe1 1049 skb_reset_network_header(skb);
e89e9cf5
AR
1050
1051 /* initialize protocol header pointer */
b0e380b1 1052 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1053
9c9c9ad5 1054 skb->protocol = htons(ETH_P_IPV6);
e89e9cf5 1055 skb->csum = 0;
e89e9cf5 1056
e89e9cf5 1057 __skb_queue_tail(&sk->sk_write_queue, skb);
c547dbf5
JP
1058 } else if (skb_is_gso(skb)) {
1059 goto append;
e89e9cf5 1060 }
e89e9cf5 1061
c547dbf5
JP
1062 skb->ip_summed = CHECKSUM_PARTIAL;
1063 /* Specify the length of each IPv6 datagram fragment.
1064 * It has to be a multiple of 8.
1065 */
1066 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1067 sizeof(struct frag_hdr)) & ~7;
1068 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1069 ipv6_select_ident(&fhdr, rt);
1070 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1071
1072append:
2811ebac
HFS
1073 return skb_append_datato_frags(sk, skb, getfrag, from,
1074 (length - transhdrlen));
e89e9cf5 1075}
1da177e4 1076
0178b695
HX
1077static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1078 gfp_t gfp)
1079{
1080 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1081}
1082
1083static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1084 gfp_t gfp)
1085{
1086 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1087}
1088
75a493e6 1089static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1090 int *maxfraglen,
1091 unsigned int fragheaderlen,
1092 struct sk_buff *skb,
75a493e6
HFS
1093 struct rt6_info *rt,
1094 bool pmtuprobe)
0c183379
G
1095{
1096 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1097 if (skb == NULL) {
1098 /* first fragment, reserve header_len */
1099 *mtu = *mtu - rt->dst.header_len;
1100
1101 } else {
1102 /*
1103 * this fragment is not first, the headers
1104 * space is regarded as data space.
1105 */
75a493e6
HFS
1106 *mtu = min(*mtu, pmtuprobe ?
1107 rt->dst.dev->mtu :
1108 dst_mtu(rt->dst.path));
0c183379
G
1109 }
1110 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1111 + fragheaderlen - sizeof(struct frag_hdr);
1112 }
1113}
1114
41a1f8ea
YH
1115int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1116 int offset, int len, int odd, struct sk_buff *skb),
1117 void *from, int length, int transhdrlen,
4c9483b2 1118 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
13b52cd4 1119 struct rt6_info *rt, unsigned int flags, int dontfrag)
1da177e4
LT
1120{
1121 struct inet_sock *inet = inet_sk(sk);
1122 struct ipv6_pinfo *np = inet6_sk(sk);
bdc712b4 1123 struct inet_cork *cork;
0c183379 1124 struct sk_buff *skb, *skb_prev = NULL;
75a493e6 1125 unsigned int maxfraglen, fragheaderlen, mtu;
1da177e4 1126 int exthdrlen;
299b0767 1127 int dst_exthdrlen;
1da177e4 1128 int hh_len;
1da177e4
LT
1129 int copy;
1130 int err;
1131 int offset = 0;
a693e698 1132 __u8 tx_flags = 0;
1da177e4
LT
1133
1134 if (flags&MSG_PROBE)
1135 return 0;
bdc712b4 1136 cork = &inet->cork.base;
1da177e4
LT
1137 if (skb_queue_empty(&sk->sk_write_queue)) {
1138 /*
1139 * setup for corking
1140 */
1141 if (opt) {
0178b695 1142 if (WARN_ON(np->cork.opt))
1da177e4 1143 return -EINVAL;
0178b695 1144
284041ef 1145 np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
0178b695
HX
1146 if (unlikely(np->cork.opt == NULL))
1147 return -ENOBUFS;
1148
1149 np->cork.opt->tot_len = opt->tot_len;
1150 np->cork.opt->opt_flen = opt->opt_flen;
1151 np->cork.opt->opt_nflen = opt->opt_nflen;
1152
1153 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1154 sk->sk_allocation);
1155 if (opt->dst0opt && !np->cork.opt->dst0opt)
1156 return -ENOBUFS;
1157
1158 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1159 sk->sk_allocation);
1160 if (opt->dst1opt && !np->cork.opt->dst1opt)
1161 return -ENOBUFS;
1162
1163 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1164 sk->sk_allocation);
1165 if (opt->hopopt && !np->cork.opt->hopopt)
1166 return -ENOBUFS;
1167
1168 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1169 sk->sk_allocation);
1170 if (opt->srcrt && !np->cork.opt->srcrt)
1171 return -ENOBUFS;
1172
1da177e4
LT
1173 /* need source address above miyazawa*/
1174 }
d8d1f30b 1175 dst_hold(&rt->dst);
bdc712b4 1176 cork->dst = &rt->dst;
4c9483b2 1177 inet->cork.fl.u.ip6 = *fl6;
1da177e4 1178 np->cork.hop_limit = hlimit;
41a1f8ea 1179 np->cork.tclass = tclass;
0c183379 1180 if (rt->dst.flags & DST_XFRM_TUNNEL)
93b36cf3 1181 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
0c183379
G
1182 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1183 else
93b36cf3 1184 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
0c183379 1185 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
c7503609 1186 if (np->frag_size < mtu) {
d91675f9
YH
1187 if (np->frag_size)
1188 mtu = np->frag_size;
1189 }
bdc712b4 1190 cork->fragsize = mtu;
d8d1f30b 1191 if (dst_allfrag(rt->dst.path))
bdc712b4
DM
1192 cork->flags |= IPCORK_ALLFRAG;
1193 cork->length = 0;
7efdba5b 1194 exthdrlen = (opt ? opt->opt_flen : 0);
1da177e4
LT
1195 length += exthdrlen;
1196 transhdrlen += exthdrlen;
7efdba5b 1197 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1198 } else {
bdc712b4 1199 rt = (struct rt6_info *)cork->dst;
4c9483b2 1200 fl6 = &inet->cork.fl.u.ip6;
0178b695 1201 opt = np->cork.opt;
1da177e4
LT
1202 transhdrlen = 0;
1203 exthdrlen = 0;
299b0767 1204 dst_exthdrlen = 0;
bdc712b4 1205 mtu = cork->fragsize;
1da177e4
LT
1206 }
1207
d8d1f30b 1208 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1209
a1b05140 1210 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1211 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1212 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1213 sizeof(struct frag_hdr);
1da177e4
LT
1214
1215 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
4df98e76
HFS
1216 unsigned int maxnonfragsize, headersize;
1217
1218 headersize = sizeof(struct ipv6hdr) +
1219 (opt ? opt->tot_len : 0) +
1220 (dst_allfrag(&rt->dst) ?
1221 sizeof(struct frag_hdr) : 0) +
1222 rt->rt6i_nfheader_len;
1223
1224 maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ?
1225 mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1226
1227 /* dontfrag active */
1228 if ((cork->length + length > mtu - headersize) && dontfrag &&
1229 (sk->sk_protocol == IPPROTO_UDP ||
1230 sk->sk_protocol == IPPROTO_RAW)) {
1231 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1232 sizeof(struct ipv6hdr));
1233 goto emsgsize;
1234 }
1235
1236 if (cork->length + length > maxnonfragsize - headersize) {
1237emsgsize:
1238 ipv6_local_error(sk, EMSGSIZE, fl6,
1239 mtu - headersize +
1240 sizeof(struct ipv6hdr));
1da177e4
LT
1241 return -EMSGSIZE;
1242 }
1243 }
1244
a693e698 1245 /* For UDP, check if TX timestamp is enabled */
bf84a010
DB
1246 if (sk->sk_type == SOCK_DGRAM)
1247 sock_tx_timestamp(sk, &tx_flags);
a693e698 1248
1da177e4
LT
1249 /*
1250 * Let's try using as much space as possible.
1251 * Use MTU if total length of the message fits into the MTU.
1252 * Otherwise, we need to reserve fragment header and
1253 * fragment alignment (= 8-15 octects, in total).
1254 *
1255 * Note that we may need to "move" the data from the tail of
1ab1457c 1256 * of the buffer to the new fragment when we split
1da177e4
LT
1257 * the message.
1258 *
1ab1457c 1259 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1260 * at once if non-fragmentable extension headers
1261 * are too large.
1ab1457c 1262 * --yoshfuji
1da177e4
LT
1263 */
1264
2811ebac
HFS
1265 skb = skb_peek_tail(&sk->sk_write_queue);
1266 cork->length += length;
1267 if (((length > mtu) ||
1268 (skb && skb_is_gso(skb))) &&
1269 (sk->sk_protocol == IPPROTO_UDP) &&
1270 (rt->dst.dev->features & NETIF_F_UFO)) {
1271 err = ip6_ufo_append_data(sk, getfrag, from, length,
1272 hh_len, fragheaderlen,
1273 transhdrlen, mtu, flags, rt);
1274 if (err)
1275 goto error;
1276 return 0;
e89e9cf5 1277 }
1da177e4 1278
2811ebac 1279 if (!skb)
1da177e4
LT
1280 goto alloc_new_skb;
1281
1282 while (length > 0) {
1283 /* Check if the remaining data fits into current packet. */
bdc712b4 1284 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1285 if (copy < length)
1286 copy = maxfraglen - skb->len;
1287
1288 if (copy <= 0) {
1289 char *data;
1290 unsigned int datalen;
1291 unsigned int fraglen;
1292 unsigned int fraggap;
1293 unsigned int alloclen;
1da177e4 1294alloc_new_skb:
1da177e4 1295 /* There's no room in the current skb */
0c183379
G
1296 if (skb)
1297 fraggap = skb->len - maxfraglen;
1da177e4
LT
1298 else
1299 fraggap = 0;
0c183379
G
1300 /* update mtu and maxfraglen if necessary */
1301 if (skb == NULL || skb_prev == NULL)
1302 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1303 fragheaderlen, skb, rt,
93b36cf3 1304 np->pmtudisc >=
75a493e6 1305 IPV6_PMTUDISC_PROBE);
0c183379
G
1306
1307 skb_prev = skb;
1da177e4
LT
1308
1309 /*
1310 * If remaining data exceeds the mtu,
1311 * we know we need more fragment(s).
1312 */
1313 datalen = length + fraggap;
1da177e4 1314
0c183379
G
1315 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1316 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1317 if ((flags & MSG_MORE) &&
d8d1f30b 1318 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1319 alloclen = mtu;
1320 else
1321 alloclen = datalen + fragheaderlen;
1322
299b0767
SK
1323 alloclen += dst_exthdrlen;
1324
0c183379
G
1325 if (datalen != length + fraggap) {
1326 /*
1327 * this is not the last fragment, the trailer
1328 * space is regarded as data space.
1329 */
1330 datalen += rt->dst.trailer_len;
1331 }
1332
1333 alloclen += rt->dst.trailer_len;
1334 fraglen = datalen + fragheaderlen;
1da177e4
LT
1335
1336 /*
1337 * We just reserve space for fragment header.
1ab1457c 1338 * Note: this may be overallocation if the message
1da177e4
LT
1339 * (without MSG_MORE) fits into the MTU.
1340 */
1341 alloclen += sizeof(struct frag_hdr);
1342
1343 if (transhdrlen) {
1344 skb = sock_alloc_send_skb(sk,
1345 alloclen + hh_len,
1346 (flags & MSG_DONTWAIT), &err);
1347 } else {
1348 skb = NULL;
1349 if (atomic_read(&sk->sk_wmem_alloc) <=
1350 2 * sk->sk_sndbuf)
1351 skb = sock_wmalloc(sk,
1352 alloclen + hh_len, 1,
1353 sk->sk_allocation);
1354 if (unlikely(skb == NULL))
1355 err = -ENOBUFS;
a693e698
AB
1356 else {
1357 /* Only the initial fragment
1358 * is time stamped.
1359 */
1360 tx_flags = 0;
1361 }
1da177e4
LT
1362 }
1363 if (skb == NULL)
1364 goto error;
1365 /*
1366 * Fill in the control structures
1367 */
9c9c9ad5 1368 skb->protocol = htons(ETH_P_IPV6);
d7f7c0ac 1369 skb->ip_summed = CHECKSUM_NONE;
1da177e4 1370 skb->csum = 0;
1f85851e
G
1371 /* reserve for fragmentation and ipsec header */
1372 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1373 dst_exthdrlen);
1da177e4 1374
a693e698
AB
1375 if (sk->sk_type == SOCK_DGRAM)
1376 skb_shinfo(skb)->tx_flags = tx_flags;
1377
1da177e4
LT
1378 /*
1379 * Find where to start putting bytes
1380 */
1f85851e
G
1381 data = skb_put(skb, fraglen);
1382 skb_set_network_header(skb, exthdrlen);
1383 data += fragheaderlen;
b0e380b1
ACM
1384 skb->transport_header = (skb->network_header +
1385 fragheaderlen);
1da177e4
LT
1386 if (fraggap) {
1387 skb->csum = skb_copy_and_csum_bits(
1388 skb_prev, maxfraglen,
1389 data + transhdrlen, fraggap, 0);
1390 skb_prev->csum = csum_sub(skb_prev->csum,
1391 skb->csum);
1392 data += fraggap;
e9fa4f7b 1393 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1394 }
1395 copy = datalen - transhdrlen - fraggap;
299b0767 1396
1da177e4
LT
1397 if (copy < 0) {
1398 err = -EINVAL;
1399 kfree_skb(skb);
1400 goto error;
1401 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1402 err = -EFAULT;
1403 kfree_skb(skb);
1404 goto error;
1405 }
1406
1407 offset += copy;
1408 length -= datalen - fraggap;
1409 transhdrlen = 0;
1410 exthdrlen = 0;
299b0767 1411 dst_exthdrlen = 0;
1da177e4
LT
1412
1413 /*
1414 * Put the packet on the pending queue
1415 */
1416 __skb_queue_tail(&sk->sk_write_queue, skb);
1417 continue;
1418 }
1419
1420 if (copy > length)
1421 copy = length;
1422
d8d1f30b 1423 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1424 unsigned int off;
1425
1426 off = skb->len;
1427 if (getfrag(from, skb_put(skb, copy),
1428 offset, copy, off, skb) < 0) {
1429 __skb_trim(skb, off);
1430 err = -EFAULT;
1431 goto error;
1432 }
1433 } else {
1434 int i = skb_shinfo(skb)->nr_frags;
5640f768 1435 struct page_frag *pfrag = sk_page_frag(sk);
1da177e4 1436
5640f768
ED
1437 err = -ENOMEM;
1438 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1439 goto error;
5640f768
ED
1440
1441 if (!skb_can_coalesce(skb, i, pfrag->page,
1442 pfrag->offset)) {
1443 err = -EMSGSIZE;
1444 if (i == MAX_SKB_FRAGS)
1445 goto error;
1446
1447 __skb_fill_page_desc(skb, i, pfrag->page,
1448 pfrag->offset, 0);
1449 skb_shinfo(skb)->nr_frags = ++i;
1450 get_page(pfrag->page);
1da177e4 1451 }
5640f768 1452 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1453 if (getfrag(from,
5640f768
ED
1454 page_address(pfrag->page) + pfrag->offset,
1455 offset, copy, skb->len, skb) < 0)
1456 goto error_efault;
1457
1458 pfrag->offset += copy;
1459 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1460 skb->len += copy;
1461 skb->data_len += copy;
f945fa7a
HX
1462 skb->truesize += copy;
1463 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1464 }
1465 offset += copy;
1466 length -= copy;
1467 }
5640f768 1468
1da177e4 1469 return 0;
5640f768
ED
1470
1471error_efault:
1472 err = -EFAULT;
1da177e4 1473error:
bdc712b4 1474 cork->length -= length;
3bd653c8 1475 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1476 return err;
1477}
a495f836 1478EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1479
bf138862
PE
1480static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1481{
0178b695
HX
1482 if (np->cork.opt) {
1483 kfree(np->cork.opt->dst0opt);
1484 kfree(np->cork.opt->dst1opt);
1485 kfree(np->cork.opt->hopopt);
1486 kfree(np->cork.opt->srcrt);
1487 kfree(np->cork.opt);
1488 np->cork.opt = NULL;
1489 }
1490
bdc712b4
DM
1491 if (inet->cork.base.dst) {
1492 dst_release(inet->cork.base.dst);
1493 inet->cork.base.dst = NULL;
1494 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
bf138862
PE
1495 }
1496 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1497}
1498
1da177e4
LT
1499int ip6_push_pending_frames(struct sock *sk)
1500{
1501 struct sk_buff *skb, *tmp_skb;
1502 struct sk_buff **tail_skb;
1503 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1504 struct inet_sock *inet = inet_sk(sk);
1505 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1506 struct net *net = sock_net(sk);
1da177e4
LT
1507 struct ipv6hdr *hdr;
1508 struct ipv6_txoptions *opt = np->cork.opt;
bdc712b4 1509 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
4c9483b2
DM
1510 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1511 unsigned char proto = fl6->flowi6_proto;
1da177e4
LT
1512 int err = 0;
1513
1514 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1515 goto out;
1516 tail_skb = &(skb_shinfo(skb)->frag_list);
1517
1518 /* move skb->data to ip header from ext header */
d56f90a7 1519 if (skb->data < skb_network_header(skb))
bbe735e4 1520 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1521 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1522 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1523 *tail_skb = tmp_skb;
1524 tail_skb = &(tmp_skb->next);
1525 skb->len += tmp_skb->len;
1526 skb->data_len += tmp_skb->len;
1da177e4 1527 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1528 tmp_skb->destructor = NULL;
1529 tmp_skb->sk = NULL;
1da177e4
LT
1530 }
1531
28a89453 1532 /* Allow local fragmentation. */
b5c15fc0 1533 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1534 skb->local_df = 1;
1535
4e3fd7a0 1536 *final_dst = fl6->daddr;
cfe1fc77 1537 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1538 if (opt && opt->opt_flen)
1539 ipv6_push_frag_opts(skb, opt, &proto);
1540 if (opt && opt->opt_nflen)
1541 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1542
e2d1bca7
ACM
1543 skb_push(skb, sizeof(struct ipv6hdr));
1544 skb_reset_network_header(skb);
0660e03f 1545 hdr = ipv6_hdr(skb);
1ab1457c 1546
3e4e4c1f 1547 ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1da177e4
LT
1548 hdr->hop_limit = np->cork.hop_limit;
1549 hdr->nexthdr = proto;
4e3fd7a0
AD
1550 hdr->saddr = fl6->saddr;
1551 hdr->daddr = *final_dst;
1da177e4 1552
a2c2064f 1553 skb->priority = sk->sk_priority;
4a19ec58 1554 skb->mark = sk->sk_mark;
a2c2064f 1555
d8d1f30b 1556 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1557 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1558 if (proto == IPPROTO_ICMPV6) {
adf30907 1559 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1560
5a57d4c7 1561 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
e41b5368 1562 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1563 }
1564
ef76bc23 1565 err = ip6_local_out(skb);
1da177e4
LT
1566 if (err) {
1567 if (err > 0)
6ce9e7b5 1568 err = net_xmit_errno(err);
1da177e4
LT
1569 if (err)
1570 goto error;
1571 }
1572
1573out:
bf138862 1574 ip6_cork_release(inet, np);
1da177e4
LT
1575 return err;
1576error:
06254914 1577 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1578 goto out;
1579}
a495f836 1580EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4
LT
1581
1582void ip6_flush_pending_frames(struct sock *sk)
1583{
1da177e4
LT
1584 struct sk_buff *skb;
1585
1586 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
adf30907
ED
1587 if (skb_dst(skb))
1588 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1589 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1590 kfree_skb(skb);
1591 }
1592
bf138862 1593 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1594}
a495f836 1595EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
This page took 2.432558 seconds and 5 git commands to generate.