ipv6: drop unused "dev" arg of icmpv6_send()
[deliverable/linux.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
1da177e4
LT
40
41#include <linux/netfilter.h>
42#include <linux/netfilter_ipv6.h>
43
44#include <net/sock.h>
45#include <net/snmp.h>
46
47#include <net/ipv6.h>
48#include <net/ndisc.h>
49#include <net/protocol.h>
50#include <net/ip6_route.h>
51#include <net/addrconf.h>
52#include <net/rawv6.h>
53#include <net/icmp.h>
54#include <net/xfrm.h>
55#include <net/checksum.h>
7bc570c8 56#include <linux/mroute6.h>
1da177e4
LT
57
58static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
ef76bc23
HX
60int __ip6_local_out(struct sk_buff *skb)
61{
62 int len;
63
64 len = skb->len - sizeof(struct ipv6hdr);
65 if (len > IPV6_MAXPLEN)
66 len = 0;
67 ipv6_hdr(skb)->payload_len = htons(len);
68
adf30907 69 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
ef76bc23
HX
70 dst_output);
71}
72
73int ip6_local_out(struct sk_buff *skb)
74{
75 int err;
76
77 err = __ip6_local_out(skb);
78 if (likely(err == 1))
79 err = dst_output(skb);
80
81 return err;
82}
83EXPORT_SYMBOL_GPL(ip6_local_out);
84
ad643a79 85static int ip6_output_finish(struct sk_buff *skb)
1da177e4 86{
adf30907 87 struct dst_entry *dst = skb_dst(skb);
1da177e4 88
3644f0ce
SH
89 if (dst->hh)
90 return neigh_hh_output(dst->hh, skb);
91 else if (dst->neighbour)
1da177e4
LT
92 return dst->neighbour->output(skb);
93
483a47d2
DL
94 IP6_INC_STATS_BH(dev_net(dst->dev),
95 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
96 kfree_skb(skb);
97 return -EINVAL;
98
99}
100
101/* dev_loopback_xmit for use with netfilter. */
102static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
103{
459a98ed 104 skb_reset_mac_header(newskb);
bbe735e4 105 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
106 newskb->pkt_type = PACKET_LOOPBACK;
107 newskb->ip_summed = CHECKSUM_UNNECESSARY;
adf30907 108 WARN_ON(!skb_dst(newskb));
1da177e4
LT
109
110 netif_rx(newskb);
111 return 0;
112}
113
114
115static int ip6_output2(struct sk_buff *skb)
116{
adf30907 117 struct dst_entry *dst = skb_dst(skb);
1da177e4
LT
118 struct net_device *dev = dst->dev;
119
120 skb->protocol = htons(ETH_P_IPV6);
121 skb->dev = dev;
122
0660e03f 123 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 124 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 125
7ad6848c 126 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
bd91b8bf
BT
127 ((mroute6_socket(dev_net(dev)) &&
128 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
129 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
130 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
131 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
132
133 /* Do not check for IFF_ALLMULTI; multicast routing
134 is not supported in any case.
135 */
136 if (newskb)
6e23ae2a
PM
137 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
138 NULL, newskb->dev,
1da177e4
LT
139 ip6_dev_loopback_xmit);
140
0660e03f 141 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
142 IP6_INC_STATS(dev_net(dev), idev,
143 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
144 kfree_skb(skb);
145 return 0;
146 }
147 }
148
edf391ff
NH
149 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
150 skb->len);
1da177e4
LT
151 }
152
6e23ae2a
PM
153 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
154 ip6_output_finish);
1da177e4
LT
155}
156
628a5c56
JH
157static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
158{
159 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
160
161 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
adf30907 162 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
628a5c56
JH
163}
164
1da177e4
LT
165int ip6_output(struct sk_buff *skb)
166{
adf30907 167 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 168 if (unlikely(idev->cnf.disable_ipv6)) {
adf30907 169 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
3bd653c8 170 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
171 kfree_skb(skb);
172 return 0;
173 }
174
628a5c56 175 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
adf30907 176 dst_allfrag(skb_dst(skb)))
1da177e4
LT
177 return ip6_fragment(skb, ip6_output2);
178 else
179 return ip6_output2(skb);
180}
181
1da177e4
LT
182/*
183 * xmit an sk_buff (used by TCP)
184 */
185
186int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
187 struct ipv6_txoptions *opt, int ipfragok)
188{
3bd653c8 189 struct net *net = sock_net(sk);
b30bd282 190 struct ipv6_pinfo *np = inet6_sk(sk);
1da177e4 191 struct in6_addr *first_hop = &fl->fl6_dst;
adf30907 192 struct dst_entry *dst = skb_dst(skb);
1da177e4
LT
193 struct ipv6hdr *hdr;
194 u8 proto = fl->proto;
195 int seg_len = skb->len;
e651f03a
GR
196 int hlimit = -1;
197 int tclass = 0;
1da177e4
LT
198 u32 mtu;
199
200 if (opt) {
c2636b4d 201 unsigned int head_room;
1da177e4
LT
202
203 /* First: exthdrs may take lots of space (~8K for now)
204 MAX_HEADER is not enough.
205 */
206 head_room = opt->opt_nflen + opt->opt_flen;
207 seg_len += head_room;
208 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
209
210 if (skb_headroom(skb) < head_room) {
211 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 212 if (skb2 == NULL) {
adf30907 213 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
214 IPSTATS_MIB_OUTDISCARDS);
215 kfree_skb(skb);
1da177e4
LT
216 return -ENOBUFS;
217 }
a11d206d
YH
218 kfree_skb(skb);
219 skb = skb2;
1da177e4
LT
220 if (sk)
221 skb_set_owner_w(skb, sk);
222 }
223 if (opt->opt_flen)
224 ipv6_push_frag_opts(skb, opt, &proto);
225 if (opt->opt_nflen)
226 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
227 }
228
e2d1bca7
ACM
229 skb_push(skb, sizeof(struct ipv6hdr));
230 skb_reset_network_header(skb);
0660e03f 231 hdr = ipv6_hdr(skb);
1da177e4 232
77e2f14f
WY
233 /* Allow local fragmentation. */
234 if (ipfragok)
235 skb->local_df = 1;
236
1da177e4
LT
237 /*
238 * Fill in the IPv6 header
239 */
e651f03a
GR
240 if (np) {
241 tclass = np->tclass;
1da177e4 242 hlimit = np->hop_limit;
e651f03a 243 }
1da177e4 244 if (hlimit < 0)
6b75d090 245 hlimit = ip6_dst_hoplimit(dst);
1da177e4 246
90bcaf7b 247 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
41a1f8ea 248
1da177e4
LT
249 hdr->payload_len = htons(seg_len);
250 hdr->nexthdr = proto;
251 hdr->hop_limit = hlimit;
252
253 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
254 ipv6_addr_copy(&hdr->daddr, first_hop);
255
a2c2064f 256 skb->priority = sk->sk_priority;
4a19ec58 257 skb->mark = sk->sk_mark;
a2c2064f 258
1da177e4 259 mtu = dst_mtu(dst);
283d07ac 260 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
adf30907 261 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 262 IPSTATS_MIB_OUT, skb->len);
6e23ae2a 263 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
6869c4d8 264 dst_output);
1da177e4
LT
265 }
266
267 if (net_ratelimit())
268 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
269 skb->dev = dst->dev;
3ffe533c 270 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 271 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
272 kfree_skb(skb);
273 return -EMSGSIZE;
274}
275
7159039a
YH
276EXPORT_SYMBOL(ip6_xmit);
277
1da177e4
LT
278/*
279 * To avoid extra problems ND packets are send through this
280 * routine. It's code duplication but I really want to avoid
281 * extra checks since ipv6_build_header is used by TCP (which
282 * is for us performance critical)
283 */
284
285int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
9acd9f3a 286 const struct in6_addr *saddr, const struct in6_addr *daddr,
1da177e4
LT
287 int proto, int len)
288{
289 struct ipv6_pinfo *np = inet6_sk(sk);
290 struct ipv6hdr *hdr;
291 int totlen;
292
293 skb->protocol = htons(ETH_P_IPV6);
294 skb->dev = dev;
295
296 totlen = len + sizeof(struct ipv6hdr);
297
55f79cc0
ACM
298 skb_reset_network_header(skb);
299 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 300 hdr = ipv6_hdr(skb);
1da177e4 301
ae08e1f0 302 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
303
304 hdr->payload_len = htons(len);
305 hdr->nexthdr = proto;
306 hdr->hop_limit = np->hop_limit;
307
308 ipv6_addr_copy(&hdr->saddr, saddr);
309 ipv6_addr_copy(&hdr->daddr, daddr);
310
311 return 0;
312}
313
314static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
315{
316 struct ip6_ra_chain *ra;
317 struct sock *last = NULL;
318
319 read_lock(&ip6_ra_lock);
320 for (ra = ip6_ra_chain; ra; ra = ra->next) {
321 struct sock *sk = ra->sk;
0bd1b59b
AM
322 if (sk && ra->sel == sel &&
323 (!sk->sk_bound_dev_if ||
324 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
325 if (last) {
326 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
327 if (skb2)
328 rawv6_rcv(last, skb2);
329 }
330 last = sk;
331 }
332 }
333
334 if (last) {
335 rawv6_rcv(last, skb);
336 read_unlock(&ip6_ra_lock);
337 return 1;
338 }
339 read_unlock(&ip6_ra_lock);
340 return 0;
341}
342
e21e0b5f
VN
343static int ip6_forward_proxy_check(struct sk_buff *skb)
344{
0660e03f 345 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
346 u8 nexthdr = hdr->nexthdr;
347 int offset;
348
349 if (ipv6_ext_hdr(nexthdr)) {
350 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
351 if (offset < 0)
352 return 0;
353 } else
354 offset = sizeof(struct ipv6hdr);
355
356 if (nexthdr == IPPROTO_ICMPV6) {
357 struct icmp6hdr *icmp6;
358
d56f90a7
ACM
359 if (!pskb_may_pull(skb, (skb_network_header(skb) +
360 offset + 1 - skb->data)))
e21e0b5f
VN
361 return 0;
362
d56f90a7 363 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
364
365 switch (icmp6->icmp6_type) {
366 case NDISC_ROUTER_SOLICITATION:
367 case NDISC_ROUTER_ADVERTISEMENT:
368 case NDISC_NEIGHBOUR_SOLICITATION:
369 case NDISC_NEIGHBOUR_ADVERTISEMENT:
370 case NDISC_REDIRECT:
371 /* For reaction involving unicast neighbor discovery
372 * message destined to the proxied address, pass it to
373 * input function.
374 */
375 return 1;
376 default:
377 break;
378 }
379 }
380
74553b09
VN
381 /*
382 * The proxying router can't forward traffic sent to a link-local
383 * address, so signal the sender and discard the packet. This
384 * behavior is clarified by the MIPv6 specification.
385 */
386 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
387 dst_link_failure(skb);
388 return -1;
389 }
390
e21e0b5f
VN
391 return 0;
392}
393
1da177e4
LT
394static inline int ip6_forward_finish(struct sk_buff *skb)
395{
396 return dst_output(skb);
397}
398
399int ip6_forward(struct sk_buff *skb)
400{
adf30907 401 struct dst_entry *dst = skb_dst(skb);
0660e03f 402 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 403 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 404 struct net *net = dev_net(dst->dev);
1ab1457c 405
53b7997f 406 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
407 goto error;
408
4497b076
BH
409 if (skb_warn_if_lro(skb))
410 goto drop;
411
1da177e4 412 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
3bd653c8 413 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
414 goto drop;
415 }
416
35fc92a9 417 skb_forward_csum(skb);
1da177e4
LT
418
419 /*
420 * We DO NOT make any processing on
421 * RA packets, pushing them to user level AS IS
422 * without ane WARRANTY that application will be able
423 * to interpret them. The reason is that we
424 * cannot make anything clever here.
425 *
426 * We are not end-node, so that if packet contains
427 * AH/ESP, we cannot make anything.
428 * Defragmentation also would be mistake, RA packets
429 * cannot be fragmented, because there is no warranty
430 * that different fragments will go along one path. --ANK
431 */
432 if (opt->ra) {
d56f90a7 433 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
434 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
435 return 0;
436 }
437
438 /*
439 * check and decrement ttl
440 */
441 if (hdr->hop_limit <= 1) {
442 /* Force OUTPUT device used as source address */
443 skb->dev = dst->dev;
3ffe533c 444 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
483a47d2
DL
445 IP6_INC_STATS_BH(net,
446 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
447
448 kfree_skb(skb);
449 return -ETIMEDOUT;
450 }
451
fbea49e1 452 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 453 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 454 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
455 int proxied = ip6_forward_proxy_check(skb);
456 if (proxied > 0)
e21e0b5f 457 return ip6_input(skb);
74553b09 458 else if (proxied < 0) {
3bd653c8
DL
459 IP6_INC_STATS(net, ip6_dst_idev(dst),
460 IPSTATS_MIB_INDISCARDS);
74553b09
VN
461 goto drop;
462 }
e21e0b5f
VN
463 }
464
1da177e4 465 if (!xfrm6_route_forward(skb)) {
3bd653c8 466 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
467 goto drop;
468 }
adf30907 469 dst = skb_dst(skb);
1da177e4
LT
470
471 /* IPv6 specs say nothing about it, but it is clear that we cannot
472 send redirects to source routed frames.
1e5dc146 473 We don't send redirects to frames decapsulated from IPsec.
1da177e4 474 */
1e5dc146 475 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
def8b4fa 476 !skb_sec_path(skb)) {
1da177e4
LT
477 struct in6_addr *target = NULL;
478 struct rt6_info *rt;
479 struct neighbour *n = dst->neighbour;
480
481 /*
482 * incoming and outgoing devices are the same
483 * send a redirect.
484 */
485
486 rt = (struct rt6_info *) dst;
487 if ((rt->rt6i_flags & RTF_GATEWAY))
488 target = (struct in6_addr*)&n->primary_key;
489 else
490 target = &hdr->daddr;
491
492 /* Limit redirects both by destination (here)
493 and by source (inside ndisc_send_redirect)
494 */
495 if (xrlim_allow(dst, 1*HZ))
496 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
497 } else {
498 int addrtype = ipv6_addr_type(&hdr->saddr);
499
1da177e4 500 /* This check is security critical. */
f81b2e7d
YH
501 if (addrtype == IPV6_ADDR_ANY ||
502 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
503 goto error;
504 if (addrtype & IPV6_ADDR_LINKLOCAL) {
505 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 506 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
507 goto error;
508 }
1da177e4
LT
509 }
510
511 if (skb->len > dst_mtu(dst)) {
512 /* Again, force OUTPUT device used as source address */
513 skb->dev = dst->dev;
3ffe533c 514 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst));
483a47d2
DL
515 IP6_INC_STATS_BH(net,
516 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
517 IP6_INC_STATS_BH(net,
518 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
519 kfree_skb(skb);
520 return -EMSGSIZE;
521 }
522
523 if (skb_cow(skb, dst->dev->hard_header_len)) {
3bd653c8 524 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
525 goto drop;
526 }
527
0660e03f 528 hdr = ipv6_hdr(skb);
1da177e4
LT
529
530 /* Mangling hops number delayed to point after skb COW */
1ab1457c 531
1da177e4
LT
532 hdr->hop_limit--;
533
483a47d2 534 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
6e23ae2a
PM
535 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
536 ip6_forward_finish);
1da177e4
LT
537
538error:
483a47d2 539 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
540drop:
541 kfree_skb(skb);
542 return -EINVAL;
543}
544
545static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
546{
547 to->pkt_type = from->pkt_type;
548 to->priority = from->priority;
549 to->protocol = from->protocol;
adf30907
ED
550 skb_dst_drop(to);
551 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 552 to->dev = from->dev;
82e91ffe 553 to->mark = from->mark;
1da177e4
LT
554
555#ifdef CONFIG_NET_SCHED
556 to->tc_index = from->tc_index;
557#endif
e7ac05f3 558 nf_copy(to, from);
ba9dda3a
JK
559#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
560 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
561 to->nf_trace = from->nf_trace;
562#endif
984bc16c 563 skb_copy_secmark(to, from);
1da177e4
LT
564}
565
566int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
567{
568 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
569 struct ipv6_opt_hdr *exthdr =
570 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 571 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 572 int found_rhdr = 0;
0660e03f 573 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
574
575 while (offset + 1 <= packet_len) {
576
577 switch (**nexthdr) {
578
579 case NEXTHDR_HOP:
27637df9 580 break;
1da177e4 581 case NEXTHDR_ROUTING:
27637df9
MN
582 found_rhdr = 1;
583 break;
1da177e4 584 case NEXTHDR_DEST:
59fbb3a6 585#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
586 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
587 break;
588#endif
589 if (found_rhdr)
590 return offset;
1da177e4
LT
591 break;
592 default :
593 return offset;
594 }
27637df9
MN
595
596 offset += ipv6_optlen(exthdr);
597 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
598 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
599 offset);
1da177e4
LT
600 }
601
602 return offset;
603}
604
605static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
606{
1da177e4 607 struct sk_buff *frag;
adf30907 608 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
d91675f9 609 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
610 struct ipv6hdr *tmp_hdr;
611 struct frag_hdr *fh;
612 unsigned int mtu, hlen, left, len;
ae08e1f0 613 __be32 frag_id = 0;
1da177e4
LT
614 int ptr, offset = 0, err=0;
615 u8 *prevhdr, nexthdr = 0;
adf30907 616 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 617
1da177e4
LT
618 hlen = ip6_find_1stfragopt(skb, &prevhdr);
619 nexthdr = *prevhdr;
620
628a5c56 621 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
622
623 /* We must not fragment if the socket is set to force MTU discovery
624 * or if the skb it not generated by a local socket. (This last
625 * check should be redundant, but it's free.)
626 */
b5c15fc0 627 if (!skb->local_df) {
adf30907 628 skb->dev = skb_dst(skb)->dev;
3ffe533c 629 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 630 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 631 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
632 kfree_skb(skb);
633 return -EMSGSIZE;
634 }
635
d91675f9
YH
636 if (np && np->frag_size < mtu) {
637 if (np->frag_size)
638 mtu = np->frag_size;
639 }
640 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 641
4d9092bb 642 if (skb_has_frags(skb)) {
1da177e4 643 int first_len = skb_pagelen(skb);
29ffe1a5 644 int truesizes = 0;
1da177e4
LT
645
646 if (first_len - hlen > mtu ||
647 ((first_len - hlen) & 7) ||
648 skb_cloned(skb))
649 goto slow_path;
650
4d9092bb 651 skb_walk_frags(skb, frag) {
1da177e4
LT
652 /* Correct geometry. */
653 if (frag->len > mtu ||
654 ((frag->len & 7) && frag->next) ||
655 skb_headroom(frag) < hlen)
656 goto slow_path;
657
1da177e4
LT
658 /* Partially cloned skb? */
659 if (skb_shared(frag))
660 goto slow_path;
2fdba6b0
HX
661
662 BUG_ON(frag->sk);
663 if (skb->sk) {
2fdba6b0
HX
664 frag->sk = skb->sk;
665 frag->destructor = sock_wfree;
29ffe1a5 666 truesizes += frag->truesize;
2fdba6b0 667 }
1da177e4
LT
668 }
669
670 err = 0;
671 offset = 0;
672 frag = skb_shinfo(skb)->frag_list;
4d9092bb 673 skb_frag_list_init(skb);
1da177e4
LT
674 /* BUILD HEADER */
675
9a217a1c 676 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 677 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 678 if (!tmp_hdr) {
adf30907 679 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 680 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
681 return -ENOMEM;
682 }
683
1da177e4
LT
684 __skb_pull(skb, hlen);
685 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
686 __skb_push(skb, hlen);
687 skb_reset_network_header(skb);
d56f90a7 688 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 689
7ea2f2c5 690 ipv6_select_ident(fh);
1da177e4
LT
691 fh->nexthdr = nexthdr;
692 fh->reserved = 0;
693 fh->frag_off = htons(IP6_MF);
694 frag_id = fh->identification;
695
696 first_len = skb_pagelen(skb);
697 skb->data_len = first_len - skb_headlen(skb);
29ffe1a5 698 skb->truesize -= truesizes;
1da177e4 699 skb->len = first_len;
0660e03f
ACM
700 ipv6_hdr(skb)->payload_len = htons(first_len -
701 sizeof(struct ipv6hdr));
a11d206d
YH
702
703 dst_hold(&rt->u.dst);
1da177e4
LT
704
705 for (;;) {
706 /* Prepare header of the next frame,
707 * before previous one went down. */
708 if (frag) {
709 frag->ip_summed = CHECKSUM_NONE;
badff6d0 710 skb_reset_transport_header(frag);
1da177e4 711 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
712 __skb_push(frag, hlen);
713 skb_reset_network_header(frag);
d56f90a7
ACM
714 memcpy(skb_network_header(frag), tmp_hdr,
715 hlen);
1da177e4
LT
716 offset += skb->len - hlen - sizeof(struct frag_hdr);
717 fh->nexthdr = nexthdr;
718 fh->reserved = 0;
719 fh->frag_off = htons(offset);
720 if (frag->next != NULL)
721 fh->frag_off |= htons(IP6_MF);
722 fh->identification = frag_id;
0660e03f
ACM
723 ipv6_hdr(frag)->payload_len =
724 htons(frag->len -
725 sizeof(struct ipv6hdr));
1da177e4
LT
726 ip6_copy_metadata(frag, skb);
727 }
1ab1457c 728
1da177e4 729 err = output(skb);
dafee490 730 if(!err)
3bd653c8
DL
731 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
732 IPSTATS_MIB_FRAGCREATES);
dafee490 733
1da177e4
LT
734 if (err || !frag)
735 break;
736
737 skb = frag;
738 frag = skb->next;
739 skb->next = NULL;
740 }
741
a51482bd 742 kfree(tmp_hdr);
1da177e4
LT
743
744 if (err == 0) {
3bd653c8
DL
745 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
746 IPSTATS_MIB_FRAGOKS);
a11d206d 747 dst_release(&rt->u.dst);
1da177e4
LT
748 return 0;
749 }
750
751 while (frag) {
752 skb = frag->next;
753 kfree_skb(frag);
754 frag = skb;
755 }
756
3bd653c8
DL
757 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
758 IPSTATS_MIB_FRAGFAILS);
a11d206d 759 dst_release(&rt->u.dst);
1da177e4
LT
760 return err;
761 }
762
763slow_path:
764 left = skb->len - hlen; /* Space per frame */
765 ptr = hlen; /* Where to start from */
766
767 /*
768 * Fragment the datagram.
769 */
770
771 *prevhdr = NEXTHDR_FRAGMENT;
772
773 /*
774 * Keep copying data until we run out.
775 */
776 while(left > 0) {
777 len = left;
778 /* IF: it doesn't fit, use 'mtu' - the data space left */
779 if (len > mtu)
780 len = mtu;
781 /* IF: we are not sending upto and including the packet end
782 then align the next start on an eight byte boundary */
783 if (len < left) {
784 len &= ~7;
785 }
786 /*
787 * Allocate buffer.
788 */
789
f5184d26 790 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 791 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
adf30907 792 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 793 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
794 err = -ENOMEM;
795 goto fail;
796 }
797
798 /*
799 * Set up data on packet
800 */
801
802 ip6_copy_metadata(frag, skb);
803 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
804 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 805 skb_reset_network_header(frag);
badff6d0 806 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
807 frag->transport_header = (frag->network_header + hlen +
808 sizeof(struct frag_hdr));
1da177e4
LT
809
810 /*
811 * Charge the memory for the fragment to any owner
812 * it might possess
813 */
814 if (skb->sk)
815 skb_set_owner_w(frag, skb->sk);
816
817 /*
818 * Copy the packet header into the new buffer.
819 */
d626f62b 820 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
821
822 /*
823 * Build fragment header.
824 */
825 fh->nexthdr = nexthdr;
826 fh->reserved = 0;
f36d6ab1 827 if (!frag_id) {
7ea2f2c5 828 ipv6_select_ident(fh);
1da177e4
LT
829 frag_id = fh->identification;
830 } else
831 fh->identification = frag_id;
832
833 /*
834 * Copy a block of the IP datagram.
835 */
8984e41d 836 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
837 BUG();
838 left -= len;
839
840 fh->frag_off = htons(offset);
841 if (left > 0)
842 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
843 ipv6_hdr(frag)->payload_len = htons(frag->len -
844 sizeof(struct ipv6hdr));
1da177e4
LT
845
846 ptr += len;
847 offset += len;
848
849 /*
850 * Put this fragment into the sending queue.
851 */
1da177e4
LT
852 err = output(frag);
853 if (err)
854 goto fail;
dafee490 855
adf30907 856 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 857 IPSTATS_MIB_FRAGCREATES);
1da177e4 858 }
adf30907 859 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 860 IPSTATS_MIB_FRAGOKS);
1da177e4 861 kfree_skb(skb);
1da177e4
LT
862 return err;
863
864fail:
adf30907 865 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 866 IPSTATS_MIB_FRAGFAILS);
1ab1457c 867 kfree_skb(skb);
1da177e4
LT
868 return err;
869}
870
cf6b1982
YH
871static inline int ip6_rt_check(struct rt6key *rt_key,
872 struct in6_addr *fl_addr,
873 struct in6_addr *addr_cache)
874{
875 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
876 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
877}
878
497c615a
HX
879static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
880 struct dst_entry *dst,
881 struct flowi *fl)
1da177e4 882{
497c615a
HX
883 struct ipv6_pinfo *np = inet6_sk(sk);
884 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 885
497c615a
HX
886 if (!dst)
887 goto out;
888
889 /* Yes, checking route validity in not connected
890 * case is not very simple. Take into account,
891 * that we do not support routing by source, TOS,
892 * and MSG_DONTROUTE --ANK (980726)
893 *
cf6b1982
YH
894 * 1. ip6_rt_check(): If route was host route,
895 * check that cached destination is current.
497c615a
HX
896 * If it is network route, we still may
897 * check its validity using saved pointer
898 * to the last used address: daddr_cache.
899 * We do not want to save whole address now,
900 * (because main consumer of this service
901 * is tcp, which has not this problem),
902 * so that the last trick works only on connected
903 * sockets.
904 * 2. oif also should be the same.
905 */
cf6b1982 906 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
8e1ef0a9
YH
907#ifdef CONFIG_IPV6_SUBTREES
908 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
909#endif
cf6b1982 910 (fl->oif && fl->oif != dst->dev->ifindex)) {
497c615a
HX
911 dst_release(dst);
912 dst = NULL;
1da177e4
LT
913 }
914
497c615a
HX
915out:
916 return dst;
917}
918
919static int ip6_dst_lookup_tail(struct sock *sk,
920 struct dst_entry **dst, struct flowi *fl)
921{
922 int err;
3b1e0a65 923 struct net *net = sock_net(sk);
497c615a 924
1da177e4 925 if (*dst == NULL)
8a3edd80 926 *dst = ip6_route_output(net, sk, fl);
1da177e4
LT
927
928 if ((err = (*dst)->error))
929 goto out_err_release;
930
931 if (ipv6_addr_any(&fl->fl6_src)) {
191cd582 932 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
7cbca67c
YH
933 &fl->fl6_dst,
934 sk ? inet6_sk(sk)->srcprefs : 0,
935 &fl->fl6_src);
44456d37 936 if (err)
1da177e4 937 goto out_err_release;
1da177e4
LT
938 }
939
95c385b4 940#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
941 /*
942 * Here if the dst entry we've looked up
943 * has a neighbour entry that is in the INCOMPLETE
944 * state and the src address from the flow is
945 * marked as OPTIMISTIC, we release the found
946 * dst entry and replace it instead with the
947 * dst entry of the nexthop router
948 */
949 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
950 struct inet6_ifaddr *ifp;
951 struct flowi fl_gw;
952 int redirect;
953
954 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
955 (*dst)->dev, 1);
956
957 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
958 if (ifp)
959 in6_ifa_put(ifp);
960
961 if (redirect) {
962 /*
963 * We need to get the dst entry for the
964 * default router instead
965 */
966 dst_release(*dst);
967 memcpy(&fl_gw, fl, sizeof(struct flowi));
968 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
969 *dst = ip6_route_output(net, sk, &fl_gw);
970 if ((err = (*dst)->error))
971 goto out_err_release;
95c385b4 972 }
e550dfb0 973 }
95c385b4
NH
974#endif
975
1da177e4
LT
976 return 0;
977
978out_err_release:
ca46f9c8 979 if (err == -ENETUNREACH)
483a47d2 980 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
981 dst_release(*dst);
982 *dst = NULL;
983 return err;
984}
34a0b3cd 985
497c615a
HX
986/**
987 * ip6_dst_lookup - perform route lookup on flow
988 * @sk: socket which provides route info
989 * @dst: pointer to dst_entry * for result
990 * @fl: flow to lookup
991 *
992 * This function performs a route lookup on the given flow.
993 *
994 * It returns zero on success, or a standard errno code on error.
995 */
996int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
997{
998 *dst = NULL;
999 return ip6_dst_lookup_tail(sk, dst, fl);
1000}
3cf3dc6c
ACM
1001EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1002
497c615a
HX
1003/**
1004 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1005 * @sk: socket which provides the dst cache and route info
1006 * @dst: pointer to dst_entry * for result
1007 * @fl: flow to lookup
1008 *
1009 * This function performs a route lookup on the given flow with the
1010 * possibility of using the cached route in the socket if it is valid.
1011 * It will take the socket dst lock when operating on the dst cache.
1012 * As a result, this function can only be used in process context.
1013 *
1014 * It returns zero on success, or a standard errno code on error.
1015 */
1016int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1017{
1018 *dst = NULL;
1019 if (sk) {
1020 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1021 *dst = ip6_sk_dst_check(sk, *dst, fl);
1022 }
1023
1024 return ip6_dst_lookup_tail(sk, dst, fl);
1025}
1026EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1027
34a0b3cd 1028static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1029 int getfrag(void *from, char *to, int offset, int len,
1030 int odd, struct sk_buff *skb),
1031 void *from, int length, int hh_len, int fragheaderlen,
1032 int transhdrlen, int mtu,unsigned int flags)
1033
1034{
1035 struct sk_buff *skb;
1036 int err;
1037
1038 /* There is support for UDP large send offload by network
1039 * device, so create one single skb packet containing complete
1040 * udp datagram
1041 */
1042 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1043 skb = sock_alloc_send_skb(sk,
1044 hh_len + fragheaderlen + transhdrlen + 20,
1045 (flags & MSG_DONTWAIT), &err);
1046 if (skb == NULL)
1047 return -ENOMEM;
1048
1049 /* reserve space for Hardware header */
1050 skb_reserve(skb, hh_len);
1051
1052 /* create space for UDP/IP header */
1053 skb_put(skb,fragheaderlen + transhdrlen);
1054
1055 /* initialize network header pointer */
c1d2bbe1 1056 skb_reset_network_header(skb);
e89e9cf5
AR
1057
1058 /* initialize protocol header pointer */
b0e380b1 1059 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1060
84fa7933 1061 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5
AR
1062 skb->csum = 0;
1063 sk->sk_sndmsg_off = 0;
1064 }
1065
1066 err = skb_append_datato_frags(sk,skb, getfrag, from,
1067 (length - transhdrlen));
1068 if (!err) {
1069 struct frag_hdr fhdr;
1070
c31d5326
SS
1071 /* Specify the length of each IPv6 datagram fragment.
1072 * It has to be a multiple of 8.
1073 */
1074 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1075 sizeof(struct frag_hdr)) & ~7;
f83ef8c0 1076 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
7ea2f2c5 1077 ipv6_select_ident(&fhdr);
e89e9cf5
AR
1078 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1079 __skb_queue_tail(&sk->sk_write_queue, skb);
1080
1081 return 0;
1082 }
1083 /* There is not enough support do UPD LSO,
1084 * so follow normal path
1085 */
1086 kfree_skb(skb);
1087
1088 return err;
1089}
1da177e4 1090
0178b695
HX
1091static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1092 gfp_t gfp)
1093{
1094 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1095}
1096
1097static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1098 gfp_t gfp)
1099{
1100 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1101}
1102
41a1f8ea
YH
1103int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1104 int offset, int len, int odd, struct sk_buff *skb),
1105 void *from, int length, int transhdrlen,
1106 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1107 struct rt6_info *rt, unsigned int flags)
1da177e4
LT
1108{
1109 struct inet_sock *inet = inet_sk(sk);
1110 struct ipv6_pinfo *np = inet6_sk(sk);
1111 struct sk_buff *skb;
1112 unsigned int maxfraglen, fragheaderlen;
1113 int exthdrlen;
1114 int hh_len;
1115 int mtu;
1116 int copy;
1117 int err;
1118 int offset = 0;
1119 int csummode = CHECKSUM_NONE;
1120
1121 if (flags&MSG_PROBE)
1122 return 0;
1123 if (skb_queue_empty(&sk->sk_write_queue)) {
1124 /*
1125 * setup for corking
1126 */
1127 if (opt) {
0178b695 1128 if (WARN_ON(np->cork.opt))
1da177e4 1129 return -EINVAL;
0178b695
HX
1130
1131 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1132 if (unlikely(np->cork.opt == NULL))
1133 return -ENOBUFS;
1134
1135 np->cork.opt->tot_len = opt->tot_len;
1136 np->cork.opt->opt_flen = opt->opt_flen;
1137 np->cork.opt->opt_nflen = opt->opt_nflen;
1138
1139 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1140 sk->sk_allocation);
1141 if (opt->dst0opt && !np->cork.opt->dst0opt)
1142 return -ENOBUFS;
1143
1144 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1145 sk->sk_allocation);
1146 if (opt->dst1opt && !np->cork.opt->dst1opt)
1147 return -ENOBUFS;
1148
1149 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1150 sk->sk_allocation);
1151 if (opt->hopopt && !np->cork.opt->hopopt)
1152 return -ENOBUFS;
1153
1154 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1155 sk->sk_allocation);
1156 if (opt->srcrt && !np->cork.opt->srcrt)
1157 return -ENOBUFS;
1158
1da177e4
LT
1159 /* need source address above miyazawa*/
1160 }
1161 dst_hold(&rt->u.dst);
c8cdaf99 1162 inet->cork.dst = &rt->u.dst;
1da177e4
LT
1163 inet->cork.fl = *fl;
1164 np->cork.hop_limit = hlimit;
41a1f8ea 1165 np->cork.tclass = tclass;
628a5c56
JH
1166 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1167 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
c7503609 1168 if (np->frag_size < mtu) {
d91675f9
YH
1169 if (np->frag_size)
1170 mtu = np->frag_size;
1171 }
1172 inet->cork.fragsize = mtu;
1da177e4
LT
1173 if (dst_allfrag(rt->u.dst.path))
1174 inet->cork.flags |= IPCORK_ALLFRAG;
1175 inet->cork.length = 0;
1176 sk->sk_sndmsg_page = NULL;
1177 sk->sk_sndmsg_off = 0;
01488942 1178 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
a1b05140 1179 rt->rt6i_nfheader_len;
1da177e4
LT
1180 length += exthdrlen;
1181 transhdrlen += exthdrlen;
1182 } else {
c8cdaf99 1183 rt = (struct rt6_info *)inet->cork.dst;
1da177e4 1184 fl = &inet->cork.fl;
0178b695 1185 opt = np->cork.opt;
1da177e4
LT
1186 transhdrlen = 0;
1187 exthdrlen = 0;
1188 mtu = inet->cork.fragsize;
1189 }
1190
1191 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1192
a1b05140 1193 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1194 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1195 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1196
1197 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1198 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1199 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1200 return -EMSGSIZE;
1201 }
1202 }
1203
1204 /*
1205 * Let's try using as much space as possible.
1206 * Use MTU if total length of the message fits into the MTU.
1207 * Otherwise, we need to reserve fragment header and
1208 * fragment alignment (= 8-15 octects, in total).
1209 *
1210 * Note that we may need to "move" the data from the tail of
1ab1457c 1211 * of the buffer to the new fragment when we split
1da177e4
LT
1212 * the message.
1213 *
1ab1457c 1214 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1215 * at once if non-fragmentable extension headers
1216 * are too large.
1ab1457c 1217 * --yoshfuji
1da177e4
LT
1218 */
1219
1220 inet->cork.length += length;
e89e9cf5
AR
1221 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1222 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1223
baa829d8
PM
1224 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1225 fragheaderlen, transhdrlen, mtu,
1226 flags);
1227 if (err)
e89e9cf5 1228 goto error;
e89e9cf5
AR
1229 return 0;
1230 }
1da177e4
LT
1231
1232 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1233 goto alloc_new_skb;
1234
1235 while (length > 0) {
1236 /* Check if the remaining data fits into current packet. */
1237 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1238 if (copy < length)
1239 copy = maxfraglen - skb->len;
1240
1241 if (copy <= 0) {
1242 char *data;
1243 unsigned int datalen;
1244 unsigned int fraglen;
1245 unsigned int fraggap;
1246 unsigned int alloclen;
1247 struct sk_buff *skb_prev;
1248alloc_new_skb:
1249 skb_prev = skb;
1250
1251 /* There's no room in the current skb */
1252 if (skb_prev)
1253 fraggap = skb_prev->len - maxfraglen;
1254 else
1255 fraggap = 0;
1256
1257 /*
1258 * If remaining data exceeds the mtu,
1259 * we know we need more fragment(s).
1260 */
1261 datalen = length + fraggap;
1262 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1263 datalen = maxfraglen - fragheaderlen;
1264
1265 fraglen = datalen + fragheaderlen;
1266 if ((flags & MSG_MORE) &&
1267 !(rt->u.dst.dev->features&NETIF_F_SG))
1268 alloclen = mtu;
1269 else
1270 alloclen = datalen + fragheaderlen;
1271
1272 /*
1273 * The last fragment gets additional space at tail.
1274 * Note: we overallocate on fragments with MSG_MODE
1275 * because we have no idea if we're the last one.
1276 */
1277 if (datalen == length + fraggap)
1278 alloclen += rt->u.dst.trailer_len;
1279
1280 /*
1281 * We just reserve space for fragment header.
1ab1457c 1282 * Note: this may be overallocation if the message
1da177e4
LT
1283 * (without MSG_MORE) fits into the MTU.
1284 */
1285 alloclen += sizeof(struct frag_hdr);
1286
1287 if (transhdrlen) {
1288 skb = sock_alloc_send_skb(sk,
1289 alloclen + hh_len,
1290 (flags & MSG_DONTWAIT), &err);
1291 } else {
1292 skb = NULL;
1293 if (atomic_read(&sk->sk_wmem_alloc) <=
1294 2 * sk->sk_sndbuf)
1295 skb = sock_wmalloc(sk,
1296 alloclen + hh_len, 1,
1297 sk->sk_allocation);
1298 if (unlikely(skb == NULL))
1299 err = -ENOBUFS;
1300 }
1301 if (skb == NULL)
1302 goto error;
1303 /*
1304 * Fill in the control structures
1305 */
1306 skb->ip_summed = csummode;
1307 skb->csum = 0;
1308 /* reserve for fragmentation */
1309 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1310
1311 /*
1312 * Find where to start putting bytes
1313 */
1314 data = skb_put(skb, fraglen);
c14d2450 1315 skb_set_network_header(skb, exthdrlen);
1da177e4 1316 data += fragheaderlen;
b0e380b1
ACM
1317 skb->transport_header = (skb->network_header +
1318 fragheaderlen);
1da177e4
LT
1319 if (fraggap) {
1320 skb->csum = skb_copy_and_csum_bits(
1321 skb_prev, maxfraglen,
1322 data + transhdrlen, fraggap, 0);
1323 skb_prev->csum = csum_sub(skb_prev->csum,
1324 skb->csum);
1325 data += fraggap;
e9fa4f7b 1326 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1327 }
1328 copy = datalen - transhdrlen - fraggap;
1329 if (copy < 0) {
1330 err = -EINVAL;
1331 kfree_skb(skb);
1332 goto error;
1333 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1334 err = -EFAULT;
1335 kfree_skb(skb);
1336 goto error;
1337 }
1338
1339 offset += copy;
1340 length -= datalen - fraggap;
1341 transhdrlen = 0;
1342 exthdrlen = 0;
1343 csummode = CHECKSUM_NONE;
1344
1345 /*
1346 * Put the packet on the pending queue
1347 */
1348 __skb_queue_tail(&sk->sk_write_queue, skb);
1349 continue;
1350 }
1351
1352 if (copy > length)
1353 copy = length;
1354
1355 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1356 unsigned int off;
1357
1358 off = skb->len;
1359 if (getfrag(from, skb_put(skb, copy),
1360 offset, copy, off, skb) < 0) {
1361 __skb_trim(skb, off);
1362 err = -EFAULT;
1363 goto error;
1364 }
1365 } else {
1366 int i = skb_shinfo(skb)->nr_frags;
1367 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1368 struct page *page = sk->sk_sndmsg_page;
1369 int off = sk->sk_sndmsg_off;
1370 unsigned int left;
1371
1372 if (page && (left = PAGE_SIZE - off) > 0) {
1373 if (copy >= left)
1374 copy = left;
1375 if (page != frag->page) {
1376 if (i == MAX_SKB_FRAGS) {
1377 err = -EMSGSIZE;
1378 goto error;
1379 }
1380 get_page(page);
1381 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1382 frag = &skb_shinfo(skb)->frags[i];
1383 }
1384 } else if(i < MAX_SKB_FRAGS) {
1385 if (copy > PAGE_SIZE)
1386 copy = PAGE_SIZE;
1387 page = alloc_pages(sk->sk_allocation, 0);
1388 if (page == NULL) {
1389 err = -ENOMEM;
1390 goto error;
1391 }
1392 sk->sk_sndmsg_page = page;
1393 sk->sk_sndmsg_off = 0;
1394
1395 skb_fill_page_desc(skb, i, page, 0, 0);
1396 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1397 } else {
1398 err = -EMSGSIZE;
1399 goto error;
1400 }
1401 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1402 err = -EFAULT;
1403 goto error;
1404 }
1405 sk->sk_sndmsg_off += copy;
1406 frag->size += copy;
1407 skb->len += copy;
1408 skb->data_len += copy;
f945fa7a
HX
1409 skb->truesize += copy;
1410 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1411 }
1412 offset += copy;
1413 length -= copy;
1414 }
1415 return 0;
1416error:
1417 inet->cork.length -= length;
3bd653c8 1418 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1419 return err;
1420}
1421
bf138862
PE
1422static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1423{
0178b695
HX
1424 if (np->cork.opt) {
1425 kfree(np->cork.opt->dst0opt);
1426 kfree(np->cork.opt->dst1opt);
1427 kfree(np->cork.opt->hopopt);
1428 kfree(np->cork.opt->srcrt);
1429 kfree(np->cork.opt);
1430 np->cork.opt = NULL;
1431 }
1432
c8cdaf99
YH
1433 if (inet->cork.dst) {
1434 dst_release(inet->cork.dst);
1435 inet->cork.dst = NULL;
bf138862
PE
1436 inet->cork.flags &= ~IPCORK_ALLFRAG;
1437 }
1438 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1439}
1440
1da177e4
LT
1441int ip6_push_pending_frames(struct sock *sk)
1442{
1443 struct sk_buff *skb, *tmp_skb;
1444 struct sk_buff **tail_skb;
1445 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1446 struct inet_sock *inet = inet_sk(sk);
1447 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1448 struct net *net = sock_net(sk);
1da177e4
LT
1449 struct ipv6hdr *hdr;
1450 struct ipv6_txoptions *opt = np->cork.opt;
c8cdaf99 1451 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1452 struct flowi *fl = &inet->cork.fl;
1453 unsigned char proto = fl->proto;
1454 int err = 0;
1455
1456 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1457 goto out;
1458 tail_skb = &(skb_shinfo(skb)->frag_list);
1459
1460 /* move skb->data to ip header from ext header */
d56f90a7 1461 if (skb->data < skb_network_header(skb))
bbe735e4 1462 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1463 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1464 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1465 *tail_skb = tmp_skb;
1466 tail_skb = &(tmp_skb->next);
1467 skb->len += tmp_skb->len;
1468 skb->data_len += tmp_skb->len;
1da177e4 1469 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1470 tmp_skb->destructor = NULL;
1471 tmp_skb->sk = NULL;
1da177e4
LT
1472 }
1473
28a89453 1474 /* Allow local fragmentation. */
b5c15fc0 1475 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1476 skb->local_df = 1;
1477
1da177e4 1478 ipv6_addr_copy(final_dst, &fl->fl6_dst);
cfe1fc77 1479 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1480 if (opt && opt->opt_flen)
1481 ipv6_push_frag_opts(skb, opt, &proto);
1482 if (opt && opt->opt_nflen)
1483 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1484
e2d1bca7
ACM
1485 skb_push(skb, sizeof(struct ipv6hdr));
1486 skb_reset_network_header(skb);
0660e03f 1487 hdr = ipv6_hdr(skb);
1ab1457c 1488
90bcaf7b 1489 *(__be32*)hdr = fl->fl6_flowlabel |
41a1f8ea 1490 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1491
1da177e4
LT
1492 hdr->hop_limit = np->cork.hop_limit;
1493 hdr->nexthdr = proto;
1494 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1495 ipv6_addr_copy(&hdr->daddr, final_dst);
1496
a2c2064f 1497 skb->priority = sk->sk_priority;
4a19ec58 1498 skb->mark = sk->sk_mark;
a2c2064f 1499
adf30907 1500 skb_dst_set(skb, dst_clone(&rt->u.dst));
edf391ff 1501 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1502 if (proto == IPPROTO_ICMPV6) {
adf30907 1503 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1504
5a57d4c7 1505 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
e41b5368 1506 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1507 }
1508
ef76bc23 1509 err = ip6_local_out(skb);
1da177e4
LT
1510 if (err) {
1511 if (err > 0)
6ce9e7b5 1512 err = net_xmit_errno(err);
1da177e4
LT
1513 if (err)
1514 goto error;
1515 }
1516
1517out:
bf138862 1518 ip6_cork_release(inet, np);
1da177e4
LT
1519 return err;
1520error:
06254914 1521 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1522 goto out;
1523}
1524
1525void ip6_flush_pending_frames(struct sock *sk)
1526{
1da177e4
LT
1527 struct sk_buff *skb;
1528
1529 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
adf30907
ED
1530 if (skb_dst(skb))
1531 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1532 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1533 kfree_skb(skb);
1534 }
1535
bf138862 1536 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1537}
This page took 0.956837 seconds and 5 git commands to generate.