[NETFILTER]: Introduce NF_INET_ hook values
[deliverable/linux.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on linux/net/ipv4/ip_output.c
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
22 * etc.
23 *
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
28 * for datagram xmit
29 */
30
31 #include <linux/errno.h>
32 #include <linux/kernel.h>
33 #include <linux/string.h>
34 #include <linux/socket.h>
35 #include <linux/net.h>
36 #include <linux/netdevice.h>
37 #include <linux/if_arp.h>
38 #include <linux/in6.h>
39 #include <linux/tcp.h>
40 #include <linux/route.h>
41 #include <linux/module.h>
42
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
45
46 #include <net/sock.h>
47 #include <net/snmp.h>
48
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
55 #include <net/icmp.h>
56 #include <net/xfrm.h>
57 #include <net/checksum.h>
58
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62 {
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
65
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
71 }
72
73 int __ip6_local_out(struct sk_buff *skb)
74 {
75 int len;
76
77 len = skb->len - sizeof(struct ipv6hdr);
78 if (len > IPV6_MAXPLEN)
79 len = 0;
80 ipv6_hdr(skb)->payload_len = htons(len);
81
82 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
83 dst_output);
84 }
85
86 int ip6_local_out(struct sk_buff *skb)
87 {
88 int err;
89
90 err = __ip6_local_out(skb);
91 if (likely(err == 1))
92 err = dst_output(skb);
93
94 return err;
95 }
96 EXPORT_SYMBOL_GPL(ip6_local_out);
97
98 static int ip6_output_finish(struct sk_buff *skb)
99 {
100 struct dst_entry *dst = skb->dst;
101
102 if (dst->hh)
103 return neigh_hh_output(dst->hh, skb);
104 else if (dst->neighbour)
105 return dst->neighbour->output(skb);
106
107 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
108 kfree_skb(skb);
109 return -EINVAL;
110
111 }
112
113 /* dev_loopback_xmit for use with netfilter. */
114 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
115 {
116 skb_reset_mac_header(newskb);
117 __skb_pull(newskb, skb_network_offset(newskb));
118 newskb->pkt_type = PACKET_LOOPBACK;
119 newskb->ip_summed = CHECKSUM_UNNECESSARY;
120 BUG_TRAP(newskb->dst);
121
122 netif_rx(newskb);
123 return 0;
124 }
125
126
127 static int ip6_output2(struct sk_buff *skb)
128 {
129 struct dst_entry *dst = skb->dst;
130 struct net_device *dev = dst->dev;
131
132 skb->protocol = htons(ETH_P_IPV6);
133 skb->dev = dev;
134
135 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
136 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
137 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
138
139 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
140 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141 &ipv6_hdr(skb)->saddr)) {
142 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
143
144 /* Do not check for IFF_ALLMULTI; multicast routing
145 is not supported in any case.
146 */
147 if (newskb)
148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
149 NULL, newskb->dev,
150 ip6_dev_loopback_xmit);
151
152 if (ipv6_hdr(skb)->hop_limit == 0) {
153 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
154 kfree_skb(skb);
155 return 0;
156 }
157 }
158
159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
160 }
161
162 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163 ip6_output_finish);
164 }
165
166 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
167 {
168 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
169
170 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
171 skb->dst->dev->mtu : dst_mtu(skb->dst);
172 }
173
174 int ip6_output(struct sk_buff *skb)
175 {
176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
177 dst_allfrag(skb->dst))
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
181 }
182
183 /*
184 * xmit an sk_buff (used by TCP)
185 */
186
187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok)
189 {
190 struct ipv6_pinfo *np = inet6_sk(sk);
191 struct in6_addr *first_hop = &fl->fl6_dst;
192 struct dst_entry *dst = skb->dst;
193 struct ipv6hdr *hdr;
194 u8 proto = fl->proto;
195 int seg_len = skb->len;
196 int hlimit, tclass;
197 u32 mtu;
198
199 if (opt) {
200 unsigned int head_room;
201
202 /* First: exthdrs may take lots of space (~8K for now)
203 MAX_HEADER is not enough.
204 */
205 head_room = opt->opt_nflen + opt->opt_flen;
206 seg_len += head_room;
207 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
208
209 if (skb_headroom(skb) < head_room) {
210 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
211 if (skb2 == NULL) {
212 IP6_INC_STATS(ip6_dst_idev(skb->dst),
213 IPSTATS_MIB_OUTDISCARDS);
214 kfree_skb(skb);
215 return -ENOBUFS;
216 }
217 kfree_skb(skb);
218 skb = skb2;
219 if (sk)
220 skb_set_owner_w(skb, sk);
221 }
222 if (opt->opt_flen)
223 ipv6_push_frag_opts(skb, opt, &proto);
224 if (opt->opt_nflen)
225 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
226 }
227
228 skb_push(skb, sizeof(struct ipv6hdr));
229 skb_reset_network_header(skb);
230 hdr = ipv6_hdr(skb);
231
232 /*
233 * Fill in the IPv6 header
234 */
235
236 hlimit = -1;
237 if (np)
238 hlimit = np->hop_limit;
239 if (hlimit < 0)
240 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
241 if (hlimit < 0)
242 hlimit = ipv6_get_hoplimit(dst->dev);
243
244 tclass = -1;
245 if (np)
246 tclass = np->tclass;
247 if (tclass < 0)
248 tclass = 0;
249
250 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
251
252 hdr->payload_len = htons(seg_len);
253 hdr->nexthdr = proto;
254 hdr->hop_limit = hlimit;
255
256 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
257 ipv6_addr_copy(&hdr->daddr, first_hop);
258
259 skb->priority = sk->sk_priority;
260
261 mtu = dst_mtu(dst);
262 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
263 IP6_INC_STATS(ip6_dst_idev(skb->dst),
264 IPSTATS_MIB_OUTREQUESTS);
265 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
266 dst_output);
267 }
268
269 if (net_ratelimit())
270 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
271 skb->dev = dst->dev;
272 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
273 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
274 kfree_skb(skb);
275 return -EMSGSIZE;
276 }
277
278 EXPORT_SYMBOL(ip6_xmit);
279
280 /*
281 * To avoid extra problems ND packets are send through this
282 * routine. It's code duplication but I really want to avoid
283 * extra checks since ipv6_build_header is used by TCP (which
284 * is for us performance critical)
285 */
286
287 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
288 struct in6_addr *saddr, struct in6_addr *daddr,
289 int proto, int len)
290 {
291 struct ipv6_pinfo *np = inet6_sk(sk);
292 struct ipv6hdr *hdr;
293 int totlen;
294
295 skb->protocol = htons(ETH_P_IPV6);
296 skb->dev = dev;
297
298 totlen = len + sizeof(struct ipv6hdr);
299
300 skb_reset_network_header(skb);
301 skb_put(skb, sizeof(struct ipv6hdr));
302 hdr = ipv6_hdr(skb);
303
304 *(__be32*)hdr = htonl(0x60000000);
305
306 hdr->payload_len = htons(len);
307 hdr->nexthdr = proto;
308 hdr->hop_limit = np->hop_limit;
309
310 ipv6_addr_copy(&hdr->saddr, saddr);
311 ipv6_addr_copy(&hdr->daddr, daddr);
312
313 return 0;
314 }
315
316 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
317 {
318 struct ip6_ra_chain *ra;
319 struct sock *last = NULL;
320
321 read_lock(&ip6_ra_lock);
322 for (ra = ip6_ra_chain; ra; ra = ra->next) {
323 struct sock *sk = ra->sk;
324 if (sk && ra->sel == sel &&
325 (!sk->sk_bound_dev_if ||
326 sk->sk_bound_dev_if == skb->dev->ifindex)) {
327 if (last) {
328 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
329 if (skb2)
330 rawv6_rcv(last, skb2);
331 }
332 last = sk;
333 }
334 }
335
336 if (last) {
337 rawv6_rcv(last, skb);
338 read_unlock(&ip6_ra_lock);
339 return 1;
340 }
341 read_unlock(&ip6_ra_lock);
342 return 0;
343 }
344
345 static int ip6_forward_proxy_check(struct sk_buff *skb)
346 {
347 struct ipv6hdr *hdr = ipv6_hdr(skb);
348 u8 nexthdr = hdr->nexthdr;
349 int offset;
350
351 if (ipv6_ext_hdr(nexthdr)) {
352 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
353 if (offset < 0)
354 return 0;
355 } else
356 offset = sizeof(struct ipv6hdr);
357
358 if (nexthdr == IPPROTO_ICMPV6) {
359 struct icmp6hdr *icmp6;
360
361 if (!pskb_may_pull(skb, (skb_network_header(skb) +
362 offset + 1 - skb->data)))
363 return 0;
364
365 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
366
367 switch (icmp6->icmp6_type) {
368 case NDISC_ROUTER_SOLICITATION:
369 case NDISC_ROUTER_ADVERTISEMENT:
370 case NDISC_NEIGHBOUR_SOLICITATION:
371 case NDISC_NEIGHBOUR_ADVERTISEMENT:
372 case NDISC_REDIRECT:
373 /* For reaction involving unicast neighbor discovery
374 * message destined to the proxied address, pass it to
375 * input function.
376 */
377 return 1;
378 default:
379 break;
380 }
381 }
382
383 /*
384 * The proxying router can't forward traffic sent to a link-local
385 * address, so signal the sender and discard the packet. This
386 * behavior is clarified by the MIPv6 specification.
387 */
388 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
389 dst_link_failure(skb);
390 return -1;
391 }
392
393 return 0;
394 }
395
396 static inline int ip6_forward_finish(struct sk_buff *skb)
397 {
398 return dst_output(skb);
399 }
400
401 int ip6_forward(struct sk_buff *skb)
402 {
403 struct dst_entry *dst = skb->dst;
404 struct ipv6hdr *hdr = ipv6_hdr(skb);
405 struct inet6_skb_parm *opt = IP6CB(skb);
406
407 if (ipv6_devconf.forwarding == 0)
408 goto error;
409
410 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
411 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
412 goto drop;
413 }
414
415 skb_forward_csum(skb);
416
417 /*
418 * We DO NOT make any processing on
419 * RA packets, pushing them to user level AS IS
420 * without ane WARRANTY that application will be able
421 * to interpret them. The reason is that we
422 * cannot make anything clever here.
423 *
424 * We are not end-node, so that if packet contains
425 * AH/ESP, we cannot make anything.
426 * Defragmentation also would be mistake, RA packets
427 * cannot be fragmented, because there is no warranty
428 * that different fragments will go along one path. --ANK
429 */
430 if (opt->ra) {
431 u8 *ptr = skb_network_header(skb) + opt->ra;
432 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
433 return 0;
434 }
435
436 /*
437 * check and decrement ttl
438 */
439 if (hdr->hop_limit <= 1) {
440 /* Force OUTPUT device used as source address */
441 skb->dev = dst->dev;
442 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
443 0, skb->dev);
444 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
445
446 kfree_skb(skb);
447 return -ETIMEDOUT;
448 }
449
450 /* XXX: idev->cnf.proxy_ndp? */
451 if (ipv6_devconf.proxy_ndp &&
452 pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
453 int proxied = ip6_forward_proxy_check(skb);
454 if (proxied > 0)
455 return ip6_input(skb);
456 else if (proxied < 0) {
457 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
458 goto drop;
459 }
460 }
461
462 if (!xfrm6_route_forward(skb)) {
463 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
464 goto drop;
465 }
466 dst = skb->dst;
467
468 /* IPv6 specs say nothing about it, but it is clear that we cannot
469 send redirects to source routed frames.
470 We don't send redirects to frames decapsulated from IPsec.
471 */
472 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
473 !skb->sp) {
474 struct in6_addr *target = NULL;
475 struct rt6_info *rt;
476 struct neighbour *n = dst->neighbour;
477
478 /*
479 * incoming and outgoing devices are the same
480 * send a redirect.
481 */
482
483 rt = (struct rt6_info *) dst;
484 if ((rt->rt6i_flags & RTF_GATEWAY))
485 target = (struct in6_addr*)&n->primary_key;
486 else
487 target = &hdr->daddr;
488
489 /* Limit redirects both by destination (here)
490 and by source (inside ndisc_send_redirect)
491 */
492 if (xrlim_allow(dst, 1*HZ))
493 ndisc_send_redirect(skb, n, target);
494 } else {
495 int addrtype = ipv6_addr_type(&hdr->saddr);
496
497 /* This check is security critical. */
498 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
499 goto error;
500 if (addrtype & IPV6_ADDR_LINKLOCAL) {
501 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
502 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
503 goto error;
504 }
505 }
506
507 if (skb->len > dst_mtu(dst)) {
508 /* Again, force OUTPUT device used as source address */
509 skb->dev = dst->dev;
510 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
511 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
512 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
513 kfree_skb(skb);
514 return -EMSGSIZE;
515 }
516
517 if (skb_cow(skb, dst->dev->hard_header_len)) {
518 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
519 goto drop;
520 }
521
522 hdr = ipv6_hdr(skb);
523
524 /* Mangling hops number delayed to point after skb COW */
525
526 hdr->hop_limit--;
527
528 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
529 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
530 ip6_forward_finish);
531
532 error:
533 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
534 drop:
535 kfree_skb(skb);
536 return -EINVAL;
537 }
538
539 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
540 {
541 to->pkt_type = from->pkt_type;
542 to->priority = from->priority;
543 to->protocol = from->protocol;
544 dst_release(to->dst);
545 to->dst = dst_clone(from->dst);
546 to->dev = from->dev;
547 to->mark = from->mark;
548
549 #ifdef CONFIG_NET_SCHED
550 to->tc_index = from->tc_index;
551 #endif
552 nf_copy(to, from);
553 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
554 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
555 to->nf_trace = from->nf_trace;
556 #endif
557 skb_copy_secmark(to, from);
558 }
559
560 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
561 {
562 u16 offset = sizeof(struct ipv6hdr);
563 struct ipv6_opt_hdr *exthdr =
564 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
565 unsigned int packet_len = skb->tail - skb->network_header;
566 int found_rhdr = 0;
567 *nexthdr = &ipv6_hdr(skb)->nexthdr;
568
569 while (offset + 1 <= packet_len) {
570
571 switch (**nexthdr) {
572
573 case NEXTHDR_HOP:
574 break;
575 case NEXTHDR_ROUTING:
576 found_rhdr = 1;
577 break;
578 case NEXTHDR_DEST:
579 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
580 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
581 break;
582 #endif
583 if (found_rhdr)
584 return offset;
585 break;
586 default :
587 return offset;
588 }
589
590 offset += ipv6_optlen(exthdr);
591 *nexthdr = &exthdr->nexthdr;
592 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
593 offset);
594 }
595
596 return offset;
597 }
598 EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
599
600 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
601 {
602 struct net_device *dev;
603 struct sk_buff *frag;
604 struct rt6_info *rt = (struct rt6_info*)skb->dst;
605 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
606 struct ipv6hdr *tmp_hdr;
607 struct frag_hdr *fh;
608 unsigned int mtu, hlen, left, len;
609 __be32 frag_id = 0;
610 int ptr, offset = 0, err=0;
611 u8 *prevhdr, nexthdr = 0;
612
613 dev = rt->u.dst.dev;
614 hlen = ip6_find_1stfragopt(skb, &prevhdr);
615 nexthdr = *prevhdr;
616
617 mtu = ip6_skb_dst_mtu(skb);
618
619 /* We must not fragment if the socket is set to force MTU discovery
620 * or if the skb it not generated by a local socket. (This last
621 * check should be redundant, but it's free.)
622 */
623 if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
624 skb->dev = skb->dst->dev;
625 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
626 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
627 kfree_skb(skb);
628 return -EMSGSIZE;
629 }
630
631 if (np && np->frag_size < mtu) {
632 if (np->frag_size)
633 mtu = np->frag_size;
634 }
635 mtu -= hlen + sizeof(struct frag_hdr);
636
637 if (skb_shinfo(skb)->frag_list) {
638 int first_len = skb_pagelen(skb);
639
640 if (first_len - hlen > mtu ||
641 ((first_len - hlen) & 7) ||
642 skb_cloned(skb))
643 goto slow_path;
644
645 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
646 /* Correct geometry. */
647 if (frag->len > mtu ||
648 ((frag->len & 7) && frag->next) ||
649 skb_headroom(frag) < hlen)
650 goto slow_path;
651
652 /* Partially cloned skb? */
653 if (skb_shared(frag))
654 goto slow_path;
655
656 BUG_ON(frag->sk);
657 if (skb->sk) {
658 sock_hold(skb->sk);
659 frag->sk = skb->sk;
660 frag->destructor = sock_wfree;
661 skb->truesize -= frag->truesize;
662 }
663 }
664
665 err = 0;
666 offset = 0;
667 frag = skb_shinfo(skb)->frag_list;
668 skb_shinfo(skb)->frag_list = NULL;
669 /* BUILD HEADER */
670
671 *prevhdr = NEXTHDR_FRAGMENT;
672 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
673 if (!tmp_hdr) {
674 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
675 return -ENOMEM;
676 }
677
678 __skb_pull(skb, hlen);
679 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
680 __skb_push(skb, hlen);
681 skb_reset_network_header(skb);
682 memcpy(skb_network_header(skb), tmp_hdr, hlen);
683
684 ipv6_select_ident(skb, fh);
685 fh->nexthdr = nexthdr;
686 fh->reserved = 0;
687 fh->frag_off = htons(IP6_MF);
688 frag_id = fh->identification;
689
690 first_len = skb_pagelen(skb);
691 skb->data_len = first_len - skb_headlen(skb);
692 skb->len = first_len;
693 ipv6_hdr(skb)->payload_len = htons(first_len -
694 sizeof(struct ipv6hdr));
695
696 dst_hold(&rt->u.dst);
697
698 for (;;) {
699 /* Prepare header of the next frame,
700 * before previous one went down. */
701 if (frag) {
702 frag->ip_summed = CHECKSUM_NONE;
703 skb_reset_transport_header(frag);
704 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
705 __skb_push(frag, hlen);
706 skb_reset_network_header(frag);
707 memcpy(skb_network_header(frag), tmp_hdr,
708 hlen);
709 offset += skb->len - hlen - sizeof(struct frag_hdr);
710 fh->nexthdr = nexthdr;
711 fh->reserved = 0;
712 fh->frag_off = htons(offset);
713 if (frag->next != NULL)
714 fh->frag_off |= htons(IP6_MF);
715 fh->identification = frag_id;
716 ipv6_hdr(frag)->payload_len =
717 htons(frag->len -
718 sizeof(struct ipv6hdr));
719 ip6_copy_metadata(frag, skb);
720 }
721
722 err = output(skb);
723 if(!err)
724 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
725
726 if (err || !frag)
727 break;
728
729 skb = frag;
730 frag = skb->next;
731 skb->next = NULL;
732 }
733
734 kfree(tmp_hdr);
735
736 if (err == 0) {
737 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
738 dst_release(&rt->u.dst);
739 return 0;
740 }
741
742 while (frag) {
743 skb = frag->next;
744 kfree_skb(frag);
745 frag = skb;
746 }
747
748 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
749 dst_release(&rt->u.dst);
750 return err;
751 }
752
753 slow_path:
754 left = skb->len - hlen; /* Space per frame */
755 ptr = hlen; /* Where to start from */
756
757 /*
758 * Fragment the datagram.
759 */
760
761 *prevhdr = NEXTHDR_FRAGMENT;
762
763 /*
764 * Keep copying data until we run out.
765 */
766 while(left > 0) {
767 len = left;
768 /* IF: it doesn't fit, use 'mtu' - the data space left */
769 if (len > mtu)
770 len = mtu;
771 /* IF: we are not sending upto and including the packet end
772 then align the next start on an eight byte boundary */
773 if (len < left) {
774 len &= ~7;
775 }
776 /*
777 * Allocate buffer.
778 */
779
780 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
781 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
782 IP6_INC_STATS(ip6_dst_idev(skb->dst),
783 IPSTATS_MIB_FRAGFAILS);
784 err = -ENOMEM;
785 goto fail;
786 }
787
788 /*
789 * Set up data on packet
790 */
791
792 ip6_copy_metadata(frag, skb);
793 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
794 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
795 skb_reset_network_header(frag);
796 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
797 frag->transport_header = (frag->network_header + hlen +
798 sizeof(struct frag_hdr));
799
800 /*
801 * Charge the memory for the fragment to any owner
802 * it might possess
803 */
804 if (skb->sk)
805 skb_set_owner_w(frag, skb->sk);
806
807 /*
808 * Copy the packet header into the new buffer.
809 */
810 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
811
812 /*
813 * Build fragment header.
814 */
815 fh->nexthdr = nexthdr;
816 fh->reserved = 0;
817 if (!frag_id) {
818 ipv6_select_ident(skb, fh);
819 frag_id = fh->identification;
820 } else
821 fh->identification = frag_id;
822
823 /*
824 * Copy a block of the IP datagram.
825 */
826 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
827 BUG();
828 left -= len;
829
830 fh->frag_off = htons(offset);
831 if (left > 0)
832 fh->frag_off |= htons(IP6_MF);
833 ipv6_hdr(frag)->payload_len = htons(frag->len -
834 sizeof(struct ipv6hdr));
835
836 ptr += len;
837 offset += len;
838
839 /*
840 * Put this fragment into the sending queue.
841 */
842 err = output(frag);
843 if (err)
844 goto fail;
845
846 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
847 }
848 IP6_INC_STATS(ip6_dst_idev(skb->dst),
849 IPSTATS_MIB_FRAGOKS);
850 kfree_skb(skb);
851 return err;
852
853 fail:
854 IP6_INC_STATS(ip6_dst_idev(skb->dst),
855 IPSTATS_MIB_FRAGFAILS);
856 kfree_skb(skb);
857 return err;
858 }
859
860 static inline int ip6_rt_check(struct rt6key *rt_key,
861 struct in6_addr *fl_addr,
862 struct in6_addr *addr_cache)
863 {
864 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
865 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
866 }
867
868 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
869 struct dst_entry *dst,
870 struct flowi *fl)
871 {
872 struct ipv6_pinfo *np = inet6_sk(sk);
873 struct rt6_info *rt = (struct rt6_info *)dst;
874
875 if (!dst)
876 goto out;
877
878 /* Yes, checking route validity in not connected
879 * case is not very simple. Take into account,
880 * that we do not support routing by source, TOS,
881 * and MSG_DONTROUTE --ANK (980726)
882 *
883 * 1. ip6_rt_check(): If route was host route,
884 * check that cached destination is current.
885 * If it is network route, we still may
886 * check its validity using saved pointer
887 * to the last used address: daddr_cache.
888 * We do not want to save whole address now,
889 * (because main consumer of this service
890 * is tcp, which has not this problem),
891 * so that the last trick works only on connected
892 * sockets.
893 * 2. oif also should be the same.
894 */
895 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
896 #ifdef CONFIG_IPV6_SUBTREES
897 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
898 #endif
899 (fl->oif && fl->oif != dst->dev->ifindex)) {
900 dst_release(dst);
901 dst = NULL;
902 }
903
904 out:
905 return dst;
906 }
907
908 static int ip6_dst_lookup_tail(struct sock *sk,
909 struct dst_entry **dst, struct flowi *fl)
910 {
911 int err;
912
913 if (*dst == NULL)
914 *dst = ip6_route_output(sk, fl);
915
916 if ((err = (*dst)->error))
917 goto out_err_release;
918
919 if (ipv6_addr_any(&fl->fl6_src)) {
920 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
921 if (err)
922 goto out_err_release;
923 }
924
925 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
926 /*
927 * Here if the dst entry we've looked up
928 * has a neighbour entry that is in the INCOMPLETE
929 * state and the src address from the flow is
930 * marked as OPTIMISTIC, we release the found
931 * dst entry and replace it instead with the
932 * dst entry of the nexthop router
933 */
934 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
935 struct inet6_ifaddr *ifp;
936 struct flowi fl_gw;
937 int redirect;
938
939 ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
940
941 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
942 if (ifp)
943 in6_ifa_put(ifp);
944
945 if (redirect) {
946 /*
947 * We need to get the dst entry for the
948 * default router instead
949 */
950 dst_release(*dst);
951 memcpy(&fl_gw, fl, sizeof(struct flowi));
952 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
953 *dst = ip6_route_output(sk, &fl_gw);
954 if ((err = (*dst)->error))
955 goto out_err_release;
956 }
957 }
958 #endif
959
960 return 0;
961
962 out_err_release:
963 if (err == -ENETUNREACH)
964 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
965 dst_release(*dst);
966 *dst = NULL;
967 return err;
968 }
969
970 /**
971 * ip6_dst_lookup - perform route lookup on flow
972 * @sk: socket which provides route info
973 * @dst: pointer to dst_entry * for result
974 * @fl: flow to lookup
975 *
976 * This function performs a route lookup on the given flow.
977 *
978 * It returns zero on success, or a standard errno code on error.
979 */
980 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
981 {
982 *dst = NULL;
983 return ip6_dst_lookup_tail(sk, dst, fl);
984 }
985 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
986
987 /**
988 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
989 * @sk: socket which provides the dst cache and route info
990 * @dst: pointer to dst_entry * for result
991 * @fl: flow to lookup
992 *
993 * This function performs a route lookup on the given flow with the
994 * possibility of using the cached route in the socket if it is valid.
995 * It will take the socket dst lock when operating on the dst cache.
996 * As a result, this function can only be used in process context.
997 *
998 * It returns zero on success, or a standard errno code on error.
999 */
1000 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1001 {
1002 *dst = NULL;
1003 if (sk) {
1004 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1005 *dst = ip6_sk_dst_check(sk, *dst, fl);
1006 }
1007
1008 return ip6_dst_lookup_tail(sk, dst, fl);
1009 }
1010 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1011
1012 static inline int ip6_ufo_append_data(struct sock *sk,
1013 int getfrag(void *from, char *to, int offset, int len,
1014 int odd, struct sk_buff *skb),
1015 void *from, int length, int hh_len, int fragheaderlen,
1016 int transhdrlen, int mtu,unsigned int flags)
1017
1018 {
1019 struct sk_buff *skb;
1020 int err;
1021
1022 /* There is support for UDP large send offload by network
1023 * device, so create one single skb packet containing complete
1024 * udp datagram
1025 */
1026 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1027 skb = sock_alloc_send_skb(sk,
1028 hh_len + fragheaderlen + transhdrlen + 20,
1029 (flags & MSG_DONTWAIT), &err);
1030 if (skb == NULL)
1031 return -ENOMEM;
1032
1033 /* reserve space for Hardware header */
1034 skb_reserve(skb, hh_len);
1035
1036 /* create space for UDP/IP header */
1037 skb_put(skb,fragheaderlen + transhdrlen);
1038
1039 /* initialize network header pointer */
1040 skb_reset_network_header(skb);
1041
1042 /* initialize protocol header pointer */
1043 skb->transport_header = skb->network_header + fragheaderlen;
1044
1045 skb->ip_summed = CHECKSUM_PARTIAL;
1046 skb->csum = 0;
1047 sk->sk_sndmsg_off = 0;
1048 }
1049
1050 err = skb_append_datato_frags(sk,skb, getfrag, from,
1051 (length - transhdrlen));
1052 if (!err) {
1053 struct frag_hdr fhdr;
1054
1055 /* specify the length of each IP datagram fragment*/
1056 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1057 sizeof(struct frag_hdr);
1058 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1059 ipv6_select_ident(skb, &fhdr);
1060 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1061 __skb_queue_tail(&sk->sk_write_queue, skb);
1062
1063 return 0;
1064 }
1065 /* There is not enough support do UPD LSO,
1066 * so follow normal path
1067 */
1068 kfree_skb(skb);
1069
1070 return err;
1071 }
1072
1073 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1074 int offset, int len, int odd, struct sk_buff *skb),
1075 void *from, int length, int transhdrlen,
1076 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1077 struct rt6_info *rt, unsigned int flags)
1078 {
1079 struct inet_sock *inet = inet_sk(sk);
1080 struct ipv6_pinfo *np = inet6_sk(sk);
1081 struct sk_buff *skb;
1082 unsigned int maxfraglen, fragheaderlen;
1083 int exthdrlen;
1084 int hh_len;
1085 int mtu;
1086 int copy;
1087 int err;
1088 int offset = 0;
1089 int csummode = CHECKSUM_NONE;
1090
1091 if (flags&MSG_PROBE)
1092 return 0;
1093 if (skb_queue_empty(&sk->sk_write_queue)) {
1094 /*
1095 * setup for corking
1096 */
1097 if (opt) {
1098 if (np->cork.opt == NULL) {
1099 np->cork.opt = kmalloc(opt->tot_len,
1100 sk->sk_allocation);
1101 if (unlikely(np->cork.opt == NULL))
1102 return -ENOBUFS;
1103 } else if (np->cork.opt->tot_len < opt->tot_len) {
1104 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1105 return -EINVAL;
1106 }
1107 memcpy(np->cork.opt, opt, opt->tot_len);
1108 inet->cork.flags |= IPCORK_OPT;
1109 /* need source address above miyazawa*/
1110 }
1111 dst_hold(&rt->u.dst);
1112 np->cork.rt = rt;
1113 inet->cork.fl = *fl;
1114 np->cork.hop_limit = hlimit;
1115 np->cork.tclass = tclass;
1116 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1117 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1118 if (np->frag_size < mtu) {
1119 if (np->frag_size)
1120 mtu = np->frag_size;
1121 }
1122 inet->cork.fragsize = mtu;
1123 if (dst_allfrag(rt->u.dst.path))
1124 inet->cork.flags |= IPCORK_ALLFRAG;
1125 inet->cork.length = 0;
1126 sk->sk_sndmsg_page = NULL;
1127 sk->sk_sndmsg_off = 0;
1128 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1129 rt->nfheader_len;
1130 length += exthdrlen;
1131 transhdrlen += exthdrlen;
1132 } else {
1133 rt = np->cork.rt;
1134 fl = &inet->cork.fl;
1135 if (inet->cork.flags & IPCORK_OPT)
1136 opt = np->cork.opt;
1137 transhdrlen = 0;
1138 exthdrlen = 0;
1139 mtu = inet->cork.fragsize;
1140 }
1141
1142 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1143
1144 fragheaderlen = sizeof(struct ipv6hdr) + rt->nfheader_len +
1145 (opt ? opt->opt_nflen : 0);
1146 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1147
1148 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1149 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1150 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1151 return -EMSGSIZE;
1152 }
1153 }
1154
1155 /*
1156 * Let's try using as much space as possible.
1157 * Use MTU if total length of the message fits into the MTU.
1158 * Otherwise, we need to reserve fragment header and
1159 * fragment alignment (= 8-15 octects, in total).
1160 *
1161 * Note that we may need to "move" the data from the tail of
1162 * of the buffer to the new fragment when we split
1163 * the message.
1164 *
1165 * FIXME: It may be fragmented into multiple chunks
1166 * at once if non-fragmentable extension headers
1167 * are too large.
1168 * --yoshfuji
1169 */
1170
1171 inet->cork.length += length;
1172 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1173 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1174
1175 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1176 fragheaderlen, transhdrlen, mtu,
1177 flags);
1178 if (err)
1179 goto error;
1180 return 0;
1181 }
1182
1183 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1184 goto alloc_new_skb;
1185
1186 while (length > 0) {
1187 /* Check if the remaining data fits into current packet. */
1188 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1189 if (copy < length)
1190 copy = maxfraglen - skb->len;
1191
1192 if (copy <= 0) {
1193 char *data;
1194 unsigned int datalen;
1195 unsigned int fraglen;
1196 unsigned int fraggap;
1197 unsigned int alloclen;
1198 struct sk_buff *skb_prev;
1199 alloc_new_skb:
1200 skb_prev = skb;
1201
1202 /* There's no room in the current skb */
1203 if (skb_prev)
1204 fraggap = skb_prev->len - maxfraglen;
1205 else
1206 fraggap = 0;
1207
1208 /*
1209 * If remaining data exceeds the mtu,
1210 * we know we need more fragment(s).
1211 */
1212 datalen = length + fraggap;
1213 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1214 datalen = maxfraglen - fragheaderlen;
1215
1216 fraglen = datalen + fragheaderlen;
1217 if ((flags & MSG_MORE) &&
1218 !(rt->u.dst.dev->features&NETIF_F_SG))
1219 alloclen = mtu;
1220 else
1221 alloclen = datalen + fragheaderlen;
1222
1223 /*
1224 * The last fragment gets additional space at tail.
1225 * Note: we overallocate on fragments with MSG_MODE
1226 * because we have no idea if we're the last one.
1227 */
1228 if (datalen == length + fraggap)
1229 alloclen += rt->u.dst.trailer_len;
1230
1231 /*
1232 * We just reserve space for fragment header.
1233 * Note: this may be overallocation if the message
1234 * (without MSG_MORE) fits into the MTU.
1235 */
1236 alloclen += sizeof(struct frag_hdr);
1237
1238 if (transhdrlen) {
1239 skb = sock_alloc_send_skb(sk,
1240 alloclen + hh_len,
1241 (flags & MSG_DONTWAIT), &err);
1242 } else {
1243 skb = NULL;
1244 if (atomic_read(&sk->sk_wmem_alloc) <=
1245 2 * sk->sk_sndbuf)
1246 skb = sock_wmalloc(sk,
1247 alloclen + hh_len, 1,
1248 sk->sk_allocation);
1249 if (unlikely(skb == NULL))
1250 err = -ENOBUFS;
1251 }
1252 if (skb == NULL)
1253 goto error;
1254 /*
1255 * Fill in the control structures
1256 */
1257 skb->ip_summed = csummode;
1258 skb->csum = 0;
1259 /* reserve for fragmentation */
1260 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1261
1262 /*
1263 * Find where to start putting bytes
1264 */
1265 data = skb_put(skb, fraglen);
1266 skb_set_network_header(skb, exthdrlen);
1267 data += fragheaderlen;
1268 skb->transport_header = (skb->network_header +
1269 fragheaderlen);
1270 if (fraggap) {
1271 skb->csum = skb_copy_and_csum_bits(
1272 skb_prev, maxfraglen,
1273 data + transhdrlen, fraggap, 0);
1274 skb_prev->csum = csum_sub(skb_prev->csum,
1275 skb->csum);
1276 data += fraggap;
1277 pskb_trim_unique(skb_prev, maxfraglen);
1278 }
1279 copy = datalen - transhdrlen - fraggap;
1280 if (copy < 0) {
1281 err = -EINVAL;
1282 kfree_skb(skb);
1283 goto error;
1284 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1285 err = -EFAULT;
1286 kfree_skb(skb);
1287 goto error;
1288 }
1289
1290 offset += copy;
1291 length -= datalen - fraggap;
1292 transhdrlen = 0;
1293 exthdrlen = 0;
1294 csummode = CHECKSUM_NONE;
1295
1296 /*
1297 * Put the packet on the pending queue
1298 */
1299 __skb_queue_tail(&sk->sk_write_queue, skb);
1300 continue;
1301 }
1302
1303 if (copy > length)
1304 copy = length;
1305
1306 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1307 unsigned int off;
1308
1309 off = skb->len;
1310 if (getfrag(from, skb_put(skb, copy),
1311 offset, copy, off, skb) < 0) {
1312 __skb_trim(skb, off);
1313 err = -EFAULT;
1314 goto error;
1315 }
1316 } else {
1317 int i = skb_shinfo(skb)->nr_frags;
1318 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1319 struct page *page = sk->sk_sndmsg_page;
1320 int off = sk->sk_sndmsg_off;
1321 unsigned int left;
1322
1323 if (page && (left = PAGE_SIZE - off) > 0) {
1324 if (copy >= left)
1325 copy = left;
1326 if (page != frag->page) {
1327 if (i == MAX_SKB_FRAGS) {
1328 err = -EMSGSIZE;
1329 goto error;
1330 }
1331 get_page(page);
1332 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1333 frag = &skb_shinfo(skb)->frags[i];
1334 }
1335 } else if(i < MAX_SKB_FRAGS) {
1336 if (copy > PAGE_SIZE)
1337 copy = PAGE_SIZE;
1338 page = alloc_pages(sk->sk_allocation, 0);
1339 if (page == NULL) {
1340 err = -ENOMEM;
1341 goto error;
1342 }
1343 sk->sk_sndmsg_page = page;
1344 sk->sk_sndmsg_off = 0;
1345
1346 skb_fill_page_desc(skb, i, page, 0, 0);
1347 frag = &skb_shinfo(skb)->frags[i];
1348 } else {
1349 err = -EMSGSIZE;
1350 goto error;
1351 }
1352 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1353 err = -EFAULT;
1354 goto error;
1355 }
1356 sk->sk_sndmsg_off += copy;
1357 frag->size += copy;
1358 skb->len += copy;
1359 skb->data_len += copy;
1360 skb->truesize += copy;
1361 atomic_add(copy, &sk->sk_wmem_alloc);
1362 }
1363 offset += copy;
1364 length -= copy;
1365 }
1366 return 0;
1367 error:
1368 inet->cork.length -= length;
1369 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1370 return err;
1371 }
1372
1373 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1374 {
1375 inet->cork.flags &= ~IPCORK_OPT;
1376 kfree(np->cork.opt);
1377 np->cork.opt = NULL;
1378 if (np->cork.rt) {
1379 dst_release(&np->cork.rt->u.dst);
1380 np->cork.rt = NULL;
1381 inet->cork.flags &= ~IPCORK_ALLFRAG;
1382 }
1383 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1384 }
1385
1386 int ip6_push_pending_frames(struct sock *sk)
1387 {
1388 struct sk_buff *skb, *tmp_skb;
1389 struct sk_buff **tail_skb;
1390 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1391 struct inet_sock *inet = inet_sk(sk);
1392 struct ipv6_pinfo *np = inet6_sk(sk);
1393 struct ipv6hdr *hdr;
1394 struct ipv6_txoptions *opt = np->cork.opt;
1395 struct rt6_info *rt = np->cork.rt;
1396 struct flowi *fl = &inet->cork.fl;
1397 unsigned char proto = fl->proto;
1398 int err = 0;
1399
1400 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1401 goto out;
1402 tail_skb = &(skb_shinfo(skb)->frag_list);
1403
1404 /* move skb->data to ip header from ext header */
1405 if (skb->data < skb_network_header(skb))
1406 __skb_pull(skb, skb_network_offset(skb));
1407 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1408 __skb_pull(tmp_skb, skb_network_header_len(skb));
1409 *tail_skb = tmp_skb;
1410 tail_skb = &(tmp_skb->next);
1411 skb->len += tmp_skb->len;
1412 skb->data_len += tmp_skb->len;
1413 skb->truesize += tmp_skb->truesize;
1414 __sock_put(tmp_skb->sk);
1415 tmp_skb->destructor = NULL;
1416 tmp_skb->sk = NULL;
1417 }
1418
1419 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1420 __skb_pull(skb, skb_network_header_len(skb));
1421 if (opt && opt->opt_flen)
1422 ipv6_push_frag_opts(skb, opt, &proto);
1423 if (opt && opt->opt_nflen)
1424 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1425
1426 skb_push(skb, sizeof(struct ipv6hdr));
1427 skb_reset_network_header(skb);
1428 hdr = ipv6_hdr(skb);
1429
1430 *(__be32*)hdr = fl->fl6_flowlabel |
1431 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1432
1433 hdr->hop_limit = np->cork.hop_limit;
1434 hdr->nexthdr = proto;
1435 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1436 ipv6_addr_copy(&hdr->daddr, final_dst);
1437
1438 skb->priority = sk->sk_priority;
1439
1440 skb->dst = dst_clone(&rt->u.dst);
1441 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1442 if (proto == IPPROTO_ICMPV6) {
1443 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1444
1445 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1446 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1447 }
1448
1449 err = ip6_local_out(skb);
1450 if (err) {
1451 if (err > 0)
1452 err = np->recverr ? net_xmit_errno(err) : 0;
1453 if (err)
1454 goto error;
1455 }
1456
1457 out:
1458 ip6_cork_release(inet, np);
1459 return err;
1460 error:
1461 goto out;
1462 }
1463
1464 void ip6_flush_pending_frames(struct sock *sk)
1465 {
1466 struct sk_buff *skb;
1467
1468 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1469 if (skb->dst)
1470 IP6_INC_STATS(ip6_dst_idev(skb->dst),
1471 IPSTATS_MIB_OUTDISCARDS);
1472 kfree_skb(skb);
1473 }
1474
1475 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1476 }
This page took 0.112253 seconds and 5 git commands to generate.