fd7cd1bfe1510e4e71fcd000b02087a8c4280d97
[deliverable/linux.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
43
44 #include <net/sock.h>
45 #include <net/snmp.h>
46
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
57
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
60 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
61 {
62 static u32 ipv6_fragmentation_id = 1;
63 static DEFINE_SPINLOCK(ip6_id_lock);
64
65 spin_lock_bh(&ip6_id_lock);
66 fhdr->identification = htonl(ipv6_fragmentation_id);
67 if (++ipv6_fragmentation_id == 0)
68 ipv6_fragmentation_id = 1;
69 spin_unlock_bh(&ip6_id_lock);
70 }
71
72 int __ip6_local_out(struct sk_buff *skb)
73 {
74 int len;
75
76 len = skb->len - sizeof(struct ipv6hdr);
77 if (len > IPV6_MAXPLEN)
78 len = 0;
79 ipv6_hdr(skb)->payload_len = htons(len);
80
81 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
82 dst_output);
83 }
84
85 int ip6_local_out(struct sk_buff *skb)
86 {
87 int err;
88
89 err = __ip6_local_out(skb);
90 if (likely(err == 1))
91 err = dst_output(skb);
92
93 return err;
94 }
95 EXPORT_SYMBOL_GPL(ip6_local_out);
96
97 static int ip6_output_finish(struct sk_buff *skb)
98 {
99 struct dst_entry *dst = skb->dst;
100
101 if (dst->hh)
102 return neigh_hh_output(dst->hh, skb);
103 else if (dst->neighbour)
104 return dst->neighbour->output(skb);
105
106 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
107 kfree_skb(skb);
108 return -EINVAL;
109
110 }
111
112 /* dev_loopback_xmit for use with netfilter. */
113 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
114 {
115 skb_reset_mac_header(newskb);
116 __skb_pull(newskb, skb_network_offset(newskb));
117 newskb->pkt_type = PACKET_LOOPBACK;
118 newskb->ip_summed = CHECKSUM_UNNECESSARY;
119 BUG_TRAP(newskb->dst);
120
121 netif_rx(newskb);
122 return 0;
123 }
124
125
126 static int ip6_output2(struct sk_buff *skb)
127 {
128 struct dst_entry *dst = skb->dst;
129 struct net_device *dev = dst->dev;
130
131 skb->protocol = htons(ETH_P_IPV6);
132 skb->dev = dev;
133
134 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
135 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
136 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
137
138 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
139 ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
140 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141 &ipv6_hdr(skb)->saddr))) {
142 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
143
144 /* Do not check for IFF_ALLMULTI; multicast routing
145 is not supported in any case.
146 */
147 if (newskb)
148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
149 NULL, newskb->dev,
150 ip6_dev_loopback_xmit);
151
152 if (ipv6_hdr(skb)->hop_limit == 0) {
153 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
154 kfree_skb(skb);
155 return 0;
156 }
157 }
158
159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
160 }
161
162 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163 ip6_output_finish);
164 }
165
166 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
167 {
168 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
169
170 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
171 skb->dst->dev->mtu : dst_mtu(skb->dst);
172 }
173
174 int ip6_output(struct sk_buff *skb)
175 {
176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
177 dst_allfrag(skb->dst))
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
181 }
182
183 /*
184 * xmit an sk_buff (used by TCP)
185 */
186
187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok)
189 {
190 struct ipv6_pinfo *np = inet6_sk(sk);
191 struct in6_addr *first_hop = &fl->fl6_dst;
192 struct dst_entry *dst = skb->dst;
193 struct ipv6hdr *hdr;
194 u8 proto = fl->proto;
195 int seg_len = skb->len;
196 int hlimit, tclass;
197 u32 mtu;
198
199 if (opt) {
200 unsigned int head_room;
201
202 /* First: exthdrs may take lots of space (~8K for now)
203 MAX_HEADER is not enough.
204 */
205 head_room = opt->opt_nflen + opt->opt_flen;
206 seg_len += head_room;
207 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
208
209 if (skb_headroom(skb) < head_room) {
210 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
211 if (skb2 == NULL) {
212 IP6_INC_STATS(ip6_dst_idev(skb->dst),
213 IPSTATS_MIB_OUTDISCARDS);
214 kfree_skb(skb);
215 return -ENOBUFS;
216 }
217 kfree_skb(skb);
218 skb = skb2;
219 if (sk)
220 skb_set_owner_w(skb, sk);
221 }
222 if (opt->opt_flen)
223 ipv6_push_frag_opts(skb, opt, &proto);
224 if (opt->opt_nflen)
225 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
226 }
227
228 skb_push(skb, sizeof(struct ipv6hdr));
229 skb_reset_network_header(skb);
230 hdr = ipv6_hdr(skb);
231
232 /*
233 * Fill in the IPv6 header
234 */
235
236 hlimit = -1;
237 if (np)
238 hlimit = np->hop_limit;
239 if (hlimit < 0)
240 hlimit = ip6_dst_hoplimit(dst);
241
242 tclass = -1;
243 if (np)
244 tclass = np->tclass;
245 if (tclass < 0)
246 tclass = 0;
247
248 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
249
250 hdr->payload_len = htons(seg_len);
251 hdr->nexthdr = proto;
252 hdr->hop_limit = hlimit;
253
254 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
255 ipv6_addr_copy(&hdr->daddr, first_hop);
256
257 skb->priority = sk->sk_priority;
258 skb->mark = sk->sk_mark;
259
260 mtu = dst_mtu(dst);
261 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
262 IP6_INC_STATS(ip6_dst_idev(skb->dst),
263 IPSTATS_MIB_OUTREQUESTS);
264 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
265 dst_output);
266 }
267
268 if (net_ratelimit())
269 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
270 skb->dev = dst->dev;
271 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
272 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
273 kfree_skb(skb);
274 return -EMSGSIZE;
275 }
276
277 EXPORT_SYMBOL(ip6_xmit);
278
279 /*
280 * To avoid extra problems ND packets are send through this
281 * routine. It's code duplication but I really want to avoid
282 * extra checks since ipv6_build_header is used by TCP (which
283 * is for us performance critical)
284 */
285
286 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
287 const struct in6_addr *saddr, const struct in6_addr *daddr,
288 int proto, int len)
289 {
290 struct ipv6_pinfo *np = inet6_sk(sk);
291 struct ipv6hdr *hdr;
292 int totlen;
293
294 skb->protocol = htons(ETH_P_IPV6);
295 skb->dev = dev;
296
297 totlen = len + sizeof(struct ipv6hdr);
298
299 skb_reset_network_header(skb);
300 skb_put(skb, sizeof(struct ipv6hdr));
301 hdr = ipv6_hdr(skb);
302
303 *(__be32*)hdr = htonl(0x60000000);
304
305 hdr->payload_len = htons(len);
306 hdr->nexthdr = proto;
307 hdr->hop_limit = np->hop_limit;
308
309 ipv6_addr_copy(&hdr->saddr, saddr);
310 ipv6_addr_copy(&hdr->daddr, daddr);
311
312 return 0;
313 }
314
315 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
316 {
317 struct ip6_ra_chain *ra;
318 struct sock *last = NULL;
319
320 read_lock(&ip6_ra_lock);
321 for (ra = ip6_ra_chain; ra; ra = ra->next) {
322 struct sock *sk = ra->sk;
323 if (sk && ra->sel == sel &&
324 (!sk->sk_bound_dev_if ||
325 sk->sk_bound_dev_if == skb->dev->ifindex)) {
326 if (last) {
327 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
328 if (skb2)
329 rawv6_rcv(last, skb2);
330 }
331 last = sk;
332 }
333 }
334
335 if (last) {
336 rawv6_rcv(last, skb);
337 read_unlock(&ip6_ra_lock);
338 return 1;
339 }
340 read_unlock(&ip6_ra_lock);
341 return 0;
342 }
343
344 static int ip6_forward_proxy_check(struct sk_buff *skb)
345 {
346 struct ipv6hdr *hdr = ipv6_hdr(skb);
347 u8 nexthdr = hdr->nexthdr;
348 int offset;
349
350 if (ipv6_ext_hdr(nexthdr)) {
351 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
352 if (offset < 0)
353 return 0;
354 } else
355 offset = sizeof(struct ipv6hdr);
356
357 if (nexthdr == IPPROTO_ICMPV6) {
358 struct icmp6hdr *icmp6;
359
360 if (!pskb_may_pull(skb, (skb_network_header(skb) +
361 offset + 1 - skb->data)))
362 return 0;
363
364 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
365
366 switch (icmp6->icmp6_type) {
367 case NDISC_ROUTER_SOLICITATION:
368 case NDISC_ROUTER_ADVERTISEMENT:
369 case NDISC_NEIGHBOUR_SOLICITATION:
370 case NDISC_NEIGHBOUR_ADVERTISEMENT:
371 case NDISC_REDIRECT:
372 /* For reaction involving unicast neighbor discovery
373 * message destined to the proxied address, pass it to
374 * input function.
375 */
376 return 1;
377 default:
378 break;
379 }
380 }
381
382 /*
383 * The proxying router can't forward traffic sent to a link-local
384 * address, so signal the sender and discard the packet. This
385 * behavior is clarified by the MIPv6 specification.
386 */
387 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
388 dst_link_failure(skb);
389 return -1;
390 }
391
392 return 0;
393 }
394
395 static inline int ip6_forward_finish(struct sk_buff *skb)
396 {
397 return dst_output(skb);
398 }
399
400 int ip6_forward(struct sk_buff *skb)
401 {
402 struct dst_entry *dst = skb->dst;
403 struct ipv6hdr *hdr = ipv6_hdr(skb);
404 struct inet6_skb_parm *opt = IP6CB(skb);
405 struct net *net = dev_net(dst->dev);
406
407 if (ipv6_devconf.forwarding == 0)
408 goto error;
409
410 if (skb_warn_if_lro(skb))
411 goto drop;
412
413 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
414 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
415 goto drop;
416 }
417
418 skb_forward_csum(skb);
419
420 /*
421 * We DO NOT make any processing on
422 * RA packets, pushing them to user level AS IS
423 * without ane WARRANTY that application will be able
424 * to interpret them. The reason is that we
425 * cannot make anything clever here.
426 *
427 * We are not end-node, so that if packet contains
428 * AH/ESP, we cannot make anything.
429 * Defragmentation also would be mistake, RA packets
430 * cannot be fragmented, because there is no warranty
431 * that different fragments will go along one path. --ANK
432 */
433 if (opt->ra) {
434 u8 *ptr = skb_network_header(skb) + opt->ra;
435 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
436 return 0;
437 }
438
439 /*
440 * check and decrement ttl
441 */
442 if (hdr->hop_limit <= 1) {
443 /* Force OUTPUT device used as source address */
444 skb->dev = dst->dev;
445 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
446 0, skb->dev);
447 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
448
449 kfree_skb(skb);
450 return -ETIMEDOUT;
451 }
452
453 /* XXX: idev->cnf.proxy_ndp? */
454 if (ipv6_devconf.proxy_ndp &&
455 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
456 int proxied = ip6_forward_proxy_check(skb);
457 if (proxied > 0)
458 return ip6_input(skb);
459 else if (proxied < 0) {
460 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
461 goto drop;
462 }
463 }
464
465 if (!xfrm6_route_forward(skb)) {
466 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
467 goto drop;
468 }
469 dst = skb->dst;
470
471 /* IPv6 specs say nothing about it, but it is clear that we cannot
472 send redirects to source routed frames.
473 We don't send redirects to frames decapsulated from IPsec.
474 */
475 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
476 !skb->sp) {
477 struct in6_addr *target = NULL;
478 struct rt6_info *rt;
479 struct neighbour *n = dst->neighbour;
480
481 /*
482 * incoming and outgoing devices are the same
483 * send a redirect.
484 */
485
486 rt = (struct rt6_info *) dst;
487 if ((rt->rt6i_flags & RTF_GATEWAY))
488 target = (struct in6_addr*)&n->primary_key;
489 else
490 target = &hdr->daddr;
491
492 /* Limit redirects both by destination (here)
493 and by source (inside ndisc_send_redirect)
494 */
495 if (xrlim_allow(dst, 1*HZ))
496 ndisc_send_redirect(skb, n, target);
497 } else {
498 int addrtype = ipv6_addr_type(&hdr->saddr);
499
500 /* This check is security critical. */
501 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
502 goto error;
503 if (addrtype & IPV6_ADDR_LINKLOCAL) {
504 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
505 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
506 goto error;
507 }
508 }
509
510 if (skb->len > dst_mtu(dst)) {
511 /* Again, force OUTPUT device used as source address */
512 skb->dev = dst->dev;
513 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
514 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
515 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
516 kfree_skb(skb);
517 return -EMSGSIZE;
518 }
519
520 if (skb_cow(skb, dst->dev->hard_header_len)) {
521 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
522 goto drop;
523 }
524
525 hdr = ipv6_hdr(skb);
526
527 /* Mangling hops number delayed to point after skb COW */
528
529 hdr->hop_limit--;
530
531 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
532 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
533 ip6_forward_finish);
534
535 error:
536 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
537 drop:
538 kfree_skb(skb);
539 return -EINVAL;
540 }
541
542 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
543 {
544 to->pkt_type = from->pkt_type;
545 to->priority = from->priority;
546 to->protocol = from->protocol;
547 dst_release(to->dst);
548 to->dst = dst_clone(from->dst);
549 to->dev = from->dev;
550 to->mark = from->mark;
551
552 #ifdef CONFIG_NET_SCHED
553 to->tc_index = from->tc_index;
554 #endif
555 nf_copy(to, from);
556 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
557 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
558 to->nf_trace = from->nf_trace;
559 #endif
560 skb_copy_secmark(to, from);
561 }
562
563 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
564 {
565 u16 offset = sizeof(struct ipv6hdr);
566 struct ipv6_opt_hdr *exthdr =
567 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
568 unsigned int packet_len = skb->tail - skb->network_header;
569 int found_rhdr = 0;
570 *nexthdr = &ipv6_hdr(skb)->nexthdr;
571
572 while (offset + 1 <= packet_len) {
573
574 switch (**nexthdr) {
575
576 case NEXTHDR_HOP:
577 break;
578 case NEXTHDR_ROUTING:
579 found_rhdr = 1;
580 break;
581 case NEXTHDR_DEST:
582 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
583 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
584 break;
585 #endif
586 if (found_rhdr)
587 return offset;
588 break;
589 default :
590 return offset;
591 }
592
593 offset += ipv6_optlen(exthdr);
594 *nexthdr = &exthdr->nexthdr;
595 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
596 offset);
597 }
598
599 return offset;
600 }
601
602 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
603 {
604 struct net_device *dev;
605 struct sk_buff *frag;
606 struct rt6_info *rt = (struct rt6_info*)skb->dst;
607 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
608 struct ipv6hdr *tmp_hdr;
609 struct frag_hdr *fh;
610 unsigned int mtu, hlen, left, len;
611 __be32 frag_id = 0;
612 int ptr, offset = 0, err=0;
613 u8 *prevhdr, nexthdr = 0;
614
615 dev = rt->u.dst.dev;
616 hlen = ip6_find_1stfragopt(skb, &prevhdr);
617 nexthdr = *prevhdr;
618
619 mtu = ip6_skb_dst_mtu(skb);
620
621 /* We must not fragment if the socket is set to force MTU discovery
622 * or if the skb it not generated by a local socket. (This last
623 * check should be redundant, but it's free.)
624 */
625 if (!skb->local_df) {
626 skb->dev = skb->dst->dev;
627 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
628 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
629 kfree_skb(skb);
630 return -EMSGSIZE;
631 }
632
633 if (np && np->frag_size < mtu) {
634 if (np->frag_size)
635 mtu = np->frag_size;
636 }
637 mtu -= hlen + sizeof(struct frag_hdr);
638
639 if (skb_shinfo(skb)->frag_list) {
640 int first_len = skb_pagelen(skb);
641 int truesizes = 0;
642
643 if (first_len - hlen > mtu ||
644 ((first_len - hlen) & 7) ||
645 skb_cloned(skb))
646 goto slow_path;
647
648 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
649 /* Correct geometry. */
650 if (frag->len > mtu ||
651 ((frag->len & 7) && frag->next) ||
652 skb_headroom(frag) < hlen)
653 goto slow_path;
654
655 /* Partially cloned skb? */
656 if (skb_shared(frag))
657 goto slow_path;
658
659 BUG_ON(frag->sk);
660 if (skb->sk) {
661 sock_hold(skb->sk);
662 frag->sk = skb->sk;
663 frag->destructor = sock_wfree;
664 truesizes += frag->truesize;
665 }
666 }
667
668 err = 0;
669 offset = 0;
670 frag = skb_shinfo(skb)->frag_list;
671 skb_shinfo(skb)->frag_list = NULL;
672 /* BUILD HEADER */
673
674 *prevhdr = NEXTHDR_FRAGMENT;
675 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
676 if (!tmp_hdr) {
677 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
678 return -ENOMEM;
679 }
680
681 __skb_pull(skb, hlen);
682 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
683 __skb_push(skb, hlen);
684 skb_reset_network_header(skb);
685 memcpy(skb_network_header(skb), tmp_hdr, hlen);
686
687 ipv6_select_ident(skb, fh);
688 fh->nexthdr = nexthdr;
689 fh->reserved = 0;
690 fh->frag_off = htons(IP6_MF);
691 frag_id = fh->identification;
692
693 first_len = skb_pagelen(skb);
694 skb->data_len = first_len - skb_headlen(skb);
695 skb->truesize -= truesizes;
696 skb->len = first_len;
697 ipv6_hdr(skb)->payload_len = htons(first_len -
698 sizeof(struct ipv6hdr));
699
700 dst_hold(&rt->u.dst);
701
702 for (;;) {
703 /* Prepare header of the next frame,
704 * before previous one went down. */
705 if (frag) {
706 frag->ip_summed = CHECKSUM_NONE;
707 skb_reset_transport_header(frag);
708 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
709 __skb_push(frag, hlen);
710 skb_reset_network_header(frag);
711 memcpy(skb_network_header(frag), tmp_hdr,
712 hlen);
713 offset += skb->len - hlen - sizeof(struct frag_hdr);
714 fh->nexthdr = nexthdr;
715 fh->reserved = 0;
716 fh->frag_off = htons(offset);
717 if (frag->next != NULL)
718 fh->frag_off |= htons(IP6_MF);
719 fh->identification = frag_id;
720 ipv6_hdr(frag)->payload_len =
721 htons(frag->len -
722 sizeof(struct ipv6hdr));
723 ip6_copy_metadata(frag, skb);
724 }
725
726 err = output(skb);
727 if(!err)
728 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
729
730 if (err || !frag)
731 break;
732
733 skb = frag;
734 frag = skb->next;
735 skb->next = NULL;
736 }
737
738 kfree(tmp_hdr);
739
740 if (err == 0) {
741 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
742 dst_release(&rt->u.dst);
743 return 0;
744 }
745
746 while (frag) {
747 skb = frag->next;
748 kfree_skb(frag);
749 frag = skb;
750 }
751
752 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
753 dst_release(&rt->u.dst);
754 return err;
755 }
756
757 slow_path:
758 left = skb->len - hlen; /* Space per frame */
759 ptr = hlen; /* Where to start from */
760
761 /*
762 * Fragment the datagram.
763 */
764
765 *prevhdr = NEXTHDR_FRAGMENT;
766
767 /*
768 * Keep copying data until we run out.
769 */
770 while(left > 0) {
771 len = left;
772 /* IF: it doesn't fit, use 'mtu' - the data space left */
773 if (len > mtu)
774 len = mtu;
775 /* IF: we are not sending upto and including the packet end
776 then align the next start on an eight byte boundary */
777 if (len < left) {
778 len &= ~7;
779 }
780 /*
781 * Allocate buffer.
782 */
783
784 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
785 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
786 IP6_INC_STATS(ip6_dst_idev(skb->dst),
787 IPSTATS_MIB_FRAGFAILS);
788 err = -ENOMEM;
789 goto fail;
790 }
791
792 /*
793 * Set up data on packet
794 */
795
796 ip6_copy_metadata(frag, skb);
797 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
798 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
799 skb_reset_network_header(frag);
800 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
801 frag->transport_header = (frag->network_header + hlen +
802 sizeof(struct frag_hdr));
803
804 /*
805 * Charge the memory for the fragment to any owner
806 * it might possess
807 */
808 if (skb->sk)
809 skb_set_owner_w(frag, skb->sk);
810
811 /*
812 * Copy the packet header into the new buffer.
813 */
814 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
815
816 /*
817 * Build fragment header.
818 */
819 fh->nexthdr = nexthdr;
820 fh->reserved = 0;
821 if (!frag_id) {
822 ipv6_select_ident(skb, fh);
823 frag_id = fh->identification;
824 } else
825 fh->identification = frag_id;
826
827 /*
828 * Copy a block of the IP datagram.
829 */
830 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
831 BUG();
832 left -= len;
833
834 fh->frag_off = htons(offset);
835 if (left > 0)
836 fh->frag_off |= htons(IP6_MF);
837 ipv6_hdr(frag)->payload_len = htons(frag->len -
838 sizeof(struct ipv6hdr));
839
840 ptr += len;
841 offset += len;
842
843 /*
844 * Put this fragment into the sending queue.
845 */
846 err = output(frag);
847 if (err)
848 goto fail;
849
850 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
851 }
852 IP6_INC_STATS(ip6_dst_idev(skb->dst),
853 IPSTATS_MIB_FRAGOKS);
854 kfree_skb(skb);
855 return err;
856
857 fail:
858 IP6_INC_STATS(ip6_dst_idev(skb->dst),
859 IPSTATS_MIB_FRAGFAILS);
860 kfree_skb(skb);
861 return err;
862 }
863
864 static inline int ip6_rt_check(struct rt6key *rt_key,
865 struct in6_addr *fl_addr,
866 struct in6_addr *addr_cache)
867 {
868 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
869 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
870 }
871
872 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
873 struct dst_entry *dst,
874 struct flowi *fl)
875 {
876 struct ipv6_pinfo *np = inet6_sk(sk);
877 struct rt6_info *rt = (struct rt6_info *)dst;
878
879 if (!dst)
880 goto out;
881
882 /* Yes, checking route validity in not connected
883 * case is not very simple. Take into account,
884 * that we do not support routing by source, TOS,
885 * and MSG_DONTROUTE --ANK (980726)
886 *
887 * 1. ip6_rt_check(): If route was host route,
888 * check that cached destination is current.
889 * If it is network route, we still may
890 * check its validity using saved pointer
891 * to the last used address: daddr_cache.
892 * We do not want to save whole address now,
893 * (because main consumer of this service
894 * is tcp, which has not this problem),
895 * so that the last trick works only on connected
896 * sockets.
897 * 2. oif also should be the same.
898 */
899 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
900 #ifdef CONFIG_IPV6_SUBTREES
901 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
902 #endif
903 (fl->oif && fl->oif != dst->dev->ifindex)) {
904 dst_release(dst);
905 dst = NULL;
906 }
907
908 out:
909 return dst;
910 }
911
912 static int ip6_dst_lookup_tail(struct sock *sk,
913 struct dst_entry **dst, struct flowi *fl)
914 {
915 int err;
916 struct net *net = sock_net(sk);
917
918 if (*dst == NULL)
919 *dst = ip6_route_output(net, sk, fl);
920
921 if ((err = (*dst)->error))
922 goto out_err_release;
923
924 if (ipv6_addr_any(&fl->fl6_src)) {
925 err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev,
926 &fl->fl6_dst,
927 sk ? inet6_sk(sk)->srcprefs : 0,
928 &fl->fl6_src);
929 if (err)
930 goto out_err_release;
931 }
932
933 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
934 /*
935 * Here if the dst entry we've looked up
936 * has a neighbour entry that is in the INCOMPLETE
937 * state and the src address from the flow is
938 * marked as OPTIMISTIC, we release the found
939 * dst entry and replace it instead with the
940 * dst entry of the nexthop router
941 */
942 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
943 struct inet6_ifaddr *ifp;
944 struct flowi fl_gw;
945 int redirect;
946
947 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
948 (*dst)->dev, 1);
949
950 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
951 if (ifp)
952 in6_ifa_put(ifp);
953
954 if (redirect) {
955 /*
956 * We need to get the dst entry for the
957 * default router instead
958 */
959 dst_release(*dst);
960 memcpy(&fl_gw, fl, sizeof(struct flowi));
961 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
962 *dst = ip6_route_output(net, sk, &fl_gw);
963 if ((err = (*dst)->error))
964 goto out_err_release;
965 }
966 }
967 #endif
968
969 return 0;
970
971 out_err_release:
972 if (err == -ENETUNREACH)
973 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
974 dst_release(*dst);
975 *dst = NULL;
976 return err;
977 }
978
979 /**
980 * ip6_dst_lookup - perform route lookup on flow
981 * @sk: socket which provides route info
982 * @dst: pointer to dst_entry * for result
983 * @fl: flow to lookup
984 *
985 * This function performs a route lookup on the given flow.
986 *
987 * It returns zero on success, or a standard errno code on error.
988 */
989 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
990 {
991 *dst = NULL;
992 return ip6_dst_lookup_tail(sk, dst, fl);
993 }
994 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
995
996 /**
997 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
998 * @sk: socket which provides the dst cache and route info
999 * @dst: pointer to dst_entry * for result
1000 * @fl: flow to lookup
1001 *
1002 * This function performs a route lookup on the given flow with the
1003 * possibility of using the cached route in the socket if it is valid.
1004 * It will take the socket dst lock when operating on the dst cache.
1005 * As a result, this function can only be used in process context.
1006 *
1007 * It returns zero on success, or a standard errno code on error.
1008 */
1009 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1010 {
1011 *dst = NULL;
1012 if (sk) {
1013 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1014 *dst = ip6_sk_dst_check(sk, *dst, fl);
1015 }
1016
1017 return ip6_dst_lookup_tail(sk, dst, fl);
1018 }
1019 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1020
1021 static inline int ip6_ufo_append_data(struct sock *sk,
1022 int getfrag(void *from, char *to, int offset, int len,
1023 int odd, struct sk_buff *skb),
1024 void *from, int length, int hh_len, int fragheaderlen,
1025 int transhdrlen, int mtu,unsigned int flags)
1026
1027 {
1028 struct sk_buff *skb;
1029 int err;
1030
1031 /* There is support for UDP large send offload by network
1032 * device, so create one single skb packet containing complete
1033 * udp datagram
1034 */
1035 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1036 skb = sock_alloc_send_skb(sk,
1037 hh_len + fragheaderlen + transhdrlen + 20,
1038 (flags & MSG_DONTWAIT), &err);
1039 if (skb == NULL)
1040 return -ENOMEM;
1041
1042 /* reserve space for Hardware header */
1043 skb_reserve(skb, hh_len);
1044
1045 /* create space for UDP/IP header */
1046 skb_put(skb,fragheaderlen + transhdrlen);
1047
1048 /* initialize network header pointer */
1049 skb_reset_network_header(skb);
1050
1051 /* initialize protocol header pointer */
1052 skb->transport_header = skb->network_header + fragheaderlen;
1053
1054 skb->ip_summed = CHECKSUM_PARTIAL;
1055 skb->csum = 0;
1056 sk->sk_sndmsg_off = 0;
1057 }
1058
1059 err = skb_append_datato_frags(sk,skb, getfrag, from,
1060 (length - transhdrlen));
1061 if (!err) {
1062 struct frag_hdr fhdr;
1063
1064 /* specify the length of each IP datagram fragment*/
1065 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1066 sizeof(struct frag_hdr);
1067 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1068 ipv6_select_ident(skb, &fhdr);
1069 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1070 __skb_queue_tail(&sk->sk_write_queue, skb);
1071
1072 return 0;
1073 }
1074 /* There is not enough support do UPD LSO,
1075 * so follow normal path
1076 */
1077 kfree_skb(skb);
1078
1079 return err;
1080 }
1081
1082 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1083 int offset, int len, int odd, struct sk_buff *skb),
1084 void *from, int length, int transhdrlen,
1085 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1086 struct rt6_info *rt, unsigned int flags)
1087 {
1088 struct inet_sock *inet = inet_sk(sk);
1089 struct ipv6_pinfo *np = inet6_sk(sk);
1090 struct sk_buff *skb;
1091 unsigned int maxfraglen, fragheaderlen;
1092 int exthdrlen;
1093 int hh_len;
1094 int mtu;
1095 int copy;
1096 int err;
1097 int offset = 0;
1098 int csummode = CHECKSUM_NONE;
1099
1100 if (flags&MSG_PROBE)
1101 return 0;
1102 if (skb_queue_empty(&sk->sk_write_queue)) {
1103 /*
1104 * setup for corking
1105 */
1106 if (opt) {
1107 if (np->cork.opt == NULL) {
1108 np->cork.opt = kmalloc(opt->tot_len,
1109 sk->sk_allocation);
1110 if (unlikely(np->cork.opt == NULL))
1111 return -ENOBUFS;
1112 } else if (np->cork.opt->tot_len < opt->tot_len) {
1113 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1114 return -EINVAL;
1115 }
1116 memcpy(np->cork.opt, opt, opt->tot_len);
1117 inet->cork.flags |= IPCORK_OPT;
1118 /* need source address above miyazawa*/
1119 }
1120 dst_hold(&rt->u.dst);
1121 inet->cork.dst = &rt->u.dst;
1122 inet->cork.fl = *fl;
1123 np->cork.hop_limit = hlimit;
1124 np->cork.tclass = tclass;
1125 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1126 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1127 if (np->frag_size < mtu) {
1128 if (np->frag_size)
1129 mtu = np->frag_size;
1130 }
1131 inet->cork.fragsize = mtu;
1132 if (dst_allfrag(rt->u.dst.path))
1133 inet->cork.flags |= IPCORK_ALLFRAG;
1134 inet->cork.length = 0;
1135 sk->sk_sndmsg_page = NULL;
1136 sk->sk_sndmsg_off = 0;
1137 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1138 rt->rt6i_nfheader_len;
1139 length += exthdrlen;
1140 transhdrlen += exthdrlen;
1141 } else {
1142 rt = (struct rt6_info *)inet->cork.dst;
1143 fl = &inet->cork.fl;
1144 if (inet->cork.flags & IPCORK_OPT)
1145 opt = np->cork.opt;
1146 transhdrlen = 0;
1147 exthdrlen = 0;
1148 mtu = inet->cork.fragsize;
1149 }
1150
1151 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1152
1153 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1154 (opt ? opt->opt_nflen : 0);
1155 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1156
1157 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1158 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1159 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1160 return -EMSGSIZE;
1161 }
1162 }
1163
1164 /*
1165 * Let's try using as much space as possible.
1166 * Use MTU if total length of the message fits into the MTU.
1167 * Otherwise, we need to reserve fragment header and
1168 * fragment alignment (= 8-15 octects, in total).
1169 *
1170 * Note that we may need to "move" the data from the tail of
1171 * of the buffer to the new fragment when we split
1172 * the message.
1173 *
1174 * FIXME: It may be fragmented into multiple chunks
1175 * at once if non-fragmentable extension headers
1176 * are too large.
1177 * --yoshfuji
1178 */
1179
1180 inet->cork.length += length;
1181 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1182 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1183
1184 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1185 fragheaderlen, transhdrlen, mtu,
1186 flags);
1187 if (err)
1188 goto error;
1189 return 0;
1190 }
1191
1192 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1193 goto alloc_new_skb;
1194
1195 while (length > 0) {
1196 /* Check if the remaining data fits into current packet. */
1197 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1198 if (copy < length)
1199 copy = maxfraglen - skb->len;
1200
1201 if (copy <= 0) {
1202 char *data;
1203 unsigned int datalen;
1204 unsigned int fraglen;
1205 unsigned int fraggap;
1206 unsigned int alloclen;
1207 struct sk_buff *skb_prev;
1208 alloc_new_skb:
1209 skb_prev = skb;
1210
1211 /* There's no room in the current skb */
1212 if (skb_prev)
1213 fraggap = skb_prev->len - maxfraglen;
1214 else
1215 fraggap = 0;
1216
1217 /*
1218 * If remaining data exceeds the mtu,
1219 * we know we need more fragment(s).
1220 */
1221 datalen = length + fraggap;
1222 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1223 datalen = maxfraglen - fragheaderlen;
1224
1225 fraglen = datalen + fragheaderlen;
1226 if ((flags & MSG_MORE) &&
1227 !(rt->u.dst.dev->features&NETIF_F_SG))
1228 alloclen = mtu;
1229 else
1230 alloclen = datalen + fragheaderlen;
1231
1232 /*
1233 * The last fragment gets additional space at tail.
1234 * Note: we overallocate on fragments with MSG_MODE
1235 * because we have no idea if we're the last one.
1236 */
1237 if (datalen == length + fraggap)
1238 alloclen += rt->u.dst.trailer_len;
1239
1240 /*
1241 * We just reserve space for fragment header.
1242 * Note: this may be overallocation if the message
1243 * (without MSG_MORE) fits into the MTU.
1244 */
1245 alloclen += sizeof(struct frag_hdr);
1246
1247 if (transhdrlen) {
1248 skb = sock_alloc_send_skb(sk,
1249 alloclen + hh_len,
1250 (flags & MSG_DONTWAIT), &err);
1251 } else {
1252 skb = NULL;
1253 if (atomic_read(&sk->sk_wmem_alloc) <=
1254 2 * sk->sk_sndbuf)
1255 skb = sock_wmalloc(sk,
1256 alloclen + hh_len, 1,
1257 sk->sk_allocation);
1258 if (unlikely(skb == NULL))
1259 err = -ENOBUFS;
1260 }
1261 if (skb == NULL)
1262 goto error;
1263 /*
1264 * Fill in the control structures
1265 */
1266 skb->ip_summed = csummode;
1267 skb->csum = 0;
1268 /* reserve for fragmentation */
1269 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1270
1271 /*
1272 * Find where to start putting bytes
1273 */
1274 data = skb_put(skb, fraglen);
1275 skb_set_network_header(skb, exthdrlen);
1276 data += fragheaderlen;
1277 skb->transport_header = (skb->network_header +
1278 fragheaderlen);
1279 if (fraggap) {
1280 skb->csum = skb_copy_and_csum_bits(
1281 skb_prev, maxfraglen,
1282 data + transhdrlen, fraggap, 0);
1283 skb_prev->csum = csum_sub(skb_prev->csum,
1284 skb->csum);
1285 data += fraggap;
1286 pskb_trim_unique(skb_prev, maxfraglen);
1287 }
1288 copy = datalen - transhdrlen - fraggap;
1289 if (copy < 0) {
1290 err = -EINVAL;
1291 kfree_skb(skb);
1292 goto error;
1293 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1294 err = -EFAULT;
1295 kfree_skb(skb);
1296 goto error;
1297 }
1298
1299 offset += copy;
1300 length -= datalen - fraggap;
1301 transhdrlen = 0;
1302 exthdrlen = 0;
1303 csummode = CHECKSUM_NONE;
1304
1305 /*
1306 * Put the packet on the pending queue
1307 */
1308 __skb_queue_tail(&sk->sk_write_queue, skb);
1309 continue;
1310 }
1311
1312 if (copy > length)
1313 copy = length;
1314
1315 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1316 unsigned int off;
1317
1318 off = skb->len;
1319 if (getfrag(from, skb_put(skb, copy),
1320 offset, copy, off, skb) < 0) {
1321 __skb_trim(skb, off);
1322 err = -EFAULT;
1323 goto error;
1324 }
1325 } else {
1326 int i = skb_shinfo(skb)->nr_frags;
1327 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1328 struct page *page = sk->sk_sndmsg_page;
1329 int off = sk->sk_sndmsg_off;
1330 unsigned int left;
1331
1332 if (page && (left = PAGE_SIZE - off) > 0) {
1333 if (copy >= left)
1334 copy = left;
1335 if (page != frag->page) {
1336 if (i == MAX_SKB_FRAGS) {
1337 err = -EMSGSIZE;
1338 goto error;
1339 }
1340 get_page(page);
1341 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1342 frag = &skb_shinfo(skb)->frags[i];
1343 }
1344 } else if(i < MAX_SKB_FRAGS) {
1345 if (copy > PAGE_SIZE)
1346 copy = PAGE_SIZE;
1347 page = alloc_pages(sk->sk_allocation, 0);
1348 if (page == NULL) {
1349 err = -ENOMEM;
1350 goto error;
1351 }
1352 sk->sk_sndmsg_page = page;
1353 sk->sk_sndmsg_off = 0;
1354
1355 skb_fill_page_desc(skb, i, page, 0, 0);
1356 frag = &skb_shinfo(skb)->frags[i];
1357 } else {
1358 err = -EMSGSIZE;
1359 goto error;
1360 }
1361 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1362 err = -EFAULT;
1363 goto error;
1364 }
1365 sk->sk_sndmsg_off += copy;
1366 frag->size += copy;
1367 skb->len += copy;
1368 skb->data_len += copy;
1369 skb->truesize += copy;
1370 atomic_add(copy, &sk->sk_wmem_alloc);
1371 }
1372 offset += copy;
1373 length -= copy;
1374 }
1375 return 0;
1376 error:
1377 inet->cork.length -= length;
1378 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1379 return err;
1380 }
1381
1382 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1383 {
1384 inet->cork.flags &= ~IPCORK_OPT;
1385 kfree(np->cork.opt);
1386 np->cork.opt = NULL;
1387 if (inet->cork.dst) {
1388 dst_release(inet->cork.dst);
1389 inet->cork.dst = NULL;
1390 inet->cork.flags &= ~IPCORK_ALLFRAG;
1391 }
1392 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1393 }
1394
1395 int ip6_push_pending_frames(struct sock *sk)
1396 {
1397 struct sk_buff *skb, *tmp_skb;
1398 struct sk_buff **tail_skb;
1399 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1400 struct inet_sock *inet = inet_sk(sk);
1401 struct ipv6_pinfo *np = inet6_sk(sk);
1402 struct ipv6hdr *hdr;
1403 struct ipv6_txoptions *opt = np->cork.opt;
1404 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1405 struct flowi *fl = &inet->cork.fl;
1406 unsigned char proto = fl->proto;
1407 int err = 0;
1408
1409 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1410 goto out;
1411 tail_skb = &(skb_shinfo(skb)->frag_list);
1412
1413 /* move skb->data to ip header from ext header */
1414 if (skb->data < skb_network_header(skb))
1415 __skb_pull(skb, skb_network_offset(skb));
1416 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1417 __skb_pull(tmp_skb, skb_network_header_len(skb));
1418 *tail_skb = tmp_skb;
1419 tail_skb = &(tmp_skb->next);
1420 skb->len += tmp_skb->len;
1421 skb->data_len += tmp_skb->len;
1422 skb->truesize += tmp_skb->truesize;
1423 __sock_put(tmp_skb->sk);
1424 tmp_skb->destructor = NULL;
1425 tmp_skb->sk = NULL;
1426 }
1427
1428 /* Allow local fragmentation. */
1429 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1430 skb->local_df = 1;
1431
1432 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1433 __skb_pull(skb, skb_network_header_len(skb));
1434 if (opt && opt->opt_flen)
1435 ipv6_push_frag_opts(skb, opt, &proto);
1436 if (opt && opt->opt_nflen)
1437 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1438
1439 skb_push(skb, sizeof(struct ipv6hdr));
1440 skb_reset_network_header(skb);
1441 hdr = ipv6_hdr(skb);
1442
1443 *(__be32*)hdr = fl->fl6_flowlabel |
1444 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1445
1446 hdr->hop_limit = np->cork.hop_limit;
1447 hdr->nexthdr = proto;
1448 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1449 ipv6_addr_copy(&hdr->daddr, final_dst);
1450
1451 skb->priority = sk->sk_priority;
1452 skb->mark = sk->sk_mark;
1453
1454 skb->dst = dst_clone(&rt->u.dst);
1455 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1456 if (proto == IPPROTO_ICMPV6) {
1457 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1458
1459 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1460 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1461 }
1462
1463 err = ip6_local_out(skb);
1464 if (err) {
1465 if (err > 0)
1466 err = np->recverr ? net_xmit_errno(err) : 0;
1467 if (err)
1468 goto error;
1469 }
1470
1471 out:
1472 ip6_cork_release(inet, np);
1473 return err;
1474 error:
1475 goto out;
1476 }
1477
1478 void ip6_flush_pending_frames(struct sock *sk)
1479 {
1480 struct sk_buff *skb;
1481
1482 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1483 if (skb->dst)
1484 IP6_INC_STATS(ip6_dst_idev(skb->dst),
1485 IPSTATS_MIB_OUTDISCARDS);
1486 kfree_skb(skb);
1487 }
1488
1489 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1490 }
This page took 0.05751 seconds and 4 git commands to generate.