[INET]: Move tcp_port_rover to inet_hashinfo
[deliverable/linux.git] / net / ipv6 / tcp_ipv6.c
1 /*
2 * TCP over IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
14 *
15 * Fixes:
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 */
27
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
36 #include <linux/in.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
43
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
47
48 #include <net/tcp.h>
49 #include <net/ndisc.h>
50 #include <net/ipv6.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
57 #include <net/xfrm.h>
58 #include <net/addrconf.h>
59 #include <net/snmp.h>
60 #include <net/dsfield.h>
61
62 #include <asm/uaccess.h>
63
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
66
67 static void tcp_v6_send_reset(struct sk_buff *skb);
68 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
69 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
70 struct sk_buff *skb);
71
72 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
74
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
77
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
81 {
82 int hashent = (lport ^ fport);
83
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
87 return (hashent & (tcp_hashinfo.ehash_size - 1));
88 }
89
90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
91 {
92 struct inet_sock *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
99 }
100
101 static inline int tcp_v6_bind_conflict(const struct sock *sk,
102 const struct inet_bind_bucket *tb)
103 {
104 const struct sock *sk2;
105 const struct hlist_node *node;
106
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
109 if (sk != sk2 &&
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
116 break;
117 }
118
119 return node != NULL;
120 }
121
122 /* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
126 */
127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
128 {
129 struct inet_bind_hashbucket *head;
130 struct inet_bind_bucket *tb;
131 struct hlist_node *node;
132 int ret;
133
134 local_bh_disable();
135 if (snum == 0) {
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
139 int rover;
140
141 spin_lock(&tcp_hashinfo.portalloc_lock);
142 if (tcp_hashinfo.port_rover < low)
143 rover = low;
144 else
145 rover = tcp_hashinfo.port_rover;
146 do { rover++;
147 if (rover > high)
148 rover = low;
149 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
150 spin_lock(&head->lock);
151 inet_bind_bucket_for_each(tb, node, &head->chain)
152 if (tb->port == rover)
153 goto next;
154 break;
155 next:
156 spin_unlock(&head->lock);
157 } while (--remaining > 0);
158 tcp_hashinfo.port_rover = rover;
159 spin_unlock(&tcp_hashinfo.portalloc_lock);
160
161 /* Exhausted local port range during search? It is not
162 * possible for us to be holding one of the bind hash
163 * locks if this test triggers, because if 'remaining'
164 * drops to zero, we broke out of the do/while loop at
165 * the top level, not from the 'break;' statement.
166 */
167 ret = 1;
168 if (unlikely(remaining <= 0))
169 goto fail;
170
171 /* OK, here is the one we will use. */
172 snum = rover;
173 } else {
174 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
175 spin_lock(&head->lock);
176 inet_bind_bucket_for_each(tb, node, &head->chain)
177 if (tb->port == snum)
178 goto tb_found;
179 }
180 tb = NULL;
181 goto tb_not_found;
182 tb_found:
183 if (tb && !hlist_empty(&tb->owners)) {
184 if (tb->fastreuse > 0 && sk->sk_reuse &&
185 sk->sk_state != TCP_LISTEN) {
186 goto success;
187 } else {
188 ret = 1;
189 if (tcp_v6_bind_conflict(sk, tb))
190 goto fail_unlock;
191 }
192 }
193 tb_not_found:
194 ret = 1;
195 if (tb == NULL) {
196 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
197 if (tb == NULL)
198 goto fail_unlock;
199 }
200 if (hlist_empty(&tb->owners)) {
201 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
202 tb->fastreuse = 1;
203 else
204 tb->fastreuse = 0;
205 } else if (tb->fastreuse &&
206 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
207 tb->fastreuse = 0;
208
209 success:
210 if (!inet_sk(sk)->bind_hash)
211 inet_bind_hash(sk, tb, snum);
212 BUG_TRAP(inet_sk(sk)->bind_hash == tb);
213 ret = 0;
214
215 fail_unlock:
216 spin_unlock(&head->lock);
217 fail:
218 local_bh_enable();
219 return ret;
220 }
221
222 static __inline__ void __tcp_v6_hash(struct sock *sk)
223 {
224 struct hlist_head *list;
225 rwlock_t *lock;
226
227 BUG_TRAP(sk_unhashed(sk));
228
229 if (sk->sk_state == TCP_LISTEN) {
230 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
231 lock = &tcp_hashinfo.lhash_lock;
232 tcp_listen_wlock();
233 } else {
234 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
235 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
236 lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
237 write_lock(lock);
238 }
239
240 __sk_add_node(sk, list);
241 sock_prot_inc_use(sk->sk_prot);
242 write_unlock(lock);
243 }
244
245
246 static void tcp_v6_hash(struct sock *sk)
247 {
248 if (sk->sk_state != TCP_CLOSE) {
249 struct tcp_sock *tp = tcp_sk(sk);
250
251 if (tp->af_specific == &ipv6_mapped) {
252 tcp_prot.hash(sk);
253 return;
254 }
255 local_bh_disable();
256 __tcp_v6_hash(sk);
257 local_bh_enable();
258 }
259 }
260
261 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
262 {
263 struct sock *sk;
264 struct hlist_node *node;
265 struct sock *result = NULL;
266 int score, hiscore;
267
268 hiscore=0;
269 read_lock(&tcp_hashinfo.lhash_lock);
270 sk_for_each(sk, node, &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)]) {
271 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
272 struct ipv6_pinfo *np = inet6_sk(sk);
273
274 score = 1;
275 if (!ipv6_addr_any(&np->rcv_saddr)) {
276 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
277 continue;
278 score++;
279 }
280 if (sk->sk_bound_dev_if) {
281 if (sk->sk_bound_dev_if != dif)
282 continue;
283 score++;
284 }
285 if (score == 3) {
286 result = sk;
287 break;
288 }
289 if (score > hiscore) {
290 hiscore = score;
291 result = sk;
292 }
293 }
294 }
295 if (result)
296 sock_hold(result);
297 read_unlock(&tcp_hashinfo.lhash_lock);
298 return result;
299 }
300
301 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
302 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
303 *
304 * The sockhash lock must be held as a reader here.
305 */
306
307 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
308 struct in6_addr *daddr, u16 hnum,
309 int dif)
310 {
311 struct inet_ehash_bucket *head;
312 struct sock *sk;
313 struct hlist_node *node;
314 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
315 int hash;
316
317 /* Optimize here for direct hit, only listening connections can
318 * have wildcards anyways.
319 */
320 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
321 head = &tcp_hashinfo.ehash[hash];
322 read_lock(&head->lock);
323 sk_for_each(sk, node, &head->chain) {
324 /* For IPV6 do the cheaper port and family tests first. */
325 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
326 goto hit; /* You sunk my battleship! */
327 }
328 /* Must check for a TIME_WAIT'er before going to listener hash. */
329 sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) {
330 /* FIXME: acme: check this... */
331 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
332
333 if(*((__u32 *)&(tw->tw_dport)) == ports &&
334 sk->sk_family == PF_INET6) {
335 if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
336 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
337 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
338 goto hit;
339 }
340 }
341 read_unlock(&head->lock);
342 return NULL;
343
344 hit:
345 sock_hold(sk);
346 read_unlock(&head->lock);
347 return sk;
348 }
349
350
351 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
352 struct in6_addr *daddr, u16 hnum,
353 int dif)
354 {
355 struct sock *sk;
356
357 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
358
359 if (sk)
360 return sk;
361
362 return tcp_v6_lookup_listener(daddr, hnum, dif);
363 }
364
365 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
366 struct in6_addr *daddr, u16 dport,
367 int dif)
368 {
369 struct sock *sk;
370
371 local_bh_disable();
372 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
373 local_bh_enable();
374
375 return sk;
376 }
377
378 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
379
380
381 /*
382 * Open request hash tables.
383 */
384
385 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
386 {
387 u32 a, b, c;
388
389 a = raddr->s6_addr32[0];
390 b = raddr->s6_addr32[1];
391 c = raddr->s6_addr32[2];
392
393 a += JHASH_GOLDEN_RATIO;
394 b += JHASH_GOLDEN_RATIO;
395 c += rnd;
396 __jhash_mix(a, b, c);
397
398 a += raddr->s6_addr32[3];
399 b += (u32) rport;
400 __jhash_mix(a, b, c);
401
402 return c & (TCP_SYNQ_HSIZE - 1);
403 }
404
405 static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
406 struct request_sock ***prevp,
407 __u16 rport,
408 struct in6_addr *raddr,
409 struct in6_addr *laddr,
410 int iif)
411 {
412 struct listen_sock *lopt = tp->accept_queue.listen_opt;
413 struct request_sock *req, **prev;
414
415 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
416 (req = *prev) != NULL;
417 prev = &req->dl_next) {
418 const struct tcp6_request_sock *treq = tcp6_rsk(req);
419
420 if (inet_rsk(req)->rmt_port == rport &&
421 req->rsk_ops->family == AF_INET6 &&
422 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
423 ipv6_addr_equal(&treq->loc_addr, laddr) &&
424 (!treq->iif || treq->iif == iif)) {
425 BUG_TRAP(req->sk == NULL);
426 *prevp = prev;
427 return req;
428 }
429 }
430
431 return NULL;
432 }
433
434 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
435 struct in6_addr *saddr,
436 struct in6_addr *daddr,
437 unsigned long base)
438 {
439 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
440 }
441
442 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
443 {
444 if (skb->protocol == htons(ETH_P_IPV6)) {
445 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
446 skb->nh.ipv6h->saddr.s6_addr32,
447 skb->h.th->dest,
448 skb->h.th->source);
449 } else {
450 return secure_tcp_sequence_number(skb->nh.iph->daddr,
451 skb->nh.iph->saddr,
452 skb->h.th->dest,
453 skb->h.th->source);
454 }
455 }
456
457 static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
458 struct tcp_tw_bucket **twp)
459 {
460 struct inet_sock *inet = inet_sk(sk);
461 struct ipv6_pinfo *np = inet6_sk(sk);
462 struct in6_addr *daddr = &np->rcv_saddr;
463 struct in6_addr *saddr = &np->daddr;
464 int dif = sk->sk_bound_dev_if;
465 u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
466 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
467 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
468 struct sock *sk2;
469 struct hlist_node *node;
470 struct tcp_tw_bucket *tw;
471
472 write_lock(&head->lock);
473
474 /* Check TIME-WAIT sockets first. */
475 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
476 tw = (struct tcp_tw_bucket*)sk2;
477
478 if(*((__u32 *)&(tw->tw_dport)) == ports &&
479 sk2->sk_family == PF_INET6 &&
480 ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
481 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
482 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
483 struct tcp_sock *tp = tcp_sk(sk);
484
485 if (tw->tw_ts_recent_stamp &&
486 (!twp || (sysctl_tcp_tw_reuse &&
487 xtime.tv_sec -
488 tw->tw_ts_recent_stamp > 1))) {
489 /* See comment in tcp_ipv4.c */
490 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
491 if (!tp->write_seq)
492 tp->write_seq = 1;
493 tp->rx_opt.ts_recent = tw->tw_ts_recent;
494 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
495 sock_hold(sk2);
496 goto unique;
497 } else
498 goto not_unique;
499 }
500 }
501 tw = NULL;
502
503 /* And established part... */
504 sk_for_each(sk2, node, &head->chain) {
505 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
506 goto not_unique;
507 }
508
509 unique:
510 BUG_TRAP(sk_unhashed(sk));
511 __sk_add_node(sk, &head->chain);
512 sk->sk_hashent = hash;
513 sock_prot_inc_use(sk->sk_prot);
514 write_unlock(&head->lock);
515
516 if (twp) {
517 *twp = tw;
518 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
519 } else if (tw) {
520 /* Silly. Should hash-dance instead... */
521 tcp_tw_deschedule(tw);
522 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
523
524 tcp_tw_put(tw);
525 }
526 return 0;
527
528 not_unique:
529 write_unlock(&head->lock);
530 return -EADDRNOTAVAIL;
531 }
532
533 static inline u32 tcpv6_port_offset(const struct sock *sk)
534 {
535 const struct inet_sock *inet = inet_sk(sk);
536 const struct ipv6_pinfo *np = inet6_sk(sk);
537
538 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
539 np->daddr.s6_addr32,
540 inet->dport);
541 }
542
543 static int tcp_v6_hash_connect(struct sock *sk)
544 {
545 unsigned short snum = inet_sk(sk)->num;
546 struct inet_bind_hashbucket *head;
547 struct inet_bind_bucket *tb;
548 int ret;
549
550 if (!snum) {
551 int low = sysctl_local_port_range[0];
552 int high = sysctl_local_port_range[1];
553 int range = high - low;
554 int i;
555 int port;
556 static u32 hint;
557 u32 offset = hint + tcpv6_port_offset(sk);
558 struct hlist_node *node;
559 struct tcp_tw_bucket *tw = NULL;
560
561 local_bh_disable();
562 for (i = 1; i <= range; i++) {
563 port = low + (i + offset) % range;
564 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
565 spin_lock(&head->lock);
566
567 /* Does not bother with rcv_saddr checks,
568 * because the established check is already
569 * unique enough.
570 */
571 inet_bind_bucket_for_each(tb, node, &head->chain) {
572 if (tb->port == port) {
573 BUG_TRAP(!hlist_empty(&tb->owners));
574 if (tb->fastreuse >= 0)
575 goto next_port;
576 if (!__tcp_v6_check_established(sk,
577 port,
578 &tw))
579 goto ok;
580 goto next_port;
581 }
582 }
583
584 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
585 if (!tb) {
586 spin_unlock(&head->lock);
587 break;
588 }
589 tb->fastreuse = -1;
590 goto ok;
591
592 next_port:
593 spin_unlock(&head->lock);
594 }
595 local_bh_enable();
596
597 return -EADDRNOTAVAIL;
598
599 ok:
600 hint += i;
601
602 /* Head lock still held and bh's disabled */
603 inet_bind_hash(sk, tb, port);
604 if (sk_unhashed(sk)) {
605 inet_sk(sk)->sport = htons(port);
606 __tcp_v6_hash(sk);
607 }
608 spin_unlock(&head->lock);
609
610 if (tw) {
611 tcp_tw_deschedule(tw);
612 tcp_tw_put(tw);
613 }
614
615 ret = 0;
616 goto out;
617 }
618
619 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
620 tb = inet_sk(sk)->bind_hash;
621 spin_lock_bh(&head->lock);
622
623 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
624 __tcp_v6_hash(sk);
625 spin_unlock_bh(&head->lock);
626 return 0;
627 } else {
628 spin_unlock(&head->lock);
629 /* No definite answer... Walk to established hash table */
630 ret = __tcp_v6_check_established(sk, snum, NULL);
631 out:
632 local_bh_enable();
633 return ret;
634 }
635 }
636
637 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
638 {
639 return IP6CB(skb)->iif;
640 }
641
642 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
643 int addr_len)
644 {
645 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
646 struct inet_sock *inet = inet_sk(sk);
647 struct ipv6_pinfo *np = inet6_sk(sk);
648 struct tcp_sock *tp = tcp_sk(sk);
649 struct in6_addr *saddr = NULL, *final_p = NULL, final;
650 struct flowi fl;
651 struct dst_entry *dst;
652 int addr_type;
653 int err;
654
655 if (addr_len < SIN6_LEN_RFC2133)
656 return -EINVAL;
657
658 if (usin->sin6_family != AF_INET6)
659 return(-EAFNOSUPPORT);
660
661 memset(&fl, 0, sizeof(fl));
662
663 if (np->sndflow) {
664 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
665 IP6_ECN_flow_init(fl.fl6_flowlabel);
666 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
667 struct ip6_flowlabel *flowlabel;
668 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
669 if (flowlabel == NULL)
670 return -EINVAL;
671 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
672 fl6_sock_release(flowlabel);
673 }
674 }
675
676 /*
677 * connect() to INADDR_ANY means loopback (BSD'ism).
678 */
679
680 if(ipv6_addr_any(&usin->sin6_addr))
681 usin->sin6_addr.s6_addr[15] = 0x1;
682
683 addr_type = ipv6_addr_type(&usin->sin6_addr);
684
685 if(addr_type & IPV6_ADDR_MULTICAST)
686 return -ENETUNREACH;
687
688 if (addr_type&IPV6_ADDR_LINKLOCAL) {
689 if (addr_len >= sizeof(struct sockaddr_in6) &&
690 usin->sin6_scope_id) {
691 /* If interface is set while binding, indices
692 * must coincide.
693 */
694 if (sk->sk_bound_dev_if &&
695 sk->sk_bound_dev_if != usin->sin6_scope_id)
696 return -EINVAL;
697
698 sk->sk_bound_dev_if = usin->sin6_scope_id;
699 }
700
701 /* Connect to link-local address requires an interface */
702 if (!sk->sk_bound_dev_if)
703 return -EINVAL;
704 }
705
706 if (tp->rx_opt.ts_recent_stamp &&
707 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
708 tp->rx_opt.ts_recent = 0;
709 tp->rx_opt.ts_recent_stamp = 0;
710 tp->write_seq = 0;
711 }
712
713 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
714 np->flow_label = fl.fl6_flowlabel;
715
716 /*
717 * TCP over IPv4
718 */
719
720 if (addr_type == IPV6_ADDR_MAPPED) {
721 u32 exthdrlen = tp->ext_header_len;
722 struct sockaddr_in sin;
723
724 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
725
726 if (__ipv6_only_sock(sk))
727 return -ENETUNREACH;
728
729 sin.sin_family = AF_INET;
730 sin.sin_port = usin->sin6_port;
731 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
732
733 tp->af_specific = &ipv6_mapped;
734 sk->sk_backlog_rcv = tcp_v4_do_rcv;
735
736 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
737
738 if (err) {
739 tp->ext_header_len = exthdrlen;
740 tp->af_specific = &ipv6_specific;
741 sk->sk_backlog_rcv = tcp_v6_do_rcv;
742 goto failure;
743 } else {
744 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
745 inet->saddr);
746 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
747 inet->rcv_saddr);
748 }
749
750 return err;
751 }
752
753 if (!ipv6_addr_any(&np->rcv_saddr))
754 saddr = &np->rcv_saddr;
755
756 fl.proto = IPPROTO_TCP;
757 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
758 ipv6_addr_copy(&fl.fl6_src,
759 (saddr ? saddr : &np->saddr));
760 fl.oif = sk->sk_bound_dev_if;
761 fl.fl_ip_dport = usin->sin6_port;
762 fl.fl_ip_sport = inet->sport;
763
764 if (np->opt && np->opt->srcrt) {
765 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
766 ipv6_addr_copy(&final, &fl.fl6_dst);
767 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
768 final_p = &final;
769 }
770
771 err = ip6_dst_lookup(sk, &dst, &fl);
772 if (err)
773 goto failure;
774 if (final_p)
775 ipv6_addr_copy(&fl.fl6_dst, final_p);
776
777 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
778 dst_release(dst);
779 goto failure;
780 }
781
782 if (saddr == NULL) {
783 saddr = &fl.fl6_src;
784 ipv6_addr_copy(&np->rcv_saddr, saddr);
785 }
786
787 /* set the source address */
788 ipv6_addr_copy(&np->saddr, saddr);
789 inet->rcv_saddr = LOOPBACK4_IPV6;
790
791 ip6_dst_store(sk, dst, NULL);
792 sk->sk_route_caps = dst->dev->features &
793 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
794
795 tp->ext_header_len = 0;
796 if (np->opt)
797 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
798
799 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
800
801 inet->dport = usin->sin6_port;
802
803 tcp_set_state(sk, TCP_SYN_SENT);
804 err = tcp_v6_hash_connect(sk);
805 if (err)
806 goto late_failure;
807
808 if (!tp->write_seq)
809 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
810 np->daddr.s6_addr32,
811 inet->sport,
812 inet->dport);
813
814 err = tcp_connect(sk);
815 if (err)
816 goto late_failure;
817
818 return 0;
819
820 late_failure:
821 tcp_set_state(sk, TCP_CLOSE);
822 __sk_dst_reset(sk);
823 failure:
824 inet->dport = 0;
825 sk->sk_route_caps = 0;
826 return err;
827 }
828
829 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
830 int type, int code, int offset, __u32 info)
831 {
832 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
833 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
834 struct ipv6_pinfo *np;
835 struct sock *sk;
836 int err;
837 struct tcp_sock *tp;
838 __u32 seq;
839
840 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
841
842 if (sk == NULL) {
843 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
844 return;
845 }
846
847 if (sk->sk_state == TCP_TIME_WAIT) {
848 tcp_tw_put((struct tcp_tw_bucket*)sk);
849 return;
850 }
851
852 bh_lock_sock(sk);
853 if (sock_owned_by_user(sk))
854 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
855
856 if (sk->sk_state == TCP_CLOSE)
857 goto out;
858
859 tp = tcp_sk(sk);
860 seq = ntohl(th->seq);
861 if (sk->sk_state != TCP_LISTEN &&
862 !between(seq, tp->snd_una, tp->snd_nxt)) {
863 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
864 goto out;
865 }
866
867 np = inet6_sk(sk);
868
869 if (type == ICMPV6_PKT_TOOBIG) {
870 struct dst_entry *dst = NULL;
871
872 if (sock_owned_by_user(sk))
873 goto out;
874 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
875 goto out;
876
877 /* icmp should have updated the destination cache entry */
878 dst = __sk_dst_check(sk, np->dst_cookie);
879
880 if (dst == NULL) {
881 struct inet_sock *inet = inet_sk(sk);
882 struct flowi fl;
883
884 /* BUGGG_FUTURE: Again, it is not clear how
885 to handle rthdr case. Ignore this complexity
886 for now.
887 */
888 memset(&fl, 0, sizeof(fl));
889 fl.proto = IPPROTO_TCP;
890 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
891 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
892 fl.oif = sk->sk_bound_dev_if;
893 fl.fl_ip_dport = inet->dport;
894 fl.fl_ip_sport = inet->sport;
895
896 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
897 sk->sk_err_soft = -err;
898 goto out;
899 }
900
901 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
902 sk->sk_err_soft = -err;
903 goto out;
904 }
905
906 } else
907 dst_hold(dst);
908
909 if (tp->pmtu_cookie > dst_mtu(dst)) {
910 tcp_sync_mss(sk, dst_mtu(dst));
911 tcp_simple_retransmit(sk);
912 } /* else let the usual retransmit timer handle it */
913 dst_release(dst);
914 goto out;
915 }
916
917 icmpv6_err_convert(type, code, &err);
918
919 /* Might be for an request_sock */
920 switch (sk->sk_state) {
921 struct request_sock *req, **prev;
922 case TCP_LISTEN:
923 if (sock_owned_by_user(sk))
924 goto out;
925
926 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
927 &hdr->saddr, tcp_v6_iif(skb));
928 if (!req)
929 goto out;
930
931 /* ICMPs are not backlogged, hence we cannot get
932 * an established socket here.
933 */
934 BUG_TRAP(req->sk == NULL);
935
936 if (seq != tcp_rsk(req)->snt_isn) {
937 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
938 goto out;
939 }
940
941 tcp_synq_drop(sk, req, prev);
942 goto out;
943
944 case TCP_SYN_SENT:
945 case TCP_SYN_RECV: /* Cannot happen.
946 It can, it SYNs are crossed. --ANK */
947 if (!sock_owned_by_user(sk)) {
948 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
949 sk->sk_err = err;
950 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
951
952 tcp_done(sk);
953 } else
954 sk->sk_err_soft = err;
955 goto out;
956 }
957
958 if (!sock_owned_by_user(sk) && np->recverr) {
959 sk->sk_err = err;
960 sk->sk_error_report(sk);
961 } else
962 sk->sk_err_soft = err;
963
964 out:
965 bh_unlock_sock(sk);
966 sock_put(sk);
967 }
968
969
970 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
971 struct dst_entry *dst)
972 {
973 struct tcp6_request_sock *treq = tcp6_rsk(req);
974 struct ipv6_pinfo *np = inet6_sk(sk);
975 struct sk_buff * skb;
976 struct ipv6_txoptions *opt = NULL;
977 struct in6_addr * final_p = NULL, final;
978 struct flowi fl;
979 int err = -1;
980
981 memset(&fl, 0, sizeof(fl));
982 fl.proto = IPPROTO_TCP;
983 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
984 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
985 fl.fl6_flowlabel = 0;
986 fl.oif = treq->iif;
987 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
988 fl.fl_ip_sport = inet_sk(sk)->sport;
989
990 if (dst == NULL) {
991 opt = np->opt;
992 if (opt == NULL &&
993 np->rxopt.bits.srcrt == 2 &&
994 treq->pktopts) {
995 struct sk_buff *pktopts = treq->pktopts;
996 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
997 if (rxopt->srcrt)
998 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
999 }
1000
1001 if (opt && opt->srcrt) {
1002 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1003 ipv6_addr_copy(&final, &fl.fl6_dst);
1004 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1005 final_p = &final;
1006 }
1007
1008 err = ip6_dst_lookup(sk, &dst, &fl);
1009 if (err)
1010 goto done;
1011 if (final_p)
1012 ipv6_addr_copy(&fl.fl6_dst, final_p);
1013 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1014 goto done;
1015 }
1016
1017 skb = tcp_make_synack(sk, dst, req);
1018 if (skb) {
1019 struct tcphdr *th = skb->h.th;
1020
1021 th->check = tcp_v6_check(th, skb->len,
1022 &treq->loc_addr, &treq->rmt_addr,
1023 csum_partial((char *)th, skb->len, skb->csum));
1024
1025 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1026 err = ip6_xmit(sk, skb, &fl, opt, 0);
1027 if (err == NET_XMIT_CN)
1028 err = 0;
1029 }
1030
1031 done:
1032 dst_release(dst);
1033 if (opt && opt != np->opt)
1034 sock_kfree_s(sk, opt, opt->tot_len);
1035 return err;
1036 }
1037
1038 static void tcp_v6_reqsk_destructor(struct request_sock *req)
1039 {
1040 if (tcp6_rsk(req)->pktopts)
1041 kfree_skb(tcp6_rsk(req)->pktopts);
1042 }
1043
1044 static struct request_sock_ops tcp6_request_sock_ops = {
1045 .family = AF_INET6,
1046 .obj_size = sizeof(struct tcp6_request_sock),
1047 .rtx_syn_ack = tcp_v6_send_synack,
1048 .send_ack = tcp_v6_reqsk_send_ack,
1049 .destructor = tcp_v6_reqsk_destructor,
1050 .send_reset = tcp_v6_send_reset
1051 };
1052
1053 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1054 {
1055 struct ipv6_pinfo *np = inet6_sk(sk);
1056 struct inet6_skb_parm *opt = IP6CB(skb);
1057
1058 if (np->rxopt.all) {
1059 if ((opt->hop && np->rxopt.bits.hopopts) ||
1060 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1061 np->rxopt.bits.rxflow) ||
1062 (opt->srcrt && np->rxopt.bits.srcrt) ||
1063 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1064 return 1;
1065 }
1066 return 0;
1067 }
1068
1069
1070 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1071 struct sk_buff *skb)
1072 {
1073 struct ipv6_pinfo *np = inet6_sk(sk);
1074
1075 if (skb->ip_summed == CHECKSUM_HW) {
1076 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
1077 skb->csum = offsetof(struct tcphdr, check);
1078 } else {
1079 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1080 csum_partial((char *)th, th->doff<<2,
1081 skb->csum));
1082 }
1083 }
1084
1085
1086 static void tcp_v6_send_reset(struct sk_buff *skb)
1087 {
1088 struct tcphdr *th = skb->h.th, *t1;
1089 struct sk_buff *buff;
1090 struct flowi fl;
1091
1092 if (th->rst)
1093 return;
1094
1095 if (!ipv6_unicast_destination(skb))
1096 return;
1097
1098 /*
1099 * We need to grab some memory, and put together an RST,
1100 * and then put it into the queue to be sent.
1101 */
1102
1103 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1104 GFP_ATOMIC);
1105 if (buff == NULL)
1106 return;
1107
1108 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1109
1110 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1111
1112 /* Swap the send and the receive. */
1113 memset(t1, 0, sizeof(*t1));
1114 t1->dest = th->source;
1115 t1->source = th->dest;
1116 t1->doff = sizeof(*t1)/4;
1117 t1->rst = 1;
1118
1119 if(th->ack) {
1120 t1->seq = th->ack_seq;
1121 } else {
1122 t1->ack = 1;
1123 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1124 + skb->len - (th->doff<<2));
1125 }
1126
1127 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1128
1129 memset(&fl, 0, sizeof(fl));
1130 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1131 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1132
1133 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1134 sizeof(*t1), IPPROTO_TCP,
1135 buff->csum);
1136
1137 fl.proto = IPPROTO_TCP;
1138 fl.oif = tcp_v6_iif(skb);
1139 fl.fl_ip_dport = t1->dest;
1140 fl.fl_ip_sport = t1->source;
1141
1142 /* sk = NULL, but it is safe for now. RST socket required. */
1143 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1144
1145 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1146 dst_release(buff->dst);
1147 return;
1148 }
1149
1150 ip6_xmit(NULL, buff, &fl, NULL, 0);
1151 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1152 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1153 return;
1154 }
1155
1156 kfree_skb(buff);
1157 }
1158
1159 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1160 {
1161 struct tcphdr *th = skb->h.th, *t1;
1162 struct sk_buff *buff;
1163 struct flowi fl;
1164 int tot_len = sizeof(struct tcphdr);
1165
1166 if (ts)
1167 tot_len += 3*4;
1168
1169 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1170 GFP_ATOMIC);
1171 if (buff == NULL)
1172 return;
1173
1174 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1175
1176 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1177
1178 /* Swap the send and the receive. */
1179 memset(t1, 0, sizeof(*t1));
1180 t1->dest = th->source;
1181 t1->source = th->dest;
1182 t1->doff = tot_len/4;
1183 t1->seq = htonl(seq);
1184 t1->ack_seq = htonl(ack);
1185 t1->ack = 1;
1186 t1->window = htons(win);
1187
1188 if (ts) {
1189 u32 *ptr = (u32*)(t1 + 1);
1190 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1191 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1192 *ptr++ = htonl(tcp_time_stamp);
1193 *ptr = htonl(ts);
1194 }
1195
1196 buff->csum = csum_partial((char *)t1, tot_len, 0);
1197
1198 memset(&fl, 0, sizeof(fl));
1199 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1200 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1201
1202 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1203 tot_len, IPPROTO_TCP,
1204 buff->csum);
1205
1206 fl.proto = IPPROTO_TCP;
1207 fl.oif = tcp_v6_iif(skb);
1208 fl.fl_ip_dport = t1->dest;
1209 fl.fl_ip_sport = t1->source;
1210
1211 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1212 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1213 dst_release(buff->dst);
1214 return;
1215 }
1216 ip6_xmit(NULL, buff, &fl, NULL, 0);
1217 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1218 return;
1219 }
1220
1221 kfree_skb(buff);
1222 }
1223
1224 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1225 {
1226 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1227
1228 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1229 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1230
1231 tcp_tw_put(tw);
1232 }
1233
1234 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1235 {
1236 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1237 }
1238
1239
1240 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1241 {
1242 struct request_sock *req, **prev;
1243 struct tcphdr *th = skb->h.th;
1244 struct tcp_sock *tp = tcp_sk(sk);
1245 struct sock *nsk;
1246
1247 /* Find possible connection requests. */
1248 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1249 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1250 if (req)
1251 return tcp_check_req(sk, skb, req, prev);
1252
1253 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1254 th->source,
1255 &skb->nh.ipv6h->daddr,
1256 ntohs(th->dest),
1257 tcp_v6_iif(skb));
1258
1259 if (nsk) {
1260 if (nsk->sk_state != TCP_TIME_WAIT) {
1261 bh_lock_sock(nsk);
1262 return nsk;
1263 }
1264 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1265 return NULL;
1266 }
1267
1268 #if 0 /*def CONFIG_SYN_COOKIES*/
1269 if (!th->rst && !th->syn && th->ack)
1270 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1271 #endif
1272 return sk;
1273 }
1274
1275 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1276 {
1277 struct tcp_sock *tp = tcp_sk(sk);
1278 struct listen_sock *lopt = tp->accept_queue.listen_opt;
1279 u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1280
1281 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
1282 tcp_synq_added(sk);
1283 }
1284
1285
1286 /* FIXME: this is substantially similar to the ipv4 code.
1287 * Can some kind of merge be done? -- erics
1288 */
1289 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1290 {
1291 struct tcp6_request_sock *treq;
1292 struct ipv6_pinfo *np = inet6_sk(sk);
1293 struct tcp_options_received tmp_opt;
1294 struct tcp_sock *tp = tcp_sk(sk);
1295 struct request_sock *req = NULL;
1296 __u32 isn = TCP_SKB_CB(skb)->when;
1297
1298 if (skb->protocol == htons(ETH_P_IP))
1299 return tcp_v4_conn_request(sk, skb);
1300
1301 if (!ipv6_unicast_destination(skb))
1302 goto drop;
1303
1304 /*
1305 * There are no SYN attacks on IPv6, yet...
1306 */
1307 if (tcp_synq_is_full(sk) && !isn) {
1308 if (net_ratelimit())
1309 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1310 goto drop;
1311 }
1312
1313 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1314 goto drop;
1315
1316 req = reqsk_alloc(&tcp6_request_sock_ops);
1317 if (req == NULL)
1318 goto drop;
1319
1320 tcp_clear_options(&tmp_opt);
1321 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1322 tmp_opt.user_mss = tp->rx_opt.user_mss;
1323
1324 tcp_parse_options(skb, &tmp_opt, 0);
1325
1326 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1327 tcp_openreq_init(req, &tmp_opt, skb);
1328
1329 treq = tcp6_rsk(req);
1330 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1331 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1332 TCP_ECN_create_request(req, skb->h.th);
1333 treq->pktopts = NULL;
1334 if (ipv6_opt_accepted(sk, skb) ||
1335 np->rxopt.bits.rxinfo ||
1336 np->rxopt.bits.rxhlim) {
1337 atomic_inc(&skb->users);
1338 treq->pktopts = skb;
1339 }
1340 treq->iif = sk->sk_bound_dev_if;
1341
1342 /* So that link locals have meaning */
1343 if (!sk->sk_bound_dev_if &&
1344 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1345 treq->iif = tcp_v6_iif(skb);
1346
1347 if (isn == 0)
1348 isn = tcp_v6_init_sequence(sk,skb);
1349
1350 tcp_rsk(req)->snt_isn = isn;
1351
1352 if (tcp_v6_send_synack(sk, req, NULL))
1353 goto drop;
1354
1355 tcp_v6_synq_add(sk, req);
1356
1357 return 0;
1358
1359 drop:
1360 if (req)
1361 reqsk_free(req);
1362
1363 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1364 return 0; /* don't send reset */
1365 }
1366
1367 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1368 struct request_sock *req,
1369 struct dst_entry *dst)
1370 {
1371 struct tcp6_request_sock *treq = tcp6_rsk(req);
1372 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1373 struct tcp6_sock *newtcp6sk;
1374 struct inet_sock *newinet;
1375 struct tcp_sock *newtp;
1376 struct sock *newsk;
1377 struct ipv6_txoptions *opt;
1378
1379 if (skb->protocol == htons(ETH_P_IP)) {
1380 /*
1381 * v6 mapped
1382 */
1383
1384 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1385
1386 if (newsk == NULL)
1387 return NULL;
1388
1389 newtcp6sk = (struct tcp6_sock *)newsk;
1390 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1391
1392 newinet = inet_sk(newsk);
1393 newnp = inet6_sk(newsk);
1394 newtp = tcp_sk(newsk);
1395
1396 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1397
1398 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1399 newinet->daddr);
1400
1401 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1402 newinet->saddr);
1403
1404 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1405
1406 newtp->af_specific = &ipv6_mapped;
1407 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1408 newnp->pktoptions = NULL;
1409 newnp->opt = NULL;
1410 newnp->mcast_oif = tcp_v6_iif(skb);
1411 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1412
1413 /*
1414 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1415 * here, tcp_create_openreq_child now does this for us, see the comment in
1416 * that function for the gory details. -acme
1417 */
1418
1419 /* It is tricky place. Until this moment IPv4 tcp
1420 worked with IPv6 af_tcp.af_specific.
1421 Sync it now.
1422 */
1423 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1424
1425 return newsk;
1426 }
1427
1428 opt = np->opt;
1429
1430 if (sk_acceptq_is_full(sk))
1431 goto out_overflow;
1432
1433 if (np->rxopt.bits.srcrt == 2 &&
1434 opt == NULL && treq->pktopts) {
1435 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1436 if (rxopt->srcrt)
1437 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1438 }
1439
1440 if (dst == NULL) {
1441 struct in6_addr *final_p = NULL, final;
1442 struct flowi fl;
1443
1444 memset(&fl, 0, sizeof(fl));
1445 fl.proto = IPPROTO_TCP;
1446 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1447 if (opt && opt->srcrt) {
1448 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1449 ipv6_addr_copy(&final, &fl.fl6_dst);
1450 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1451 final_p = &final;
1452 }
1453 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1454 fl.oif = sk->sk_bound_dev_if;
1455 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1456 fl.fl_ip_sport = inet_sk(sk)->sport;
1457
1458 if (ip6_dst_lookup(sk, &dst, &fl))
1459 goto out;
1460
1461 if (final_p)
1462 ipv6_addr_copy(&fl.fl6_dst, final_p);
1463
1464 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1465 goto out;
1466 }
1467
1468 newsk = tcp_create_openreq_child(sk, req, skb);
1469 if (newsk == NULL)
1470 goto out;
1471
1472 /*
1473 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1474 * count here, tcp_create_openreq_child now does this for us, see the
1475 * comment in that function for the gory details. -acme
1476 */
1477
1478 ip6_dst_store(newsk, dst, NULL);
1479 newsk->sk_route_caps = dst->dev->features &
1480 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1481
1482 newtcp6sk = (struct tcp6_sock *)newsk;
1483 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1484
1485 newtp = tcp_sk(newsk);
1486 newinet = inet_sk(newsk);
1487 newnp = inet6_sk(newsk);
1488
1489 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1490
1491 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1492 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1493 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1494 newsk->sk_bound_dev_if = treq->iif;
1495
1496 /* Now IPv6 options...
1497
1498 First: no IPv4 options.
1499 */
1500 newinet->opt = NULL;
1501
1502 /* Clone RX bits */
1503 newnp->rxopt.all = np->rxopt.all;
1504
1505 /* Clone pktoptions received with SYN */
1506 newnp->pktoptions = NULL;
1507 if (treq->pktopts != NULL) {
1508 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1509 kfree_skb(treq->pktopts);
1510 treq->pktopts = NULL;
1511 if (newnp->pktoptions)
1512 skb_set_owner_r(newnp->pktoptions, newsk);
1513 }
1514 newnp->opt = NULL;
1515 newnp->mcast_oif = tcp_v6_iif(skb);
1516 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1517
1518 /* Clone native IPv6 options from listening socket (if any)
1519
1520 Yes, keeping reference count would be much more clever,
1521 but we make one more one thing there: reattach optmem
1522 to newsk.
1523 */
1524 if (opt) {
1525 newnp->opt = ipv6_dup_options(newsk, opt);
1526 if (opt != np->opt)
1527 sock_kfree_s(sk, opt, opt->tot_len);
1528 }
1529
1530 newtp->ext_header_len = 0;
1531 if (newnp->opt)
1532 newtp->ext_header_len = newnp->opt->opt_nflen +
1533 newnp->opt->opt_flen;
1534
1535 tcp_sync_mss(newsk, dst_mtu(dst));
1536 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1537 tcp_initialize_rcv_mss(newsk);
1538
1539 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1540
1541 __tcp_v6_hash(newsk);
1542 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1543
1544 return newsk;
1545
1546 out_overflow:
1547 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1548 out:
1549 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1550 if (opt && opt != np->opt)
1551 sock_kfree_s(sk, opt, opt->tot_len);
1552 dst_release(dst);
1553 return NULL;
1554 }
1555
1556 static int tcp_v6_checksum_init(struct sk_buff *skb)
1557 {
1558 if (skb->ip_summed == CHECKSUM_HW) {
1559 skb->ip_summed = CHECKSUM_UNNECESSARY;
1560 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1561 &skb->nh.ipv6h->daddr,skb->csum))
1562 return 0;
1563 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1564 }
1565 if (skb->len <= 76) {
1566 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1567 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1568 return -1;
1569 skb->ip_summed = CHECKSUM_UNNECESSARY;
1570 } else {
1571 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1572 &skb->nh.ipv6h->daddr,0);
1573 }
1574 return 0;
1575 }
1576
1577 /* The socket must have it's spinlock held when we get
1578 * here.
1579 *
1580 * We have a potential double-lock case here, so even when
1581 * doing backlog processing we use the BH locking scheme.
1582 * This is because we cannot sleep with the original spinlock
1583 * held.
1584 */
1585 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1586 {
1587 struct ipv6_pinfo *np = inet6_sk(sk);
1588 struct tcp_sock *tp;
1589 struct sk_buff *opt_skb = NULL;
1590
1591 /* Imagine: socket is IPv6. IPv4 packet arrives,
1592 goes to IPv4 receive handler and backlogged.
1593 From backlog it always goes here. Kerboom...
1594 Fortunately, tcp_rcv_established and rcv_established
1595 handle them correctly, but it is not case with
1596 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1597 */
1598
1599 if (skb->protocol == htons(ETH_P_IP))
1600 return tcp_v4_do_rcv(sk, skb);
1601
1602 if (sk_filter(sk, skb, 0))
1603 goto discard;
1604
1605 /*
1606 * socket locking is here for SMP purposes as backlog rcv
1607 * is currently called with bh processing disabled.
1608 */
1609
1610 /* Do Stevens' IPV6_PKTOPTIONS.
1611
1612 Yes, guys, it is the only place in our code, where we
1613 may make it not affecting IPv4.
1614 The rest of code is protocol independent,
1615 and I do not like idea to uglify IPv4.
1616
1617 Actually, all the idea behind IPV6_PKTOPTIONS
1618 looks not very well thought. For now we latch
1619 options, received in the last packet, enqueued
1620 by tcp. Feel free to propose better solution.
1621 --ANK (980728)
1622 */
1623 if (np->rxopt.all)
1624 opt_skb = skb_clone(skb, GFP_ATOMIC);
1625
1626 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1627 TCP_CHECK_TIMER(sk);
1628 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1629 goto reset;
1630 TCP_CHECK_TIMER(sk);
1631 if (opt_skb)
1632 goto ipv6_pktoptions;
1633 return 0;
1634 }
1635
1636 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1637 goto csum_err;
1638
1639 if (sk->sk_state == TCP_LISTEN) {
1640 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1641 if (!nsk)
1642 goto discard;
1643
1644 /*
1645 * Queue it on the new socket if the new socket is active,
1646 * otherwise we just shortcircuit this and continue with
1647 * the new socket..
1648 */
1649 if(nsk != sk) {
1650 if (tcp_child_process(sk, nsk, skb))
1651 goto reset;
1652 if (opt_skb)
1653 __kfree_skb(opt_skb);
1654 return 0;
1655 }
1656 }
1657
1658 TCP_CHECK_TIMER(sk);
1659 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1660 goto reset;
1661 TCP_CHECK_TIMER(sk);
1662 if (opt_skb)
1663 goto ipv6_pktoptions;
1664 return 0;
1665
1666 reset:
1667 tcp_v6_send_reset(skb);
1668 discard:
1669 if (opt_skb)
1670 __kfree_skb(opt_skb);
1671 kfree_skb(skb);
1672 return 0;
1673 csum_err:
1674 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1675 goto discard;
1676
1677
1678 ipv6_pktoptions:
1679 /* Do you ask, what is it?
1680
1681 1. skb was enqueued by tcp.
1682 2. skb is added to tail of read queue, rather than out of order.
1683 3. socket is not in passive state.
1684 4. Finally, it really contains options, which user wants to receive.
1685 */
1686 tp = tcp_sk(sk);
1687 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1688 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1689 if (np->rxopt.bits.rxinfo)
1690 np->mcast_oif = tcp_v6_iif(opt_skb);
1691 if (np->rxopt.bits.rxhlim)
1692 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1693 if (ipv6_opt_accepted(sk, opt_skb)) {
1694 skb_set_owner_r(opt_skb, sk);
1695 opt_skb = xchg(&np->pktoptions, opt_skb);
1696 } else {
1697 __kfree_skb(opt_skb);
1698 opt_skb = xchg(&np->pktoptions, NULL);
1699 }
1700 }
1701
1702 if (opt_skb)
1703 kfree_skb(opt_skb);
1704 return 0;
1705 }
1706
1707 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1708 {
1709 struct sk_buff *skb = *pskb;
1710 struct tcphdr *th;
1711 struct sock *sk;
1712 int ret;
1713
1714 if (skb->pkt_type != PACKET_HOST)
1715 goto discard_it;
1716
1717 /*
1718 * Count it even if it's bad.
1719 */
1720 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1721
1722 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1723 goto discard_it;
1724
1725 th = skb->h.th;
1726
1727 if (th->doff < sizeof(struct tcphdr)/4)
1728 goto bad_packet;
1729 if (!pskb_may_pull(skb, th->doff*4))
1730 goto discard_it;
1731
1732 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1733 tcp_v6_checksum_init(skb) < 0))
1734 goto bad_packet;
1735
1736 th = skb->h.th;
1737 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1738 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1739 skb->len - th->doff*4);
1740 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1741 TCP_SKB_CB(skb)->when = 0;
1742 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1743 TCP_SKB_CB(skb)->sacked = 0;
1744
1745 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1746 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1747
1748 if (!sk)
1749 goto no_tcp_socket;
1750
1751 process:
1752 if (sk->sk_state == TCP_TIME_WAIT)
1753 goto do_time_wait;
1754
1755 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1756 goto discard_and_relse;
1757
1758 if (sk_filter(sk, skb, 0))
1759 goto discard_and_relse;
1760
1761 skb->dev = NULL;
1762
1763 bh_lock_sock(sk);
1764 ret = 0;
1765 if (!sock_owned_by_user(sk)) {
1766 if (!tcp_prequeue(sk, skb))
1767 ret = tcp_v6_do_rcv(sk, skb);
1768 } else
1769 sk_add_backlog(sk, skb);
1770 bh_unlock_sock(sk);
1771
1772 sock_put(sk);
1773 return ret ? -1 : 0;
1774
1775 no_tcp_socket:
1776 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1777 goto discard_it;
1778
1779 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1780 bad_packet:
1781 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1782 } else {
1783 tcp_v6_send_reset(skb);
1784 }
1785
1786 discard_it:
1787
1788 /*
1789 * Discard frame
1790 */
1791
1792 kfree_skb(skb);
1793 return 0;
1794
1795 discard_and_relse:
1796 sock_put(sk);
1797 goto discard_it;
1798
1799 do_time_wait:
1800 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1801 tcp_tw_put((struct tcp_tw_bucket *) sk);
1802 goto discard_it;
1803 }
1804
1805 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1806 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1807 tcp_tw_put((struct tcp_tw_bucket *) sk);
1808 goto discard_it;
1809 }
1810
1811 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1812 skb, th, skb->len)) {
1813 case TCP_TW_SYN:
1814 {
1815 struct sock *sk2;
1816
1817 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1818 if (sk2 != NULL) {
1819 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1820 tcp_tw_put((struct tcp_tw_bucket *)sk);
1821 sk = sk2;
1822 goto process;
1823 }
1824 /* Fall through to ACK */
1825 }
1826 case TCP_TW_ACK:
1827 tcp_v6_timewait_ack(sk, skb);
1828 break;
1829 case TCP_TW_RST:
1830 goto no_tcp_socket;
1831 case TCP_TW_SUCCESS:;
1832 }
1833 goto discard_it;
1834 }
1835
1836 static int tcp_v6_rebuild_header(struct sock *sk)
1837 {
1838 int err;
1839 struct dst_entry *dst;
1840 struct ipv6_pinfo *np = inet6_sk(sk);
1841
1842 dst = __sk_dst_check(sk, np->dst_cookie);
1843
1844 if (dst == NULL) {
1845 struct inet_sock *inet = inet_sk(sk);
1846 struct in6_addr *final_p = NULL, final;
1847 struct flowi fl;
1848
1849 memset(&fl, 0, sizeof(fl));
1850 fl.proto = IPPROTO_TCP;
1851 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1852 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1853 fl.fl6_flowlabel = np->flow_label;
1854 fl.oif = sk->sk_bound_dev_if;
1855 fl.fl_ip_dport = inet->dport;
1856 fl.fl_ip_sport = inet->sport;
1857
1858 if (np->opt && np->opt->srcrt) {
1859 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1860 ipv6_addr_copy(&final, &fl.fl6_dst);
1861 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1862 final_p = &final;
1863 }
1864
1865 err = ip6_dst_lookup(sk, &dst, &fl);
1866 if (err) {
1867 sk->sk_route_caps = 0;
1868 return err;
1869 }
1870 if (final_p)
1871 ipv6_addr_copy(&fl.fl6_dst, final_p);
1872
1873 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1874 sk->sk_err_soft = -err;
1875 dst_release(dst);
1876 return err;
1877 }
1878
1879 ip6_dst_store(sk, dst, NULL);
1880 sk->sk_route_caps = dst->dev->features &
1881 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1882 }
1883
1884 return 0;
1885 }
1886
1887 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1888 {
1889 struct sock *sk = skb->sk;
1890 struct inet_sock *inet = inet_sk(sk);
1891 struct ipv6_pinfo *np = inet6_sk(sk);
1892 struct flowi fl;
1893 struct dst_entry *dst;
1894 struct in6_addr *final_p = NULL, final;
1895
1896 memset(&fl, 0, sizeof(fl));
1897 fl.proto = IPPROTO_TCP;
1898 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1899 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1900 fl.fl6_flowlabel = np->flow_label;
1901 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1902 fl.oif = sk->sk_bound_dev_if;
1903 fl.fl_ip_sport = inet->sport;
1904 fl.fl_ip_dport = inet->dport;
1905
1906 if (np->opt && np->opt->srcrt) {
1907 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1908 ipv6_addr_copy(&final, &fl.fl6_dst);
1909 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1910 final_p = &final;
1911 }
1912
1913 dst = __sk_dst_check(sk, np->dst_cookie);
1914
1915 if (dst == NULL) {
1916 int err = ip6_dst_lookup(sk, &dst, &fl);
1917
1918 if (err) {
1919 sk->sk_err_soft = -err;
1920 return err;
1921 }
1922
1923 if (final_p)
1924 ipv6_addr_copy(&fl.fl6_dst, final_p);
1925
1926 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1927 sk->sk_route_caps = 0;
1928 dst_release(dst);
1929 return err;
1930 }
1931
1932 ip6_dst_store(sk, dst, NULL);
1933 sk->sk_route_caps = dst->dev->features &
1934 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1935 }
1936
1937 skb->dst = dst_clone(dst);
1938
1939 /* Restore final destination back after routing done */
1940 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1941
1942 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1943 }
1944
1945 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1946 {
1947 struct ipv6_pinfo *np = inet6_sk(sk);
1948 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1949
1950 sin6->sin6_family = AF_INET6;
1951 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1952 sin6->sin6_port = inet_sk(sk)->dport;
1953 /* We do not store received flowlabel for TCP */
1954 sin6->sin6_flowinfo = 0;
1955 sin6->sin6_scope_id = 0;
1956 if (sk->sk_bound_dev_if &&
1957 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1958 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1959 }
1960
1961 static int tcp_v6_remember_stamp(struct sock *sk)
1962 {
1963 /* Alas, not yet... */
1964 return 0;
1965 }
1966
1967 static struct tcp_func ipv6_specific = {
1968 .queue_xmit = tcp_v6_xmit,
1969 .send_check = tcp_v6_send_check,
1970 .rebuild_header = tcp_v6_rebuild_header,
1971 .conn_request = tcp_v6_conn_request,
1972 .syn_recv_sock = tcp_v6_syn_recv_sock,
1973 .remember_stamp = tcp_v6_remember_stamp,
1974 .net_header_len = sizeof(struct ipv6hdr),
1975
1976 .setsockopt = ipv6_setsockopt,
1977 .getsockopt = ipv6_getsockopt,
1978 .addr2sockaddr = v6_addr2sockaddr,
1979 .sockaddr_len = sizeof(struct sockaddr_in6)
1980 };
1981
1982 /*
1983 * TCP over IPv4 via INET6 API
1984 */
1985
1986 static struct tcp_func ipv6_mapped = {
1987 .queue_xmit = ip_queue_xmit,
1988 .send_check = tcp_v4_send_check,
1989 .rebuild_header = inet_sk_rebuild_header,
1990 .conn_request = tcp_v6_conn_request,
1991 .syn_recv_sock = tcp_v6_syn_recv_sock,
1992 .remember_stamp = tcp_v4_remember_stamp,
1993 .net_header_len = sizeof(struct iphdr),
1994
1995 .setsockopt = ipv6_setsockopt,
1996 .getsockopt = ipv6_getsockopt,
1997 .addr2sockaddr = v6_addr2sockaddr,
1998 .sockaddr_len = sizeof(struct sockaddr_in6)
1999 };
2000
2001
2002
2003 /* NOTE: A lot of things set to zero explicitly by call to
2004 * sk_alloc() so need not be done here.
2005 */
2006 static int tcp_v6_init_sock(struct sock *sk)
2007 {
2008 struct tcp_sock *tp = tcp_sk(sk);
2009
2010 skb_queue_head_init(&tp->out_of_order_queue);
2011 tcp_init_xmit_timers(sk);
2012 tcp_prequeue_init(tp);
2013
2014 tp->rto = TCP_TIMEOUT_INIT;
2015 tp->mdev = TCP_TIMEOUT_INIT;
2016
2017 /* So many TCP implementations out there (incorrectly) count the
2018 * initial SYN frame in their delayed-ACK and congestion control
2019 * algorithms that we must have the following bandaid to talk
2020 * efficiently to them. -DaveM
2021 */
2022 tp->snd_cwnd = 2;
2023
2024 /* See draft-stevens-tcpca-spec-01 for discussion of the
2025 * initialization of these values.
2026 */
2027 tp->snd_ssthresh = 0x7fffffff;
2028 tp->snd_cwnd_clamp = ~0;
2029 tp->mss_cache = 536;
2030
2031 tp->reordering = sysctl_tcp_reordering;
2032
2033 sk->sk_state = TCP_CLOSE;
2034
2035 tp->af_specific = &ipv6_specific;
2036 tp->ca_ops = &tcp_init_congestion_ops;
2037 sk->sk_write_space = sk_stream_write_space;
2038 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2039
2040 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2041 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2042
2043 atomic_inc(&tcp_sockets_allocated);
2044
2045 return 0;
2046 }
2047
2048 static int tcp_v6_destroy_sock(struct sock *sk)
2049 {
2050 extern int tcp_v4_destroy_sock(struct sock *sk);
2051
2052 tcp_v4_destroy_sock(sk);
2053 return inet6_destroy_sock(sk);
2054 }
2055
2056 /* Proc filesystem TCPv6 sock list dumping. */
2057 static void get_openreq6(struct seq_file *seq,
2058 struct sock *sk, struct request_sock *req, int i, int uid)
2059 {
2060 struct in6_addr *dest, *src;
2061 int ttd = req->expires - jiffies;
2062
2063 if (ttd < 0)
2064 ttd = 0;
2065
2066 src = &tcp6_rsk(req)->loc_addr;
2067 dest = &tcp6_rsk(req)->rmt_addr;
2068 seq_printf(seq,
2069 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2070 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2071 i,
2072 src->s6_addr32[0], src->s6_addr32[1],
2073 src->s6_addr32[2], src->s6_addr32[3],
2074 ntohs(inet_sk(sk)->sport),
2075 dest->s6_addr32[0], dest->s6_addr32[1],
2076 dest->s6_addr32[2], dest->s6_addr32[3],
2077 ntohs(inet_rsk(req)->rmt_port),
2078 TCP_SYN_RECV,
2079 0,0, /* could print option size, but that is af dependent. */
2080 1, /* timers active (only the expire timer) */
2081 jiffies_to_clock_t(ttd),
2082 req->retrans,
2083 uid,
2084 0, /* non standard timer */
2085 0, /* open_requests have no inode */
2086 0, req);
2087 }
2088
2089 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2090 {
2091 struct in6_addr *dest, *src;
2092 __u16 destp, srcp;
2093 int timer_active;
2094 unsigned long timer_expires;
2095 struct inet_sock *inet = inet_sk(sp);
2096 struct tcp_sock *tp = tcp_sk(sp);
2097 struct ipv6_pinfo *np = inet6_sk(sp);
2098
2099 dest = &np->daddr;
2100 src = &np->rcv_saddr;
2101 destp = ntohs(inet->dport);
2102 srcp = ntohs(inet->sport);
2103 if (tp->pending == TCP_TIME_RETRANS) {
2104 timer_active = 1;
2105 timer_expires = tp->timeout;
2106 } else if (tp->pending == TCP_TIME_PROBE0) {
2107 timer_active = 4;
2108 timer_expires = tp->timeout;
2109 } else if (timer_pending(&sp->sk_timer)) {
2110 timer_active = 2;
2111 timer_expires = sp->sk_timer.expires;
2112 } else {
2113 timer_active = 0;
2114 timer_expires = jiffies;
2115 }
2116
2117 seq_printf(seq,
2118 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2119 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2120 i,
2121 src->s6_addr32[0], src->s6_addr32[1],
2122 src->s6_addr32[2], src->s6_addr32[3], srcp,
2123 dest->s6_addr32[0], dest->s6_addr32[1],
2124 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2125 sp->sk_state,
2126 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2127 timer_active,
2128 jiffies_to_clock_t(timer_expires - jiffies),
2129 tp->retransmits,
2130 sock_i_uid(sp),
2131 tp->probes_out,
2132 sock_i_ino(sp),
2133 atomic_read(&sp->sk_refcnt), sp,
2134 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2135 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2136 );
2137 }
2138
2139 static void get_timewait6_sock(struct seq_file *seq,
2140 struct tcp_tw_bucket *tw, int i)
2141 {
2142 struct in6_addr *dest, *src;
2143 __u16 destp, srcp;
2144 int ttd = tw->tw_ttd - jiffies;
2145
2146 if (ttd < 0)
2147 ttd = 0;
2148
2149 dest = &tw->tw_v6_daddr;
2150 src = &tw->tw_v6_rcv_saddr;
2151 destp = ntohs(tw->tw_dport);
2152 srcp = ntohs(tw->tw_sport);
2153
2154 seq_printf(seq,
2155 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2156 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2157 i,
2158 src->s6_addr32[0], src->s6_addr32[1],
2159 src->s6_addr32[2], src->s6_addr32[3], srcp,
2160 dest->s6_addr32[0], dest->s6_addr32[1],
2161 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2162 tw->tw_substate, 0, 0,
2163 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2164 atomic_read(&tw->tw_refcnt), tw);
2165 }
2166
2167 #ifdef CONFIG_PROC_FS
2168 static int tcp6_seq_show(struct seq_file *seq, void *v)
2169 {
2170 struct tcp_iter_state *st;
2171
2172 if (v == SEQ_START_TOKEN) {
2173 seq_puts(seq,
2174 " sl "
2175 "local_address "
2176 "remote_address "
2177 "st tx_queue rx_queue tr tm->when retrnsmt"
2178 " uid timeout inode\n");
2179 goto out;
2180 }
2181 st = seq->private;
2182
2183 switch (st->state) {
2184 case TCP_SEQ_STATE_LISTENING:
2185 case TCP_SEQ_STATE_ESTABLISHED:
2186 get_tcp6_sock(seq, v, st->num);
2187 break;
2188 case TCP_SEQ_STATE_OPENREQ:
2189 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2190 break;
2191 case TCP_SEQ_STATE_TIME_WAIT:
2192 get_timewait6_sock(seq, v, st->num);
2193 break;
2194 }
2195 out:
2196 return 0;
2197 }
2198
2199 static struct file_operations tcp6_seq_fops;
2200 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2201 .owner = THIS_MODULE,
2202 .name = "tcp6",
2203 .family = AF_INET6,
2204 .seq_show = tcp6_seq_show,
2205 .seq_fops = &tcp6_seq_fops,
2206 };
2207
2208 int __init tcp6_proc_init(void)
2209 {
2210 return tcp_proc_register(&tcp6_seq_afinfo);
2211 }
2212
2213 void tcp6_proc_exit(void)
2214 {
2215 tcp_proc_unregister(&tcp6_seq_afinfo);
2216 }
2217 #endif
2218
2219 struct proto tcpv6_prot = {
2220 .name = "TCPv6",
2221 .owner = THIS_MODULE,
2222 .close = tcp_close,
2223 .connect = tcp_v6_connect,
2224 .disconnect = tcp_disconnect,
2225 .accept = tcp_accept,
2226 .ioctl = tcp_ioctl,
2227 .init = tcp_v6_init_sock,
2228 .destroy = tcp_v6_destroy_sock,
2229 .shutdown = tcp_shutdown,
2230 .setsockopt = tcp_setsockopt,
2231 .getsockopt = tcp_getsockopt,
2232 .sendmsg = tcp_sendmsg,
2233 .recvmsg = tcp_recvmsg,
2234 .backlog_rcv = tcp_v6_do_rcv,
2235 .hash = tcp_v6_hash,
2236 .unhash = tcp_unhash,
2237 .get_port = tcp_v6_get_port,
2238 .enter_memory_pressure = tcp_enter_memory_pressure,
2239 .sockets_allocated = &tcp_sockets_allocated,
2240 .memory_allocated = &tcp_memory_allocated,
2241 .memory_pressure = &tcp_memory_pressure,
2242 .sysctl_mem = sysctl_tcp_mem,
2243 .sysctl_wmem = sysctl_tcp_wmem,
2244 .sysctl_rmem = sysctl_tcp_rmem,
2245 .max_header = MAX_TCP_HEADER,
2246 .obj_size = sizeof(struct tcp6_sock),
2247 .rsk_prot = &tcp6_request_sock_ops,
2248 };
2249
2250 static struct inet6_protocol tcpv6_protocol = {
2251 .handler = tcp_v6_rcv,
2252 .err_handler = tcp_v6_err,
2253 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2254 };
2255
2256 extern struct proto_ops inet6_stream_ops;
2257
2258 static struct inet_protosw tcpv6_protosw = {
2259 .type = SOCK_STREAM,
2260 .protocol = IPPROTO_TCP,
2261 .prot = &tcpv6_prot,
2262 .ops = &inet6_stream_ops,
2263 .capability = -1,
2264 .no_check = 0,
2265 .flags = INET_PROTOSW_PERMANENT,
2266 };
2267
2268 void __init tcpv6_init(void)
2269 {
2270 /* register inet6 protocol */
2271 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2272 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2273 inet6_register_protosw(&tcpv6_protosw);
2274 }
This page took 0.078168 seconds and 5 git commands to generate.