/spare/repo/netdev-2.6 branch 'master'
[deliverable/linux.git] / net / ipv6 / tcp_ipv6.c
CommitLineData
1da177e4
LT
1/*
2 * TCP over IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
14 *
15 * Fixes:
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 */
27
28#include <linux/module.h>
29#include <linux/config.h>
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/jiffies.h>
36#include <linux/in.h>
37#include <linux/in6.h>
38#include <linux/netdevice.h>
39#include <linux/init.h>
40#include <linux/jhash.h>
41#include <linux/ipsec.h>
42#include <linux/times.h>
43
44#include <linux/ipv6.h>
45#include <linux/icmpv6.h>
46#include <linux/random.h>
47
48#include <net/tcp.h>
49#include <net/ndisc.h>
5324a040 50#include <net/inet6_hashtables.h>
1da177e4
LT
51#include <net/ipv6.h>
52#include <net/transp_v6.h>
53#include <net/addrconf.h>
54#include <net/ip6_route.h>
55#include <net/ip6_checksum.h>
56#include <net/inet_ecn.h>
57#include <net/protocol.h>
58#include <net/xfrm.h>
59#include <net/addrconf.h>
60#include <net/snmp.h>
61#include <net/dsfield.h>
62
63#include <asm/uaccess.h>
64
65#include <linux/proc_fs.h>
66#include <linux/seq_file.h>
67
68static void tcp_v6_send_reset(struct sk_buff *skb);
60236fdd 69static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
1da177e4
LT
70static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
71 struct sk_buff *skb);
72
73static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
74static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75
76static struct tcp_func ipv6_mapped;
77static struct tcp_func ipv6_specific;
78
0f7ff927
ACM
79static inline int tcp_v6_bind_conflict(const struct sock *sk,
80 const struct inet_bind_bucket *tb)
1da177e4 81{
0f7ff927
ACM
82 const struct sock *sk2;
83 const struct hlist_node *node;
1da177e4
LT
84
85 /* We must walk the whole port owner list in this case. -DaveM */
86 sk_for_each_bound(sk2, node, &tb->owners) {
87 if (sk != sk2 &&
88 (!sk->sk_bound_dev_if ||
89 !sk2->sk_bound_dev_if ||
90 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
91 (!sk->sk_reuse || !sk2->sk_reuse ||
92 sk2->sk_state == TCP_LISTEN) &&
93 ipv6_rcv_saddr_equal(sk, sk2))
94 break;
95 }
96
97 return node != NULL;
98}
99
100/* Grrr, addr_type already calculated by caller, but I don't want
101 * to add some silly "cookie" argument to this method just for that.
102 * But it doesn't matter, the recalculation is in the rarest path
103 * this function ever takes.
104 */
105static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
106{
0f7ff927
ACM
107 struct inet_bind_hashbucket *head;
108 struct inet_bind_bucket *tb;
1da177e4
LT
109 struct hlist_node *node;
110 int ret;
111
112 local_bh_disable();
113 if (snum == 0) {
114 int low = sysctl_local_port_range[0];
115 int high = sysctl_local_port_range[1];
116 int remaining = (high - low) + 1;
117 int rover;
118
6e04e021
ACM
119 spin_lock(&tcp_hashinfo.portalloc_lock);
120 if (tcp_hashinfo.port_rover < low)
c3924c70
FH
121 rover = low;
122 else
6e04e021 123 rover = tcp_hashinfo.port_rover;
1da177e4 124 do { rover++;
c3924c70 125 if (rover > high)
1da177e4 126 rover = low;
6e04e021 127 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
1da177e4 128 spin_lock(&head->lock);
0f7ff927 129 inet_bind_bucket_for_each(tb, node, &head->chain)
1da177e4
LT
130 if (tb->port == rover)
131 goto next;
132 break;
133 next:
134 spin_unlock(&head->lock);
135 } while (--remaining > 0);
6e04e021
ACM
136 tcp_hashinfo.port_rover = rover;
137 spin_unlock(&tcp_hashinfo.portalloc_lock);
1da177e4 138
d5d28375
DM
139 /* Exhausted local port range during search? It is not
140 * possible for us to be holding one of the bind hash
141 * locks if this test triggers, because if 'remaining'
142 * drops to zero, we broke out of the do/while loop at
143 * the top level, not from the 'break;' statement.
144 */
1da177e4 145 ret = 1;
d5d28375 146 if (unlikely(remaining <= 0))
1da177e4
LT
147 goto fail;
148
149 /* OK, here is the one we will use. */
150 snum = rover;
151 } else {
6e04e021 152 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
1da177e4 153 spin_lock(&head->lock);
0f7ff927 154 inet_bind_bucket_for_each(tb, node, &head->chain)
1da177e4
LT
155 if (tb->port == snum)
156 goto tb_found;
157 }
158 tb = NULL;
159 goto tb_not_found;
160tb_found:
161 if (tb && !hlist_empty(&tb->owners)) {
162 if (tb->fastreuse > 0 && sk->sk_reuse &&
163 sk->sk_state != TCP_LISTEN) {
164 goto success;
165 } else {
166 ret = 1;
167 if (tcp_v6_bind_conflict(sk, tb))
168 goto fail_unlock;
169 }
170 }
171tb_not_found:
172 ret = 1;
6e04e021
ACM
173 if (tb == NULL) {
174 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
175 if (tb == NULL)
176 goto fail_unlock;
177 }
1da177e4
LT
178 if (hlist_empty(&tb->owners)) {
179 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
180 tb->fastreuse = 1;
181 else
182 tb->fastreuse = 0;
183 } else if (tb->fastreuse &&
184 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
185 tb->fastreuse = 0;
186
187success:
463c84b9 188 if (!inet_csk(sk)->icsk_bind_hash)
2d8c4ce5 189 inet_bind_hash(sk, tb, snum);
463c84b9 190 BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
1da177e4
LT
191 ret = 0;
192
193fail_unlock:
194 spin_unlock(&head->lock);
195fail:
196 local_bh_enable();
197 return ret;
198}
199
200static __inline__ void __tcp_v6_hash(struct sock *sk)
201{
202 struct hlist_head *list;
203 rwlock_t *lock;
204
205 BUG_TRAP(sk_unhashed(sk));
206
207 if (sk->sk_state == TCP_LISTEN) {
6e04e021
ACM
208 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
209 lock = &tcp_hashinfo.lhash_lock;
f3f05f70 210 inet_listen_wlock(&tcp_hashinfo);
1da177e4 211 } else {
505cbfc5 212 sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size);
6e04e021
ACM
213 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
214 lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
1da177e4
LT
215 write_lock(lock);
216 }
217
218 __sk_add_node(sk, list);
219 sock_prot_inc_use(sk->sk_prot);
220 write_unlock(lock);
221}
222
223
224static void tcp_v6_hash(struct sock *sk)
225{
226 if (sk->sk_state != TCP_CLOSE) {
227 struct tcp_sock *tp = tcp_sk(sk);
228
229 if (tp->af_specific == &ipv6_mapped) {
230 tcp_prot.hash(sk);
231 return;
232 }
233 local_bh_disable();
234 __tcp_v6_hash(sk);
235 local_bh_enable();
236 }
237}
238
1da177e4
LT
239/*
240 * Open request hash tables.
241 */
242
463c84b9 243static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
1da177e4
LT
244{
245 u32 a, b, c;
246
247 a = raddr->s6_addr32[0];
248 b = raddr->s6_addr32[1];
249 c = raddr->s6_addr32[2];
250
251 a += JHASH_GOLDEN_RATIO;
252 b += JHASH_GOLDEN_RATIO;
253 c += rnd;
254 __jhash_mix(a, b, c);
255
256 a += raddr->s6_addr32[3];
257 b += (u32) rport;
258 __jhash_mix(a, b, c);
259
260 return c & (TCP_SYNQ_HSIZE - 1);
261}
262
463c84b9 263static struct request_sock *tcp_v6_search_req(const struct sock *sk,
60236fdd 264 struct request_sock ***prevp,
1da177e4
LT
265 __u16 rport,
266 struct in6_addr *raddr,
267 struct in6_addr *laddr,
268 int iif)
269{
463c84b9
ACM
270 const struct inet_connection_sock *icsk = inet_csk(sk);
271 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
60236fdd 272 struct request_sock *req, **prev;
1da177e4
LT
273
274 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
275 (req = *prev) != NULL;
276 prev = &req->dl_next) {
2e6599cb
ACM
277 const struct tcp6_request_sock *treq = tcp6_rsk(req);
278
279 if (inet_rsk(req)->rmt_port == rport &&
60236fdd 280 req->rsk_ops->family == AF_INET6 &&
2e6599cb
ACM
281 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
282 ipv6_addr_equal(&treq->loc_addr, laddr) &&
283 (!treq->iif || treq->iif == iif)) {
1da177e4
LT
284 BUG_TRAP(req->sk == NULL);
285 *prevp = prev;
286 return req;
287 }
288 }
289
290 return NULL;
291}
292
293static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
294 struct in6_addr *saddr,
295 struct in6_addr *daddr,
296 unsigned long base)
297{
298 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
299}
300
301static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
302{
303 if (skb->protocol == htons(ETH_P_IPV6)) {
304 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
305 skb->nh.ipv6h->saddr.s6_addr32,
306 skb->h.th->dest,
307 skb->h.th->source);
308 } else {
309 return secure_tcp_sequence_number(skb->nh.iph->daddr,
310 skb->nh.iph->saddr,
311 skb->h.th->dest,
312 skb->h.th->source);
313 }
314}
315
505cbfc5 316static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
8feaf0c0 317 struct inet_timewait_sock **twp)
1da177e4
LT
318{
319 struct inet_sock *inet = inet_sk(sk);
505cbfc5
ACM
320 const struct ipv6_pinfo *np = inet6_sk(sk);
321 const struct in6_addr *daddr = &np->rcv_saddr;
322 const struct in6_addr *saddr = &np->daddr;
323 const int dif = sk->sk_bound_dev_if;
8feaf0c0 324 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
505cbfc5
ACM
325 const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport,
326 tcp_hashinfo.ehash_size);
6e04e021 327 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
1da177e4 328 struct sock *sk2;
8feaf0c0
ACM
329 const struct hlist_node *node;
330 struct inet_timewait_sock *tw;
1da177e4
LT
331
332 write_lock(&head->lock);
333
334 /* Check TIME-WAIT sockets first. */
6e04e021 335 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
8feaf0c0
ACM
336 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
337
338 tw = inet_twsk(sk2);
1da177e4
LT
339
340 if(*((__u32 *)&(tw->tw_dport)) == ports &&
341 sk2->sk_family == PF_INET6 &&
8feaf0c0
ACM
342 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
343 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
1da177e4 344 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
8feaf0c0 345 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
1da177e4
LT
346 struct tcp_sock *tp = tcp_sk(sk);
347
8feaf0c0
ACM
348 if (tcptw->tw_ts_recent_stamp &&
349 (!twp ||
350 (sysctl_tcp_tw_reuse &&
351 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
1da177e4 352 /* See comment in tcp_ipv4.c */
8feaf0c0 353 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
1da177e4
LT
354 if (!tp->write_seq)
355 tp->write_seq = 1;
8feaf0c0
ACM
356 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
357 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
1da177e4
LT
358 sock_hold(sk2);
359 goto unique;
360 } else
361 goto not_unique;
362 }
363 }
364 tw = NULL;
365
366 /* And established part... */
367 sk_for_each(sk2, node, &head->chain) {
8feaf0c0 368 if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
1da177e4
LT
369 goto not_unique;
370 }
371
372unique:
373 BUG_TRAP(sk_unhashed(sk));
374 __sk_add_node(sk, &head->chain);
375 sk->sk_hashent = hash;
376 sock_prot_inc_use(sk->sk_prot);
377 write_unlock(&head->lock);
378
379 if (twp) {
380 *twp = tw;
381 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
382 } else if (tw) {
383 /* Silly. Should hash-dance instead... */
295ff7ed 384 inet_twsk_deschedule(tw, &tcp_death_row);
1da177e4
LT
385 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
386
8feaf0c0 387 inet_twsk_put(tw);
1da177e4
LT
388 }
389 return 0;
390
391not_unique:
392 write_unlock(&head->lock);
393 return -EADDRNOTAVAIL;
394}
395
396static inline u32 tcpv6_port_offset(const struct sock *sk)
397{
398 const struct inet_sock *inet = inet_sk(sk);
399 const struct ipv6_pinfo *np = inet6_sk(sk);
400
401 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
402 np->daddr.s6_addr32,
403 inet->dport);
404}
405
406static int tcp_v6_hash_connect(struct sock *sk)
407{
408 unsigned short snum = inet_sk(sk)->num;
0f7ff927
ACM
409 struct inet_bind_hashbucket *head;
410 struct inet_bind_bucket *tb;
1da177e4
LT
411 int ret;
412
413 if (!snum) {
414 int low = sysctl_local_port_range[0];
415 int high = sysctl_local_port_range[1];
416 int range = high - low;
417 int i;
418 int port;
419 static u32 hint;
420 u32 offset = hint + tcpv6_port_offset(sk);
421 struct hlist_node *node;
8feaf0c0 422 struct inet_timewait_sock *tw = NULL;
1da177e4
LT
423
424 local_bh_disable();
425 for (i = 1; i <= range; i++) {
426 port = low + (i + offset) % range;
6e04e021 427 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
1da177e4
LT
428 spin_lock(&head->lock);
429
430 /* Does not bother with rcv_saddr checks,
431 * because the established check is already
432 * unique enough.
433 */
0f7ff927 434 inet_bind_bucket_for_each(tb, node, &head->chain) {
1da177e4
LT
435 if (tb->port == port) {
436 BUG_TRAP(!hlist_empty(&tb->owners));
437 if (tb->fastreuse >= 0)
438 goto next_port;
439 if (!__tcp_v6_check_established(sk,
440 port,
441 &tw))
442 goto ok;
443 goto next_port;
444 }
445 }
446
6e04e021 447 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
1da177e4
LT
448 if (!tb) {
449 spin_unlock(&head->lock);
450 break;
451 }
452 tb->fastreuse = -1;
453 goto ok;
454
455 next_port:
456 spin_unlock(&head->lock);
457 }
458 local_bh_enable();
459
460 return -EADDRNOTAVAIL;
461
462ok:
463 hint += i;
464
465 /* Head lock still held and bh's disabled */
2d8c4ce5 466 inet_bind_hash(sk, tb, port);
1da177e4
LT
467 if (sk_unhashed(sk)) {
468 inet_sk(sk)->sport = htons(port);
469 __tcp_v6_hash(sk);
470 }
471 spin_unlock(&head->lock);
472
473 if (tw) {
295ff7ed 474 inet_twsk_deschedule(tw, &tcp_death_row);
8feaf0c0 475 inet_twsk_put(tw);
1da177e4
LT
476 }
477
478 ret = 0;
479 goto out;
480 }
481
6e04e021 482 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
463c84b9 483 tb = inet_csk(sk)->icsk_bind_hash;
1da177e4
LT
484 spin_lock_bh(&head->lock);
485
486 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
487 __tcp_v6_hash(sk);
488 spin_unlock_bh(&head->lock);
489 return 0;
490 } else {
491 spin_unlock(&head->lock);
492 /* No definite answer... Walk to established hash table */
493 ret = __tcp_v6_check_established(sk, snum, NULL);
494out:
495 local_bh_enable();
496 return ret;
497 }
498}
499
1da177e4
LT
500static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
501 int addr_len)
502{
503 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
504 struct inet_sock *inet = inet_sk(sk);
505 struct ipv6_pinfo *np = inet6_sk(sk);
506 struct tcp_sock *tp = tcp_sk(sk);
507 struct in6_addr *saddr = NULL, *final_p = NULL, final;
508 struct flowi fl;
509 struct dst_entry *dst;
510 int addr_type;
511 int err;
512
513 if (addr_len < SIN6_LEN_RFC2133)
514 return -EINVAL;
515
516 if (usin->sin6_family != AF_INET6)
517 return(-EAFNOSUPPORT);
518
519 memset(&fl, 0, sizeof(fl));
520
521 if (np->sndflow) {
522 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
523 IP6_ECN_flow_init(fl.fl6_flowlabel);
524 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
525 struct ip6_flowlabel *flowlabel;
526 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
527 if (flowlabel == NULL)
528 return -EINVAL;
529 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
530 fl6_sock_release(flowlabel);
531 }
532 }
533
534 /*
535 * connect() to INADDR_ANY means loopback (BSD'ism).
536 */
537
538 if(ipv6_addr_any(&usin->sin6_addr))
539 usin->sin6_addr.s6_addr[15] = 0x1;
540
541 addr_type = ipv6_addr_type(&usin->sin6_addr);
542
543 if(addr_type & IPV6_ADDR_MULTICAST)
544 return -ENETUNREACH;
545
546 if (addr_type&IPV6_ADDR_LINKLOCAL) {
547 if (addr_len >= sizeof(struct sockaddr_in6) &&
548 usin->sin6_scope_id) {
549 /* If interface is set while binding, indices
550 * must coincide.
551 */
552 if (sk->sk_bound_dev_if &&
553 sk->sk_bound_dev_if != usin->sin6_scope_id)
554 return -EINVAL;
555
556 sk->sk_bound_dev_if = usin->sin6_scope_id;
557 }
558
559 /* Connect to link-local address requires an interface */
560 if (!sk->sk_bound_dev_if)
561 return -EINVAL;
562 }
563
564 if (tp->rx_opt.ts_recent_stamp &&
565 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
566 tp->rx_opt.ts_recent = 0;
567 tp->rx_opt.ts_recent_stamp = 0;
568 tp->write_seq = 0;
569 }
570
571 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
572 np->flow_label = fl.fl6_flowlabel;
573
574 /*
575 * TCP over IPv4
576 */
577
578 if (addr_type == IPV6_ADDR_MAPPED) {
579 u32 exthdrlen = tp->ext_header_len;
580 struct sockaddr_in sin;
581
582 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
583
584 if (__ipv6_only_sock(sk))
585 return -ENETUNREACH;
586
587 sin.sin_family = AF_INET;
588 sin.sin_port = usin->sin6_port;
589 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
590
591 tp->af_specific = &ipv6_mapped;
592 sk->sk_backlog_rcv = tcp_v4_do_rcv;
593
594 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
595
596 if (err) {
597 tp->ext_header_len = exthdrlen;
598 tp->af_specific = &ipv6_specific;
599 sk->sk_backlog_rcv = tcp_v6_do_rcv;
600 goto failure;
601 } else {
602 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
603 inet->saddr);
604 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
605 inet->rcv_saddr);
606 }
607
608 return err;
609 }
610
611 if (!ipv6_addr_any(&np->rcv_saddr))
612 saddr = &np->rcv_saddr;
613
614 fl.proto = IPPROTO_TCP;
615 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
616 ipv6_addr_copy(&fl.fl6_src,
617 (saddr ? saddr : &np->saddr));
618 fl.oif = sk->sk_bound_dev_if;
619 fl.fl_ip_dport = usin->sin6_port;
620 fl.fl_ip_sport = inet->sport;
621
622 if (np->opt && np->opt->srcrt) {
623 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
624 ipv6_addr_copy(&final, &fl.fl6_dst);
625 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
626 final_p = &final;
627 }
628
629 err = ip6_dst_lookup(sk, &dst, &fl);
630 if (err)
631 goto failure;
632 if (final_p)
633 ipv6_addr_copy(&fl.fl6_dst, final_p);
634
635 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
636 dst_release(dst);
637 goto failure;
638 }
639
640 if (saddr == NULL) {
641 saddr = &fl.fl6_src;
642 ipv6_addr_copy(&np->rcv_saddr, saddr);
643 }
644
645 /* set the source address */
646 ipv6_addr_copy(&np->saddr, saddr);
647 inet->rcv_saddr = LOOPBACK4_IPV6;
648
649 ip6_dst_store(sk, dst, NULL);
650 sk->sk_route_caps = dst->dev->features &
651 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
652
653 tp->ext_header_len = 0;
654 if (np->opt)
655 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
656
657 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
658
659 inet->dport = usin->sin6_port;
660
661 tcp_set_state(sk, TCP_SYN_SENT);
662 err = tcp_v6_hash_connect(sk);
663 if (err)
664 goto late_failure;
665
666 if (!tp->write_seq)
667 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
668 np->daddr.s6_addr32,
669 inet->sport,
670 inet->dport);
671
672 err = tcp_connect(sk);
673 if (err)
674 goto late_failure;
675
676 return 0;
677
678late_failure:
679 tcp_set_state(sk, TCP_CLOSE);
680 __sk_dst_reset(sk);
681failure:
682 inet->dport = 0;
683 sk->sk_route_caps = 0;
684 return err;
685}
686
687static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
688 int type, int code, int offset, __u32 info)
689{
690 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
505cbfc5 691 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
1da177e4
LT
692 struct ipv6_pinfo *np;
693 struct sock *sk;
694 int err;
695 struct tcp_sock *tp;
696 __u32 seq;
697
505cbfc5
ACM
698 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
699 th->source, skb->dev->ifindex);
1da177e4
LT
700
701 if (sk == NULL) {
702 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
703 return;
704 }
705
706 if (sk->sk_state == TCP_TIME_WAIT) {
8feaf0c0 707 inet_twsk_put((struct inet_timewait_sock *)sk);
1da177e4
LT
708 return;
709 }
710
711 bh_lock_sock(sk);
712 if (sock_owned_by_user(sk))
713 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
714
715 if (sk->sk_state == TCP_CLOSE)
716 goto out;
717
718 tp = tcp_sk(sk);
719 seq = ntohl(th->seq);
720 if (sk->sk_state != TCP_LISTEN &&
721 !between(seq, tp->snd_una, tp->snd_nxt)) {
722 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
723 goto out;
724 }
725
726 np = inet6_sk(sk);
727
728 if (type == ICMPV6_PKT_TOOBIG) {
729 struct dst_entry *dst = NULL;
730
731 if (sock_owned_by_user(sk))
732 goto out;
733 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
734 goto out;
735
736 /* icmp should have updated the destination cache entry */
737 dst = __sk_dst_check(sk, np->dst_cookie);
738
739 if (dst == NULL) {
740 struct inet_sock *inet = inet_sk(sk);
741 struct flowi fl;
742
743 /* BUGGG_FUTURE: Again, it is not clear how
744 to handle rthdr case. Ignore this complexity
745 for now.
746 */
747 memset(&fl, 0, sizeof(fl));
748 fl.proto = IPPROTO_TCP;
749 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
750 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
751 fl.oif = sk->sk_bound_dev_if;
752 fl.fl_ip_dport = inet->dport;
753 fl.fl_ip_sport = inet->sport;
754
755 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
756 sk->sk_err_soft = -err;
757 goto out;
758 }
759
760 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
761 sk->sk_err_soft = -err;
762 goto out;
763 }
764
765 } else
766 dst_hold(dst);
767
768 if (tp->pmtu_cookie > dst_mtu(dst)) {
769 tcp_sync_mss(sk, dst_mtu(dst));
770 tcp_simple_retransmit(sk);
771 } /* else let the usual retransmit timer handle it */
772 dst_release(dst);
773 goto out;
774 }
775
776 icmpv6_err_convert(type, code, &err);
777
60236fdd 778 /* Might be for an request_sock */
1da177e4 779 switch (sk->sk_state) {
60236fdd 780 struct request_sock *req, **prev;
1da177e4
LT
781 case TCP_LISTEN:
782 if (sock_owned_by_user(sk))
783 goto out;
784
463c84b9 785 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
505cbfc5 786 &hdr->saddr, inet6_iif(skb));
1da177e4
LT
787 if (!req)
788 goto out;
789
790 /* ICMPs are not backlogged, hence we cannot get
791 * an established socket here.
792 */
793 BUG_TRAP(req->sk == NULL);
794
2e6599cb 795 if (seq != tcp_rsk(req)->snt_isn) {
1da177e4
LT
796 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
797 goto out;
798 }
799
463c84b9 800 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
801 goto out;
802
803 case TCP_SYN_SENT:
804 case TCP_SYN_RECV: /* Cannot happen.
805 It can, it SYNs are crossed. --ANK */
806 if (!sock_owned_by_user(sk)) {
807 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
808 sk->sk_err = err;
809 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
810
811 tcp_done(sk);
812 } else
813 sk->sk_err_soft = err;
814 goto out;
815 }
816
817 if (!sock_owned_by_user(sk) && np->recverr) {
818 sk->sk_err = err;
819 sk->sk_error_report(sk);
820 } else
821 sk->sk_err_soft = err;
822
823out:
824 bh_unlock_sock(sk);
825 sock_put(sk);
826}
827
828
60236fdd 829static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
1da177e4
LT
830 struct dst_entry *dst)
831{
2e6599cb 832 struct tcp6_request_sock *treq = tcp6_rsk(req);
1da177e4
LT
833 struct ipv6_pinfo *np = inet6_sk(sk);
834 struct sk_buff * skb;
835 struct ipv6_txoptions *opt = NULL;
836 struct in6_addr * final_p = NULL, final;
837 struct flowi fl;
838 int err = -1;
839
840 memset(&fl, 0, sizeof(fl));
841 fl.proto = IPPROTO_TCP;
2e6599cb
ACM
842 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
843 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1da177e4 844 fl.fl6_flowlabel = 0;
2e6599cb
ACM
845 fl.oif = treq->iif;
846 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1da177e4
LT
847 fl.fl_ip_sport = inet_sk(sk)->sport;
848
849 if (dst == NULL) {
850 opt = np->opt;
851 if (opt == NULL &&
852 np->rxopt.bits.srcrt == 2 &&
2e6599cb
ACM
853 treq->pktopts) {
854 struct sk_buff *pktopts = treq->pktopts;
1da177e4
LT
855 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
856 if (rxopt->srcrt)
857 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
858 }
859
860 if (opt && opt->srcrt) {
861 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
862 ipv6_addr_copy(&final, &fl.fl6_dst);
863 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
864 final_p = &final;
865 }
866
867 err = ip6_dst_lookup(sk, &dst, &fl);
868 if (err)
869 goto done;
870 if (final_p)
871 ipv6_addr_copy(&fl.fl6_dst, final_p);
872 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
873 goto done;
874 }
875
876 skb = tcp_make_synack(sk, dst, req);
877 if (skb) {
878 struct tcphdr *th = skb->h.th;
879
880 th->check = tcp_v6_check(th, skb->len,
2e6599cb 881 &treq->loc_addr, &treq->rmt_addr,
1da177e4
LT
882 csum_partial((char *)th, skb->len, skb->csum));
883
2e6599cb 884 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1da177e4
LT
885 err = ip6_xmit(sk, skb, &fl, opt, 0);
886 if (err == NET_XMIT_CN)
887 err = 0;
888 }
889
890done:
891 dst_release(dst);
892 if (opt && opt != np->opt)
893 sock_kfree_s(sk, opt, opt->tot_len);
894 return err;
895}
896
60236fdd 897static void tcp_v6_reqsk_destructor(struct request_sock *req)
1da177e4 898{
2e6599cb
ACM
899 if (tcp6_rsk(req)->pktopts)
900 kfree_skb(tcp6_rsk(req)->pktopts);
1da177e4
LT
901}
902
60236fdd 903static struct request_sock_ops tcp6_request_sock_ops = {
1da177e4 904 .family = AF_INET6,
2e6599cb 905 .obj_size = sizeof(struct tcp6_request_sock),
1da177e4 906 .rtx_syn_ack = tcp_v6_send_synack,
60236fdd
ACM
907 .send_ack = tcp_v6_reqsk_send_ack,
908 .destructor = tcp_v6_reqsk_destructor,
1da177e4
LT
909 .send_reset = tcp_v6_send_reset
910};
911
912static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
913{
914 struct ipv6_pinfo *np = inet6_sk(sk);
915 struct inet6_skb_parm *opt = IP6CB(skb);
916
917 if (np->rxopt.all) {
918 if ((opt->hop && np->rxopt.bits.hopopts) ||
919 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
920 np->rxopt.bits.rxflow) ||
921 (opt->srcrt && np->rxopt.bits.srcrt) ||
922 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
923 return 1;
924 }
925 return 0;
926}
927
928
929static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
930 struct sk_buff *skb)
931{
932 struct ipv6_pinfo *np = inet6_sk(sk);
933
934 if (skb->ip_summed == CHECKSUM_HW) {
935 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
936 skb->csum = offsetof(struct tcphdr, check);
937 } else {
938 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
939 csum_partial((char *)th, th->doff<<2,
940 skb->csum));
941 }
942}
943
944
945static void tcp_v6_send_reset(struct sk_buff *skb)
946{
947 struct tcphdr *th = skb->h.th, *t1;
948 struct sk_buff *buff;
949 struct flowi fl;
950
951 if (th->rst)
952 return;
953
954 if (!ipv6_unicast_destination(skb))
955 return;
956
957 /*
958 * We need to grab some memory, and put together an RST,
959 * and then put it into the queue to be sent.
960 */
961
962 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
963 GFP_ATOMIC);
964 if (buff == NULL)
965 return;
966
967 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
968
969 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
970
971 /* Swap the send and the receive. */
972 memset(t1, 0, sizeof(*t1));
973 t1->dest = th->source;
974 t1->source = th->dest;
975 t1->doff = sizeof(*t1)/4;
976 t1->rst = 1;
977
978 if(th->ack) {
979 t1->seq = th->ack_seq;
980 } else {
981 t1->ack = 1;
982 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
983 + skb->len - (th->doff<<2));
984 }
985
986 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
987
988 memset(&fl, 0, sizeof(fl));
989 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
990 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
991
992 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
993 sizeof(*t1), IPPROTO_TCP,
994 buff->csum);
995
996 fl.proto = IPPROTO_TCP;
505cbfc5 997 fl.oif = inet6_iif(skb);
1da177e4
LT
998 fl.fl_ip_dport = t1->dest;
999 fl.fl_ip_sport = t1->source;
1000
1001 /* sk = NULL, but it is safe for now. RST socket required. */
1002 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1003
1004 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1005 dst_release(buff->dst);
1006 return;
1007 }
1008
1009 ip6_xmit(NULL, buff, &fl, NULL, 0);
1010 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1011 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1012 return;
1013 }
1014
1015 kfree_skb(buff);
1016}
1017
1018static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1019{
1020 struct tcphdr *th = skb->h.th, *t1;
1021 struct sk_buff *buff;
1022 struct flowi fl;
1023 int tot_len = sizeof(struct tcphdr);
1024
1025 if (ts)
1026 tot_len += 3*4;
1027
1028 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1029 GFP_ATOMIC);
1030 if (buff == NULL)
1031 return;
1032
1033 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1034
1035 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1036
1037 /* Swap the send and the receive. */
1038 memset(t1, 0, sizeof(*t1));
1039 t1->dest = th->source;
1040 t1->source = th->dest;
1041 t1->doff = tot_len/4;
1042 t1->seq = htonl(seq);
1043 t1->ack_seq = htonl(ack);
1044 t1->ack = 1;
1045 t1->window = htons(win);
1046
1047 if (ts) {
1048 u32 *ptr = (u32*)(t1 + 1);
1049 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1050 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1051 *ptr++ = htonl(tcp_time_stamp);
1052 *ptr = htonl(ts);
1053 }
1054
1055 buff->csum = csum_partial((char *)t1, tot_len, 0);
1056
1057 memset(&fl, 0, sizeof(fl));
1058 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1059 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1060
1061 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1062 tot_len, IPPROTO_TCP,
1063 buff->csum);
1064
1065 fl.proto = IPPROTO_TCP;
505cbfc5 1066 fl.oif = inet6_iif(skb);
1da177e4
LT
1067 fl.fl_ip_dport = t1->dest;
1068 fl.fl_ip_sport = t1->source;
1069
1070 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1071 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1072 dst_release(buff->dst);
1073 return;
1074 }
1075 ip6_xmit(NULL, buff, &fl, NULL, 0);
1076 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1077 return;
1078 }
1079
1080 kfree_skb(buff);
1081}
1082
1083static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1084{
8feaf0c0
ACM
1085 struct inet_timewait_sock *tw = inet_twsk(sk);
1086 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 1087
8feaf0c0
ACM
1088 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1089 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1090 tcptw->tw_ts_recent);
1da177e4 1091
8feaf0c0 1092 inet_twsk_put(tw);
1da177e4
LT
1093}
1094
60236fdd 1095static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1da177e4 1096{
2e6599cb 1097 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1da177e4
LT
1098}
1099
1100
1101static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1102{
60236fdd 1103 struct request_sock *req, **prev;
505cbfc5 1104 const struct tcphdr *th = skb->h.th;
1da177e4
LT
1105 struct sock *nsk;
1106
1107 /* Find possible connection requests. */
463c84b9 1108 req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
505cbfc5 1109 &skb->nh.ipv6h->daddr, inet6_iif(skb));
1da177e4
LT
1110 if (req)
1111 return tcp_check_req(sk, skb, req, prev);
1112
505cbfc5
ACM
1113 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
1114 th->source, &skb->nh.ipv6h->daddr,
1115 ntohs(th->dest), inet6_iif(skb));
1da177e4
LT
1116
1117 if (nsk) {
1118 if (nsk->sk_state != TCP_TIME_WAIT) {
1119 bh_lock_sock(nsk);
1120 return nsk;
1121 }
8feaf0c0 1122 inet_twsk_put((struct inet_timewait_sock *)nsk);
1da177e4
LT
1123 return NULL;
1124 }
1125
1126#if 0 /*def CONFIG_SYN_COOKIES*/
1127 if (!th->rst && !th->syn && th->ack)
1128 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1129#endif
1130 return sk;
1131}
1132
60236fdd 1133static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1da177e4 1134{
463c84b9
ACM
1135 struct inet_connection_sock *icsk = inet_csk(sk);
1136 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1137 const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1da177e4 1138
463c84b9
ACM
1139 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1140 inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1da177e4
LT
1141}
1142
1143
1144/* FIXME: this is substantially similar to the ipv4 code.
1145 * Can some kind of merge be done? -- erics
1146 */
1147static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1148{
2e6599cb 1149 struct tcp6_request_sock *treq;
1da177e4
LT
1150 struct ipv6_pinfo *np = inet6_sk(sk);
1151 struct tcp_options_received tmp_opt;
1152 struct tcp_sock *tp = tcp_sk(sk);
60236fdd 1153 struct request_sock *req = NULL;
1da177e4
LT
1154 __u32 isn = TCP_SKB_CB(skb)->when;
1155
1156 if (skb->protocol == htons(ETH_P_IP))
1157 return tcp_v4_conn_request(sk, skb);
1158
1159 if (!ipv6_unicast_destination(skb))
1160 goto drop;
1161
1162 /*
1163 * There are no SYN attacks on IPv6, yet...
1164 */
463c84b9 1165 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1da177e4
LT
1166 if (net_ratelimit())
1167 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1168 goto drop;
1169 }
1170
463c84b9 1171 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
1172 goto drop;
1173
60236fdd 1174 req = reqsk_alloc(&tcp6_request_sock_ops);
1da177e4
LT
1175 if (req == NULL)
1176 goto drop;
1177
1178 tcp_clear_options(&tmp_opt);
1179 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1180 tmp_opt.user_mss = tp->rx_opt.user_mss;
1181
1182 tcp_parse_options(skb, &tmp_opt, 0);
1183
1184 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1185 tcp_openreq_init(req, &tmp_opt, skb);
1186
2e6599cb
ACM
1187 treq = tcp6_rsk(req);
1188 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1189 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1da177e4 1190 TCP_ECN_create_request(req, skb->h.th);
2e6599cb 1191 treq->pktopts = NULL;
1da177e4
LT
1192 if (ipv6_opt_accepted(sk, skb) ||
1193 np->rxopt.bits.rxinfo ||
1194 np->rxopt.bits.rxhlim) {
1195 atomic_inc(&skb->users);
2e6599cb 1196 treq->pktopts = skb;
1da177e4 1197 }
2e6599cb 1198 treq->iif = sk->sk_bound_dev_if;
1da177e4
LT
1199
1200 /* So that link locals have meaning */
1201 if (!sk->sk_bound_dev_if &&
2e6599cb 1202 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
505cbfc5 1203 treq->iif = inet6_iif(skb);
1da177e4
LT
1204
1205 if (isn == 0)
1206 isn = tcp_v6_init_sequence(sk,skb);
1207
2e6599cb 1208 tcp_rsk(req)->snt_isn = isn;
1da177e4
LT
1209
1210 if (tcp_v6_send_synack(sk, req, NULL))
1211 goto drop;
1212
1213 tcp_v6_synq_add(sk, req);
1214
1215 return 0;
1216
1217drop:
1218 if (req)
60236fdd 1219 reqsk_free(req);
1da177e4
LT
1220
1221 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1222 return 0; /* don't send reset */
1223}
1224
1225static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1226 struct request_sock *req,
1da177e4
LT
1227 struct dst_entry *dst)
1228{
2e6599cb 1229 struct tcp6_request_sock *treq = tcp6_rsk(req);
1da177e4
LT
1230 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1231 struct tcp6_sock *newtcp6sk;
1232 struct inet_sock *newinet;
1233 struct tcp_sock *newtp;
1234 struct sock *newsk;
1235 struct ipv6_txoptions *opt;
1236
1237 if (skb->protocol == htons(ETH_P_IP)) {
1238 /*
1239 * v6 mapped
1240 */
1241
1242 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1243
1244 if (newsk == NULL)
1245 return NULL;
1246
1247 newtcp6sk = (struct tcp6_sock *)newsk;
1248 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1249
1250 newinet = inet_sk(newsk);
1251 newnp = inet6_sk(newsk);
1252 newtp = tcp_sk(newsk);
1253
1254 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1255
1256 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1257 newinet->daddr);
1258
1259 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1260 newinet->saddr);
1261
1262 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1263
1264 newtp->af_specific = &ipv6_mapped;
1265 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1266 newnp->pktoptions = NULL;
1267 newnp->opt = NULL;
505cbfc5 1268 newnp->mcast_oif = inet6_iif(skb);
1da177e4
LT
1269 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1270
e6848976
ACM
1271 /*
1272 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1273 * here, tcp_create_openreq_child now does this for us, see the comment in
1274 * that function for the gory details. -acme
1da177e4 1275 */
1da177e4
LT
1276
1277 /* It is tricky place. Until this moment IPv4 tcp
1278 worked with IPv6 af_tcp.af_specific.
1279 Sync it now.
1280 */
1281 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1282
1283 return newsk;
1284 }
1285
1286 opt = np->opt;
1287
1288 if (sk_acceptq_is_full(sk))
1289 goto out_overflow;
1290
1291 if (np->rxopt.bits.srcrt == 2 &&
2e6599cb
ACM
1292 opt == NULL && treq->pktopts) {
1293 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1da177e4 1294 if (rxopt->srcrt)
2e6599cb 1295 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1da177e4
LT
1296 }
1297
1298 if (dst == NULL) {
1299 struct in6_addr *final_p = NULL, final;
1300 struct flowi fl;
1301
1302 memset(&fl, 0, sizeof(fl));
1303 fl.proto = IPPROTO_TCP;
2e6599cb 1304 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1da177e4
LT
1305 if (opt && opt->srcrt) {
1306 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1307 ipv6_addr_copy(&final, &fl.fl6_dst);
1308 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1309 final_p = &final;
1310 }
2e6599cb 1311 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1da177e4 1312 fl.oif = sk->sk_bound_dev_if;
2e6599cb 1313 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1da177e4
LT
1314 fl.fl_ip_sport = inet_sk(sk)->sport;
1315
1316 if (ip6_dst_lookup(sk, &dst, &fl))
1317 goto out;
1318
1319 if (final_p)
1320 ipv6_addr_copy(&fl.fl6_dst, final_p);
1321
1322 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1323 goto out;
1324 }
1325
1326 newsk = tcp_create_openreq_child(sk, req, skb);
1327 if (newsk == NULL)
1328 goto out;
1329
e6848976
ACM
1330 /*
1331 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1332 * count here, tcp_create_openreq_child now does this for us, see the
1333 * comment in that function for the gory details. -acme
1334 */
1da177e4
LT
1335
1336 ip6_dst_store(newsk, dst, NULL);
1337 newsk->sk_route_caps = dst->dev->features &
1338 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1339
1340 newtcp6sk = (struct tcp6_sock *)newsk;
1341 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1342
1343 newtp = tcp_sk(newsk);
1344 newinet = inet_sk(newsk);
1345 newnp = inet6_sk(newsk);
1346
1347 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1348
2e6599cb
ACM
1349 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1350 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1351 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1352 newsk->sk_bound_dev_if = treq->iif;
1da177e4
LT
1353
1354 /* Now IPv6 options...
1355
1356 First: no IPv4 options.
1357 */
1358 newinet->opt = NULL;
1359
1360 /* Clone RX bits */
1361 newnp->rxopt.all = np->rxopt.all;
1362
1363 /* Clone pktoptions received with SYN */
1364 newnp->pktoptions = NULL;
2e6599cb
ACM
1365 if (treq->pktopts != NULL) {
1366 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1367 kfree_skb(treq->pktopts);
1368 treq->pktopts = NULL;
1da177e4
LT
1369 if (newnp->pktoptions)
1370 skb_set_owner_r(newnp->pktoptions, newsk);
1371 }
1372 newnp->opt = NULL;
505cbfc5 1373 newnp->mcast_oif = inet6_iif(skb);
1da177e4
LT
1374 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1375
1376 /* Clone native IPv6 options from listening socket (if any)
1377
1378 Yes, keeping reference count would be much more clever,
1379 but we make one more one thing there: reattach optmem
1380 to newsk.
1381 */
1382 if (opt) {
1383 newnp->opt = ipv6_dup_options(newsk, opt);
1384 if (opt != np->opt)
1385 sock_kfree_s(sk, opt, opt->tot_len);
1386 }
1387
1388 newtp->ext_header_len = 0;
1389 if (newnp->opt)
1390 newtp->ext_header_len = newnp->opt->opt_nflen +
1391 newnp->opt->opt_flen;
1392
1393 tcp_sync_mss(newsk, dst_mtu(dst));
1394 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1395 tcp_initialize_rcv_mss(newsk);
1396
1397 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1398
1399 __tcp_v6_hash(newsk);
2d8c4ce5 1400 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1da177e4
LT
1401
1402 return newsk;
1403
1404out_overflow:
1405 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1406out:
1407 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1408 if (opt && opt != np->opt)
1409 sock_kfree_s(sk, opt, opt->tot_len);
1410 dst_release(dst);
1411 return NULL;
1412}
1413
1414static int tcp_v6_checksum_init(struct sk_buff *skb)
1415{
1416 if (skb->ip_summed == CHECKSUM_HW) {
1417 skb->ip_summed = CHECKSUM_UNNECESSARY;
1418 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1419 &skb->nh.ipv6h->daddr,skb->csum))
1420 return 0;
64ce2073 1421 LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
1da177e4
LT
1422 }
1423 if (skb->len <= 76) {
1424 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1425 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1426 return -1;
1427 skb->ip_summed = CHECKSUM_UNNECESSARY;
1428 } else {
1429 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1430 &skb->nh.ipv6h->daddr,0);
1431 }
1432 return 0;
1433}
1434
1435/* The socket must have it's spinlock held when we get
1436 * here.
1437 *
1438 * We have a potential double-lock case here, so even when
1439 * doing backlog processing we use the BH locking scheme.
1440 * This is because we cannot sleep with the original spinlock
1441 * held.
1442 */
1443static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1444{
1445 struct ipv6_pinfo *np = inet6_sk(sk);
1446 struct tcp_sock *tp;
1447 struct sk_buff *opt_skb = NULL;
1448
1449 /* Imagine: socket is IPv6. IPv4 packet arrives,
1450 goes to IPv4 receive handler and backlogged.
1451 From backlog it always goes here. Kerboom...
1452 Fortunately, tcp_rcv_established and rcv_established
1453 handle them correctly, but it is not case with
1454 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1455 */
1456
1457 if (skb->protocol == htons(ETH_P_IP))
1458 return tcp_v4_do_rcv(sk, skb);
1459
1460 if (sk_filter(sk, skb, 0))
1461 goto discard;
1462
1463 /*
1464 * socket locking is here for SMP purposes as backlog rcv
1465 * is currently called with bh processing disabled.
1466 */
1467
1468 /* Do Stevens' IPV6_PKTOPTIONS.
1469
1470 Yes, guys, it is the only place in our code, where we
1471 may make it not affecting IPv4.
1472 The rest of code is protocol independent,
1473 and I do not like idea to uglify IPv4.
1474
1475 Actually, all the idea behind IPV6_PKTOPTIONS
1476 looks not very well thought. For now we latch
1477 options, received in the last packet, enqueued
1478 by tcp. Feel free to propose better solution.
1479 --ANK (980728)
1480 */
1481 if (np->rxopt.all)
1482 opt_skb = skb_clone(skb, GFP_ATOMIC);
1483
1484 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1485 TCP_CHECK_TIMER(sk);
1486 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1487 goto reset;
1488 TCP_CHECK_TIMER(sk);
1489 if (opt_skb)
1490 goto ipv6_pktoptions;
1491 return 0;
1492 }
1493
1494 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1495 goto csum_err;
1496
1497 if (sk->sk_state == TCP_LISTEN) {
1498 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1499 if (!nsk)
1500 goto discard;
1501
1502 /*
1503 * Queue it on the new socket if the new socket is active,
1504 * otherwise we just shortcircuit this and continue with
1505 * the new socket..
1506 */
1507 if(nsk != sk) {
1508 if (tcp_child_process(sk, nsk, skb))
1509 goto reset;
1510 if (opt_skb)
1511 __kfree_skb(opt_skb);
1512 return 0;
1513 }
1514 }
1515
1516 TCP_CHECK_TIMER(sk);
1517 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1518 goto reset;
1519 TCP_CHECK_TIMER(sk);
1520 if (opt_skb)
1521 goto ipv6_pktoptions;
1522 return 0;
1523
1524reset:
1525 tcp_v6_send_reset(skb);
1526discard:
1527 if (opt_skb)
1528 __kfree_skb(opt_skb);
1529 kfree_skb(skb);
1530 return 0;
1531csum_err:
1532 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1533 goto discard;
1534
1535
1536ipv6_pktoptions:
1537 /* Do you ask, what is it?
1538
1539 1. skb was enqueued by tcp.
1540 2. skb is added to tail of read queue, rather than out of order.
1541 3. socket is not in passive state.
1542 4. Finally, it really contains options, which user wants to receive.
1543 */
1544 tp = tcp_sk(sk);
1545 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1546 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1547 if (np->rxopt.bits.rxinfo)
505cbfc5 1548 np->mcast_oif = inet6_iif(opt_skb);
1da177e4
LT
1549 if (np->rxopt.bits.rxhlim)
1550 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1551 if (ipv6_opt_accepted(sk, opt_skb)) {
1552 skb_set_owner_r(opt_skb, sk);
1553 opt_skb = xchg(&np->pktoptions, opt_skb);
1554 } else {
1555 __kfree_skb(opt_skb);
1556 opt_skb = xchg(&np->pktoptions, NULL);
1557 }
1558 }
1559
1560 if (opt_skb)
1561 kfree_skb(opt_skb);
1562 return 0;
1563}
1564
1565static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1566{
1567 struct sk_buff *skb = *pskb;
1568 struct tcphdr *th;
1569 struct sock *sk;
1570 int ret;
1571
1572 if (skb->pkt_type != PACKET_HOST)
1573 goto discard_it;
1574
1575 /*
1576 * Count it even if it's bad.
1577 */
1578 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1579
1580 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1581 goto discard_it;
1582
1583 th = skb->h.th;
1584
1585 if (th->doff < sizeof(struct tcphdr)/4)
1586 goto bad_packet;
1587 if (!pskb_may_pull(skb, th->doff*4))
1588 goto discard_it;
1589
1590 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1591 tcp_v6_checksum_init(skb) < 0))
1592 goto bad_packet;
1593
1594 th = skb->h.th;
1595 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1596 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1597 skb->len - th->doff*4);
1598 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1599 TCP_SKB_CB(skb)->when = 0;
1600 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1601 TCP_SKB_CB(skb)->sacked = 0;
1602
505cbfc5
ACM
1603 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1604 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1605 inet6_iif(skb));
1da177e4
LT
1606
1607 if (!sk)
1608 goto no_tcp_socket;
1609
1610process:
1611 if (sk->sk_state == TCP_TIME_WAIT)
1612 goto do_time_wait;
1613
1614 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1615 goto discard_and_relse;
1616
1617 if (sk_filter(sk, skb, 0))
1618 goto discard_and_relse;
1619
1620 skb->dev = NULL;
1621
1622 bh_lock_sock(sk);
1623 ret = 0;
1624 if (!sock_owned_by_user(sk)) {
1625 if (!tcp_prequeue(sk, skb))
1626 ret = tcp_v6_do_rcv(sk, skb);
1627 } else
1628 sk_add_backlog(sk, skb);
1629 bh_unlock_sock(sk);
1630
1631 sock_put(sk);
1632 return ret ? -1 : 0;
1633
1634no_tcp_socket:
1635 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1636 goto discard_it;
1637
1638 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1639bad_packet:
1640 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1641 } else {
1642 tcp_v6_send_reset(skb);
1643 }
1644
1645discard_it:
1646
1647 /*
1648 * Discard frame
1649 */
1650
1651 kfree_skb(skb);
1652 return 0;
1653
1654discard_and_relse:
1655 sock_put(sk);
1656 goto discard_it;
1657
1658do_time_wait:
1659 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
8feaf0c0 1660 inet_twsk_put((struct inet_timewait_sock *)sk);
1da177e4
LT
1661 goto discard_it;
1662 }
1663
1664 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1665 TCP_INC_STATS_BH(TCP_MIB_INERRS);
8feaf0c0 1666 inet_twsk_put((struct inet_timewait_sock *)sk);
1da177e4
LT
1667 goto discard_it;
1668 }
1669
8feaf0c0
ACM
1670 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1671 skb, th)) {
1da177e4
LT
1672 case TCP_TW_SYN:
1673 {
1674 struct sock *sk2;
1675
505cbfc5
ACM
1676 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1677 &skb->nh.ipv6h->daddr,
1678 ntohs(th->dest), inet6_iif(skb));
1da177e4 1679 if (sk2 != NULL) {
295ff7ed
ACM
1680 struct inet_timewait_sock *tw = inet_twsk(sk);
1681 inet_twsk_deschedule(tw, &tcp_death_row);
1682 inet_twsk_put(tw);
1da177e4
LT
1683 sk = sk2;
1684 goto process;
1685 }
1686 /* Fall through to ACK */
1687 }
1688 case TCP_TW_ACK:
1689 tcp_v6_timewait_ack(sk, skb);
1690 break;
1691 case TCP_TW_RST:
1692 goto no_tcp_socket;
1693 case TCP_TW_SUCCESS:;
1694 }
1695 goto discard_it;
1696}
1697
1698static int tcp_v6_rebuild_header(struct sock *sk)
1699{
1700 int err;
1701 struct dst_entry *dst;
1702 struct ipv6_pinfo *np = inet6_sk(sk);
1703
1704 dst = __sk_dst_check(sk, np->dst_cookie);
1705
1706 if (dst == NULL) {
1707 struct inet_sock *inet = inet_sk(sk);
1708 struct in6_addr *final_p = NULL, final;
1709 struct flowi fl;
1710
1711 memset(&fl, 0, sizeof(fl));
1712 fl.proto = IPPROTO_TCP;
1713 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1714 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1715 fl.fl6_flowlabel = np->flow_label;
1716 fl.oif = sk->sk_bound_dev_if;
1717 fl.fl_ip_dport = inet->dport;
1718 fl.fl_ip_sport = inet->sport;
1719
1720 if (np->opt && np->opt->srcrt) {
1721 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1722 ipv6_addr_copy(&final, &fl.fl6_dst);
1723 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1724 final_p = &final;
1725 }
1726
1727 err = ip6_dst_lookup(sk, &dst, &fl);
1728 if (err) {
1729 sk->sk_route_caps = 0;
1730 return err;
1731 }
1732 if (final_p)
1733 ipv6_addr_copy(&fl.fl6_dst, final_p);
1734
1735 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1736 sk->sk_err_soft = -err;
1737 dst_release(dst);
1738 return err;
1739 }
1740
1741 ip6_dst_store(sk, dst, NULL);
1742 sk->sk_route_caps = dst->dev->features &
1743 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1744 }
1745
1746 return 0;
1747}
1748
1749static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1750{
1751 struct sock *sk = skb->sk;
1752 struct inet_sock *inet = inet_sk(sk);
1753 struct ipv6_pinfo *np = inet6_sk(sk);
1754 struct flowi fl;
1755 struct dst_entry *dst;
1756 struct in6_addr *final_p = NULL, final;
1757
1758 memset(&fl, 0, sizeof(fl));
1759 fl.proto = IPPROTO_TCP;
1760 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1761 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1762 fl.fl6_flowlabel = np->flow_label;
1763 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1764 fl.oif = sk->sk_bound_dev_if;
1765 fl.fl_ip_sport = inet->sport;
1766 fl.fl_ip_dport = inet->dport;
1767
1768 if (np->opt && np->opt->srcrt) {
1769 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1770 ipv6_addr_copy(&final, &fl.fl6_dst);
1771 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1772 final_p = &final;
1773 }
1774
1775 dst = __sk_dst_check(sk, np->dst_cookie);
1776
1777 if (dst == NULL) {
1778 int err = ip6_dst_lookup(sk, &dst, &fl);
1779
1780 if (err) {
1781 sk->sk_err_soft = -err;
1782 return err;
1783 }
1784
1785 if (final_p)
1786 ipv6_addr_copy(&fl.fl6_dst, final_p);
1787
1788 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1789 sk->sk_route_caps = 0;
1790 dst_release(dst);
1791 return err;
1792 }
1793
1794 ip6_dst_store(sk, dst, NULL);
1795 sk->sk_route_caps = dst->dev->features &
1796 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1797 }
1798
1799 skb->dst = dst_clone(dst);
1800
1801 /* Restore final destination back after routing done */
1802 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1803
1804 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1805}
1806
1807static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1808{
1809 struct ipv6_pinfo *np = inet6_sk(sk);
1810 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1811
1812 sin6->sin6_family = AF_INET6;
1813 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1814 sin6->sin6_port = inet_sk(sk)->dport;
1815 /* We do not store received flowlabel for TCP */
1816 sin6->sin6_flowinfo = 0;
1817 sin6->sin6_scope_id = 0;
1818 if (sk->sk_bound_dev_if &&
1819 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1820 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1821}
1822
1823static int tcp_v6_remember_stamp(struct sock *sk)
1824{
1825 /* Alas, not yet... */
1826 return 0;
1827}
1828
1829static struct tcp_func ipv6_specific = {
1830 .queue_xmit = tcp_v6_xmit,
1831 .send_check = tcp_v6_send_check,
1832 .rebuild_header = tcp_v6_rebuild_header,
1833 .conn_request = tcp_v6_conn_request,
1834 .syn_recv_sock = tcp_v6_syn_recv_sock,
1835 .remember_stamp = tcp_v6_remember_stamp,
1836 .net_header_len = sizeof(struct ipv6hdr),
1837
1838 .setsockopt = ipv6_setsockopt,
1839 .getsockopt = ipv6_getsockopt,
1840 .addr2sockaddr = v6_addr2sockaddr,
1841 .sockaddr_len = sizeof(struct sockaddr_in6)
1842};
1843
1844/*
1845 * TCP over IPv4 via INET6 API
1846 */
1847
1848static struct tcp_func ipv6_mapped = {
1849 .queue_xmit = ip_queue_xmit,
1850 .send_check = tcp_v4_send_check,
32519f11 1851 .rebuild_header = inet_sk_rebuild_header,
1da177e4
LT
1852 .conn_request = tcp_v6_conn_request,
1853 .syn_recv_sock = tcp_v6_syn_recv_sock,
1854 .remember_stamp = tcp_v4_remember_stamp,
1855 .net_header_len = sizeof(struct iphdr),
1856
1857 .setsockopt = ipv6_setsockopt,
1858 .getsockopt = ipv6_getsockopt,
1859 .addr2sockaddr = v6_addr2sockaddr,
1860 .sockaddr_len = sizeof(struct sockaddr_in6)
1861};
1862
1863
1864
1865/* NOTE: A lot of things set to zero explicitly by call to
1866 * sk_alloc() so need not be done here.
1867 */
1868static int tcp_v6_init_sock(struct sock *sk)
1869{
6687e988 1870 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4
LT
1871 struct tcp_sock *tp = tcp_sk(sk);
1872
1873 skb_queue_head_init(&tp->out_of_order_queue);
1874 tcp_init_xmit_timers(sk);
1875 tcp_prequeue_init(tp);
1876
6687e988 1877 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1da177e4
LT
1878 tp->mdev = TCP_TIMEOUT_INIT;
1879
1880 /* So many TCP implementations out there (incorrectly) count the
1881 * initial SYN frame in their delayed-ACK and congestion control
1882 * algorithms that we must have the following bandaid to talk
1883 * efficiently to them. -DaveM
1884 */
1885 tp->snd_cwnd = 2;
1886
1887 /* See draft-stevens-tcpca-spec-01 for discussion of the
1888 * initialization of these values.
1889 */
1890 tp->snd_ssthresh = 0x7fffffff;
1891 tp->snd_cwnd_clamp = ~0;
c1b4a7e6 1892 tp->mss_cache = 536;
1da177e4
LT
1893
1894 tp->reordering = sysctl_tcp_reordering;
1895
1896 sk->sk_state = TCP_CLOSE;
1897
1898 tp->af_specific = &ipv6_specific;
6687e988 1899 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1da177e4
LT
1900 sk->sk_write_space = sk_stream_write_space;
1901 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1902
1903 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1904 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1905
1906 atomic_inc(&tcp_sockets_allocated);
1907
1908 return 0;
1909}
1910
1911static int tcp_v6_destroy_sock(struct sock *sk)
1912{
1da177e4
LT
1913 tcp_v4_destroy_sock(sk);
1914 return inet6_destroy_sock(sk);
1915}
1916
1917/* Proc filesystem TCPv6 sock list dumping. */
1918static void get_openreq6(struct seq_file *seq,
60236fdd 1919 struct sock *sk, struct request_sock *req, int i, int uid)
1da177e4
LT
1920{
1921 struct in6_addr *dest, *src;
1922 int ttd = req->expires - jiffies;
1923
1924 if (ttd < 0)
1925 ttd = 0;
1926
2e6599cb
ACM
1927 src = &tcp6_rsk(req)->loc_addr;
1928 dest = &tcp6_rsk(req)->rmt_addr;
1da177e4
LT
1929 seq_printf(seq,
1930 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1931 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1932 i,
1933 src->s6_addr32[0], src->s6_addr32[1],
1934 src->s6_addr32[2], src->s6_addr32[3],
1935 ntohs(inet_sk(sk)->sport),
1936 dest->s6_addr32[0], dest->s6_addr32[1],
1937 dest->s6_addr32[2], dest->s6_addr32[3],
2e6599cb 1938 ntohs(inet_rsk(req)->rmt_port),
1da177e4
LT
1939 TCP_SYN_RECV,
1940 0,0, /* could print option size, but that is af dependent. */
1941 1, /* timers active (only the expire timer) */
1942 jiffies_to_clock_t(ttd),
1943 req->retrans,
1944 uid,
1945 0, /* non standard timer */
1946 0, /* open_requests have no inode */
1947 0, req);
1948}
1949
1950static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1951{
1952 struct in6_addr *dest, *src;
1953 __u16 destp, srcp;
1954 int timer_active;
1955 unsigned long timer_expires;
1956 struct inet_sock *inet = inet_sk(sp);
1957 struct tcp_sock *tp = tcp_sk(sp);
463c84b9 1958 const struct inet_connection_sock *icsk = inet_csk(sp);
1da177e4
LT
1959 struct ipv6_pinfo *np = inet6_sk(sp);
1960
1961 dest = &np->daddr;
1962 src = &np->rcv_saddr;
1963 destp = ntohs(inet->dport);
1964 srcp = ntohs(inet->sport);
463c84b9
ACM
1965
1966 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 1967 timer_active = 1;
463c84b9
ACM
1968 timer_expires = icsk->icsk_timeout;
1969 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 1970 timer_active = 4;
463c84b9 1971 timer_expires = icsk->icsk_timeout;
1da177e4
LT
1972 } else if (timer_pending(&sp->sk_timer)) {
1973 timer_active = 2;
1974 timer_expires = sp->sk_timer.expires;
1975 } else {
1976 timer_active = 0;
1977 timer_expires = jiffies;
1978 }
1979
1980 seq_printf(seq,
1981 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1982 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1983 i,
1984 src->s6_addr32[0], src->s6_addr32[1],
1985 src->s6_addr32[2], src->s6_addr32[3], srcp,
1986 dest->s6_addr32[0], dest->s6_addr32[1],
1987 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1988 sp->sk_state,
1989 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1990 timer_active,
1991 jiffies_to_clock_t(timer_expires - jiffies),
463c84b9 1992 icsk->icsk_retransmits,
1da177e4 1993 sock_i_uid(sp),
6687e988 1994 icsk->icsk_probes_out,
1da177e4
LT
1995 sock_i_ino(sp),
1996 atomic_read(&sp->sk_refcnt), sp,
463c84b9
ACM
1997 icsk->icsk_rto,
1998 icsk->icsk_ack.ato,
1999 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1da177e4
LT
2000 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2001 );
2002}
2003
2004static void get_timewait6_sock(struct seq_file *seq,
8feaf0c0 2005 struct inet_timewait_sock *tw, int i)
1da177e4
LT
2006{
2007 struct in6_addr *dest, *src;
2008 __u16 destp, srcp;
8feaf0c0 2009 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
1da177e4
LT
2010 int ttd = tw->tw_ttd - jiffies;
2011
2012 if (ttd < 0)
2013 ttd = 0;
2014
8feaf0c0
ACM
2015 dest = &tcp6tw->tw_v6_daddr;
2016 src = &tcp6tw->tw_v6_rcv_saddr;
1da177e4
LT
2017 destp = ntohs(tw->tw_dport);
2018 srcp = ntohs(tw->tw_sport);
2019
2020 seq_printf(seq,
2021 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2022 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2023 i,
2024 src->s6_addr32[0], src->s6_addr32[1],
2025 src->s6_addr32[2], src->s6_addr32[3], srcp,
2026 dest->s6_addr32[0], dest->s6_addr32[1],
2027 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2028 tw->tw_substate, 0, 0,
2029 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2030 atomic_read(&tw->tw_refcnt), tw);
2031}
2032
2033#ifdef CONFIG_PROC_FS
2034static int tcp6_seq_show(struct seq_file *seq, void *v)
2035{
2036 struct tcp_iter_state *st;
2037
2038 if (v == SEQ_START_TOKEN) {
2039 seq_puts(seq,
2040 " sl "
2041 "local_address "
2042 "remote_address "
2043 "st tx_queue rx_queue tr tm->when retrnsmt"
2044 " uid timeout inode\n");
2045 goto out;
2046 }
2047 st = seq->private;
2048
2049 switch (st->state) {
2050 case TCP_SEQ_STATE_LISTENING:
2051 case TCP_SEQ_STATE_ESTABLISHED:
2052 get_tcp6_sock(seq, v, st->num);
2053 break;
2054 case TCP_SEQ_STATE_OPENREQ:
2055 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2056 break;
2057 case TCP_SEQ_STATE_TIME_WAIT:
2058 get_timewait6_sock(seq, v, st->num);
2059 break;
2060 }
2061out:
2062 return 0;
2063}
2064
2065static struct file_operations tcp6_seq_fops;
2066static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2067 .owner = THIS_MODULE,
2068 .name = "tcp6",
2069 .family = AF_INET6,
2070 .seq_show = tcp6_seq_show,
2071 .seq_fops = &tcp6_seq_fops,
2072};
2073
2074int __init tcp6_proc_init(void)
2075{
2076 return tcp_proc_register(&tcp6_seq_afinfo);
2077}
2078
2079void tcp6_proc_exit(void)
2080{
2081 tcp_proc_unregister(&tcp6_seq_afinfo);
2082}
2083#endif
2084
2085struct proto tcpv6_prot = {
2086 .name = "TCPv6",
2087 .owner = THIS_MODULE,
2088 .close = tcp_close,
2089 .connect = tcp_v6_connect,
2090 .disconnect = tcp_disconnect,
463c84b9 2091 .accept = inet_csk_accept,
1da177e4
LT
2092 .ioctl = tcp_ioctl,
2093 .init = tcp_v6_init_sock,
2094 .destroy = tcp_v6_destroy_sock,
2095 .shutdown = tcp_shutdown,
2096 .setsockopt = tcp_setsockopt,
2097 .getsockopt = tcp_getsockopt,
2098 .sendmsg = tcp_sendmsg,
2099 .recvmsg = tcp_recvmsg,
2100 .backlog_rcv = tcp_v6_do_rcv,
2101 .hash = tcp_v6_hash,
2102 .unhash = tcp_unhash,
2103 .get_port = tcp_v6_get_port,
2104 .enter_memory_pressure = tcp_enter_memory_pressure,
2105 .sockets_allocated = &tcp_sockets_allocated,
2106 .memory_allocated = &tcp_memory_allocated,
2107 .memory_pressure = &tcp_memory_pressure,
0a5578cf 2108 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2109 .sysctl_mem = sysctl_tcp_mem,
2110 .sysctl_wmem = sysctl_tcp_wmem,
2111 .sysctl_rmem = sysctl_tcp_rmem,
2112 .max_header = MAX_TCP_HEADER,
2113 .obj_size = sizeof(struct tcp6_sock),
8feaf0c0 2114 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
60236fdd 2115 .rsk_prot = &tcp6_request_sock_ops,
1da177e4
LT
2116};
2117
2118static struct inet6_protocol tcpv6_protocol = {
2119 .handler = tcp_v6_rcv,
2120 .err_handler = tcp_v6_err,
2121 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2122};
2123
1da177e4
LT
2124static struct inet_protosw tcpv6_protosw = {
2125 .type = SOCK_STREAM,
2126 .protocol = IPPROTO_TCP,
2127 .prot = &tcpv6_prot,
2128 .ops = &inet6_stream_ops,
2129 .capability = -1,
2130 .no_check = 0,
2131 .flags = INET_PROTOSW_PERMANENT,
2132};
2133
2134void __init tcpv6_init(void)
2135{
2136 /* register inet6 protocol */
2137 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2138 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2139 inet6_register_protosw(&tcpv6_protosw);
2140}
This page took 0.212288 seconds and 5 git commands to generate.