net/ipv6/tcp_ipv6.c

   1 /*
   2  *      TCP over IPv6
   3  *      Linux INET6 implementation
   4  *
   5  *      Authors:
   6  *      Pedro Roque             <roque@di.fc.ul.pt>
   7  *
   8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
   9  *
  10  *      Based on:
  11  *      linux/net/ipv4/tcp.c
  12  *      linux/net/ipv4/tcp_input.c
  13  *      linux/net/ipv4/tcp_output.c
  14  *
  15  *      Fixes:
  16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
  17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  19  *                                      a single port at the same time.
  20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  */
  27
  28 #include <linux/module.h>
  29 #include <linux/config.h>
  30 #include <linux/errno.h>
  31 #include <linux/types.h>
  32 #include <linux/socket.h>
  33 #include <linux/sockios.h>
  34 #include <linux/net.h>
  35 #include <linux/jiffies.h>
  36 #include <linux/in.h>
  37 #include <linux/in6.h>
  38 #include <linux/netdevice.h>
  39 #include <linux/init.h>
  40 #include <linux/jhash.h>
  41 #include <linux/ipsec.h>
  42 #include <linux/times.h>
  43
  44 #include <linux/ipv6.h>
  45 #include <linux/icmpv6.h>
  46 #include <linux/random.h>
  47
  48 #include <net/tcp.h>
  49 #include <net/ndisc.h>
  50 #include <net/inet6_hashtables.h>
  51 #include <net/ipv6.h>
  52 #include <net/transp_v6.h>
  53 #include <net/addrconf.h>
  54 #include <net/ip6_route.h>
  55 #include <net/ip6_checksum.h>
  56 #include <net/inet_ecn.h>
  57 #include <net/protocol.h>
  58 #include <net/xfrm.h>
  59 #include <net/addrconf.h>
  60 #include <net/snmp.h>
  61 #include <net/dsfield.h>
  62
  63 #include <asm/uaccess.h>
  64
  65 #include <linux/proc_fs.h>
  66 #include <linux/seq_file.h>
  67
  68 static void     tcp_v6_send_reset(struct sk_buff *skb);
  69 static void     tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
  70 static void     tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
  71                                   struct sk_buff *skb);
  72
  73 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
  74 static int      tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
  75
  76 static struct tcp_func ipv6_mapped;
  77 static struct tcp_func ipv6_specific;
  78
  79 static inline int tcp_v6_bind_conflict(const struct sock *sk,
  80                                        const struct inet_bind_bucket *tb)
  81 {
  82         const struct sock *sk2;
  83         const struct hlist_node *node;
  84
  85         /* We must walk the whole port owner list in this case. -DaveM */
  86         sk_for_each_bound(sk2, node, &tb->owners) {
  87                 if (sk != sk2 &&
  88                     (!sk->sk_bound_dev_if ||
  89                      !sk2->sk_bound_dev_if ||
  90                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
  91                     (!sk->sk_reuse || !sk2->sk_reuse ||
  92                      sk2->sk_state == TCP_LISTEN) &&
  93                      ipv6_rcv_saddr_equal(sk, sk2))
  94                         break;
  95         }
  96
  97         return node != NULL;
  98 }
  99
 100 /* Grrr, addr_type already calculated by caller, but I don't want
 101  * to add some silly "cookie" argument to this method just for that.
 102  * But it doesn't matter, the recalculation is in the rarest path
 103  * this function ever takes.
 104  */
 105 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
 106 {
 107         struct inet_bind_hashbucket *head;
 108         struct inet_bind_bucket *tb;
 109         struct hlist_node *node;
 110         int ret;
 111
 112         local_bh_disable();
 113         if (snum == 0) {
 114                 int low = sysctl_local_port_range[0];
 115                 int high = sysctl_local_port_range[1];
 116                 int remaining = (high - low) + 1;
 117                 int rover;
 118
 119                 spin_lock(&tcp_hashinfo.portalloc_lock);
 120                 if (tcp_hashinfo.port_rover < low)
 121                         rover = low;
 122                 else
 123                         rover = tcp_hashinfo.port_rover;
 124                 do {    rover++;
 125                         if (rover > high)
 126                                 rover = low;
 127                         head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
 128                         spin_lock(&head->lock);
 129                         inet_bind_bucket_for_each(tb, node, &head->chain)
 130                                 if (tb->port == rover)
 131                                         goto next;
 132                         break;
 133                 next:
 134                         spin_unlock(&head->lock);
 135                 } while (--remaining > 0);
 136                 tcp_hashinfo.port_rover = rover;
 137                 spin_unlock(&tcp_hashinfo.portalloc_lock);
 138
 139                 /* Exhausted local port range during search?  It is not
 140                  * possible for us to be holding one of the bind hash
 141                  * locks if this test triggers, because if 'remaining'
 142                  * drops to zero, we broke out of the do/while loop at
 143                  * the top level, not from the 'break;' statement.
 144                  */
 145                 ret = 1;
 146                 if (unlikely(remaining <= 0))
 147                         goto fail;
 148
 149                 /* OK, here is the one we will use. */
 150                 snum = rover;
 151         } else {
 152                 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
 153                 spin_lock(&head->lock);
 154                 inet_bind_bucket_for_each(tb, node, &head->chain)
 155                         if (tb->port == snum)
 156                                 goto tb_found;
 157         }
 158         tb = NULL;
 159         goto tb_not_found;
 160 tb_found:
 161         if (tb && !hlist_empty(&tb->owners)) {
 162                 if (tb->fastreuse > 0 && sk->sk_reuse &&
 163                     sk->sk_state != TCP_LISTEN) {
 164                         goto success;
 165                 } else {
 166                         ret = 1;
 167                         if (tcp_v6_bind_conflict(sk, tb))
 168                                 goto fail_unlock;
 169                 }
 170         }
 171 tb_not_found:
 172         ret = 1;
 173         if (tb == NULL) {
 174                 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
 175                 if (tb == NULL)
 176                         goto fail_unlock;
 177         }
 178         if (hlist_empty(&tb->owners)) {
 179                 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
 180                         tb->fastreuse = 1;
 181                 else
 182                         tb->fastreuse = 0;
 183         } else if (tb->fastreuse &&
 184                    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
 185                 tb->fastreuse = 0;
 186
 187 success:
 188         if (!inet_csk(sk)->icsk_bind_hash)
 189                 inet_bind_hash(sk, tb, snum);
 190         BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
 191         ret = 0;
 192
 193 fail_unlock:
 194         spin_unlock(&head->lock);
 195 fail:
 196         local_bh_enable();
 197         return ret;
 198 }
 199
 200 static __inline__ void __tcp_v6_hash(struct sock *sk)
 201 {
 202         struct hlist_head *list;
 203         rwlock_t *lock;
 204
 205         BUG_TRAP(sk_unhashed(sk));
 206
 207         if (sk->sk_state == TCP_LISTEN) {
 208                 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
 209                 lock = &tcp_hashinfo.lhash_lock;
 210                 inet_listen_wlock(&tcp_hashinfo);
 211         } else {
 212                 unsigned int hash;
 213                 sk->sk_hash = hash = inet6_sk_ehashfn(sk);
 214                 hash &= (tcp_hashinfo.ehash_size - 1);
 215                 list = &tcp_hashinfo.ehash[hash].chain;
 216                 lock = &tcp_hashinfo.ehash[hash].lock;
 217                 write_lock(lock);
 218         }
 219
 220         __sk_add_node(sk, list);
 221         sock_prot_inc_use(sk->sk_prot);
 222         write_unlock(lock);
 223 }
 224
 225
 226 static void tcp_v6_hash(struct sock *sk)
 227 {
 228         if (sk->sk_state != TCP_CLOSE) {
 229                 struct tcp_sock *tp = tcp_sk(sk);
 230
 231                 if (tp->af_specific == &ipv6_mapped) {
 232                         tcp_prot.hash(sk);
 233                         return;
 234                 }
 235                 local_bh_disable();
 236                 __tcp_v6_hash(sk);
 237                 local_bh_enable();
 238         }
 239 }
 240
 241 /*
 242  * Open request hash tables.
 243  */
 244
 245 static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
 246 {
 247         u32 a, b, c;
 248
 249         a = raddr->s6_addr32[0];
 250         b = raddr->s6_addr32[1];
 251         c = raddr->s6_addr32[2];
 252
 253         a += JHASH_GOLDEN_RATIO;
 254         b += JHASH_GOLDEN_RATIO;
 255         c += rnd;
 256         __jhash_mix(a, b, c);
 257
 258         a += raddr->s6_addr32[3];
 259         b += (u32) rport;
 260         __jhash_mix(a, b, c);
 261
 262         return c & (TCP_SYNQ_HSIZE - 1);
 263 }
 264
 265 static struct request_sock *tcp_v6_search_req(const struct sock *sk,
 266                                               struct request_sock ***prevp,
 267                                               __u16 rport,
 268                                               struct in6_addr *raddr,
 269                                               struct in6_addr *laddr,
 270                                               int iif)
 271 {
 272         const struct inet_connection_sock *icsk = inet_csk(sk);
 273         struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
 274         struct request_sock *req, **prev;
 275
 276         for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
 277              (req = *prev) != NULL;
 278              prev = &req->dl_next) {
 279                 const struct tcp6_request_sock *treq = tcp6_rsk(req);
 280
 281                 if (inet_rsk(req)->rmt_port == rport &&
 282                     req->rsk_ops->family == AF_INET6 &&
 283                     ipv6_addr_equal(&treq->rmt_addr, raddr) &&
 284                     ipv6_addr_equal(&treq->loc_addr, laddr) &&
 285                     (!treq->iif || treq->iif == iif)) {
 286                         BUG_TRAP(req->sk == NULL);
 287                         *prevp = prev;
 288                         return req;
 289                 }
 290         }
 291
 292         return NULL;
 293 }
 294
 295 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
 296                                    struct in6_addr *saddr,
 297                                    struct in6_addr *daddr,
 298                                    unsigned long base)
 299 {
 300         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
 301 }
 302
 303 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
 304 {
 305         if (skb->protocol == htons(ETH_P_IPV6)) {
 306                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
 307                                                     skb->nh.ipv6h->saddr.s6_addr32,
 308                                                     skb->h.th->dest,
 309                                                     skb->h.th->source);
 310         } else {
 311                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
 312                                                   skb->nh.iph->saddr,
 313                                                   skb->h.th->dest,
 314                                                   skb->h.th->source);
 315         }
 316 }
 317
 318 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
 319                                       struct inet_timewait_sock **twp)
 320 {
 321         struct inet_sock *inet = inet_sk(sk);
 322         const struct ipv6_pinfo *np = inet6_sk(sk);
 323         const struct in6_addr *daddr = &np->rcv_saddr;
 324         const struct in6_addr *saddr = &np->daddr;
 325         const int dif = sk->sk_bound_dev_if;
 326         const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
 327         unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
 328         struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
 329         struct sock *sk2;
 330         const struct hlist_node *node;
 331         struct inet_timewait_sock *tw;
 332
 333         prefetch(head->chain.first);
 334         write_lock(&head->lock);
 335
 336         /* Check TIME-WAIT sockets first. */
 337         sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
 338                 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
 339
 340                 tw = inet_twsk(sk2);
 341
 342                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
 343                    sk2->sk_family               == PF_INET6     &&
 344                    ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
 345                    ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)     &&
 346                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
 347                         const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
 348                         struct tcp_sock *tp = tcp_sk(sk);
 349
 350                         if (tcptw->tw_ts_recent_stamp &&
 351                             (!twp ||
 352                              (sysctl_tcp_tw_reuse &&
 353                               xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
 354                                 /* See comment in tcp_ipv4.c */
 355                                 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 356                                 if (!tp->write_seq)
 357                                         tp->write_seq = 1;
 358                                 tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 359                                 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 360                                 sock_hold(sk2);
 361                                 goto unique;
 362                         } else
 363                                 goto not_unique;
 364                 }
 365         }
 366         tw = NULL;
 367
 368         /* And established part... */
 369         sk_for_each(sk2, node, &head->chain) {
 370                 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
 371                         goto not_unique;
 372         }
 373
 374 unique:
 375         BUG_TRAP(sk_unhashed(sk));
 376         __sk_add_node(sk, &head->chain);
 377         sk->sk_hash = hash;
 378         sock_prot_inc_use(sk->sk_prot);
 379         write_unlock(&head->lock);
 380
 381         if (twp) {
 382                 *twp = tw;
 383                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
 384         } else if (tw) {
 385                 /* Silly. Should hash-dance instead... */
 386                 inet_twsk_deschedule(tw, &tcp_death_row);
 387                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
 388
 389                 inet_twsk_put(tw);
 390         }
 391         return 0;
 392
 393 not_unique:
 394         write_unlock(&head->lock);
 395         return -EADDRNOTAVAIL;
 396 }
 397
 398 static inline u32 tcpv6_port_offset(const struct sock *sk)
 399 {
 400         const struct inet_sock *inet = inet_sk(sk);
 401         const struct ipv6_pinfo *np = inet6_sk(sk);
 402
 403         return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
 404                                            np->daddr.s6_addr32,
 405                                            inet->dport);
 406 }
 407
 408 static int tcp_v6_hash_connect(struct sock *sk)
 409 {
 410         unsigned short snum = inet_sk(sk)->num;
 411         struct inet_bind_hashbucket *head;
 412         struct inet_bind_bucket *tb;
 413         int ret;
 414
 415         if (!snum) {
 416                 int low = sysctl_local_port_range[0];
 417                 int high = sysctl_local_port_range[1];
 418                 int range = high - low;
 419                 int i;
 420                 int port;
 421                 static u32 hint;
 422                 u32 offset = hint + tcpv6_port_offset(sk);
 423                 struct hlist_node *node;
 424                 struct inet_timewait_sock *tw = NULL;
 425
 426                 local_bh_disable();
 427                 for (i = 1; i <= range; i++) {
 428                         port = low + (i + offset) % range;
 429                         head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
 430                         spin_lock(&head->lock);
 431
 432                         /* Does not bother with rcv_saddr checks,
 433                          * because the established check is already
 434                          * unique enough.
 435                          */
 436                         inet_bind_bucket_for_each(tb, node, &head->chain) {
 437                                 if (tb->port == port) {
 438                                         BUG_TRAP(!hlist_empty(&tb->owners));
 439                                         if (tb->fastreuse >= 0)
 440                                                 goto next_port;
 441                                         if (!__tcp_v6_check_established(sk,
 442                                                                         port,
 443                                                                         &tw))
 444                                                 goto ok;
 445                                         goto next_port;
 446                                 }
 447                         }
 448
 449                         tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
 450                         if (!tb) {
 451                                 spin_unlock(&head->lock);
 452                                 break;
 453                         }
 454                         tb->fastreuse = -1;
 455                         goto ok;
 456
 457                 next_port:
 458                         spin_unlock(&head->lock);
 459                 }
 460                 local_bh_enable();
 461
 462                 return -EADDRNOTAVAIL;
 463
 464 ok:
 465                 hint += i;
 466
 467                 /* Head lock still held and bh's disabled */
 468                 inet_bind_hash(sk, tb, port);
 469                 if (sk_unhashed(sk)) {
 470                         inet_sk(sk)->sport = htons(port);
 471                         __tcp_v6_hash(sk);
 472                 }
 473                 spin_unlock(&head->lock);
 474
 475                 if (tw) {
 476                         inet_twsk_deschedule(tw, &tcp_death_row);
 477                         inet_twsk_put(tw);
 478                 }
 479
 480                 ret = 0;
 481                 goto out;
 482         }
 483
 484         head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
 485         tb   = inet_csk(sk)->icsk_bind_hash;
 486         spin_lock_bh(&head->lock);
 487
 488         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
 489                 __tcp_v6_hash(sk);
 490                 spin_unlock_bh(&head->lock);
 491                 return 0;
 492         } else {
 493                 spin_unlock(&head->lock);
 494                 /* No definite answer... Walk to established hash table */
 495                 ret = __tcp_v6_check_established(sk, snum, NULL);
 496 out:
 497                 local_bh_enable();
 498                 return ret;
 499         }
 500 }
 501
 502 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 503                           int addr_len)
 504 {
 505         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
 506         struct inet_sock *inet = inet_sk(sk);
 507         struct ipv6_pinfo *np = inet6_sk(sk);
 508         struct tcp_sock *tp = tcp_sk(sk);
 509         struct in6_addr *saddr = NULL, *final_p = NULL, final;
 510         struct flowi fl;
 511         struct dst_entry *dst;
 512         int addr_type;
 513         int err;
 514
 515         if (addr_len < SIN6_LEN_RFC2133)
 516                 return -EINVAL;
 517
 518         if (usin->sin6_family != AF_INET6)
 519                 return(-EAFNOSUPPORT);
 520
 521         memset(&fl, 0, sizeof(fl));
 522
 523         if (np->sndflow) {
 524                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 525                 IP6_ECN_flow_init(fl.fl6_flowlabel);
 526                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
 527                         struct ip6_flowlabel *flowlabel;
 528                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
 529                         if (flowlabel == NULL)
 530                                 return -EINVAL;
 531                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
 532                         fl6_sock_release(flowlabel);
 533                 }
 534         }
 535
 536         /*
 537          *      connect() to INADDR_ANY means loopback (BSD'ism).
 538          */
 539
 540         if(ipv6_addr_any(&usin->sin6_addr))
 541                 usin->sin6_addr.s6_addr[15] = 0x1;
 542
 543         addr_type = ipv6_addr_type(&usin->sin6_addr);
 544
 545         if(addr_type & IPV6_ADDR_MULTICAST)
 546                 return -ENETUNREACH;
 547
 548         if (addr_type&IPV6_ADDR_LINKLOCAL) {
 549                 if (addr_len >= sizeof(struct sockaddr_in6) &&
 550                     usin->sin6_scope_id) {
 551                         /* If interface is set while binding, indices
 552                          * must coincide.
 553                          */
 554                         if (sk->sk_bound_dev_if &&
 555                             sk->sk_bound_dev_if != usin->sin6_scope_id)
 556                                 return -EINVAL;
 557
 558                         sk->sk_bound_dev_if = usin->sin6_scope_id;
 559                 }
 560
 561                 /* Connect to link-local address requires an interface */
 562                 if (!sk->sk_bound_dev_if)
 563                         return -EINVAL;
 564         }
 565
 566         if (tp->rx_opt.ts_recent_stamp &&
 567             !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
 568                 tp->rx_opt.ts_recent = 0;
 569                 tp->rx_opt.ts_recent_stamp = 0;
 570                 tp->write_seq = 0;
 571         }
 572
 573         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
 574         np->flow_label = fl.fl6_flowlabel;
 575
 576         /*
 577          *      TCP over IPv4
 578          */
 579
 580         if (addr_type == IPV6_ADDR_MAPPED) {
 581                 u32 exthdrlen = tp->ext_header_len;
 582                 struct sockaddr_in sin;
 583
 584                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
 585
 586                 if (__ipv6_only_sock(sk))
 587                         return -ENETUNREACH;
 588
 589                 sin.sin_family = AF_INET;
 590                 sin.sin_port = usin->sin6_port;
 591                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 592
 593                 tp->af_specific = &ipv6_mapped;
 594                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
 595
 596                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 597
 598                 if (err) {
 599                         tp->ext_header_len = exthdrlen;
 600                         tp->af_specific = &ipv6_specific;
 601                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
 602                         goto failure;
 603                 } else {
 604                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
 605                                       inet->saddr);
 606                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
 607                                       inet->rcv_saddr);
 608                 }
 609
 610                 return err;
 611         }
 612
 613         if (!ipv6_addr_any(&np->rcv_saddr))
 614                 saddr = &np->rcv_saddr;
 615
 616         fl.proto = IPPROTO_TCP;
 617         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 618         ipv6_addr_copy(&fl.fl6_src,
 619                        (saddr ? saddr : &np->saddr));
 620         fl.oif = sk->sk_bound_dev_if;
 621         fl.fl_ip_dport = usin->sin6_port;
 622         fl.fl_ip_sport = inet->sport;
 623
 624         if (np->opt && np->opt->srcrt) {
 625                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
 626                 ipv6_addr_copy(&final, &fl.fl6_dst);
 627                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 628                 final_p = &final;
 629         }
 630
 631         err = ip6_dst_lookup(sk, &dst, &fl);
 632         if (err)
 633                 goto failure;
 634         if (final_p)
 635                 ipv6_addr_copy(&fl.fl6_dst, final_p);
 636
 637         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
 638                 goto failure;
 639
 640         if (saddr == NULL) {
 641                 saddr = &fl.fl6_src;
 642                 ipv6_addr_copy(&np->rcv_saddr, saddr);
 643         }
 644
 645         /* set the source address */
 646         ipv6_addr_copy(&np->saddr, saddr);
 647         inet->rcv_saddr = LOOPBACK4_IPV6;
 648
 649         ip6_dst_store(sk, dst, NULL);
 650         sk->sk_route_caps = dst->dev->features &
 651                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
 652
 653         tp->ext_header_len = 0;
 654         if (np->opt)
 655                 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
 656
 657         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 658
 659         inet->dport = usin->sin6_port;
 660
 661         tcp_set_state(sk, TCP_SYN_SENT);
 662         err = tcp_v6_hash_connect(sk);
 663         if (err)
 664                 goto late_failure;
 665
 666         if (!tp->write_seq)
 667                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
 668                                                              np->daddr.s6_addr32,
 669                                                              inet->sport,
 670                                                              inet->dport);
 671
 672         err = tcp_connect(sk);
 673         if (err)
 674                 goto late_failure;
 675
 676         return 0;
 677
 678 late_failure:
 679         tcp_set_state(sk, TCP_CLOSE);
 680         __sk_dst_reset(sk);
 681 failure:
 682         inet->dport = 0;
 683         sk->sk_route_caps = 0;
 684         return err;
 685 }
 686
 687 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 688                 int type, int code, int offset, __u32 info)
 689 {
 690         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
 691         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
 692         struct ipv6_pinfo *np;
 693         struct sock *sk;
 694         int err;
 695         struct tcp_sock *tp;
 696         __u32 seq;
 697
 698         sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
 699                           th->source, skb->dev->ifindex);
 700
 701         if (sk == NULL) {
 702                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 703                 return;
 704         }
 705
 706         if (sk->sk_state == TCP_TIME_WAIT) {
 707                 inet_twsk_put((struct inet_timewait_sock *)sk);
 708                 return;
 709         }
 710
 711         bh_lock_sock(sk);
 712         if (sock_owned_by_user(sk))
 713                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
 714
 715         if (sk->sk_state == TCP_CLOSE)
 716                 goto out;
 717
 718         tp = tcp_sk(sk);
 719         seq = ntohl(th->seq);
 720         if (sk->sk_state != TCP_LISTEN &&
 721             !between(seq, tp->snd_una, tp->snd_nxt)) {
 722                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 723                 goto out;
 724         }
 725
 726         np = inet6_sk(sk);
 727
 728         if (type == ICMPV6_PKT_TOOBIG) {
 729                 struct dst_entry *dst = NULL;
 730
 731                 if (sock_owned_by_user(sk))
 732                         goto out;
 733                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
 734                         goto out;
 735
 736                 /* icmp should have updated the destination cache entry */
 737                 dst = __sk_dst_check(sk, np->dst_cookie);
 738
 739                 if (dst == NULL) {
 740                         struct inet_sock *inet = inet_sk(sk);
 741                         struct flowi fl;
 742
 743                         /* BUGGG_FUTURE: Again, it is not clear how
 744                            to handle rthdr case. Ignore this complexity
 745                            for now.
 746                          */
 747                         memset(&fl, 0, sizeof(fl));
 748                         fl.proto = IPPROTO_TCP;
 749                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 750                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 751                         fl.oif = sk->sk_bound_dev_if;
 752                         fl.fl_ip_dport = inet->dport;
 753                         fl.fl_ip_sport = inet->sport;
 754
 755                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
 756                                 sk->sk_err_soft = -err;
 757                                 goto out;
 758                         }
 759
 760                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
 761                                 sk->sk_err_soft = -err;
 762                                 goto out;
 763                         }
 764
 765                 } else
 766                         dst_hold(dst);
 767
 768                 if (tp->pmtu_cookie > dst_mtu(dst)) {
 769                         tcp_sync_mss(sk, dst_mtu(dst));
 770                         tcp_simple_retransmit(sk);
 771                 } /* else let the usual retransmit timer handle it */
 772                 dst_release(dst);
 773                 goto out;
 774         }
 775
 776         icmpv6_err_convert(type, code, &err);
 777
 778         /* Might be for an request_sock */
 779         switch (sk->sk_state) {
 780                 struct request_sock *req, **prev;
 781         case TCP_LISTEN:
 782                 if (sock_owned_by_user(sk))
 783                         goto out;
 784
 785                 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
 786                                         &hdr->saddr, inet6_iif(skb));
 787                 if (!req)
 788                         goto out;
 789
 790                 /* ICMPs are not backlogged, hence we cannot get
 791                  * an established socket here.
 792                  */
 793                 BUG_TRAP(req->sk == NULL);
 794
 795                 if (seq != tcp_rsk(req)->snt_isn) {
 796                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 797                         goto out;
 798                 }
 799
 800                 inet_csk_reqsk_queue_drop(sk, req, prev);
 801                 goto out;
 802
 803         case TCP_SYN_SENT:
 804         case TCP_SYN_RECV:  /* Cannot happen.
 805                                It can, it SYNs are crossed. --ANK */
 806                 if (!sock_owned_by_user(sk)) {
 807                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
 808                         sk->sk_err = err;
 809                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
 810
 811                         tcp_done(sk);
 812                 } else
 813                         sk->sk_err_soft = err;
 814                 goto out;
 815         }
 816
 817         if (!sock_owned_by_user(sk) && np->recverr) {
 818                 sk->sk_err = err;
 819                 sk->sk_error_report(sk);
 820         } else
 821                 sk->sk_err_soft = err;
 822
 823 out:
 824         bh_unlock_sock(sk);
 825         sock_put(sk);
 826 }
 827
 828
 829 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 830                               struct dst_entry *dst)
 831 {
 832         struct tcp6_request_sock *treq = tcp6_rsk(req);
 833         struct ipv6_pinfo *np = inet6_sk(sk);
 834         struct sk_buff * skb;
 835         struct ipv6_txoptions *opt = NULL;
 836         struct in6_addr * final_p = NULL, final;
 837         struct flowi fl;
 838         int err = -1;
 839
 840         memset(&fl, 0, sizeof(fl));
 841         fl.proto = IPPROTO_TCP;
 842         ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
 843         ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
 844         fl.fl6_flowlabel = 0;
 845         fl.oif = treq->iif;
 846         fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 847         fl.fl_ip_sport = inet_sk(sk)->sport;
 848
 849         if (dst == NULL) {
 850                 opt = np->opt;
 851                 if (opt == NULL &&
 852                     np->rxopt.bits.osrcrt == 2 &&
 853                     treq->pktopts) {
 854                         struct sk_buff *pktopts = treq->pktopts;
 855                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
 856                         if (rxopt->srcrt)
 857                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
 858                 }
 859
 860                 if (opt && opt->srcrt) {
 861                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
 862                         ipv6_addr_copy(&final, &fl.fl6_dst);
 863                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 864                         final_p = &final;
 865                 }
 866
 867                 err = ip6_dst_lookup(sk, &dst, &fl);
 868                 if (err)
 869                         goto done;
 870                 if (final_p)
 871                         ipv6_addr_copy(&fl.fl6_dst, final_p);
 872                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
 873                         goto done;
 874         }
 875
 876         skb = tcp_make_synack(sk, dst, req);
 877         if (skb) {
 878                 struct tcphdr *th = skb->h.th;
 879
 880                 th->check = tcp_v6_check(th, skb->len,
 881                                          &treq->loc_addr, &treq->rmt_addr,
 882                                          csum_partial((char *)th, skb->len, skb->csum));
 883
 884                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
 885                 err = ip6_xmit(sk, skb, &fl, opt, 0);
 886                 if (err == NET_XMIT_CN)
 887                         err = 0;
 888         }
 889
 890 done:
 891         if (opt && opt != np->opt)
 892                 sock_kfree_s(sk, opt, opt->tot_len);
 893         return err;
 894 }
 895
 896 static void tcp_v6_reqsk_destructor(struct request_sock *req)
 897 {
 898         if (tcp6_rsk(req)->pktopts)
 899                 kfree_skb(tcp6_rsk(req)->pktopts);
 900 }
 901
 902 static struct request_sock_ops tcp6_request_sock_ops = {
 903         .family         =       AF_INET6,
 904         .obj_size       =       sizeof(struct tcp6_request_sock),
 905         .rtx_syn_ack    =       tcp_v6_send_synack,
 906         .send_ack       =       tcp_v6_reqsk_send_ack,
 907         .destructor     =       tcp_v6_reqsk_destructor,
 908         .send_reset     =       tcp_v6_send_reset
 909 };
 910
 911 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
 912 {
 913         struct ipv6_pinfo *np = inet6_sk(sk);
 914         struct inet6_skb_parm *opt = IP6CB(skb);
 915
 916         if (np->rxopt.all) {
 917                 if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
 918                     ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
 919                     (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
 920                     ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
 921                         return 1;
 922         }
 923         return 0;
 924 }
 925
 926
 927 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
 928                               struct sk_buff *skb)
 929 {
 930         struct ipv6_pinfo *np = inet6_sk(sk);
 931
 932         if (skb->ip_summed == CHECKSUM_HW) {
 933                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
 934                 skb->csum = offsetof(struct tcphdr, check);
 935         } else {
 936                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
 937                                             csum_partial((char *)th, th->doff<<2,
 938                                                          skb->csum));
 939         }
 940 }
 941
 942
 943 static void tcp_v6_send_reset(struct sk_buff *skb)
 944 {
 945         struct tcphdr *th = skb->h.th, *t1;
 946         struct sk_buff *buff;
 947         struct flowi fl;
 948
 949         if (th->rst)
 950                 return;
 951
 952         if (!ipv6_unicast_destination(skb))
 953                 return;
 954
 955         /*
 956          * We need to grab some memory, and put together an RST,
 957          * and then put it into the queue to be sent.
 958          */
 959
 960         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
 961                          GFP_ATOMIC);
 962         if (buff == NULL)
 963                 return;
 964
 965         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
 966
 967         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
 968
 969         /* Swap the send and the receive. */
 970         memset(t1, 0, sizeof(*t1));
 971         t1->dest = th->source;
 972         t1->source = th->dest;
 973         t1->doff = sizeof(*t1)/4;
 974         t1->rst = 1;
 975
 976         if(th->ack) {
 977                 t1->seq = th->ack_seq;
 978         } else {
 979                 t1->ack = 1;
 980                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
 981                                     + skb->len - (th->doff<<2));
 982         }
 983
 984         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
 985
 986         memset(&fl, 0, sizeof(fl));
 987         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
 988         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
 989
 990         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
 991                                     sizeof(*t1), IPPROTO_TCP,
 992                                     buff->csum);
 993
 994         fl.proto = IPPROTO_TCP;
 995         fl.oif = inet6_iif(skb);
 996         fl.fl_ip_dport = t1->dest;
 997         fl.fl_ip_sport = t1->source;
 998
 999         /* sk = NULL, but it is safe for now. RST socket required. */
1000         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1001
1002                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
1003                         return;
1004
1005                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1006                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1007                 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1008                 return;
1009         }
1010
1011         kfree_skb(buff);
1012 }
1013
1014 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1015 {
1016         struct tcphdr *th = skb->h.th, *t1;
1017         struct sk_buff *buff;
1018         struct flowi fl;
1019         int tot_len = sizeof(struct tcphdr);
1020
1021         if (ts)
1022                 tot_len += 3*4;
1023
1024         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1025                          GFP_ATOMIC);
1026         if (buff == NULL)
1027                 return;
1028
1029         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1030
1031         t1 = (struct tcphdr *) skb_push(buff,tot_len);
1032
1033         /* Swap the send and the receive. */
1034         memset(t1, 0, sizeof(*t1));
1035         t1->dest = th->source;
1036         t1->source = th->dest;
1037         t1->doff = tot_len/4;
1038         t1->seq = htonl(seq);
1039         t1->ack_seq = htonl(ack);
1040         t1->ack = 1;
1041         t1->window = htons(win);
1042
1043         if (ts) {
1044                 u32 *ptr = (u32*)(t1 + 1);
1045                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1046                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1047                 *ptr++ = htonl(tcp_time_stamp);
1048                 *ptr = htonl(ts);
1049         }
1050
1051         buff->csum = csum_partial((char *)t1, tot_len, 0);
1052
1053         memset(&fl, 0, sizeof(fl));
1054         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1055         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1056
1057         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1058                                     tot_len, IPPROTO_TCP,
1059                                     buff->csum);
1060
1061         fl.proto = IPPROTO_TCP;
1062         fl.oif = inet6_iif(skb);
1063         fl.fl_ip_dport = t1->dest;
1064         fl.fl_ip_sport = t1->source;
1065
1066         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1067                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
1068                         return;
1069                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1070                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1071                 return;
1072         }
1073
1074         kfree_skb(buff);
1075 }
1076
1077 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1078 {
1079         struct inet_timewait_sock *tw = inet_twsk(sk);
1080         const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1081
1082         tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1083                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1084                         tcptw->tw_ts_recent);
1085
1086         inet_twsk_put(tw);
1087 }
1088
1089 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1090 {
1091         tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1092 }
1093
1094
1095 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1096 {
1097         struct request_sock *req, **prev;
1098         const struct tcphdr *th = skb->h.th;
1099         struct sock *nsk;
1100
1101         /* Find possible connection requests. */
1102         req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
1103                                 &skb->nh.ipv6h->daddr, inet6_iif(skb));
1104         if (req)
1105                 return tcp_check_req(sk, skb, req, prev);
1106
1107         nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
1108                                          th->source, &skb->nh.ipv6h->daddr,
1109                                          ntohs(th->dest), inet6_iif(skb));
1110
1111         if (nsk) {
1112                 if (nsk->sk_state != TCP_TIME_WAIT) {
1113                         bh_lock_sock(nsk);
1114                         return nsk;
1115                 }
1116                 inet_twsk_put((struct inet_timewait_sock *)nsk);
1117                 return NULL;
1118         }
1119
1120 #if 0 /*def CONFIG_SYN_COOKIES*/
1121         if (!th->rst && !th->syn && th->ack)
1122                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1123 #endif
1124         return sk;
1125 }
1126
1127 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1128 {
1129         struct inet_connection_sock *icsk = inet_csk(sk);
1130         struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1131         const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1132
1133         reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1134         inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1135 }
1136
1137
1138 /* FIXME: this is substantially similar to the ipv4 code.
1139  * Can some kind of merge be done? -- erics
1140  */
1141 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1142 {
1143         struct tcp6_request_sock *treq;
1144         struct ipv6_pinfo *np = inet6_sk(sk);
1145         struct tcp_options_received tmp_opt;
1146         struct tcp_sock *tp = tcp_sk(sk);
1147         struct request_sock *req = NULL;
1148         __u32 isn = TCP_SKB_CB(skb)->when;
1149
1150         if (skb->protocol == htons(ETH_P_IP))
1151                 return tcp_v4_conn_request(sk, skb);
1152
1153         if (!ipv6_unicast_destination(skb))
1154                 goto drop;
1155
1156         /*
1157          *      There are no SYN attacks on IPv6, yet...
1158          */
1159         if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1160                 if (net_ratelimit())
1161                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1162                 goto drop;
1163         }
1164
1165         if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1166                 goto drop;
1167
1168         req = reqsk_alloc(&tcp6_request_sock_ops);
1169         if (req == NULL)
1170                 goto drop;
1171
1172         tcp_clear_options(&tmp_opt);
1173         tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1174         tmp_opt.user_mss = tp->rx_opt.user_mss;
1175
1176         tcp_parse_options(skb, &tmp_opt, 0);
1177
1178         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1179         tcp_openreq_init(req, &tmp_opt, skb);
1180
1181         treq = tcp6_rsk(req);
1182         ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1183         ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1184         TCP_ECN_create_request(req, skb->h.th);
1185         treq->pktopts = NULL;
1186         if (ipv6_opt_accepted(sk, skb) ||
1187             np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1188             np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1189                 atomic_inc(&skb->users);
1190                 treq->pktopts = skb;
1191         }
1192         treq->iif = sk->sk_bound_dev_if;
1193
1194         /* So that link locals have meaning */
1195         if (!sk->sk_bound_dev_if &&
1196             ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1197                 treq->iif = inet6_iif(skb);
1198
1199         if (isn == 0)
1200                 isn = tcp_v6_init_sequence(sk,skb);
1201
1202         tcp_rsk(req)->snt_isn = isn;
1203
1204         if (tcp_v6_send_synack(sk, req, NULL))
1205                 goto drop;
1206
1207         tcp_v6_synq_add(sk, req);
1208
1209         return 0;
1210
1211 drop:
1212         if (req)
1213                 reqsk_free(req);
1214
1215         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1216         return 0; /* don't send reset */
1217 }
1218
1219 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1220                                           struct request_sock *req,
1221                                           struct dst_entry *dst)
1222 {
1223         struct tcp6_request_sock *treq = tcp6_rsk(req);
1224         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1225         struct tcp6_sock *newtcp6sk;
1226         struct inet_sock *newinet;
1227         struct tcp_sock *newtp;
1228         struct sock *newsk;
1229         struct ipv6_txoptions *opt;
1230
1231         if (skb->protocol == htons(ETH_P_IP)) {
1232                 /*
1233                  *      v6 mapped
1234                  */
1235
1236                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1237
1238                 if (newsk == NULL)
1239                         return NULL;
1240
1241                 newtcp6sk = (struct tcp6_sock *)newsk;
1242                 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1243
1244                 newinet = inet_sk(newsk);
1245                 newnp = inet6_sk(newsk);
1246                 newtp = tcp_sk(newsk);
1247
1248                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1249
1250                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1251                               newinet->daddr);
1252
1253                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1254                               newinet->saddr);
1255
1256                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1257
1258                 newtp->af_specific = &ipv6_mapped;
1259                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1260                 newnp->pktoptions  = NULL;
1261                 newnp->opt         = NULL;
1262                 newnp->mcast_oif   = inet6_iif(skb);
1263                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1264
1265                 /*
1266                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1267                  * here, tcp_create_openreq_child now does this for us, see the comment in
1268                  * that function for the gory details. -acme
1269                  */
1270
1271                 /* It is tricky place. Until this moment IPv4 tcp
1272                    worked with IPv6 af_tcp.af_specific.
1273                    Sync it now.
1274                  */
1275                 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1276
1277                 return newsk;
1278         }
1279
1280         opt = np->opt;
1281
1282         if (sk_acceptq_is_full(sk))
1283                 goto out_overflow;
1284
1285         if (np->rxopt.bits.osrcrt == 2 &&
1286             opt == NULL && treq->pktopts) {
1287                 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1288                 if (rxopt->srcrt)
1289                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1290         }
1291
1292         if (dst == NULL) {
1293                 struct in6_addr *final_p = NULL, final;
1294                 struct flowi fl;
1295
1296                 memset(&fl, 0, sizeof(fl));
1297                 fl.proto = IPPROTO_TCP;
1298                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1299                 if (opt && opt->srcrt) {
1300                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1301                         ipv6_addr_copy(&final, &fl.fl6_dst);
1302                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1303                         final_p = &final;
1304                 }
1305                 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1306                 fl.oif = sk->sk_bound_dev_if;
1307                 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1308                 fl.fl_ip_sport = inet_sk(sk)->sport;
1309
1310                 if (ip6_dst_lookup(sk, &dst, &fl))
1311                         goto out;
1312
1313                 if (final_p)
1314                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1315
1316                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1317                         goto out;
1318         }
1319
1320         newsk = tcp_create_openreq_child(sk, req, skb);
1321         if (newsk == NULL)
1322                 goto out;
1323
1324         /*
1325          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1326          * count here, tcp_create_openreq_child now does this for us, see the
1327          * comment in that function for the gory details. -acme
1328          */
1329
1330         ip6_dst_store(newsk, dst, NULL);
1331         newsk->sk_route_caps = dst->dev->features &
1332                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1333
1334         newtcp6sk = (struct tcp6_sock *)newsk;
1335         inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1336
1337         newtp = tcp_sk(newsk);
1338         newinet = inet_sk(newsk);
1339         newnp = inet6_sk(newsk);
1340
1341         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1342
1343         ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1344         ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1345         ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1346         newsk->sk_bound_dev_if = treq->iif;
1347
1348         /* Now IPv6 options...
1349
1350            First: no IPv4 options.
1351          */
1352         newinet->opt = NULL;
1353
1354         /* Clone RX bits */
1355         newnp->rxopt.all = np->rxopt.all;
1356
1357         /* Clone pktoptions received with SYN */
1358         newnp->pktoptions = NULL;
1359         if (treq->pktopts != NULL) {
1360                 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1361                 kfree_skb(treq->pktopts);
1362                 treq->pktopts = NULL;
1363                 if (newnp->pktoptions)
1364                         skb_set_owner_r(newnp->pktoptions, newsk);
1365         }
1366         newnp->opt        = NULL;
1367         newnp->mcast_oif  = inet6_iif(skb);
1368         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1369
1370         /* Clone native IPv6 options from listening socket (if any)
1371
1372            Yes, keeping reference count would be much more clever,
1373            but we make one more one thing there: reattach optmem
1374            to newsk.
1375          */
1376         if (opt) {
1377                 newnp->opt = ipv6_dup_options(newsk, opt);
1378                 if (opt != np->opt)
1379                         sock_kfree_s(sk, opt, opt->tot_len);
1380         }
1381
1382         newtp->ext_header_len = 0;
1383         if (newnp->opt)
1384                 newtp->ext_header_len = newnp->opt->opt_nflen +
1385                                         newnp->opt->opt_flen;
1386
1387         tcp_sync_mss(newsk, dst_mtu(dst));
1388         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1389         tcp_initialize_rcv_mss(newsk);
1390
1391         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1392
1393         __tcp_v6_hash(newsk);
1394         inet_inherit_port(&tcp_hashinfo, sk, newsk);
1395
1396         return newsk;
1397
1398 out_overflow:
1399         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1400 out:
1401         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1402         if (opt && opt != np->opt)
1403                 sock_kfree_s(sk, opt, opt->tot_len);
1404         dst_release(dst);
1405         return NULL;
1406 }
1407
1408 static int tcp_v6_checksum_init(struct sk_buff *skb)
1409 {
1410         if (skb->ip_summed == CHECKSUM_HW) {
1411                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1412                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1413                                   &skb->nh.ipv6h->daddr,skb->csum))
1414                         return 0;
1415                 LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
1416         }
1417         if (skb->len <= 76) {
1418                 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1419                                  &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1420                         return -1;
1421                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1422         } else {
1423                 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1424                                           &skb->nh.ipv6h->daddr,0);
1425         }
1426         return 0;
1427 }
1428
1429 /* The socket must have it's spinlock held when we get
1430  * here.
1431  *
1432  * We have a potential double-lock case here, so even when
1433  * doing backlog processing we use the BH locking scheme.
1434  * This is because we cannot sleep with the original spinlock
1435  * held.
1436  */
1437 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1438 {
1439         struct ipv6_pinfo *np = inet6_sk(sk);
1440         struct tcp_sock *tp;
1441         struct sk_buff *opt_skb = NULL;
1442
1443         /* Imagine: socket is IPv6. IPv4 packet arrives,
1444            goes to IPv4 receive handler and backlogged.
1445            From backlog it always goes here. Kerboom...
1446            Fortunately, tcp_rcv_established and rcv_established
1447            handle them correctly, but it is not case with
1448            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1449          */
1450
1451         if (skb->protocol == htons(ETH_P_IP))
1452                 return tcp_v4_do_rcv(sk, skb);
1453
1454         if (sk_filter(sk, skb, 0))
1455                 goto discard;
1456
1457         /*
1458          *      socket locking is here for SMP purposes as backlog rcv
1459          *      is currently called with bh processing disabled.
1460          */
1461
1462         /* Do Stevens' IPV6_PKTOPTIONS.
1463
1464            Yes, guys, it is the only place in our code, where we
1465            may make it not affecting IPv4.
1466            The rest of code is protocol independent,
1467            and I do not like idea to uglify IPv4.
1468
1469            Actually, all the idea behind IPV6_PKTOPTIONS
1470            looks not very well thought. For now we latch
1471            options, received in the last packet, enqueued
1472            by tcp. Feel free to propose better solution.
1473                                                --ANK (980728)
1474          */
1475         if (np->rxopt.all)
1476                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1477
1478         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1479                 TCP_CHECK_TIMER(sk);
1480                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1481                         goto reset;
1482                 TCP_CHECK_TIMER(sk);
1483                 if (opt_skb)
1484                         goto ipv6_pktoptions;
1485                 return 0;
1486         }
1487
1488         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1489                 goto csum_err;
1490
1491         if (sk->sk_state == TCP_LISTEN) {
1492                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1493                 if (!nsk)
1494                         goto discard;
1495
1496                 /*
1497                  * Queue it on the new socket if the new socket is active,
1498                  * otherwise we just shortcircuit this and continue with
1499                  * the new socket..
1500                  */
1501                 if(nsk != sk) {
1502                         if (tcp_child_process(sk, nsk, skb))
1503                                 goto reset;
1504                         if (opt_skb)
1505                                 __kfree_skb(opt_skb);
1506                         return 0;
1507                 }
1508         }
1509
1510         TCP_CHECK_TIMER(sk);
1511         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1512                 goto reset;
1513         TCP_CHECK_TIMER(sk);
1514         if (opt_skb)
1515                 goto ipv6_pktoptions;
1516         return 0;
1517
1518 reset:
1519         tcp_v6_send_reset(skb);
1520 discard:
1521         if (opt_skb)
1522                 __kfree_skb(opt_skb);
1523         kfree_skb(skb);
1524         return 0;
1525 csum_err:
1526         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1527         goto discard;
1528
1529
1530 ipv6_pktoptions:
1531         /* Do you ask, what is it?
1532
1533            1. skb was enqueued by tcp.
1534            2. skb is added to tail of read queue, rather than out of order.
1535            3. socket is not in passive state.
1536            4. Finally, it really contains options, which user wants to receive.
1537          */
1538         tp = tcp_sk(sk);
1539         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1540             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1541                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1542                         np->mcast_oif = inet6_iif(opt_skb);
1543                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1544                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1545                 if (ipv6_opt_accepted(sk, opt_skb)) {
1546                         skb_set_owner_r(opt_skb, sk);
1547                         opt_skb = xchg(&np->pktoptions, opt_skb);
1548                 } else {
1549                         __kfree_skb(opt_skb);
1550                         opt_skb = xchg(&np->pktoptions, NULL);
1551                 }
1552         }
1553
1554         if (opt_skb)
1555                 kfree_skb(opt_skb);
1556         return 0;
1557 }
1558
1559 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1560 {
1561         struct sk_buff *skb = *pskb;
1562         struct tcphdr *th;
1563         struct sock *sk;
1564         int ret;
1565
1566         if (skb->pkt_type != PACKET_HOST)
1567                 goto discard_it;
1568
1569         /*
1570          *      Count it even if it's bad.
1571          */
1572         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1573
1574         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1575                 goto discard_it;
1576
1577         th = skb->h.th;
1578
1579         if (th->doff < sizeof(struct tcphdr)/4)
1580                 goto bad_packet;
1581         if (!pskb_may_pull(skb, th->doff*4))
1582                 goto discard_it;
1583
1584         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1585              tcp_v6_checksum_init(skb) < 0))
1586                 goto bad_packet;
1587
1588         th = skb->h.th;
1589         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1590         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1591                                     skb->len - th->doff*4);
1592         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1593         TCP_SKB_CB(skb)->when = 0;
1594         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1595         TCP_SKB_CB(skb)->sacked = 0;
1596
1597         sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1598                             &skb->nh.ipv6h->daddr, ntohs(th->dest),
1599                             inet6_iif(skb));
1600
1601         if (!sk)
1602                 goto no_tcp_socket;
1603
1604 process:
1605         if (sk->sk_state == TCP_TIME_WAIT)
1606                 goto do_time_wait;
1607
1608         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1609                 goto discard_and_relse;
1610
1611         if (sk_filter(sk, skb, 0))
1612                 goto discard_and_relse;
1613
1614         skb->dev = NULL;
1615
1616         bh_lock_sock(sk);
1617         ret = 0;
1618         if (!sock_owned_by_user(sk)) {
1619                 if (!tcp_prequeue(sk, skb))
1620                         ret = tcp_v6_do_rcv(sk, skb);
1621         } else
1622                 sk_add_backlog(sk, skb);
1623         bh_unlock_sock(sk);
1624
1625         sock_put(sk);
1626         return ret ? -1 : 0;
1627
1628 no_tcp_socket:
1629         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1630                 goto discard_it;
1631
1632         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1633 bad_packet:
1634                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1635         } else {
1636                 tcp_v6_send_reset(skb);
1637         }
1638
1639 discard_it:
1640
1641         /*
1642          *      Discard frame
1643          */
1644
1645         kfree_skb(skb);
1646         return 0;
1647
1648 discard_and_relse:
1649         sock_put(sk);
1650         goto discard_it;
1651
1652 do_time_wait:
1653         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1654                 inet_twsk_put((struct inet_timewait_sock *)sk);
1655                 goto discard_it;
1656         }
1657
1658         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1659                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1660                 inet_twsk_put((struct inet_timewait_sock *)sk);
1661                 goto discard_it;
1662         }
1663
1664         switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1665                                            skb, th)) {
1666         case TCP_TW_SYN:
1667         {
1668                 struct sock *sk2;
1669
1670                 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1671                                             &skb->nh.ipv6h->daddr,
1672                                             ntohs(th->dest), inet6_iif(skb));
1673                 if (sk2 != NULL) {
1674                         struct inet_timewait_sock *tw = inet_twsk(sk);
1675                         inet_twsk_deschedule(tw, &tcp_death_row);
1676                         inet_twsk_put(tw);
1677                         sk = sk2;
1678                         goto process;
1679                 }
1680                 /* Fall through to ACK */
1681         }
1682         case TCP_TW_ACK:
1683                 tcp_v6_timewait_ack(sk, skb);
1684                 break;
1685         case TCP_TW_RST:
1686                 goto no_tcp_socket;
1687         case TCP_TW_SUCCESS:;
1688         }
1689         goto discard_it;
1690 }
1691
1692 static int tcp_v6_rebuild_header(struct sock *sk)
1693 {
1694         int err;
1695         struct dst_entry *dst;
1696         struct ipv6_pinfo *np = inet6_sk(sk);
1697
1698         dst = __sk_dst_check(sk, np->dst_cookie);
1699
1700         if (dst == NULL) {
1701                 struct inet_sock *inet = inet_sk(sk);
1702                 struct in6_addr *final_p = NULL, final;
1703                 struct flowi fl;
1704
1705                 memset(&fl, 0, sizeof(fl));
1706                 fl.proto = IPPROTO_TCP;
1707                 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1708                 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1709                 fl.fl6_flowlabel = np->flow_label;
1710                 fl.oif = sk->sk_bound_dev_if;
1711                 fl.fl_ip_dport = inet->dport;
1712                 fl.fl_ip_sport = inet->sport;
1713
1714                 if (np->opt && np->opt->srcrt) {
1715                         struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1716                         ipv6_addr_copy(&final, &fl.fl6_dst);
1717                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1718                         final_p = &final;
1719                 }
1720
1721                 err = ip6_dst_lookup(sk, &dst, &fl);
1722                 if (err) {
1723                         sk->sk_route_caps = 0;
1724                         return err;
1725                 }
1726                 if (final_p)
1727                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1728
1729                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1730                         sk->sk_err_soft = -err;
1731                         return err;
1732                 }
1733
1734                 ip6_dst_store(sk, dst, NULL);
1735                 sk->sk_route_caps = dst->dev->features &
1736                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1737         }
1738
1739         return 0;
1740 }
1741
1742 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1743 {
1744         struct sock *sk = skb->sk;
1745         struct inet_sock *inet = inet_sk(sk);
1746         struct ipv6_pinfo *np = inet6_sk(sk);
1747         struct flowi fl;
1748         struct dst_entry *dst;
1749         struct in6_addr *final_p = NULL, final;
1750
1751         memset(&fl, 0, sizeof(fl));
1752         fl.proto = IPPROTO_TCP;
1753         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1754         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1755         fl.fl6_flowlabel = np->flow_label;
1756         IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1757         fl.oif = sk->sk_bound_dev_if;
1758         fl.fl_ip_sport = inet->sport;
1759         fl.fl_ip_dport = inet->dport;
1760
1761         if (np->opt && np->opt->srcrt) {
1762                 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1763                 ipv6_addr_copy(&final, &fl.fl6_dst);
1764                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1765                 final_p = &final;
1766         }
1767
1768         dst = __sk_dst_check(sk, np->dst_cookie);
1769
1770         if (dst == NULL) {
1771                 int err = ip6_dst_lookup(sk, &dst, &fl);
1772
1773                 if (err) {
1774                         sk->sk_err_soft = -err;
1775                         return err;
1776                 }
1777
1778                 if (final_p)
1779                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1780
1781                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1782                         sk->sk_route_caps = 0;
1783                         return err;
1784                 }
1785
1786                 ip6_dst_store(sk, dst, NULL);
1787                 sk->sk_route_caps = dst->dev->features &
1788                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1789         }
1790
1791         skb->dst = dst_clone(dst);
1792
1793         /* Restore final destination back after routing done */
1794         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1795
1796         return ip6_xmit(sk, skb, &fl, np->opt, 0);
1797 }
1798
1799 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1800 {
1801         struct ipv6_pinfo *np = inet6_sk(sk);
1802         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1803
1804         sin6->sin6_family = AF_INET6;
1805         ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1806         sin6->sin6_port = inet_sk(sk)->dport;
1807         /* We do not store received flowlabel for TCP */
1808         sin6->sin6_flowinfo = 0;
1809         sin6->sin6_scope_id = 0;
1810         if (sk->sk_bound_dev_if &&
1811             ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1812                 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1813 }
1814
1815 static int tcp_v6_remember_stamp(struct sock *sk)
1816 {
1817         /* Alas, not yet... */
1818         return 0;
1819 }
1820
1821 static struct tcp_func ipv6_specific = {
1822         .queue_xmit     =       tcp_v6_xmit,
1823         .send_check     =       tcp_v6_send_check,
1824         .rebuild_header =       tcp_v6_rebuild_header,
1825         .conn_request   =       tcp_v6_conn_request,
1826         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1827         .remember_stamp =       tcp_v6_remember_stamp,
1828         .net_header_len =       sizeof(struct ipv6hdr),
1829
1830         .setsockopt     =       ipv6_setsockopt,
1831         .getsockopt     =       ipv6_getsockopt,
1832         .addr2sockaddr  =       v6_addr2sockaddr,
1833         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1834 };
1835
1836 /*
1837  *      TCP over IPv4 via INET6 API
1838  */
1839
1840 static struct tcp_func ipv6_mapped = {
1841         .queue_xmit     =       ip_queue_xmit,
1842         .send_check     =       tcp_v4_send_check,
1843         .rebuild_header =       inet_sk_rebuild_header,
1844         .conn_request   =       tcp_v6_conn_request,
1845         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1846         .remember_stamp =       tcp_v4_remember_stamp,
1847         .net_header_len =       sizeof(struct iphdr),
1848
1849         .setsockopt     =       ipv6_setsockopt,
1850         .getsockopt     =       ipv6_getsockopt,
1851         .addr2sockaddr  =       v6_addr2sockaddr,
1852         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1853 };
1854
1855
1856
1857 /* NOTE: A lot of things set to zero explicitly by call to
1858  *       sk_alloc() so need not be done here.
1859  */
1860 static int tcp_v6_init_sock(struct sock *sk)
1861 {
1862         struct inet_connection_sock *icsk = inet_csk(sk);
1863         struct tcp_sock *tp = tcp_sk(sk);
1864
1865         skb_queue_head_init(&tp->out_of_order_queue);
1866         tcp_init_xmit_timers(sk);
1867         tcp_prequeue_init(tp);
1868
1869         icsk->icsk_rto = TCP_TIMEOUT_INIT;
1870         tp->mdev = TCP_TIMEOUT_INIT;
1871
1872         /* So many TCP implementations out there (incorrectly) count the
1873          * initial SYN frame in their delayed-ACK and congestion control
1874          * algorithms that we must have the following bandaid to talk
1875          * efficiently to them.  -DaveM
1876          */
1877         tp->snd_cwnd = 2;
1878
1879         /* See draft-stevens-tcpca-spec-01 for discussion of the
1880          * initialization of these values.
1881          */
1882         tp->snd_ssthresh = 0x7fffffff;
1883         tp->snd_cwnd_clamp = ~0;
1884         tp->mss_cache = 536;
1885
1886         tp->reordering = sysctl_tcp_reordering;
1887
1888         sk->sk_state = TCP_CLOSE;
1889
1890         tp->af_specific = &ipv6_specific;
1891         icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1892         sk->sk_write_space = sk_stream_write_space;
1893         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1894
1895         sk->sk_sndbuf = sysctl_tcp_wmem[1];
1896         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1897
1898         atomic_inc(&tcp_sockets_allocated);
1899
1900         return 0;
1901 }
1902
1903 static int tcp_v6_destroy_sock(struct sock *sk)
1904 {
1905         tcp_v4_destroy_sock(sk);
1906         return inet6_destroy_sock(sk);
1907 }
1908
1909 /* Proc filesystem TCPv6 sock list dumping. */
1910 static void get_openreq6(struct seq_file *seq,
1911                          struct sock *sk, struct request_sock *req, int i, int uid)
1912 {
1913         struct in6_addr *dest, *src;
1914         int ttd = req->expires - jiffies;
1915
1916         if (ttd < 0)
1917                 ttd = 0;
1918
1919         src = &tcp6_rsk(req)->loc_addr;
1920         dest = &tcp6_rsk(req)->rmt_addr;
1921         seq_printf(seq,
1922                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1923                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1924                    i,
1925                    src->s6_addr32[0], src->s6_addr32[1],
1926                    src->s6_addr32[2], src->s6_addr32[3],
1927                    ntohs(inet_sk(sk)->sport),
1928                    dest->s6_addr32[0], dest->s6_addr32[1],
1929                    dest->s6_addr32[2], dest->s6_addr32[3],
1930                    ntohs(inet_rsk(req)->rmt_port),
1931                    TCP_SYN_RECV,
1932                    0,0, /* could print option size, but that is af dependent. */
1933                    1,   /* timers active (only the expire timer) */
1934                    jiffies_to_clock_t(ttd),
1935                    req->retrans,
1936                    uid,
1937                    0,  /* non standard timer */
1938                    0, /* open_requests have no inode */
1939                    0, req);
1940 }
1941
1942 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1943 {
1944         struct in6_addr *dest, *src;
1945         __u16 destp, srcp;
1946         int timer_active;
1947         unsigned long timer_expires;
1948         struct inet_sock *inet = inet_sk(sp);
1949         struct tcp_sock *tp = tcp_sk(sp);
1950         const struct inet_connection_sock *icsk = inet_csk(sp);
1951         struct ipv6_pinfo *np = inet6_sk(sp);
1952
1953         dest  = &np->daddr;
1954         src   = &np->rcv_saddr;
1955         destp = ntohs(inet->dport);
1956         srcp  = ntohs(inet->sport);
1957
1958         if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1959                 timer_active    = 1;
1960                 timer_expires   = icsk->icsk_timeout;
1961         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1962                 timer_active    = 4;
1963                 timer_expires   = icsk->icsk_timeout;
1964         } else if (timer_pending(&sp->sk_timer)) {
1965                 timer_active    = 2;
1966                 timer_expires   = sp->sk_timer.expires;
1967         } else {
1968                 timer_active    = 0;
1969                 timer_expires = jiffies;
1970         }
1971
1972         seq_printf(seq,
1973                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1974                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1975                    i,
1976                    src->s6_addr32[0], src->s6_addr32[1],
1977                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1978                    dest->s6_addr32[0], dest->s6_addr32[1],
1979                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1980                    sp->sk_state,
1981                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1982                    timer_active,
1983                    jiffies_to_clock_t(timer_expires - jiffies),
1984                    icsk->icsk_retransmits,
1985                    sock_i_uid(sp),
1986                    icsk->icsk_probes_out,
1987                    sock_i_ino(sp),
1988                    atomic_read(&sp->sk_refcnt), sp,
1989                    icsk->icsk_rto,
1990                    icsk->icsk_ack.ato,
1991                    (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1992                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1993                    );
1994 }
1995
1996 static void get_timewait6_sock(struct seq_file *seq,
1997                                struct inet_timewait_sock *tw, int i)
1998 {
1999         struct in6_addr *dest, *src;
2000         __u16 destp, srcp;
2001         struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
2002         int ttd = tw->tw_ttd - jiffies;
2003
2004         if (ttd < 0)
2005                 ttd = 0;
2006
2007         dest = &tcp6tw->tw_v6_daddr;
2008         src  = &tcp6tw->tw_v6_rcv_saddr;
2009         destp = ntohs(tw->tw_dport);
2010         srcp  = ntohs(tw->tw_sport);
2011
2012         seq_printf(seq,
2013                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2014                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2015                    i,
2016                    src->s6_addr32[0], src->s6_addr32[1],
2017                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2018                    dest->s6_addr32[0], dest->s6_addr32[1],
2019                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2020                    tw->tw_substate, 0, 0,
2021                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2022                    atomic_read(&tw->tw_refcnt), tw);
2023 }
2024
2025 #ifdef CONFIG_PROC_FS
2026 static int tcp6_seq_show(struct seq_file *seq, void *v)
2027 {
2028         struct tcp_iter_state *st;
2029
2030         if (v == SEQ_START_TOKEN) {
2031                 seq_puts(seq,
2032                          "  sl  "
2033                          "local_address                         "
2034                          "remote_address                        "
2035                          "st tx_queue rx_queue tr tm->when retrnsmt"
2036                          "   uid  timeout inode\n");
2037                 goto out;
2038         }
2039         st = seq->private;
2040
2041         switch (st->state) {
2042         case TCP_SEQ_STATE_LISTENING:
2043         case TCP_SEQ_STATE_ESTABLISHED:
2044                 get_tcp6_sock(seq, v, st->num);
2045                 break;
2046         case TCP_SEQ_STATE_OPENREQ:
2047                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2048                 break;
2049         case TCP_SEQ_STATE_TIME_WAIT:
2050                 get_timewait6_sock(seq, v, st->num);
2051                 break;
2052         }
2053 out:
2054         return 0;
2055 }
2056
2057 static struct file_operations tcp6_seq_fops;
2058 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2059         .owner          = THIS_MODULE,
2060         .name           = "tcp6",
2061         .family         = AF_INET6,
2062         .seq_show       = tcp6_seq_show,
2063         .seq_fops       = &tcp6_seq_fops,
2064 };
2065
2066 int __init tcp6_proc_init(void)
2067 {
2068         return tcp_proc_register(&tcp6_seq_afinfo);
2069 }
2070
2071 void tcp6_proc_exit(void)
2072 {
2073         tcp_proc_unregister(&tcp6_seq_afinfo);
2074 }
2075 #endif
2076
2077 struct proto tcpv6_prot = {
2078         .name                   = "TCPv6",
2079         .owner                  = THIS_MODULE,
2080         .close                  = tcp_close,
2081         .connect                = tcp_v6_connect,
2082         .disconnect             = tcp_disconnect,
2083         .accept                 = inet_csk_accept,
2084         .ioctl                  = tcp_ioctl,
2085         .init                   = tcp_v6_init_sock,
2086         .destroy                = tcp_v6_destroy_sock,
2087         .shutdown               = tcp_shutdown,
2088         .setsockopt             = tcp_setsockopt,
2089         .getsockopt             = tcp_getsockopt,
2090         .sendmsg                = tcp_sendmsg,
2091         .recvmsg                = tcp_recvmsg,
2092         .backlog_rcv            = tcp_v6_do_rcv,
2093         .hash                   = tcp_v6_hash,
2094         .unhash                 = tcp_unhash,
2095         .get_port               = tcp_v6_get_port,
2096         .enter_memory_pressure  = tcp_enter_memory_pressure,
2097         .sockets_allocated      = &tcp_sockets_allocated,
2098         .memory_allocated       = &tcp_memory_allocated,
2099         .memory_pressure        = &tcp_memory_pressure,
2100         .orphan_count           = &tcp_orphan_count,
2101         .sysctl_mem             = sysctl_tcp_mem,
2102         .sysctl_wmem            = sysctl_tcp_wmem,
2103         .sysctl_rmem            = sysctl_tcp_rmem,
2104         .max_header             = MAX_TCP_HEADER,
2105         .obj_size               = sizeof(struct tcp6_sock),
2106         .twsk_obj_size          = sizeof(struct tcp6_timewait_sock),
2107         .rsk_prot               = &tcp6_request_sock_ops,
2108 };
2109
2110 static struct inet6_protocol tcpv6_protocol = {
2111         .handler        =       tcp_v6_rcv,
2112         .err_handler    =       tcp_v6_err,
2113         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2114 };
2115
2116 static struct inet_protosw tcpv6_protosw = {
2117         .type           =       SOCK_STREAM,
2118         .protocol       =       IPPROTO_TCP,
2119         .prot           =       &tcpv6_prot,
2120         .ops            =       &inet6_stream_ops,
2121         .capability     =       -1,
2122         .no_check       =       0,
2123         .flags          =       INET_PROTOSW_PERMANENT,
2124 };
2125
2126 void __init tcpv6_init(void)
2127 {
2128         /* register inet6 protocol */
2129         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2130                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2131         inet6_register_protosw(&tcpv6_protosw);
2132 }