Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[deliverable/linux.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
1a2449a8 75#include <net/netdma.h>
6e5714ea 76#include <net/secure_seq.h>
d1a4c0b3 77#include <net/tcp_memcontrol.h>
1da177e4
LT
78
79#include <linux/inet.h>
80#include <linux/ipv6.h>
81#include <linux/stddef.h>
82#include <linux/proc_fs.h>
83#include <linux/seq_file.h>
84
cfb6eeb4
YH
85#include <linux/crypto.h>
86#include <linux/scatterlist.h>
87
ab32ea5d
BH
88int sysctl_tcp_tw_reuse __read_mostly;
89int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 90EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 91
1da177e4 92
cfb6eeb4 93#ifdef CONFIG_TCP_MD5SIG
a915da9b 94static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 95 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
96#endif
97
5caea4ea 98struct inet_hashinfo tcp_hashinfo;
4bc2f18b 99EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 100
cf533ea5 101static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 102{
eddc9ec5
ACM
103 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 ip_hdr(skb)->saddr,
aa8223c7
ACM
105 tcp_hdr(skb)->dest,
106 tcp_hdr(skb)->source);
1da177e4
LT
107}
108
6d6ee43e
ACM
109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110{
111 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 struct tcp_sock *tp = tcp_sk(sk);
113
114 /* With PAWS, it is safe from the viewpoint
115 of data integrity. Even without PAWS it is safe provided sequence
116 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
117
118 Actually, the idea is close to VJ's one, only timestamp cache is
119 held not per host, but per port pair and TW bucket is used as state
120 holder.
121
122 If TW bucket has been already destroyed we fall back to VJ's scheme
123 and use initial timestamp retrieved from peer table.
124 */
125 if (tcptw->tw_ts_recent_stamp &&
126 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 127 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
128 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 if (tp->write_seq == 0)
130 tp->write_seq = 1;
131 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
132 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 sock_hold(sktw);
134 return 1;
135 }
136
137 return 0;
138}
6d6ee43e
ACM
139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140
1da177e4
LT
141/* This will initiate an outgoing connection. */
142int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
143{
2d7192d6 144 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
145 struct inet_sock *inet = inet_sk(sk);
146 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 147 __be16 orig_sport, orig_dport;
bada8adc 148 __be32 daddr, nexthop;
da905bd1 149 struct flowi4 *fl4;
2d7192d6 150 struct rtable *rt;
1da177e4 151 int err;
f6d8bd05 152 struct ip_options_rcu *inet_opt;
1da177e4
LT
153
154 if (addr_len < sizeof(struct sockaddr_in))
155 return -EINVAL;
156
157 if (usin->sin_family != AF_INET)
158 return -EAFNOSUPPORT;
159
160 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
161 inet_opt = rcu_dereference_protected(inet->inet_opt,
162 sock_owned_by_user(sk));
163 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
164 if (!daddr)
165 return -EINVAL;
f6d8bd05 166 nexthop = inet_opt->opt.faddr;
1da177e4
LT
167 }
168
dca8b089
DM
169 orig_sport = inet->inet_sport;
170 orig_dport = usin->sin_port;
da905bd1
DM
171 fl4 = &inet->cork.fl.u.ip4;
172 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
173 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174 IPPROTO_TCP,
175 orig_sport, orig_dport, sk, true);
176 if (IS_ERR(rt)) {
177 err = PTR_ERR(rt);
178 if (err == -ENETUNREACH)
7c73a6fa 179 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 180 return err;
584bdf8c 181 }
1da177e4
LT
182
183 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
184 ip_rt_put(rt);
185 return -ENETUNREACH;
186 }
187
f6d8bd05 188 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 189 daddr = fl4->daddr;
1da177e4 190
c720c7e8 191 if (!inet->inet_saddr)
da905bd1 192 inet->inet_saddr = fl4->saddr;
c720c7e8 193 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 194
c720c7e8 195 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
196 /* Reset inherited state */
197 tp->rx_opt.ts_recent = 0;
198 tp->rx_opt.ts_recent_stamp = 0;
199 tp->write_seq = 0;
200 }
201
295ff7ed 202 if (tcp_death_row.sysctl_tw_recycle &&
da905bd1 203 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
ed2361e6 204 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
7174259e
ACM
205 /*
206 * VJ's idea. We save last timestamp seen from
207 * the destination in peer table, when entering state
208 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
209 * when trying new connection.
1da177e4 210 */
317fe0e6
ED
211 if (peer) {
212 inet_peer_refcheck(peer);
213 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
214 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
215 tp->rx_opt.ts_recent = peer->tcp_ts;
216 }
1da177e4
LT
217 }
218 }
219
c720c7e8
ED
220 inet->inet_dport = usin->sin_port;
221 inet->inet_daddr = daddr;
1da177e4 222
d83d8461 223 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
224 if (inet_opt)
225 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 226
bee7ca9e 227 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
228
229 /* Socket identity is still unknown (sport may be zero).
230 * However we set state to SYN-SENT and not releasing socket
231 * lock select source port, enter ourselves into the hash tables and
232 * complete initialization after this.
233 */
234 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 235 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
236 if (err)
237 goto failure;
238
da905bd1 239 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
240 inet->inet_sport, inet->inet_dport, sk);
241 if (IS_ERR(rt)) {
242 err = PTR_ERR(rt);
243 rt = NULL;
1da177e4 244 goto failure;
b23dd4fe 245 }
1da177e4 246 /* OK, now commit destination to socket. */
bcd76111 247 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 248 sk_setup_caps(sk, &rt->dst);
1da177e4
LT
249
250 if (!tp->write_seq)
c720c7e8
ED
251 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
252 inet->inet_daddr,
253 inet->inet_sport,
1da177e4
LT
254 usin->sin_port);
255
c720c7e8 256 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4
LT
257
258 err = tcp_connect(sk);
259 rt = NULL;
260 if (err)
261 goto failure;
262
263 return 0;
264
265failure:
7174259e
ACM
266 /*
267 * This unhashes the socket and releases the local port,
268 * if necessary.
269 */
1da177e4
LT
270 tcp_set_state(sk, TCP_CLOSE);
271 ip_rt_put(rt);
272 sk->sk_route_caps = 0;
c720c7e8 273 inet->inet_dport = 0;
1da177e4
LT
274 return err;
275}
4bc2f18b 276EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 277
1da177e4
LT
278/*
279 * This routine does path mtu discovery as defined in RFC1191.
280 */
b71d1d42 281static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
1da177e4
LT
282{
283 struct dst_entry *dst;
284 struct inet_sock *inet = inet_sk(sk);
1da177e4
LT
285
286 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
287 * send out by Linux are always <576bytes so they should go through
288 * unfragmented).
289 */
290 if (sk->sk_state == TCP_LISTEN)
291 return;
292
293 /* We don't check in the destentry if pmtu discovery is forbidden
294 * on this route. We just assume that no packet_to_big packets
295 * are send back when pmtu discovery is not active.
e905a9ed 296 * There is a small race when the user changes this flag in the
1da177e4
LT
297 * route, but I think that's acceptable.
298 */
299 if ((dst = __sk_dst_check(sk, 0)) == NULL)
300 return;
301
302 dst->ops->update_pmtu(dst, mtu);
303
304 /* Something is about to be wrong... Remember soft error
305 * for the case, if this connection will not able to recover.
306 */
307 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
308 sk->sk_err_soft = EMSGSIZE;
309
310 mtu = dst_mtu(dst);
311
312 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 313 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
314 tcp_sync_mss(sk, mtu);
315
316 /* Resend the TCP packet because it's
317 * clear that the old packet has been
318 * dropped. This is the new "fast" path mtu
319 * discovery.
320 */
321 tcp_simple_retransmit(sk);
322 } /* else let the usual retransmit timer handle it */
323}
324
325/*
326 * This routine is called by the ICMP module when it gets some
327 * sort of error condition. If err < 0 then the socket should
328 * be closed and the error returned to the user. If err > 0
329 * it's just the icmp type << 8 | icmp code. After adjustment
330 * header points to the first 8 bytes of the tcp header. We need
331 * to find the appropriate port.
332 *
333 * The locking strategy used here is very "optimistic". When
334 * someone else accesses the socket the ICMP is just dropped
335 * and for some paths there is no check at all.
336 * A more general error queue to queue errors for later handling
337 * is probably better.
338 *
339 */
340
4d1a2d9e 341void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 342{
b71d1d42 343 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 344 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 345 struct inet_connection_sock *icsk;
1da177e4
LT
346 struct tcp_sock *tp;
347 struct inet_sock *inet;
4d1a2d9e
DL
348 const int type = icmp_hdr(icmp_skb)->type;
349 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 350 struct sock *sk;
f1ecd5d9 351 struct sk_buff *skb;
1da177e4 352 __u32 seq;
f1ecd5d9 353 __u32 remaining;
1da177e4 354 int err;
4d1a2d9e 355 struct net *net = dev_net(icmp_skb->dev);
1da177e4 356
4d1a2d9e 357 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 358 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
359 return;
360 }
361
fd54d716 362 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 363 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 364 if (!sk) {
dcfc23ca 365 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
366 return;
367 }
368 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 369 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
370 return;
371 }
372
373 bh_lock_sock(sk);
374 /* If too many ICMPs get dropped on busy
375 * servers this needs to be solved differently.
376 */
377 if (sock_owned_by_user(sk))
de0744af 378 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
1da177e4
LT
379
380 if (sk->sk_state == TCP_CLOSE)
381 goto out;
382
97e3ecd1 383 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
384 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
385 goto out;
386 }
387
f1ecd5d9 388 icsk = inet_csk(sk);
1da177e4
LT
389 tp = tcp_sk(sk);
390 seq = ntohl(th->seq);
391 if (sk->sk_state != TCP_LISTEN &&
392 !between(seq, tp->snd_una, tp->snd_nxt)) {
de0744af 393 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
394 goto out;
395 }
396
397 switch (type) {
398 case ICMP_SOURCE_QUENCH:
399 /* Just silently ignore these. */
400 goto out;
401 case ICMP_PARAMETERPROB:
402 err = EPROTO;
403 break;
404 case ICMP_DEST_UNREACH:
405 if (code > NR_ICMP_UNREACH)
406 goto out;
407
408 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
409 if (!sock_owned_by_user(sk))
410 do_pmtu_discovery(sk, iph, info);
411 goto out;
412 }
413
414 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
415 /* check if icmp_skb allows revert of backoff
416 * (see draft-zimmermann-tcp-lcd) */
417 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
418 break;
419 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
420 !icsk->icsk_backoff)
421 break;
422
8f49c270
DM
423 if (sock_owned_by_user(sk))
424 break;
425
f1ecd5d9 426 icsk->icsk_backoff--;
9ad7c049
JC
427 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
428 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
429 tcp_bound_rto(sk);
430
431 skb = tcp_write_queue_head(sk);
432 BUG_ON(!skb);
433
434 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
435 tcp_time_stamp - TCP_SKB_CB(skb)->when);
436
437 if (remaining) {
438 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
439 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
440 } else {
441 /* RTO revert clocked out retransmission.
442 * Will retransmit now */
443 tcp_retransmit_timer(sk);
444 }
445
1da177e4
LT
446 break;
447 case ICMP_TIME_EXCEEDED:
448 err = EHOSTUNREACH;
449 break;
450 default:
451 goto out;
452 }
453
454 switch (sk->sk_state) {
60236fdd 455 struct request_sock *req, **prev;
1da177e4
LT
456 case TCP_LISTEN:
457 if (sock_owned_by_user(sk))
458 goto out;
459
463c84b9
ACM
460 req = inet_csk_search_req(sk, &prev, th->dest,
461 iph->daddr, iph->saddr);
1da177e4
LT
462 if (!req)
463 goto out;
464
465 /* ICMPs are not backlogged, hence we cannot get
466 an established socket here.
467 */
547b792c 468 WARN_ON(req->sk);
1da177e4 469
2e6599cb 470 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 471 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
472 goto out;
473 }
474
475 /*
476 * Still in SYN_RECV, just remove it silently.
477 * There is no good way to pass the error to the newly
478 * created socket, and POSIX does not want network
479 * errors returned from accept().
480 */
463c84b9 481 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
482 goto out;
483
484 case TCP_SYN_SENT:
485 case TCP_SYN_RECV: /* Cannot happen.
486 It can f.e. if SYNs crossed.
487 */
488 if (!sock_owned_by_user(sk)) {
1da177e4
LT
489 sk->sk_err = err;
490
491 sk->sk_error_report(sk);
492
493 tcp_done(sk);
494 } else {
495 sk->sk_err_soft = err;
496 }
497 goto out;
498 }
499
500 /* If we've already connected we will keep trying
501 * until we time out, or the user gives up.
502 *
503 * rfc1122 4.2.3.9 allows to consider as hard errors
504 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
505 * but it is obsoleted by pmtu discovery).
506 *
507 * Note, that in modern internet, where routing is unreliable
508 * and in each dark corner broken firewalls sit, sending random
509 * errors ordered by their masters even this two messages finally lose
510 * their original sense (even Linux sends invalid PORT_UNREACHs)
511 *
512 * Now we are in compliance with RFCs.
513 * --ANK (980905)
514 */
515
516 inet = inet_sk(sk);
517 if (!sock_owned_by_user(sk) && inet->recverr) {
518 sk->sk_err = err;
519 sk->sk_error_report(sk);
520 } else { /* Only an error on timeout */
521 sk->sk_err_soft = err;
522 }
523
524out:
525 bh_unlock_sock(sk);
526 sock_put(sk);
527}
528
419f9f89
HX
529static void __tcp_v4_send_check(struct sk_buff *skb,
530 __be32 saddr, __be32 daddr)
1da177e4 531{
aa8223c7 532 struct tcphdr *th = tcp_hdr(skb);
1da177e4 533
84fa7933 534 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 535 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 536 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 537 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 538 } else {
419f9f89 539 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 540 csum_partial(th,
1da177e4
LT
541 th->doff << 2,
542 skb->csum));
543 }
544}
545
419f9f89 546/* This routine computes an IPv4 TCP checksum. */
bb296246 547void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 548{
cf533ea5 549 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
550
551 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
552}
4bc2f18b 553EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 554
a430a43d
HX
555int tcp_v4_gso_send_check(struct sk_buff *skb)
556{
eddc9ec5 557 const struct iphdr *iph;
a430a43d
HX
558 struct tcphdr *th;
559
560 if (!pskb_may_pull(skb, sizeof(*th)))
561 return -EINVAL;
562
eddc9ec5 563 iph = ip_hdr(skb);
aa8223c7 564 th = tcp_hdr(skb);
a430a43d
HX
565
566 th->check = 0;
84fa7933 567 skb->ip_summed = CHECKSUM_PARTIAL;
419f9f89 568 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
a430a43d
HX
569 return 0;
570}
571
1da177e4
LT
572/*
573 * This routine will send an RST to the other tcp.
574 *
575 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
576 * for reset.
577 * Answer: if a packet caused RST, it is not for a socket
578 * existing in our system, if it is matched to a socket,
579 * it is just duplicate segment or bug in other side's TCP.
580 * So that we build reply only basing on parameters
581 * arrived with segment.
582 * Exception: precedence violation. We do not implement it in any case.
583 */
584
cfb6eeb4 585static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 586{
cf533ea5 587 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
588 struct {
589 struct tcphdr th;
590#ifdef CONFIG_TCP_MD5SIG
714e85be 591 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
592#endif
593 } rep;
1da177e4 594 struct ip_reply_arg arg;
cfb6eeb4
YH
595#ifdef CONFIG_TCP_MD5SIG
596 struct tcp_md5sig_key *key;
658ddaaf
SL
597 const __u8 *hash_location = NULL;
598 unsigned char newhash[16];
599 int genhash;
600 struct sock *sk1 = NULL;
cfb6eeb4 601#endif
a86b1e30 602 struct net *net;
1da177e4
LT
603
604 /* Never send a reset in response to a reset. */
605 if (th->rst)
606 return;
607
511c3f92 608 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
609 return;
610
611 /* Swap the send and the receive. */
cfb6eeb4
YH
612 memset(&rep, 0, sizeof(rep));
613 rep.th.dest = th->source;
614 rep.th.source = th->dest;
615 rep.th.doff = sizeof(struct tcphdr) / 4;
616 rep.th.rst = 1;
1da177e4
LT
617
618 if (th->ack) {
cfb6eeb4 619 rep.th.seq = th->ack_seq;
1da177e4 620 } else {
cfb6eeb4
YH
621 rep.th.ack = 1;
622 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
623 skb->len - (th->doff << 2));
1da177e4
LT
624 }
625
7174259e 626 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
627 arg.iov[0].iov_base = (unsigned char *)&rep;
628 arg.iov[0].iov_len = sizeof(rep.th);
629
630#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
631 hash_location = tcp_parse_md5sig_option(th);
632 if (!sk && hash_location) {
633 /*
634 * active side is lost. Try to find listening socket through
635 * source port, and then find md5 key through listening socket.
636 * we are not loose security here:
637 * Incoming packet is checked with md5 hash with finding key,
638 * no RST generated if md5 hash doesn't match.
639 */
640 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
641 &tcp_hashinfo, ip_hdr(skb)->daddr,
642 ntohs(th->source), inet_iif(skb));
643 /* don't send rst if it can't find key */
644 if (!sk1)
645 return;
646 rcu_read_lock();
647 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
648 &ip_hdr(skb)->saddr, AF_INET);
649 if (!key)
650 goto release_sk1;
651
652 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
653 if (genhash || memcmp(hash_location, newhash, 16) != 0)
654 goto release_sk1;
655 } else {
656 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
657 &ip_hdr(skb)->saddr,
658 AF_INET) : NULL;
659 }
660
cfb6eeb4
YH
661 if (key) {
662 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
663 (TCPOPT_NOP << 16) |
664 (TCPOPT_MD5SIG << 8) |
665 TCPOLEN_MD5SIG);
666 /* Update length and the length the header thinks exists */
667 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
668 rep.th.doff = arg.iov[0].iov_len / 4;
669
49a72dfb 670 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
671 key, ip_hdr(skb)->saddr,
672 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
673 }
674#endif
eddc9ec5
ACM
675 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
676 ip_hdr(skb)->saddr, /* XXX */
52cd5750 677 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 678 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 679 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa
SL
680 /* When socket is gone, all binding information is lost.
681 * routing might fail in this case. using iif for oif to
682 * make sure we can deliver it
683 */
684 arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
1da177e4 685
adf30907 686 net = dev_net(skb_dst(skb)->dev);
66b13d99 687 arg.tos = ip_hdr(skb)->tos;
0a5ebb80 688 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7feb49c8 689 &arg, arg.iov[0].iov_len);
1da177e4 690
63231bdd
PE
691 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
692 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
693
694#ifdef CONFIG_TCP_MD5SIG
695release_sk1:
696 if (sk1) {
697 rcu_read_unlock();
698 sock_put(sk1);
699 }
700#endif
1da177e4
LT
701}
702
703/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
704 outside socket context is ugly, certainly. What can I do?
705 */
706
9501f972
YH
707static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
708 u32 win, u32 ts, int oif,
88ef4a5a 709 struct tcp_md5sig_key *key,
66b13d99 710 int reply_flags, u8 tos)
1da177e4 711{
cf533ea5 712 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
713 struct {
714 struct tcphdr th;
714e85be 715 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 716#ifdef CONFIG_TCP_MD5SIG
714e85be 717 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
718#endif
719 ];
1da177e4
LT
720 } rep;
721 struct ip_reply_arg arg;
adf30907 722 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
723
724 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 725 memset(&arg, 0, sizeof(arg));
1da177e4
LT
726
727 arg.iov[0].iov_base = (unsigned char *)&rep;
728 arg.iov[0].iov_len = sizeof(rep.th);
729 if (ts) {
cfb6eeb4
YH
730 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
731 (TCPOPT_TIMESTAMP << 8) |
732 TCPOLEN_TIMESTAMP);
733 rep.opt[1] = htonl(tcp_time_stamp);
734 rep.opt[2] = htonl(ts);
cb48cfe8 735 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
736 }
737
738 /* Swap the send and the receive. */
739 rep.th.dest = th->source;
740 rep.th.source = th->dest;
741 rep.th.doff = arg.iov[0].iov_len / 4;
742 rep.th.seq = htonl(seq);
743 rep.th.ack_seq = htonl(ack);
744 rep.th.ack = 1;
745 rep.th.window = htons(win);
746
cfb6eeb4 747#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4
YH
748 if (key) {
749 int offset = (ts) ? 3 : 0;
750
751 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
752 (TCPOPT_NOP << 16) |
753 (TCPOPT_MD5SIG << 8) |
754 TCPOLEN_MD5SIG);
755 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
756 rep.th.doff = arg.iov[0].iov_len/4;
757
49a72dfb 758 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
759 key, ip_hdr(skb)->saddr,
760 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
761 }
762#endif
88ef4a5a 763 arg.flags = reply_flags;
eddc9ec5
ACM
764 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
765 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
766 arg.iov[0].iov_len, IPPROTO_TCP, 0);
767 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
768 if (oif)
769 arg.bound_dev_if = oif;
66b13d99 770 arg.tos = tos;
0a5ebb80 771 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7feb49c8 772 &arg, arg.iov[0].iov_len);
1da177e4 773
63231bdd 774 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
775}
776
777static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
778{
8feaf0c0 779 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 780 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 781
9501f972 782 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 783 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
9501f972
YH
784 tcptw->tw_ts_recent,
785 tw->tw_bound_dev_if,
88ef4a5a 786 tcp_twsk_md5_key(tcptw),
66b13d99
ED
787 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
788 tw->tw_tos
9501f972 789 );
1da177e4 790
8feaf0c0 791 inet_twsk_put(tw);
1da177e4
LT
792}
793
6edafaaf 794static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 795 struct request_sock *req)
1da177e4 796{
9501f972 797 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
cfb6eeb4 798 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
9501f972
YH
799 req->ts_recent,
800 0,
a915da9b
ED
801 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
802 AF_INET),
66b13d99
ED
803 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
804 ip_hdr(skb)->tos);
1da177e4
LT
805}
806
1da177e4 807/*
9bf1d83e 808 * Send a SYN-ACK after having received a SYN.
60236fdd 809 * This still operates on a request_sock only, not on a big
1da177e4
LT
810 * socket.
811 */
72659ecc
OP
812static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
813 struct request_sock *req,
814 struct request_values *rvp)
1da177e4 815{
2e6599cb 816 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 817 struct flowi4 fl4;
1da177e4
LT
818 int err = -1;
819 struct sk_buff * skb;
820
821 /* First, grab a route. */
6bd023f3 822 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 823 return -1;
1da177e4 824
e6b4d113 825 skb = tcp_make_synack(sk, dst, req, rvp);
1da177e4
LT
826
827 if (skb) {
419f9f89 828 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
1da177e4 829
2e6599cb
ACM
830 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
831 ireq->rmt_addr,
832 ireq->opt);
b9df3cb8 833 err = net_xmit_eval(err);
1da177e4
LT
834 }
835
1da177e4
LT
836 dst_release(dst);
837 return err;
838}
839
72659ecc 840static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
e6b4d113 841 struct request_values *rvp)
fd80eb94 842{
72659ecc
OP
843 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
844 return tcp_v4_send_synack(sk, NULL, req, rvp);
fd80eb94
DL
845}
846
1da177e4 847/*
60236fdd 848 * IPv4 request_sock destructor.
1da177e4 849 */
60236fdd 850static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 851{
a51482bd 852 kfree(inet_rsk(req)->opt);
1da177e4
LT
853}
854
946cedcc
ED
855/*
856 * Return 1 if a syncookie should be sent
857 */
858int tcp_syn_flood_action(struct sock *sk,
859 const struct sk_buff *skb,
860 const char *proto)
1da177e4 861{
946cedcc
ED
862 const char *msg = "Dropping request";
863 int want_cookie = 0;
864 struct listen_sock *lopt;
865
866
1da177e4 867
2a1d4bd4 868#ifdef CONFIG_SYN_COOKIES
946cedcc 869 if (sysctl_tcp_syncookies) {
2a1d4bd4 870 msg = "Sending cookies";
946cedcc
ED
871 want_cookie = 1;
872 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
873 } else
80e40daa 874#endif
946cedcc
ED
875 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
876
877 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
878 if (!lopt->synflood_warned) {
879 lopt->synflood_warned = 1;
afd46503 880 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
946cedcc
ED
881 proto, ntohs(tcp_hdr(skb)->dest), msg);
882 }
883 return want_cookie;
2a1d4bd4 884}
946cedcc 885EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
886
887/*
60236fdd 888 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 889 */
f6d8bd05
ED
890static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
891 struct sk_buff *skb)
1da177e4 892{
f6d8bd05
ED
893 const struct ip_options *opt = &(IPCB(skb)->opt);
894 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
895
896 if (opt && opt->optlen) {
f6d8bd05
ED
897 int opt_size = sizeof(*dopt) + opt->optlen;
898
1da177e4
LT
899 dopt = kmalloc(opt_size, GFP_ATOMIC);
900 if (dopt) {
f6d8bd05 901 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
902 kfree(dopt);
903 dopt = NULL;
904 }
905 }
906 }
907 return dopt;
908}
909
cfb6eeb4
YH
910#ifdef CONFIG_TCP_MD5SIG
911/*
912 * RFC2385 MD5 checksumming requires a mapping of
913 * IP address->MD5 Key.
914 * We need to maintain these in the sk structure.
915 */
916
917/* Find the Key structure for an address. */
a915da9b
ED
918struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
919 const union tcp_md5_addr *addr,
920 int family)
cfb6eeb4
YH
921{
922 struct tcp_sock *tp = tcp_sk(sk);
a915da9b
ED
923 struct tcp_md5sig_key *key;
924 struct hlist_node *pos;
925 unsigned int size = sizeof(struct in_addr);
a8afca03 926 struct tcp_md5sig_info *md5sig;
cfb6eeb4 927
a8afca03
ED
928 /* caller either holds rcu_read_lock() or socket lock */
929 md5sig = rcu_dereference_check(tp->md5sig_info,
b4fb05ea
ED
930 sock_owned_by_user(sk) ||
931 lockdep_is_held(&sk->sk_lock.slock));
a8afca03 932 if (!md5sig)
cfb6eeb4 933 return NULL;
a915da9b
ED
934#if IS_ENABLED(CONFIG_IPV6)
935 if (family == AF_INET6)
936 size = sizeof(struct in6_addr);
937#endif
a8afca03 938 hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
a915da9b
ED
939 if (key->family != family)
940 continue;
941 if (!memcmp(&key->addr, addr, size))
942 return key;
cfb6eeb4
YH
943 }
944 return NULL;
945}
a915da9b 946EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
947
948struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
949 struct sock *addr_sk)
950{
a915da9b
ED
951 union tcp_md5_addr *addr;
952
953 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
954 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 955}
cfb6eeb4
YH
956EXPORT_SYMBOL(tcp_v4_md5_lookup);
957
f5b99bcd
AB
958static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
959 struct request_sock *req)
cfb6eeb4 960{
a915da9b
ED
961 union tcp_md5_addr *addr;
962
963 addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
964 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
965}
966
967/* This can be called on a newly created socket, from other files */
a915da9b
ED
968int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
969 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
970{
971 /* Add Key to the list */
b0a713e9 972 struct tcp_md5sig_key *key;
cfb6eeb4 973 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 974 struct tcp_md5sig_info *md5sig;
cfb6eeb4 975
a915da9b 976 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
cfb6eeb4
YH
977 if (key) {
978 /* Pre-existing entry - just update that one. */
a915da9b 979 memcpy(key->key, newkey, newkeylen);
b0a713e9 980 key->keylen = newkeylen;
a915da9b
ED
981 return 0;
982 }
260fcbeb 983
a8afca03
ED
984 md5sig = rcu_dereference_protected(tp->md5sig_info,
985 sock_owned_by_user(sk));
a915da9b
ED
986 if (!md5sig) {
987 md5sig = kmalloc(sizeof(*md5sig), gfp);
988 if (!md5sig)
cfb6eeb4 989 return -ENOMEM;
cfb6eeb4 990
a915da9b
ED
991 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
992 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 993 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 994 }
cfb6eeb4 995
5f3d9cb2 996 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
997 if (!key)
998 return -ENOMEM;
999 if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
5f3d9cb2 1000 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 1001 return -ENOMEM;
cfb6eeb4 1002 }
a915da9b
ED
1003
1004 memcpy(key->key, newkey, newkeylen);
1005 key->keylen = newkeylen;
1006 key->family = family;
1007 memcpy(&key->addr, addr,
1008 (family == AF_INET6) ? sizeof(struct in6_addr) :
1009 sizeof(struct in_addr));
1010 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1011 return 0;
1012}
a915da9b 1013EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1014
a915da9b 1015int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4
YH
1016{
1017 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1018 struct tcp_md5sig_key *key;
a8afca03 1019 struct tcp_md5sig_info *md5sig;
a915da9b
ED
1020
1021 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1022 if (!key)
1023 return -ENOENT;
1024 hlist_del_rcu(&key->node);
5f3d9cb2 1025 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1026 kfree_rcu(key, rcu);
a8afca03
ED
1027 md5sig = rcu_dereference_protected(tp->md5sig_info,
1028 sock_owned_by_user(sk));
1029 if (hlist_empty(&md5sig->head))
a915da9b
ED
1030 tcp_free_md5sig_pool();
1031 return 0;
cfb6eeb4 1032}
a915da9b 1033EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1034
a915da9b 1035void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1036{
1037 struct tcp_sock *tp = tcp_sk(sk);
a915da9b
ED
1038 struct tcp_md5sig_key *key;
1039 struct hlist_node *pos, *n;
a8afca03 1040 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1041
a8afca03
ED
1042 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1043
1044 if (!hlist_empty(&md5sig->head))
cfb6eeb4 1045 tcp_free_md5sig_pool();
a8afca03 1046 hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
a915da9b 1047 hlist_del_rcu(&key->node);
5f3d9cb2 1048 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1049 kfree_rcu(key, rcu);
cfb6eeb4
YH
1050 }
1051}
1052
7174259e
ACM
1053static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1054 int optlen)
cfb6eeb4
YH
1055{
1056 struct tcp_md5sig cmd;
1057 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1058
1059 if (optlen < sizeof(cmd))
1060 return -EINVAL;
1061
7174259e 1062 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1063 return -EFAULT;
1064
1065 if (sin->sin_family != AF_INET)
1066 return -EINVAL;
1067
a8afca03 1068 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1069 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1070 AF_INET);
cfb6eeb4
YH
1071
1072 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1073 return -EINVAL;
1074
a915da9b
ED
1075 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1076 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1077 GFP_KERNEL);
cfb6eeb4
YH
1078}
1079
49a72dfb
AL
1080static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1081 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1082{
cfb6eeb4 1083 struct tcp4_pseudohdr *bp;
49a72dfb 1084 struct scatterlist sg;
cfb6eeb4
YH
1085
1086 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1087
1088 /*
49a72dfb 1089 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1090 * destination IP address, zero-padded protocol number, and
1091 * segment length)
1092 */
1093 bp->saddr = saddr;
1094 bp->daddr = daddr;
1095 bp->pad = 0;
076fb722 1096 bp->protocol = IPPROTO_TCP;
49a72dfb 1097 bp->len = cpu_to_be16(nbytes);
c7da57a1 1098
49a72dfb
AL
1099 sg_init_one(&sg, bp, sizeof(*bp));
1100 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1101}
1102
a915da9b 1103static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1104 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1105{
1106 struct tcp_md5sig_pool *hp;
1107 struct hash_desc *desc;
1108
1109 hp = tcp_get_md5sig_pool();
1110 if (!hp)
1111 goto clear_hash_noput;
1112 desc = &hp->md5_desc;
1113
1114 if (crypto_hash_init(desc))
1115 goto clear_hash;
1116 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1117 goto clear_hash;
1118 if (tcp_md5_hash_header(hp, th))
1119 goto clear_hash;
1120 if (tcp_md5_hash_key(hp, key))
1121 goto clear_hash;
1122 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1123 goto clear_hash;
1124
cfb6eeb4 1125 tcp_put_md5sig_pool();
cfb6eeb4 1126 return 0;
49a72dfb 1127
cfb6eeb4
YH
1128clear_hash:
1129 tcp_put_md5sig_pool();
1130clear_hash_noput:
1131 memset(md5_hash, 0, 16);
49a72dfb 1132 return 1;
cfb6eeb4
YH
1133}
1134
49a72dfb 1135int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1136 const struct sock *sk, const struct request_sock *req,
1137 const struct sk_buff *skb)
cfb6eeb4 1138{
49a72dfb
AL
1139 struct tcp_md5sig_pool *hp;
1140 struct hash_desc *desc;
318cf7aa 1141 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1142 __be32 saddr, daddr;
1143
1144 if (sk) {
c720c7e8
ED
1145 saddr = inet_sk(sk)->inet_saddr;
1146 daddr = inet_sk(sk)->inet_daddr;
49a72dfb
AL
1147 } else if (req) {
1148 saddr = inet_rsk(req)->loc_addr;
1149 daddr = inet_rsk(req)->rmt_addr;
cfb6eeb4 1150 } else {
49a72dfb
AL
1151 const struct iphdr *iph = ip_hdr(skb);
1152 saddr = iph->saddr;
1153 daddr = iph->daddr;
cfb6eeb4 1154 }
49a72dfb
AL
1155
1156 hp = tcp_get_md5sig_pool();
1157 if (!hp)
1158 goto clear_hash_noput;
1159 desc = &hp->md5_desc;
1160
1161 if (crypto_hash_init(desc))
1162 goto clear_hash;
1163
1164 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1165 goto clear_hash;
1166 if (tcp_md5_hash_header(hp, th))
1167 goto clear_hash;
1168 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1169 goto clear_hash;
1170 if (tcp_md5_hash_key(hp, key))
1171 goto clear_hash;
1172 if (crypto_hash_final(desc, md5_hash))
1173 goto clear_hash;
1174
1175 tcp_put_md5sig_pool();
1176 return 0;
1177
1178clear_hash:
1179 tcp_put_md5sig_pool();
1180clear_hash_noput:
1181 memset(md5_hash, 0, 16);
1182 return 1;
cfb6eeb4 1183}
49a72dfb 1184EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1185
318cf7aa 1186static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1187{
1188 /*
1189 * This gets called for each TCP segment that arrives
1190 * so we want to be efficient.
1191 * We have 3 drop cases:
1192 * o No MD5 hash and one expected.
1193 * o MD5 hash and we're not expecting one.
1194 * o MD5 hash and its wrong.
1195 */
cf533ea5 1196 const __u8 *hash_location = NULL;
cfb6eeb4 1197 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1198 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1199 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1200 int genhash;
cfb6eeb4
YH
1201 unsigned char newhash[16];
1202
a915da9b
ED
1203 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1204 AF_INET);
7d5d5525 1205 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1206
cfb6eeb4
YH
1207 /* We've parsed the options - do we have a hash? */
1208 if (!hash_expected && !hash_location)
1209 return 0;
1210
1211 if (hash_expected && !hash_location) {
785957d3 1212 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
cfb6eeb4
YH
1213 return 1;
1214 }
1215
1216 if (!hash_expected && hash_location) {
785957d3 1217 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
cfb6eeb4
YH
1218 return 1;
1219 }
1220
1221 /* Okay, so this is hash_expected and hash_location -
1222 * so we need to calculate the checksum.
1223 */
49a72dfb
AL
1224 genhash = tcp_v4_md5_hash_skb(newhash,
1225 hash_expected,
1226 NULL, NULL, skb);
cfb6eeb4
YH
1227
1228 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1229 if (net_ratelimit()) {
058bd4d2
JP
1230 pr_info("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1231 &iph->saddr, ntohs(th->source),
1232 &iph->daddr, ntohs(th->dest),
1233 genhash ? " tcp_v4_calc_md5_hash failed" : "");
cfb6eeb4
YH
1234 }
1235 return 1;
1236 }
1237 return 0;
1238}
1239
1240#endif
1241
72a3effa 1242struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1243 .family = PF_INET,
2e6599cb 1244 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1245 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1246 .send_ack = tcp_v4_reqsk_send_ack,
1247 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1248 .send_reset = tcp_v4_send_reset,
72659ecc 1249 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1250};
1251
cfb6eeb4 1252#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1253static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1254 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1255 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1256};
b6332e6c 1257#endif
cfb6eeb4 1258
1da177e4
LT
1259int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1260{
4957faad 1261 struct tcp_extend_values tmp_ext;
1da177e4 1262 struct tcp_options_received tmp_opt;
cf533ea5 1263 const u8 *hash_location;
60236fdd 1264 struct request_sock *req;
e6b4d113 1265 struct inet_request_sock *ireq;
4957faad 1266 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1267 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1268 __be32 saddr = ip_hdr(skb)->saddr;
1269 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1270 __u32 isn = TCP_SKB_CB(skb)->when;
1da177e4 1271 int want_cookie = 0;
1da177e4
LT
1272
1273 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1274 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1275 goto drop;
1276
1277 /* TW buckets are converted to open requests without
1278 * limitations, they conserve resources and peer is
1279 * evidently real one.
1280 */
463c84b9 1281 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
946cedcc
ED
1282 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1283 if (!want_cookie)
1284 goto drop;
1da177e4
LT
1285 }
1286
1287 /* Accept backlog is full. If we have already queued enough
1288 * of warm entries in syn queue, drop request. It is better than
1289 * clogging syn queue with openreqs with exponentially increasing
1290 * timeout.
1291 */
463c84b9 1292 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
1293 goto drop;
1294
ce4a7d0d 1295 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1296 if (!req)
1297 goto drop;
1298
cfb6eeb4
YH
1299#ifdef CONFIG_TCP_MD5SIG
1300 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1301#endif
1302
1da177e4 1303 tcp_clear_options(&tmp_opt);
bee7ca9e 1304 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1305 tmp_opt.user_mss = tp->rx_opt.user_mss;
bb5b7c11 1306 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
4957faad
WAS
1307
1308 if (tmp_opt.cookie_plus > 0 &&
1309 tmp_opt.saw_tstamp &&
1310 !tp->rx_opt.cookie_out_never &&
1311 (sysctl_tcp_cookie_size > 0 ||
1312 (tp->cookie_values != NULL &&
1313 tp->cookie_values->cookie_desired > 0))) {
1314 u8 *c;
1315 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1316 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1317
1318 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1319 goto drop_and_release;
1320
1321 /* Secret recipe starts with IP addresses */
0eae88f3
ED
1322 *mess++ ^= (__force u32)daddr;
1323 *mess++ ^= (__force u32)saddr;
1da177e4 1324
4957faad
WAS
1325 /* plus variable length Initiator Cookie */
1326 c = (u8 *)mess;
1327 while (l-- > 0)
1328 *c++ ^= *hash_location++;
1329
4957faad 1330 want_cookie = 0; /* not our kind of cookie */
4957faad
WAS
1331 tmp_ext.cookie_out_never = 0; /* false */
1332 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1333 } else if (!tp->rx_opt.cookie_in_always) {
1334 /* redundant indications, but ensure initialization. */
1335 tmp_ext.cookie_out_never = 1; /* true */
1336 tmp_ext.cookie_plus = 0;
1337 } else {
1338 goto drop_and_release;
1339 }
1340 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1da177e4 1341
4dfc2817 1342 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1343 tcp_clear_options(&tmp_opt);
1da177e4 1344
1da177e4 1345 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1346 tcp_openreq_init(req, &tmp_opt, skb);
1347
bb5b7c11
DM
1348 ireq = inet_rsk(req);
1349 ireq->loc_addr = daddr;
1350 ireq->rmt_addr = saddr;
1351 ireq->no_srccheck = inet_sk(sk)->transparent;
1352 ireq->opt = tcp_v4_save_options(sk, skb);
1353
284904aa 1354 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1355 goto drop_and_free;
284904aa 1356
172d69e6 1357 if (!want_cookie || tmp_opt.tstamp_ok)
aa8223c7 1358 TCP_ECN_create_request(req, tcp_hdr(skb));
1da177e4
LT
1359
1360 if (want_cookie) {
1da177e4 1361 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1362 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4
LT
1363 } else if (!isn) {
1364 struct inet_peer *peer = NULL;
6bd023f3 1365 struct flowi4 fl4;
1da177e4
LT
1366
1367 /* VJ's idea. We save last timestamp seen
1368 * from the destination in peer table, when entering
1369 * state TIME-WAIT, and check against it before
1370 * accepting new connection request.
1371 *
1372 * If "isn" is not zero, this request hit alive
1373 * timewait bucket, so that all the necessary checks
1374 * are made in the function processing timewait state.
1375 */
1376 if (tmp_opt.saw_tstamp &&
295ff7ed 1377 tcp_death_row.sysctl_tw_recycle &&
6bd023f3 1378 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
ed2361e6
DM
1379 fl4.daddr == saddr &&
1380 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
317fe0e6 1381 inet_peer_refcheck(peer);
2c1409a0 1382 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1da177e4
LT
1383 (s32)(peer->tcp_ts - req->ts_recent) >
1384 TCP_PAWS_WINDOW) {
de0744af 1385 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1386 goto drop_and_release;
1da177e4
LT
1387 }
1388 }
1389 /* Kill the following clause, if you dislike this way. */
1390 else if (!sysctl_tcp_syncookies &&
463c84b9 1391 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4
LT
1392 (sysctl_max_syn_backlog >> 2)) &&
1393 (!peer || !peer->tcp_ts_stamp) &&
1394 (!dst || !dst_metric(dst, RTAX_RTT))) {
1395 /* Without syncookies last quarter of
1396 * backlog is filled with destinations,
1397 * proven to be alive.
1398 * It means that we continue to communicate
1399 * to destinations, already remembered
1400 * to the moment of synflood.
1401 */
afd46503 1402 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
673d57e7 1403 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1404 goto drop_and_release;
1da177e4
LT
1405 }
1406
a94f723d 1407 isn = tcp_v4_init_sequence(skb);
1da177e4 1408 }
2e6599cb 1409 tcp_rsk(req)->snt_isn = isn;
9ad7c049 1410 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4 1411
72659ecc
OP
1412 if (tcp_v4_send_synack(sk, dst, req,
1413 (struct request_values *)&tmp_ext) ||
4957faad 1414 want_cookie)
1da177e4
LT
1415 goto drop_and_free;
1416
7cd04fa7 1417 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1da177e4
LT
1418 return 0;
1419
7cd04fa7
DL
1420drop_and_release:
1421 dst_release(dst);
1da177e4 1422drop_and_free:
60236fdd 1423 reqsk_free(req);
1da177e4 1424drop:
1da177e4
LT
1425 return 0;
1426}
4bc2f18b 1427EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1428
1429
1430/*
1431 * The three way handshake has completed - we got a valid synack -
1432 * now create the new socket.
1433 */
1434struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1435 struct request_sock *req,
1da177e4
LT
1436 struct dst_entry *dst)
1437{
2e6599cb 1438 struct inet_request_sock *ireq;
1da177e4
LT
1439 struct inet_sock *newinet;
1440 struct tcp_sock *newtp;
1441 struct sock *newsk;
cfb6eeb4
YH
1442#ifdef CONFIG_TCP_MD5SIG
1443 struct tcp_md5sig_key *key;
1444#endif
f6d8bd05 1445 struct ip_options_rcu *inet_opt;
1da177e4
LT
1446
1447 if (sk_acceptq_is_full(sk))
1448 goto exit_overflow;
1449
1da177e4
LT
1450 newsk = tcp_create_openreq_child(sk, req, skb);
1451 if (!newsk)
093d2823 1452 goto exit_nonewsk;
1da177e4 1453
bcd76111 1454 newsk->sk_gso_type = SKB_GSO_TCPV4;
1da177e4
LT
1455
1456 newtp = tcp_sk(newsk);
1457 newinet = inet_sk(newsk);
2e6599cb 1458 ireq = inet_rsk(req);
c720c7e8
ED
1459 newinet->inet_daddr = ireq->rmt_addr;
1460 newinet->inet_rcv_saddr = ireq->loc_addr;
1461 newinet->inet_saddr = ireq->loc_addr;
f6d8bd05
ED
1462 inet_opt = ireq->opt;
1463 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1464 ireq->opt = NULL;
463c84b9 1465 newinet->mc_index = inet_iif(skb);
eddc9ec5 1466 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1467 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1468 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1469 if (inet_opt)
1470 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1471 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1472
dfd25fff
ED
1473 if (!dst) {
1474 dst = inet_csk_route_child_sock(sk, newsk, req);
1475 if (!dst)
1476 goto put_and_exit;
1477 } else {
1478 /* syncookie case : see end of cookie_v4_check() */
1479 }
0e734419
DM
1480 sk_setup_caps(newsk, dst);
1481
5d424d5a 1482 tcp_mtup_init(newsk);
1da177e4 1483 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1484 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1485 if (tcp_sk(sk)->rx_opt.user_mss &&
1486 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1487 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1488
1da177e4 1489 tcp_initialize_rcv_mss(newsk);
9ad7c049
JC
1490 if (tcp_rsk(req)->snt_synack)
1491 tcp_valid_rtt_meas(newsk,
1492 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1493 newtp->total_retrans = req->retrans;
1da177e4 1494
cfb6eeb4
YH
1495#ifdef CONFIG_TCP_MD5SIG
1496 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1497 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1498 AF_INET);
c720c7e8 1499 if (key != NULL) {
cfb6eeb4
YH
1500 /*
1501 * We're using one, so create a matching key
1502 * on the newsk structure. If we fail to get
1503 * memory, then we end up not copying the key
1504 * across. Shucks.
1505 */
a915da9b
ED
1506 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1507 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1508 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1509 }
1510#endif
1511
0e734419
DM
1512 if (__inet_inherit_port(sk, newsk) < 0)
1513 goto put_and_exit;
9327f705 1514 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1515
1516 return newsk;
1517
1518exit_overflow:
de0744af 1519 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1520exit_nonewsk:
1521 dst_release(dst);
1da177e4 1522exit:
de0744af 1523 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1524 return NULL;
0e734419 1525put_and_exit:
709e8697 1526 tcp_clear_xmit_timers(newsk);
d8a6e65f 1527 tcp_cleanup_congestion_control(newsk);
918eb399 1528 bh_unlock_sock(newsk);
0e734419
DM
1529 sock_put(newsk);
1530 goto exit;
1da177e4 1531}
4bc2f18b 1532EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1533
1534static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1535{
aa8223c7 1536 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1537 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1538 struct sock *nsk;
60236fdd 1539 struct request_sock **prev;
1da177e4 1540 /* Find possible connection requests. */
463c84b9
ACM
1541 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1542 iph->saddr, iph->daddr);
1da177e4
LT
1543 if (req)
1544 return tcp_check_req(sk, skb, req, prev);
1545
3b1e0a65 1546 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1547 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1548
1549 if (nsk) {
1550 if (nsk->sk_state != TCP_TIME_WAIT) {
1551 bh_lock_sock(nsk);
1552 return nsk;
1553 }
9469c7b4 1554 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1555 return NULL;
1556 }
1557
1558#ifdef CONFIG_SYN_COOKIES
af9b4738 1559 if (!th->syn)
1da177e4
LT
1560 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1561#endif
1562 return sk;
1563}
1564
b51655b9 1565static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1566{
eddc9ec5
ACM
1567 const struct iphdr *iph = ip_hdr(skb);
1568
84fa7933 1569 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1570 if (!tcp_v4_check(skb->len, iph->saddr,
1571 iph->daddr, skb->csum)) {
fb286bb2 1572 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1573 return 0;
fb286bb2 1574 }
1da177e4 1575 }
fb286bb2 1576
eddc9ec5 1577 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1578 skb->len, IPPROTO_TCP, 0);
1579
1da177e4 1580 if (skb->len <= 76) {
fb286bb2 1581 return __skb_checksum_complete(skb);
1da177e4
LT
1582 }
1583 return 0;
1584}
1585
1586
1587/* The socket must have it's spinlock held when we get
1588 * here.
1589 *
1590 * We have a potential double-lock case here, so even when
1591 * doing backlog processing we use the BH locking scheme.
1592 * This is because we cannot sleep with the original spinlock
1593 * held.
1594 */
1595int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1596{
cfb6eeb4
YH
1597 struct sock *rsk;
1598#ifdef CONFIG_TCP_MD5SIG
1599 /*
1600 * We really want to reject the packet as early as possible
1601 * if:
1602 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1603 * o There is an MD5 option and we're not expecting one
1604 */
7174259e 1605 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1606 goto discard;
1607#endif
1608
1da177e4 1609 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
bdeab991 1610 sock_rps_save_rxhash(sk, skb);
aa8223c7 1611 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1612 rsk = sk;
1da177e4 1613 goto reset;
cfb6eeb4 1614 }
1da177e4
LT
1615 return 0;
1616 }
1617
ab6a5bb6 1618 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1619 goto csum_err;
1620
1621 if (sk->sk_state == TCP_LISTEN) {
1622 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1623 if (!nsk)
1624 goto discard;
1625
1626 if (nsk != sk) {
bdeab991 1627 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1628 if (tcp_child_process(sk, nsk, skb)) {
1629 rsk = nsk;
1da177e4 1630 goto reset;
cfb6eeb4 1631 }
1da177e4
LT
1632 return 0;
1633 }
ca55158c 1634 } else
bdeab991 1635 sock_rps_save_rxhash(sk, skb);
ca55158c 1636
aa8223c7 1637 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1638 rsk = sk;
1da177e4 1639 goto reset;
cfb6eeb4 1640 }
1da177e4
LT
1641 return 0;
1642
1643reset:
cfb6eeb4 1644 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1645discard:
1646 kfree_skb(skb);
1647 /* Be careful here. If this function gets more complicated and
1648 * gcc suffers from register pressure on the x86, sk (in %ebx)
1649 * might be destroyed here. This current version compiles correctly,
1650 * but you have been warned.
1651 */
1652 return 0;
1653
1654csum_err:
63231bdd 1655 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1656 goto discard;
1657}
4bc2f18b 1658EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4
LT
1659
1660/*
1661 * From tcp_input.c
1662 */
1663
1664int tcp_v4_rcv(struct sk_buff *skb)
1665{
eddc9ec5 1666 const struct iphdr *iph;
cf533ea5 1667 const struct tcphdr *th;
1da177e4
LT
1668 struct sock *sk;
1669 int ret;
a86b1e30 1670 struct net *net = dev_net(skb->dev);
1da177e4
LT
1671
1672 if (skb->pkt_type != PACKET_HOST)
1673 goto discard_it;
1674
1675 /* Count it even if it's bad */
63231bdd 1676 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1677
1678 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1679 goto discard_it;
1680
aa8223c7 1681 th = tcp_hdr(skb);
1da177e4
LT
1682
1683 if (th->doff < sizeof(struct tcphdr) / 4)
1684 goto bad_packet;
1685 if (!pskb_may_pull(skb, th->doff * 4))
1686 goto discard_it;
1687
1688 /* An explanation is required here, I think.
1689 * Packet length and doff are validated by header prediction,
caa20d9a 1690 * provided case of th->doff==0 is eliminated.
1da177e4 1691 * So, we defer the checks. */
60476372 1692 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1da177e4
LT
1693 goto bad_packet;
1694
aa8223c7 1695 th = tcp_hdr(skb);
eddc9ec5 1696 iph = ip_hdr(skb);
1da177e4
LT
1697 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1698 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1699 skb->len - th->doff * 4);
1700 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1701 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 1702 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1703 TCP_SKB_CB(skb)->sacked = 0;
1704
9a1f27c4 1705 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1706 if (!sk)
1707 goto no_tcp_socket;
1708
bb134d5d
ED
1709process:
1710 if (sk->sk_state == TCP_TIME_WAIT)
1711 goto do_time_wait;
1712
6cce09f8
ED
1713 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1714 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1715 goto discard_and_relse;
6cce09f8 1716 }
d218d111 1717
1da177e4
LT
1718 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1719 goto discard_and_relse;
b59c2701 1720 nf_reset(skb);
1da177e4 1721
fda9ef5d 1722 if (sk_filter(sk, skb))
1da177e4
LT
1723 goto discard_and_relse;
1724
1725 skb->dev = NULL;
1726
c6366184 1727 bh_lock_sock_nested(sk);
1da177e4
LT
1728 ret = 0;
1729 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1730#ifdef CONFIG_NET_DMA
1731 struct tcp_sock *tp = tcp_sk(sk);
1732 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
a2bd1140 1733 tp->ucopy.dma_chan = net_dma_find_channel();
1a2449a8 1734 if (tp->ucopy.dma_chan)
1da177e4 1735 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1736 else
1737#endif
1738 {
1739 if (!tcp_prequeue(sk, skb))
ae8d7f88 1740 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 1741 }
6cce09f8 1742 } else if (unlikely(sk_add_backlog(sk, skb))) {
6b03a53a 1743 bh_unlock_sock(sk);
6cce09f8 1744 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
1745 goto discard_and_relse;
1746 }
1da177e4
LT
1747 bh_unlock_sock(sk);
1748
1749 sock_put(sk);
1750
1751 return ret;
1752
1753no_tcp_socket:
1754 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1755 goto discard_it;
1756
1757 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1758bad_packet:
63231bdd 1759 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1760 } else {
cfb6eeb4 1761 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1762 }
1763
1764discard_it:
1765 /* Discard frame. */
1766 kfree_skb(skb);
e905a9ed 1767 return 0;
1da177e4
LT
1768
1769discard_and_relse:
1770 sock_put(sk);
1771 goto discard_it;
1772
1773do_time_wait:
1774 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1775 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1776 goto discard_it;
1777 }
1778
1779 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
63231bdd 1780 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
9469c7b4 1781 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1782 goto discard_it;
1783 }
9469c7b4 1784 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1785 case TCP_TW_SYN: {
c346dca1 1786 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1787 &tcp_hashinfo,
eddc9ec5 1788 iph->daddr, th->dest,
463c84b9 1789 inet_iif(skb));
1da177e4 1790 if (sk2) {
9469c7b4
YH
1791 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1792 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1793 sk = sk2;
1794 goto process;
1795 }
1796 /* Fall through to ACK */
1797 }
1798 case TCP_TW_ACK:
1799 tcp_v4_timewait_ack(sk, skb);
1800 break;
1801 case TCP_TW_RST:
1802 goto no_tcp_socket;
1803 case TCP_TW_SUCCESS:;
1804 }
1805 goto discard_it;
1806}
1807
3f419d2d 1808struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1da177e4 1809{
3f419d2d 1810 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1da177e4 1811 struct inet_sock *inet = inet_sk(sk);
3f419d2d 1812 struct inet_peer *peer;
1da177e4 1813
c5216cc7
DM
1814 if (!rt ||
1815 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
b534ecf1 1816 peer = inet_getpeer_v4(inet->inet_daddr, 1);
3f419d2d 1817 *release_it = true;
1da177e4
LT
1818 } else {
1819 if (!rt->peer)
a48eff12 1820 rt_bind_peer(rt, inet->inet_daddr, 1);
1da177e4 1821 peer = rt->peer;
3f419d2d 1822 *release_it = false;
1da177e4
LT
1823 }
1824
3f419d2d 1825 return peer;
1da177e4 1826}
3f419d2d 1827EXPORT_SYMBOL(tcp_v4_get_peer);
1da177e4 1828
ccb7c410 1829void *tcp_v4_tw_get_peer(struct sock *sk)
1da177e4 1830{
cf533ea5 1831 const struct inet_timewait_sock *tw = inet_twsk(sk);
1da177e4 1832
ccb7c410 1833 return inet_getpeer_v4(tw->tw_daddr, 1);
1da177e4 1834}
ccb7c410
DM
1835EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1836
1837static struct timewait_sock_ops tcp_timewait_sock_ops = {
1838 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1839 .twsk_unique = tcp_twsk_unique,
1840 .twsk_destructor= tcp_twsk_destructor,
1841 .twsk_getpeer = tcp_v4_tw_get_peer,
1842};
1da177e4 1843
3b401a81 1844const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1845 .queue_xmit = ip_queue_xmit,
1846 .send_check = tcp_v4_send_check,
1847 .rebuild_header = inet_sk_rebuild_header,
1848 .conn_request = tcp_v4_conn_request,
1849 .syn_recv_sock = tcp_v4_syn_recv_sock,
3f419d2d 1850 .get_peer = tcp_v4_get_peer,
543d9cfe
ACM
1851 .net_header_len = sizeof(struct iphdr),
1852 .setsockopt = ip_setsockopt,
1853 .getsockopt = ip_getsockopt,
1854 .addr2sockaddr = inet_csk_addr2sockaddr,
1855 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1856 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1857#ifdef CONFIG_COMPAT
543d9cfe
ACM
1858 .compat_setsockopt = compat_ip_setsockopt,
1859 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1860#endif
1da177e4 1861};
4bc2f18b 1862EXPORT_SYMBOL(ipv4_specific);
1da177e4 1863
cfb6eeb4 1864#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1865static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1866 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1867 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1868 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1869};
b6332e6c 1870#endif
cfb6eeb4 1871
1da177e4
LT
1872/* NOTE: A lot of things set to zero explicitly by call to
1873 * sk_alloc() so need not be done here.
1874 */
1875static int tcp_v4_init_sock(struct sock *sk)
1876{
6687e988 1877 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4
LT
1878 struct tcp_sock *tp = tcp_sk(sk);
1879
1880 skb_queue_head_init(&tp->out_of_order_queue);
1881 tcp_init_xmit_timers(sk);
1882 tcp_prequeue_init(tp);
1883
6687e988 1884 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1da177e4
LT
1885 tp->mdev = TCP_TIMEOUT_INIT;
1886
1887 /* So many TCP implementations out there (incorrectly) count the
1888 * initial SYN frame in their delayed-ACK and congestion control
1889 * algorithms that we must have the following bandaid to talk
1890 * efficiently to them. -DaveM
1891 */
9ad7c049 1892 tp->snd_cwnd = TCP_INIT_CWND;
1da177e4
LT
1893
1894 /* See draft-stevens-tcpca-spec-01 for discussion of the
1895 * initialization of these values.
1896 */
0b6a05c1 1897 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1da177e4 1898 tp->snd_cwnd_clamp = ~0;
bee7ca9e 1899 tp->mss_cache = TCP_MSS_DEFAULT;
1da177e4
LT
1900
1901 tp->reordering = sysctl_tcp_reordering;
6687e988 1902 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1da177e4
LT
1903
1904 sk->sk_state = TCP_CLOSE;
1905
1906 sk->sk_write_space = sk_stream_write_space;
1907 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1908
8292a17a 1909 icsk->icsk_af_ops = &ipv4_specific;
d83d8461 1910 icsk->icsk_sync_mss = tcp_sync_mss;
cfb6eeb4
YH
1911#ifdef CONFIG_TCP_MD5SIG
1912 tp->af_specific = &tcp_sock_ipv4_specific;
1913#endif
1da177e4 1914
435cf559
WAS
1915 /* TCP Cookie Transactions */
1916 if (sysctl_tcp_cookie_size > 0) {
1917 /* Default, cookies without s_data_payload. */
1918 tp->cookie_values =
1919 kzalloc(sizeof(*tp->cookie_values),
1920 sk->sk_allocation);
1921 if (tp->cookie_values != NULL)
1922 kref_init(&tp->cookie_values->kref);
1923 }
1924 /* Presumed zeroed, in order of appearance:
1925 * cookie_in_always, cookie_out_never,
1926 * s_data_constant, s_data_in, s_data_out
1927 */
1da177e4
LT
1928 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1929 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1930
eb4dea58 1931 local_bh_disable();
d1a4c0b3 1932 sock_update_memcg(sk);
180d8cd9 1933 sk_sockets_allocated_inc(sk);
eb4dea58 1934 local_bh_enable();
1da177e4
LT
1935
1936 return 0;
1937}
1938
7d06b2e0 1939void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1940{
1941 struct tcp_sock *tp = tcp_sk(sk);
1942
1943 tcp_clear_xmit_timers(sk);
1944
6687e988 1945 tcp_cleanup_congestion_control(sk);
317a76f9 1946
1da177e4 1947 /* Cleanup up the write buffer. */
fe067e8a 1948 tcp_write_queue_purge(sk);
1da177e4
LT
1949
1950 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1951 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1952
cfb6eeb4
YH
1953#ifdef CONFIG_TCP_MD5SIG
1954 /* Clean up the MD5 key list, if any */
1955 if (tp->md5sig_info) {
a915da9b 1956 tcp_clear_md5_list(sk);
a8afca03 1957 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
1958 tp->md5sig_info = NULL;
1959 }
1960#endif
1961
1a2449a8
CL
1962#ifdef CONFIG_NET_DMA
1963 /* Cleans up our sk_async_wait_queue */
e905a9ed 1964 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1965#endif
1966
1da177e4
LT
1967 /* Clean prequeue, it must be empty really */
1968 __skb_queue_purge(&tp->ucopy.prequeue);
1969
1970 /* Clean up a referenced TCP bind bucket. */
463c84b9 1971 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1972 inet_put_port(sk);
1da177e4
LT
1973
1974 /*
1975 * If sendmsg cached page exists, toss it.
1976 */
1977 if (sk->sk_sndmsg_page) {
1978 __free_page(sk->sk_sndmsg_page);
1979 sk->sk_sndmsg_page = NULL;
1980 }
1981
435cf559
WAS
1982 /* TCP Cookie Transactions */
1983 if (tp->cookie_values != NULL) {
1984 kref_put(&tp->cookie_values->kref,
1985 tcp_cookie_values_release);
1986 tp->cookie_values = NULL;
1987 }
1988
180d8cd9 1989 sk_sockets_allocated_dec(sk);
d1a4c0b3 1990 sock_release_memcg(sk);
1da177e4 1991}
1da177e4
LT
1992EXPORT_SYMBOL(tcp_v4_destroy_sock);
1993
1994#ifdef CONFIG_PROC_FS
1995/* Proc filesystem TCP sock list dumping. */
1996
3ab5aee7 1997static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1da177e4 1998{
3ab5aee7 1999 return hlist_nulls_empty(head) ? NULL :
8feaf0c0 2000 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
2001}
2002
8feaf0c0 2003static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4 2004{
3ab5aee7
ED
2005 return !is_a_nulls(tw->tw_node.next) ?
2006 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1da177e4
LT
2007}
2008
a8b690f9
TH
2009/*
2010 * Get next listener socket follow cur. If cur is NULL, get first socket
2011 * starting from bucket given in st->bucket; when st->bucket is zero the
2012 * very first socket in the hash table is returned.
2013 */
1da177e4
LT
2014static void *listening_get_next(struct seq_file *seq, void *cur)
2015{
463c84b9 2016 struct inet_connection_sock *icsk;
c25eb3bf 2017 struct hlist_nulls_node *node;
1da177e4 2018 struct sock *sk = cur;
5caea4ea 2019 struct inet_listen_hashbucket *ilb;
5799de0b 2020 struct tcp_iter_state *st = seq->private;
a4146b1b 2021 struct net *net = seq_file_net(seq);
1da177e4
LT
2022
2023 if (!sk) {
a8b690f9 2024 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 2025 spin_lock_bh(&ilb->lock);
c25eb3bf 2026 sk = sk_nulls_head(&ilb->head);
a8b690f9 2027 st->offset = 0;
1da177e4
LT
2028 goto get_sk;
2029 }
5caea4ea 2030 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2031 ++st->num;
a8b690f9 2032 ++st->offset;
1da177e4
LT
2033
2034 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2035 struct request_sock *req = cur;
1da177e4 2036
72a3effa 2037 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2038 req = req->dl_next;
2039 while (1) {
2040 while (req) {
bdccc4ca 2041 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2042 cur = req;
2043 goto out;
2044 }
2045 req = req->dl_next;
2046 }
72a3effa 2047 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2048 break;
2049get_req:
463c84b9 2050 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2051 }
1bde5ac4 2052 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2053 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2054 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2055 } else {
e905a9ed 2056 icsk = inet_csk(sk);
463c84b9
ACM
2057 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2058 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2059 goto start_req;
463c84b9 2060 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2061 sk = sk_nulls_next(sk);
1da177e4
LT
2062 }
2063get_sk:
c25eb3bf 2064 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2065 if (!net_eq(sock_net(sk), net))
2066 continue;
2067 if (sk->sk_family == st->family) {
1da177e4
LT
2068 cur = sk;
2069 goto out;
2070 }
e905a9ed 2071 icsk = inet_csk(sk);
463c84b9
ACM
2072 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2073 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2074start_req:
2075 st->uid = sock_i_uid(sk);
2076 st->syn_wait_sk = sk;
2077 st->state = TCP_SEQ_STATE_OPENREQ;
2078 st->sbucket = 0;
2079 goto get_req;
2080 }
463c84b9 2081 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2082 }
5caea4ea 2083 spin_unlock_bh(&ilb->lock);
a8b690f9 2084 st->offset = 0;
0f7ff927 2085 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2086 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2087 spin_lock_bh(&ilb->lock);
c25eb3bf 2088 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2089 goto get_sk;
2090 }
2091 cur = NULL;
2092out:
2093 return cur;
2094}
2095
2096static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2097{
a8b690f9
TH
2098 struct tcp_iter_state *st = seq->private;
2099 void *rc;
2100
2101 st->bucket = 0;
2102 st->offset = 0;
2103 rc = listening_get_next(seq, NULL);
1da177e4
LT
2104
2105 while (rc && *pos) {
2106 rc = listening_get_next(seq, rc);
2107 --*pos;
2108 }
2109 return rc;
2110}
2111
6eac5604
AK
2112static inline int empty_bucket(struct tcp_iter_state *st)
2113{
3ab5aee7
ED
2114 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2115 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
6eac5604
AK
2116}
2117
a8b690f9
TH
2118/*
2119 * Get first established socket starting from bucket given in st->bucket.
2120 * If st->bucket is zero, the very first socket in the hash is returned.
2121 */
1da177e4
LT
2122static void *established_get_first(struct seq_file *seq)
2123{
5799de0b 2124 struct tcp_iter_state *st = seq->private;
a4146b1b 2125 struct net *net = seq_file_net(seq);
1da177e4
LT
2126 void *rc = NULL;
2127
a8b690f9
TH
2128 st->offset = 0;
2129 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2130 struct sock *sk;
3ab5aee7 2131 struct hlist_nulls_node *node;
8feaf0c0 2132 struct inet_timewait_sock *tw;
9db66bdc 2133 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2134
6eac5604
AK
2135 /* Lockless fast path for the common case of empty buckets */
2136 if (empty_bucket(st))
2137 continue;
2138
9db66bdc 2139 spin_lock_bh(lock);
3ab5aee7 2140 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2141 if (sk->sk_family != st->family ||
878628fb 2142 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2143 continue;
2144 }
2145 rc = sk;
2146 goto out;
2147 }
2148 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0 2149 inet_twsk_for_each(tw, node,
dbca9b27 2150 &tcp_hashinfo.ehash[st->bucket].twchain) {
28518fc1 2151 if (tw->tw_family != st->family ||
878628fb 2152 !net_eq(twsk_net(tw), net)) {
1da177e4
LT
2153 continue;
2154 }
2155 rc = tw;
2156 goto out;
2157 }
9db66bdc 2158 spin_unlock_bh(lock);
1da177e4
LT
2159 st->state = TCP_SEQ_STATE_ESTABLISHED;
2160 }
2161out:
2162 return rc;
2163}
2164
2165static void *established_get_next(struct seq_file *seq, void *cur)
2166{
2167 struct sock *sk = cur;
8feaf0c0 2168 struct inet_timewait_sock *tw;
3ab5aee7 2169 struct hlist_nulls_node *node;
5799de0b 2170 struct tcp_iter_state *st = seq->private;
a4146b1b 2171 struct net *net = seq_file_net(seq);
1da177e4
LT
2172
2173 ++st->num;
a8b690f9 2174 ++st->offset;
1da177e4
LT
2175
2176 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2177 tw = cur;
2178 tw = tw_next(tw);
2179get_tw:
878628fb 2180 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
1da177e4
LT
2181 tw = tw_next(tw);
2182 }
2183 if (tw) {
2184 cur = tw;
2185 goto out;
2186 }
9db66bdc 2187 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2188 st->state = TCP_SEQ_STATE_ESTABLISHED;
2189
6eac5604 2190 /* Look for next non empty bucket */
a8b690f9 2191 st->offset = 0;
f373b53b 2192 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
6eac5604
AK
2193 empty_bucket(st))
2194 ;
f373b53b 2195 if (st->bucket > tcp_hashinfo.ehash_mask)
6eac5604
AK
2196 return NULL;
2197
9db66bdc 2198 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
3ab5aee7 2199 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4 2200 } else
3ab5aee7 2201 sk = sk_nulls_next(sk);
1da177e4 2202
3ab5aee7 2203 sk_nulls_for_each_from(sk, node) {
878628fb 2204 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1da177e4
LT
2205 goto found;
2206 }
2207
2208 st->state = TCP_SEQ_STATE_TIME_WAIT;
dbca9b27 2209 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
1da177e4
LT
2210 goto get_tw;
2211found:
2212 cur = sk;
2213out:
2214 return cur;
2215}
2216
2217static void *established_get_idx(struct seq_file *seq, loff_t pos)
2218{
a8b690f9
TH
2219 struct tcp_iter_state *st = seq->private;
2220 void *rc;
2221
2222 st->bucket = 0;
2223 rc = established_get_first(seq);
1da177e4
LT
2224
2225 while (rc && pos) {
2226 rc = established_get_next(seq, rc);
2227 --pos;
7174259e 2228 }
1da177e4
LT
2229 return rc;
2230}
2231
2232static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2233{
2234 void *rc;
5799de0b 2235 struct tcp_iter_state *st = seq->private;
1da177e4 2236
1da177e4
LT
2237 st->state = TCP_SEQ_STATE_LISTENING;
2238 rc = listening_get_idx(seq, &pos);
2239
2240 if (!rc) {
1da177e4
LT
2241 st->state = TCP_SEQ_STATE_ESTABLISHED;
2242 rc = established_get_idx(seq, pos);
2243 }
2244
2245 return rc;
2246}
2247
a8b690f9
TH
2248static void *tcp_seek_last_pos(struct seq_file *seq)
2249{
2250 struct tcp_iter_state *st = seq->private;
2251 int offset = st->offset;
2252 int orig_num = st->num;
2253 void *rc = NULL;
2254
2255 switch (st->state) {
2256 case TCP_SEQ_STATE_OPENREQ:
2257 case TCP_SEQ_STATE_LISTENING:
2258 if (st->bucket >= INET_LHTABLE_SIZE)
2259 break;
2260 st->state = TCP_SEQ_STATE_LISTENING;
2261 rc = listening_get_next(seq, NULL);
2262 while (offset-- && rc)
2263 rc = listening_get_next(seq, rc);
2264 if (rc)
2265 break;
2266 st->bucket = 0;
2267 /* Fallthrough */
2268 case TCP_SEQ_STATE_ESTABLISHED:
2269 case TCP_SEQ_STATE_TIME_WAIT:
2270 st->state = TCP_SEQ_STATE_ESTABLISHED;
2271 if (st->bucket > tcp_hashinfo.ehash_mask)
2272 break;
2273 rc = established_get_first(seq);
2274 while (offset-- && rc)
2275 rc = established_get_next(seq, rc);
2276 }
2277
2278 st->num = orig_num;
2279
2280 return rc;
2281}
2282
1da177e4
LT
2283static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2284{
5799de0b 2285 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2286 void *rc;
2287
2288 if (*pos && *pos == st->last_pos) {
2289 rc = tcp_seek_last_pos(seq);
2290 if (rc)
2291 goto out;
2292 }
2293
1da177e4
LT
2294 st->state = TCP_SEQ_STATE_LISTENING;
2295 st->num = 0;
a8b690f9
TH
2296 st->bucket = 0;
2297 st->offset = 0;
2298 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2299
2300out:
2301 st->last_pos = *pos;
2302 return rc;
1da177e4
LT
2303}
2304
2305static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2306{
a8b690f9 2307 struct tcp_iter_state *st = seq->private;
1da177e4 2308 void *rc = NULL;
1da177e4
LT
2309
2310 if (v == SEQ_START_TOKEN) {
2311 rc = tcp_get_idx(seq, 0);
2312 goto out;
2313 }
1da177e4
LT
2314
2315 switch (st->state) {
2316 case TCP_SEQ_STATE_OPENREQ:
2317 case TCP_SEQ_STATE_LISTENING:
2318 rc = listening_get_next(seq, v);
2319 if (!rc) {
1da177e4 2320 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2321 st->bucket = 0;
2322 st->offset = 0;
1da177e4
LT
2323 rc = established_get_first(seq);
2324 }
2325 break;
2326 case TCP_SEQ_STATE_ESTABLISHED:
2327 case TCP_SEQ_STATE_TIME_WAIT:
2328 rc = established_get_next(seq, v);
2329 break;
2330 }
2331out:
2332 ++*pos;
a8b690f9 2333 st->last_pos = *pos;
1da177e4
LT
2334 return rc;
2335}
2336
2337static void tcp_seq_stop(struct seq_file *seq, void *v)
2338{
5799de0b 2339 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2340
2341 switch (st->state) {
2342 case TCP_SEQ_STATE_OPENREQ:
2343 if (v) {
463c84b9
ACM
2344 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2345 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2346 }
2347 case TCP_SEQ_STATE_LISTENING:
2348 if (v != SEQ_START_TOKEN)
5caea4ea 2349 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4
LT
2350 break;
2351 case TCP_SEQ_STATE_TIME_WAIT:
2352 case TCP_SEQ_STATE_ESTABLISHED:
2353 if (v)
9db66bdc 2354 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2355 break;
2356 }
2357}
2358
73cb88ec 2359int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4
LT
2360{
2361 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
1da177e4 2362 struct tcp_iter_state *s;
52d6f3f1 2363 int err;
1da177e4 2364
52d6f3f1
DL
2365 err = seq_open_net(inode, file, &afinfo->seq_ops,
2366 sizeof(struct tcp_iter_state));
2367 if (err < 0)
2368 return err;
f40c8174 2369
52d6f3f1 2370 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2371 s->family = afinfo->family;
a8b690f9 2372 s->last_pos = 0;
f40c8174
DL
2373 return 0;
2374}
73cb88ec 2375EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2376
6f8b13bc 2377int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2378{
2379 int rc = 0;
2380 struct proc_dir_entry *p;
2381
9427c4b3
DL
2382 afinfo->seq_ops.start = tcp_seq_start;
2383 afinfo->seq_ops.next = tcp_seq_next;
2384 afinfo->seq_ops.stop = tcp_seq_stop;
2385
84841c3c 2386 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2387 afinfo->seq_fops, afinfo);
84841c3c 2388 if (!p)
1da177e4
LT
2389 rc = -ENOMEM;
2390 return rc;
2391}
4bc2f18b 2392EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2393
6f8b13bc 2394void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2395{
6f8b13bc 2396 proc_net_remove(net, afinfo->name);
1da177e4 2397}
4bc2f18b 2398EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2399
cf533ea5 2400static void get_openreq4(const struct sock *sk, const struct request_sock *req,
5e659e4c 2401 struct seq_file *f, int i, int uid, int *len)
1da177e4 2402{
2e6599cb 2403 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
2404 int ttd = req->expires - jiffies;
2405
5e659e4c 2406 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2407 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
1da177e4 2408 i,
2e6599cb 2409 ireq->loc_addr,
c720c7e8 2410 ntohs(inet_sk(sk)->inet_sport),
2e6599cb
ACM
2411 ireq->rmt_addr,
2412 ntohs(ireq->rmt_port),
1da177e4
LT
2413 TCP_SYN_RECV,
2414 0, 0, /* could print option size, but that is af dependent. */
2415 1, /* timers active (only the expire timer) */
2416 jiffies_to_clock_t(ttd),
2417 req->retrans,
2418 uid,
2419 0, /* non standard timer */
2420 0, /* open_requests have no inode */
2421 atomic_read(&sk->sk_refcnt),
5e659e4c
PE
2422 req,
2423 len);
1da177e4
LT
2424}
2425
5e659e4c 2426static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
1da177e4
LT
2427{
2428 int timer_active;
2429 unsigned long timer_expires;
cf533ea5 2430 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2431 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2432 const struct inet_sock *inet = inet_sk(sk);
c720c7e8
ED
2433 __be32 dest = inet->inet_daddr;
2434 __be32 src = inet->inet_rcv_saddr;
2435 __u16 destp = ntohs(inet->inet_dport);
2436 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2437 int rx_queue;
1da177e4 2438
463c84b9 2439 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 2440 timer_active = 1;
463c84b9
ACM
2441 timer_expires = icsk->icsk_timeout;
2442 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2443 timer_active = 4;
463c84b9 2444 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2445 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2446 timer_active = 2;
cf4c6bf8 2447 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2448 } else {
2449 timer_active = 0;
2450 timer_expires = jiffies;
2451 }
2452
49d09007
ED
2453 if (sk->sk_state == TCP_LISTEN)
2454 rx_queue = sk->sk_ack_backlog;
2455 else
2456 /*
2457 * because we dont lock socket, we might find a transient negative value
2458 */
2459 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2460
5e659e4c 2461 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
71338aa7 2462 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
cf4c6bf8 2463 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2464 tp->write_seq - tp->snd_una,
49d09007 2465 rx_queue,
1da177e4
LT
2466 timer_active,
2467 jiffies_to_clock_t(timer_expires - jiffies),
463c84b9 2468 icsk->icsk_retransmits,
cf4c6bf8 2469 sock_i_uid(sk),
6687e988 2470 icsk->icsk_probes_out,
cf4c6bf8
IJ
2471 sock_i_ino(sk),
2472 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2473 jiffies_to_clock_t(icsk->icsk_rto),
2474 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2475 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2476 tp->snd_cwnd,
0b6a05c1 2477 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
5e659e4c 2478 len);
1da177e4
LT
2479}
2480
cf533ea5 2481static void get_timewait4_sock(const struct inet_timewait_sock *tw,
5e659e4c 2482 struct seq_file *f, int i, int *len)
1da177e4 2483{
23f33c2d 2484 __be32 dest, src;
1da177e4
LT
2485 __u16 destp, srcp;
2486 int ttd = tw->tw_ttd - jiffies;
2487
2488 if (ttd < 0)
2489 ttd = 0;
2490
2491 dest = tw->tw_daddr;
2492 src = tw->tw_rcv_saddr;
2493 destp = ntohs(tw->tw_dport);
2494 srcp = ntohs(tw->tw_sport);
2495
5e659e4c 2496 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2497 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
1da177e4
LT
2498 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2499 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
5e659e4c 2500 atomic_read(&tw->tw_refcnt), tw, len);
1da177e4
LT
2501}
2502
2503#define TMPSZ 150
2504
2505static int tcp4_seq_show(struct seq_file *seq, void *v)
2506{
5799de0b 2507 struct tcp_iter_state *st;
5e659e4c 2508 int len;
1da177e4
LT
2509
2510 if (v == SEQ_START_TOKEN) {
2511 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2512 " sl local_address rem_address st tx_queue "
2513 "rx_queue tr tm->when retrnsmt uid timeout "
2514 "inode");
2515 goto out;
2516 }
2517 st = seq->private;
2518
2519 switch (st->state) {
2520 case TCP_SEQ_STATE_LISTENING:
2521 case TCP_SEQ_STATE_ESTABLISHED:
5e659e4c 2522 get_tcp4_sock(v, seq, st->num, &len);
1da177e4
LT
2523 break;
2524 case TCP_SEQ_STATE_OPENREQ:
5e659e4c 2525 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
1da177e4
LT
2526 break;
2527 case TCP_SEQ_STATE_TIME_WAIT:
5e659e4c 2528 get_timewait4_sock(v, seq, st->num, &len);
1da177e4
LT
2529 break;
2530 }
5e659e4c 2531 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
1da177e4
LT
2532out:
2533 return 0;
2534}
2535
73cb88ec
AV
2536static const struct file_operations tcp_afinfo_seq_fops = {
2537 .owner = THIS_MODULE,
2538 .open = tcp_seq_open,
2539 .read = seq_read,
2540 .llseek = seq_lseek,
2541 .release = seq_release_net
2542};
2543
1da177e4 2544static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2545 .name = "tcp",
2546 .family = AF_INET,
73cb88ec 2547 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2548 .seq_ops = {
2549 .show = tcp4_seq_show,
2550 },
1da177e4
LT
2551};
2552
2c8c1e72 2553static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2554{
2555 return tcp_proc_register(net, &tcp4_seq_afinfo);
2556}
2557
2c8c1e72 2558static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2559{
2560 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2561}
2562
2563static struct pernet_operations tcp4_net_ops = {
2564 .init = tcp4_proc_init_net,
2565 .exit = tcp4_proc_exit_net,
2566};
2567
1da177e4
LT
2568int __init tcp4_proc_init(void)
2569{
757764f6 2570 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2571}
2572
2573void tcp4_proc_exit(void)
2574{
757764f6 2575 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2576}
2577#endif /* CONFIG_PROC_FS */
2578
bf296b12
HX
2579struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2580{
b71d1d42 2581 const struct iphdr *iph = skb_gro_network_header(skb);
bf296b12
HX
2582
2583 switch (skb->ip_summed) {
2584 case CHECKSUM_COMPLETE:
86911732 2585 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
bf296b12
HX
2586 skb->csum)) {
2587 skb->ip_summed = CHECKSUM_UNNECESSARY;
2588 break;
2589 }
2590
2591 /* fall through */
2592 case CHECKSUM_NONE:
2593 NAPI_GRO_CB(skb)->flush = 1;
2594 return NULL;
2595 }
2596
2597 return tcp_gro_receive(head, skb);
2598}
bf296b12
HX
2599
2600int tcp4_gro_complete(struct sk_buff *skb)
2601{
b71d1d42 2602 const struct iphdr *iph = ip_hdr(skb);
bf296b12
HX
2603 struct tcphdr *th = tcp_hdr(skb);
2604
2605 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2606 iph->saddr, iph->daddr, 0);
2607 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2608
2609 return tcp_gro_complete(skb);
2610}
bf296b12 2611
1da177e4
LT
2612struct proto tcp_prot = {
2613 .name = "TCP",
2614 .owner = THIS_MODULE,
2615 .close = tcp_close,
2616 .connect = tcp_v4_connect,
2617 .disconnect = tcp_disconnect,
463c84b9 2618 .accept = inet_csk_accept,
1da177e4
LT
2619 .ioctl = tcp_ioctl,
2620 .init = tcp_v4_init_sock,
2621 .destroy = tcp_v4_destroy_sock,
2622 .shutdown = tcp_shutdown,
2623 .setsockopt = tcp_setsockopt,
2624 .getsockopt = tcp_getsockopt,
1da177e4 2625 .recvmsg = tcp_recvmsg,
7ba42910
CG
2626 .sendmsg = tcp_sendmsg,
2627 .sendpage = tcp_sendpage,
1da177e4 2628 .backlog_rcv = tcp_v4_do_rcv,
ab1e0a13
ACM
2629 .hash = inet_hash,
2630 .unhash = inet_unhash,
2631 .get_port = inet_csk_get_port,
1da177e4
LT
2632 .enter_memory_pressure = tcp_enter_memory_pressure,
2633 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2634 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2635 .memory_allocated = &tcp_memory_allocated,
2636 .memory_pressure = &tcp_memory_pressure,
1da177e4
LT
2637 .sysctl_wmem = sysctl_tcp_wmem,
2638 .sysctl_rmem = sysctl_tcp_rmem,
2639 .max_header = MAX_TCP_HEADER,
2640 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2641 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2642 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2643 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2644 .h.hashinfo = &tcp_hashinfo,
7ba42910 2645 .no_autobind = true,
543d9cfe
ACM
2646#ifdef CONFIG_COMPAT
2647 .compat_setsockopt = compat_tcp_setsockopt,
2648 .compat_getsockopt = compat_tcp_getsockopt,
2649#endif
d1a4c0b3
GC
2650#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2651 .init_cgroup = tcp_init_cgroup,
2652 .destroy_cgroup = tcp_destroy_cgroup,
2653 .proto_cgroup = tcp_proto_cgroup,
2654#endif
1da177e4 2655};
4bc2f18b 2656EXPORT_SYMBOL(tcp_prot);
1da177e4 2657
046ee902
DL
2658static int __net_init tcp_sk_init(struct net *net)
2659{
2660 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2661 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2662}
2663
2664static void __net_exit tcp_sk_exit(struct net *net)
2665{
2666 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
b099ce26
EB
2667}
2668
2669static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2670{
2671 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2672}
2673
2674static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2675 .init = tcp_sk_init,
2676 .exit = tcp_sk_exit,
2677 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2678};
2679
9b0f976f 2680void __init tcp_v4_init(void)
1da177e4 2681{
5caea4ea 2682 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2683 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2684 panic("Failed to create the TCP control socket.\n");
1da177e4 2685}
This page took 1.140365 seconds and 5 git commands to generate.