Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[deliverable/linux.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
1a2449a8 75#include <net/netdma.h>
6e5714ea 76#include <net/secure_seq.h>
d1a4c0b3 77#include <net/tcp_memcontrol.h>
1da177e4
LT
78
79#include <linux/inet.h>
80#include <linux/ipv6.h>
81#include <linux/stddef.h>
82#include <linux/proc_fs.h>
83#include <linux/seq_file.h>
84
cfb6eeb4
YH
85#include <linux/crypto.h>
86#include <linux/scatterlist.h>
87
ab32ea5d
BH
88int sysctl_tcp_tw_reuse __read_mostly;
89int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 90EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 91
1da177e4 92
cfb6eeb4 93#ifdef CONFIG_TCP_MD5SIG
a915da9b 94static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 95 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
96#endif
97
5caea4ea 98struct inet_hashinfo tcp_hashinfo;
4bc2f18b 99EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 100
cf533ea5 101static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 102{
eddc9ec5
ACM
103 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 ip_hdr(skb)->saddr,
aa8223c7
ACM
105 tcp_hdr(skb)->dest,
106 tcp_hdr(skb)->source);
1da177e4
LT
107}
108
6d6ee43e
ACM
109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110{
111 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 struct tcp_sock *tp = tcp_sk(sk);
113
114 /* With PAWS, it is safe from the viewpoint
115 of data integrity. Even without PAWS it is safe provided sequence
116 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
117
118 Actually, the idea is close to VJ's one, only timestamp cache is
119 held not per host, but per port pair and TW bucket is used as state
120 holder.
121
122 If TW bucket has been already destroyed we fall back to VJ's scheme
123 and use initial timestamp retrieved from peer table.
124 */
125 if (tcptw->tw_ts_recent_stamp &&
126 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 127 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
128 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 if (tp->write_seq == 0)
130 tp->write_seq = 1;
131 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
132 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 sock_hold(sktw);
134 return 1;
135 }
136
137 return 0;
138}
6d6ee43e
ACM
139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140
ee995283
PE
141static int tcp_repair_connect(struct sock *sk)
142{
143 tcp_connect_init(sk);
144 tcp_finish_connect(sk, NULL);
145
146 return 0;
147}
148
1da177e4
LT
149/* This will initiate an outgoing connection. */
150int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
151{
2d7192d6 152 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
153 struct inet_sock *inet = inet_sk(sk);
154 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 155 __be16 orig_sport, orig_dport;
bada8adc 156 __be32 daddr, nexthop;
da905bd1 157 struct flowi4 *fl4;
2d7192d6 158 struct rtable *rt;
1da177e4 159 int err;
f6d8bd05 160 struct ip_options_rcu *inet_opt;
1da177e4
LT
161
162 if (addr_len < sizeof(struct sockaddr_in))
163 return -EINVAL;
164
165 if (usin->sin_family != AF_INET)
166 return -EAFNOSUPPORT;
167
168 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
169 inet_opt = rcu_dereference_protected(inet->inet_opt,
170 sock_owned_by_user(sk));
171 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
172 if (!daddr)
173 return -EINVAL;
f6d8bd05 174 nexthop = inet_opt->opt.faddr;
1da177e4
LT
175 }
176
dca8b089
DM
177 orig_sport = inet->inet_sport;
178 orig_dport = usin->sin_port;
da905bd1
DM
179 fl4 = &inet->cork.fl.u.ip4;
180 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
181 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
182 IPPROTO_TCP,
183 orig_sport, orig_dport, sk, true);
184 if (IS_ERR(rt)) {
185 err = PTR_ERR(rt);
186 if (err == -ENETUNREACH)
7c73a6fa 187 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 188 return err;
584bdf8c 189 }
1da177e4
LT
190
191 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
192 ip_rt_put(rt);
193 return -ENETUNREACH;
194 }
195
f6d8bd05 196 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 197 daddr = fl4->daddr;
1da177e4 198
c720c7e8 199 if (!inet->inet_saddr)
da905bd1 200 inet->inet_saddr = fl4->saddr;
c720c7e8 201 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 202
c720c7e8 203 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
204 /* Reset inherited state */
205 tp->rx_opt.ts_recent = 0;
206 tp->rx_opt.ts_recent_stamp = 0;
ee995283
PE
207 if (likely(!tp->repair))
208 tp->write_seq = 0;
1da177e4
LT
209 }
210
295ff7ed 211 if (tcp_death_row.sysctl_tw_recycle &&
81166dd6
DM
212 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
213 tcp_fetch_timewait_stamp(sk, &rt->dst);
1da177e4 214
c720c7e8
ED
215 inet->inet_dport = usin->sin_port;
216 inet->inet_daddr = daddr;
1da177e4 217
d83d8461 218 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
219 if (inet_opt)
220 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 221
bee7ca9e 222 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
223
224 /* Socket identity is still unknown (sport may be zero).
225 * However we set state to SYN-SENT and not releasing socket
226 * lock select source port, enter ourselves into the hash tables and
227 * complete initialization after this.
228 */
229 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 230 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
231 if (err)
232 goto failure;
233
da905bd1 234 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
235 inet->inet_sport, inet->inet_dport, sk);
236 if (IS_ERR(rt)) {
237 err = PTR_ERR(rt);
238 rt = NULL;
1da177e4 239 goto failure;
b23dd4fe 240 }
1da177e4 241 /* OK, now commit destination to socket. */
bcd76111 242 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 243 sk_setup_caps(sk, &rt->dst);
1da177e4 244
ee995283 245 if (!tp->write_seq && likely(!tp->repair))
c720c7e8
ED
246 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
247 inet->inet_daddr,
248 inet->inet_sport,
1da177e4
LT
249 usin->sin_port);
250
c720c7e8 251 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4 252
ee995283
PE
253 if (likely(!tp->repair))
254 err = tcp_connect(sk);
255 else
256 err = tcp_repair_connect(sk);
257
1da177e4
LT
258 rt = NULL;
259 if (err)
260 goto failure;
261
262 return 0;
263
264failure:
7174259e
ACM
265 /*
266 * This unhashes the socket and releases the local port,
267 * if necessary.
268 */
1da177e4
LT
269 tcp_set_state(sk, TCP_CLOSE);
270 ip_rt_put(rt);
271 sk->sk_route_caps = 0;
c720c7e8 272 inet->inet_dport = 0;
1da177e4
LT
273 return err;
274}
4bc2f18b 275EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 276
1da177e4 277/*
563d34d0
ED
278 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
279 * It can be called through tcp_release_cb() if socket was owned by user
280 * at the time tcp_v4_err() was called to handle ICMP message.
1da177e4 281 */
563d34d0 282static void tcp_v4_mtu_reduced(struct sock *sk)
1da177e4
LT
283{
284 struct dst_entry *dst;
285 struct inet_sock *inet = inet_sk(sk);
563d34d0 286 u32 mtu = tcp_sk(sk)->mtu_info;
1da177e4
LT
287
288 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
289 * send out by Linux are always <576bytes so they should go through
290 * unfragmented).
291 */
292 if (sk->sk_state == TCP_LISTEN)
293 return;
294
80d0a69f
DM
295 dst = inet_csk_update_pmtu(sk, mtu);
296 if (!dst)
1da177e4
LT
297 return;
298
1da177e4
LT
299 /* Something is about to be wrong... Remember soft error
300 * for the case, if this connection will not able to recover.
301 */
302 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
303 sk->sk_err_soft = EMSGSIZE;
304
305 mtu = dst_mtu(dst);
306
307 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 308 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
309 tcp_sync_mss(sk, mtu);
310
311 /* Resend the TCP packet because it's
312 * clear that the old packet has been
313 * dropped. This is the new "fast" path mtu
314 * discovery.
315 */
316 tcp_simple_retransmit(sk);
317 } /* else let the usual retransmit timer handle it */
318}
319
55be7a9c
DM
320static void do_redirect(struct sk_buff *skb, struct sock *sk)
321{
322 struct dst_entry *dst = __sk_dst_check(sk, 0);
323
1ed5c48f 324 if (dst)
6700c270 325 dst->ops->redirect(dst, sk, skb);
55be7a9c
DM
326}
327
1da177e4
LT
328/*
329 * This routine is called by the ICMP module when it gets some
330 * sort of error condition. If err < 0 then the socket should
331 * be closed and the error returned to the user. If err > 0
332 * it's just the icmp type << 8 | icmp code. After adjustment
333 * header points to the first 8 bytes of the tcp header. We need
334 * to find the appropriate port.
335 *
336 * The locking strategy used here is very "optimistic". When
337 * someone else accesses the socket the ICMP is just dropped
338 * and for some paths there is no check at all.
339 * A more general error queue to queue errors for later handling
340 * is probably better.
341 *
342 */
343
4d1a2d9e 344void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 345{
b71d1d42 346 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 347 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 348 struct inet_connection_sock *icsk;
1da177e4
LT
349 struct tcp_sock *tp;
350 struct inet_sock *inet;
4d1a2d9e
DL
351 const int type = icmp_hdr(icmp_skb)->type;
352 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 353 struct sock *sk;
f1ecd5d9 354 struct sk_buff *skb;
1da177e4 355 __u32 seq;
f1ecd5d9 356 __u32 remaining;
1da177e4 357 int err;
4d1a2d9e 358 struct net *net = dev_net(icmp_skb->dev);
1da177e4 359
4d1a2d9e 360 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 361 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
362 return;
363 }
364
fd54d716 365 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 366 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 367 if (!sk) {
dcfc23ca 368 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
369 return;
370 }
371 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 372 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
373 return;
374 }
375
376 bh_lock_sock(sk);
377 /* If too many ICMPs get dropped on busy
378 * servers this needs to be solved differently.
563d34d0
ED
379 * We do take care of PMTU discovery (RFC1191) special case :
380 * we can receive locally generated ICMP messages while socket is held.
1da177e4 381 */
563d34d0
ED
382 if (sock_owned_by_user(sk) &&
383 type != ICMP_DEST_UNREACH &&
384 code != ICMP_FRAG_NEEDED)
de0744af 385 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
1da177e4
LT
386
387 if (sk->sk_state == TCP_CLOSE)
388 goto out;
389
97e3ecd1 390 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
391 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
392 goto out;
393 }
394
f1ecd5d9 395 icsk = inet_csk(sk);
1da177e4
LT
396 tp = tcp_sk(sk);
397 seq = ntohl(th->seq);
398 if (sk->sk_state != TCP_LISTEN &&
399 !between(seq, tp->snd_una, tp->snd_nxt)) {
de0744af 400 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
401 goto out;
402 }
403
404 switch (type) {
55be7a9c
DM
405 case ICMP_REDIRECT:
406 do_redirect(icmp_skb, sk);
407 goto out;
1da177e4
LT
408 case ICMP_SOURCE_QUENCH:
409 /* Just silently ignore these. */
410 goto out;
411 case ICMP_PARAMETERPROB:
412 err = EPROTO;
413 break;
414 case ICMP_DEST_UNREACH:
415 if (code > NR_ICMP_UNREACH)
416 goto out;
417
418 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
563d34d0 419 tp->mtu_info = info;
144d56e9 420 if (!sock_owned_by_user(sk)) {
563d34d0 421 tcp_v4_mtu_reduced(sk);
144d56e9
ED
422 } else {
423 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
424 sock_hold(sk);
425 }
1da177e4
LT
426 goto out;
427 }
428
429 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
430 /* check if icmp_skb allows revert of backoff
431 * (see draft-zimmermann-tcp-lcd) */
432 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
433 break;
434 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
435 !icsk->icsk_backoff)
436 break;
437
8f49c270
DM
438 if (sock_owned_by_user(sk))
439 break;
440
f1ecd5d9 441 icsk->icsk_backoff--;
9ad7c049
JC
442 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
443 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
444 tcp_bound_rto(sk);
445
446 skb = tcp_write_queue_head(sk);
447 BUG_ON(!skb);
448
449 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
450 tcp_time_stamp - TCP_SKB_CB(skb)->when);
451
452 if (remaining) {
453 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
454 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
455 } else {
456 /* RTO revert clocked out retransmission.
457 * Will retransmit now */
458 tcp_retransmit_timer(sk);
459 }
460
1da177e4
LT
461 break;
462 case ICMP_TIME_EXCEEDED:
463 err = EHOSTUNREACH;
464 break;
465 default:
466 goto out;
467 }
468
469 switch (sk->sk_state) {
60236fdd 470 struct request_sock *req, **prev;
1da177e4
LT
471 case TCP_LISTEN:
472 if (sock_owned_by_user(sk))
473 goto out;
474
463c84b9
ACM
475 req = inet_csk_search_req(sk, &prev, th->dest,
476 iph->daddr, iph->saddr);
1da177e4
LT
477 if (!req)
478 goto out;
479
480 /* ICMPs are not backlogged, hence we cannot get
481 an established socket here.
482 */
547b792c 483 WARN_ON(req->sk);
1da177e4 484
2e6599cb 485 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 486 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
487 goto out;
488 }
489
490 /*
491 * Still in SYN_RECV, just remove it silently.
492 * There is no good way to pass the error to the newly
493 * created socket, and POSIX does not want network
494 * errors returned from accept().
495 */
463c84b9 496 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
497 goto out;
498
499 case TCP_SYN_SENT:
500 case TCP_SYN_RECV: /* Cannot happen.
501 It can f.e. if SYNs crossed.
502 */
503 if (!sock_owned_by_user(sk)) {
1da177e4
LT
504 sk->sk_err = err;
505
506 sk->sk_error_report(sk);
507
508 tcp_done(sk);
509 } else {
510 sk->sk_err_soft = err;
511 }
512 goto out;
513 }
514
515 /* If we've already connected we will keep trying
516 * until we time out, or the user gives up.
517 *
518 * rfc1122 4.2.3.9 allows to consider as hard errors
519 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
520 * but it is obsoleted by pmtu discovery).
521 *
522 * Note, that in modern internet, where routing is unreliable
523 * and in each dark corner broken firewalls sit, sending random
524 * errors ordered by their masters even this two messages finally lose
525 * their original sense (even Linux sends invalid PORT_UNREACHs)
526 *
527 * Now we are in compliance with RFCs.
528 * --ANK (980905)
529 */
530
531 inet = inet_sk(sk);
532 if (!sock_owned_by_user(sk) && inet->recverr) {
533 sk->sk_err = err;
534 sk->sk_error_report(sk);
535 } else { /* Only an error on timeout */
536 sk->sk_err_soft = err;
537 }
538
539out:
540 bh_unlock_sock(sk);
541 sock_put(sk);
542}
543
419f9f89
HX
544static void __tcp_v4_send_check(struct sk_buff *skb,
545 __be32 saddr, __be32 daddr)
1da177e4 546{
aa8223c7 547 struct tcphdr *th = tcp_hdr(skb);
1da177e4 548
84fa7933 549 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 550 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 551 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 552 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 553 } else {
419f9f89 554 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 555 csum_partial(th,
1da177e4
LT
556 th->doff << 2,
557 skb->csum));
558 }
559}
560
419f9f89 561/* This routine computes an IPv4 TCP checksum. */
bb296246 562void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 563{
cf533ea5 564 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
565
566 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
567}
4bc2f18b 568EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 569
a430a43d
HX
570int tcp_v4_gso_send_check(struct sk_buff *skb)
571{
eddc9ec5 572 const struct iphdr *iph;
a430a43d
HX
573 struct tcphdr *th;
574
575 if (!pskb_may_pull(skb, sizeof(*th)))
576 return -EINVAL;
577
eddc9ec5 578 iph = ip_hdr(skb);
aa8223c7 579 th = tcp_hdr(skb);
a430a43d
HX
580
581 th->check = 0;
84fa7933 582 skb->ip_summed = CHECKSUM_PARTIAL;
419f9f89 583 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
a430a43d
HX
584 return 0;
585}
586
1da177e4
LT
587/*
588 * This routine will send an RST to the other tcp.
589 *
590 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
591 * for reset.
592 * Answer: if a packet caused RST, it is not for a socket
593 * existing in our system, if it is matched to a socket,
594 * it is just duplicate segment or bug in other side's TCP.
595 * So that we build reply only basing on parameters
596 * arrived with segment.
597 * Exception: precedence violation. We do not implement it in any case.
598 */
599
cfb6eeb4 600static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 601{
cf533ea5 602 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
603 struct {
604 struct tcphdr th;
605#ifdef CONFIG_TCP_MD5SIG
714e85be 606 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
607#endif
608 } rep;
1da177e4 609 struct ip_reply_arg arg;
cfb6eeb4
YH
610#ifdef CONFIG_TCP_MD5SIG
611 struct tcp_md5sig_key *key;
658ddaaf
SL
612 const __u8 *hash_location = NULL;
613 unsigned char newhash[16];
614 int genhash;
615 struct sock *sk1 = NULL;
cfb6eeb4 616#endif
a86b1e30 617 struct net *net;
1da177e4
LT
618
619 /* Never send a reset in response to a reset. */
620 if (th->rst)
621 return;
622
511c3f92 623 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
624 return;
625
626 /* Swap the send and the receive. */
cfb6eeb4
YH
627 memset(&rep, 0, sizeof(rep));
628 rep.th.dest = th->source;
629 rep.th.source = th->dest;
630 rep.th.doff = sizeof(struct tcphdr) / 4;
631 rep.th.rst = 1;
1da177e4
LT
632
633 if (th->ack) {
cfb6eeb4 634 rep.th.seq = th->ack_seq;
1da177e4 635 } else {
cfb6eeb4
YH
636 rep.th.ack = 1;
637 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
638 skb->len - (th->doff << 2));
1da177e4
LT
639 }
640
7174259e 641 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
642 arg.iov[0].iov_base = (unsigned char *)&rep;
643 arg.iov[0].iov_len = sizeof(rep.th);
644
645#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
646 hash_location = tcp_parse_md5sig_option(th);
647 if (!sk && hash_location) {
648 /*
649 * active side is lost. Try to find listening socket through
650 * source port, and then find md5 key through listening socket.
651 * we are not loose security here:
652 * Incoming packet is checked with md5 hash with finding key,
653 * no RST generated if md5 hash doesn't match.
654 */
655 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
656 &tcp_hashinfo, ip_hdr(skb)->daddr,
657 ntohs(th->source), inet_iif(skb));
658 /* don't send rst if it can't find key */
659 if (!sk1)
660 return;
661 rcu_read_lock();
662 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
663 &ip_hdr(skb)->saddr, AF_INET);
664 if (!key)
665 goto release_sk1;
666
667 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
668 if (genhash || memcmp(hash_location, newhash, 16) != 0)
669 goto release_sk1;
670 } else {
671 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
672 &ip_hdr(skb)->saddr,
673 AF_INET) : NULL;
674 }
675
cfb6eeb4
YH
676 if (key) {
677 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
678 (TCPOPT_NOP << 16) |
679 (TCPOPT_MD5SIG << 8) |
680 TCPOLEN_MD5SIG);
681 /* Update length and the length the header thinks exists */
682 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
683 rep.th.doff = arg.iov[0].iov_len / 4;
684
49a72dfb 685 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
686 key, ip_hdr(skb)->saddr,
687 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
688 }
689#endif
eddc9ec5
ACM
690 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
691 ip_hdr(skb)->saddr, /* XXX */
52cd5750 692 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 693 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 694 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa
SL
695 /* When socket is gone, all binding information is lost.
696 * routing might fail in this case. using iif for oif to
697 * make sure we can deliver it
698 */
699 arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
1da177e4 700
adf30907 701 net = dev_net(skb_dst(skb)->dev);
66b13d99 702 arg.tos = ip_hdr(skb)->tos;
be9f4a44 703 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 704 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 705
63231bdd
PE
706 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
707 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
708
709#ifdef CONFIG_TCP_MD5SIG
710release_sk1:
711 if (sk1) {
712 rcu_read_unlock();
713 sock_put(sk1);
714 }
715#endif
1da177e4
LT
716}
717
718/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
719 outside socket context is ugly, certainly. What can I do?
720 */
721
9501f972
YH
722static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
723 u32 win, u32 ts, int oif,
88ef4a5a 724 struct tcp_md5sig_key *key,
66b13d99 725 int reply_flags, u8 tos)
1da177e4 726{
cf533ea5 727 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
728 struct {
729 struct tcphdr th;
714e85be 730 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 731#ifdef CONFIG_TCP_MD5SIG
714e85be 732 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
733#endif
734 ];
1da177e4
LT
735 } rep;
736 struct ip_reply_arg arg;
adf30907 737 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
738
739 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 740 memset(&arg, 0, sizeof(arg));
1da177e4
LT
741
742 arg.iov[0].iov_base = (unsigned char *)&rep;
743 arg.iov[0].iov_len = sizeof(rep.th);
744 if (ts) {
cfb6eeb4
YH
745 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
746 (TCPOPT_TIMESTAMP << 8) |
747 TCPOLEN_TIMESTAMP);
748 rep.opt[1] = htonl(tcp_time_stamp);
749 rep.opt[2] = htonl(ts);
cb48cfe8 750 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
751 }
752
753 /* Swap the send and the receive. */
754 rep.th.dest = th->source;
755 rep.th.source = th->dest;
756 rep.th.doff = arg.iov[0].iov_len / 4;
757 rep.th.seq = htonl(seq);
758 rep.th.ack_seq = htonl(ack);
759 rep.th.ack = 1;
760 rep.th.window = htons(win);
761
cfb6eeb4 762#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4
YH
763 if (key) {
764 int offset = (ts) ? 3 : 0;
765
766 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
767 (TCPOPT_NOP << 16) |
768 (TCPOPT_MD5SIG << 8) |
769 TCPOLEN_MD5SIG);
770 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
771 rep.th.doff = arg.iov[0].iov_len/4;
772
49a72dfb 773 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
774 key, ip_hdr(skb)->saddr,
775 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
776 }
777#endif
88ef4a5a 778 arg.flags = reply_flags;
eddc9ec5
ACM
779 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
780 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
781 arg.iov[0].iov_len, IPPROTO_TCP, 0);
782 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
783 if (oif)
784 arg.bound_dev_if = oif;
66b13d99 785 arg.tos = tos;
be9f4a44 786 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 787 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 788
63231bdd 789 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
790}
791
792static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
793{
8feaf0c0 794 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 795 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 796
9501f972 797 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 798 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
9501f972
YH
799 tcptw->tw_ts_recent,
800 tw->tw_bound_dev_if,
88ef4a5a 801 tcp_twsk_md5_key(tcptw),
66b13d99
ED
802 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
803 tw->tw_tos
9501f972 804 );
1da177e4 805
8feaf0c0 806 inet_twsk_put(tw);
1da177e4
LT
807}
808
6edafaaf 809static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 810 struct request_sock *req)
1da177e4 811{
9501f972 812 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
cfb6eeb4 813 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
9501f972
YH
814 req->ts_recent,
815 0,
a915da9b
ED
816 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
817 AF_INET),
66b13d99
ED
818 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
819 ip_hdr(skb)->tos);
1da177e4
LT
820}
821
1da177e4 822/*
9bf1d83e 823 * Send a SYN-ACK after having received a SYN.
60236fdd 824 * This still operates on a request_sock only, not on a big
1da177e4
LT
825 * socket.
826 */
72659ecc
OP
827static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
828 struct request_sock *req,
fff32699 829 struct request_values *rvp,
7586eceb
ED
830 u16 queue_mapping,
831 bool nocache)
1da177e4 832{
2e6599cb 833 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 834 struct flowi4 fl4;
1da177e4
LT
835 int err = -1;
836 struct sk_buff * skb;
837
838 /* First, grab a route. */
ba3f7f04 839 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 840 return -1;
1da177e4 841
e6b4d113 842 skb = tcp_make_synack(sk, dst, req, rvp);
1da177e4
LT
843
844 if (skb) {
419f9f89 845 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
1da177e4 846
fff32699 847 skb_set_queue_mapping(skb, queue_mapping);
2e6599cb
ACM
848 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
849 ireq->rmt_addr,
850 ireq->opt);
b9df3cb8 851 err = net_xmit_eval(err);
1da177e4
LT
852 }
853
1da177e4
LT
854 return err;
855}
856
72659ecc 857static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
e6b4d113 858 struct request_values *rvp)
fd80eb94 859{
72659ecc 860 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
7586eceb 861 return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
fd80eb94
DL
862}
863
1da177e4 864/*
60236fdd 865 * IPv4 request_sock destructor.
1da177e4 866 */
60236fdd 867static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 868{
a51482bd 869 kfree(inet_rsk(req)->opt);
1da177e4
LT
870}
871
946cedcc 872/*
a2a385d6 873 * Return true if a syncookie should be sent
946cedcc 874 */
a2a385d6 875bool tcp_syn_flood_action(struct sock *sk,
946cedcc
ED
876 const struct sk_buff *skb,
877 const char *proto)
1da177e4 878{
946cedcc 879 const char *msg = "Dropping request";
a2a385d6 880 bool want_cookie = false;
946cedcc
ED
881 struct listen_sock *lopt;
882
883
1da177e4 884
2a1d4bd4 885#ifdef CONFIG_SYN_COOKIES
946cedcc 886 if (sysctl_tcp_syncookies) {
2a1d4bd4 887 msg = "Sending cookies";
a2a385d6 888 want_cookie = true;
946cedcc
ED
889 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
890 } else
80e40daa 891#endif
946cedcc
ED
892 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
893
894 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
895 if (!lopt->synflood_warned) {
896 lopt->synflood_warned = 1;
afd46503 897 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
946cedcc
ED
898 proto, ntohs(tcp_hdr(skb)->dest), msg);
899 }
900 return want_cookie;
2a1d4bd4 901}
946cedcc 902EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
903
904/*
60236fdd 905 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 906 */
f6d8bd05
ED
907static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
908 struct sk_buff *skb)
1da177e4 909{
f6d8bd05
ED
910 const struct ip_options *opt = &(IPCB(skb)->opt);
911 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
912
913 if (opt && opt->optlen) {
f6d8bd05
ED
914 int opt_size = sizeof(*dopt) + opt->optlen;
915
1da177e4
LT
916 dopt = kmalloc(opt_size, GFP_ATOMIC);
917 if (dopt) {
f6d8bd05 918 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
919 kfree(dopt);
920 dopt = NULL;
921 }
922 }
923 }
924 return dopt;
925}
926
cfb6eeb4
YH
927#ifdef CONFIG_TCP_MD5SIG
928/*
929 * RFC2385 MD5 checksumming requires a mapping of
930 * IP address->MD5 Key.
931 * We need to maintain these in the sk structure.
932 */
933
934/* Find the Key structure for an address. */
a915da9b
ED
935struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
936 const union tcp_md5_addr *addr,
937 int family)
cfb6eeb4
YH
938{
939 struct tcp_sock *tp = tcp_sk(sk);
a915da9b
ED
940 struct tcp_md5sig_key *key;
941 struct hlist_node *pos;
942 unsigned int size = sizeof(struct in_addr);
a8afca03 943 struct tcp_md5sig_info *md5sig;
cfb6eeb4 944
a8afca03
ED
945 /* caller either holds rcu_read_lock() or socket lock */
946 md5sig = rcu_dereference_check(tp->md5sig_info,
b4fb05ea
ED
947 sock_owned_by_user(sk) ||
948 lockdep_is_held(&sk->sk_lock.slock));
a8afca03 949 if (!md5sig)
cfb6eeb4 950 return NULL;
a915da9b
ED
951#if IS_ENABLED(CONFIG_IPV6)
952 if (family == AF_INET6)
953 size = sizeof(struct in6_addr);
954#endif
a8afca03 955 hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
a915da9b
ED
956 if (key->family != family)
957 continue;
958 if (!memcmp(&key->addr, addr, size))
959 return key;
cfb6eeb4
YH
960 }
961 return NULL;
962}
a915da9b 963EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
964
965struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
966 struct sock *addr_sk)
967{
a915da9b
ED
968 union tcp_md5_addr *addr;
969
970 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
971 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 972}
cfb6eeb4
YH
973EXPORT_SYMBOL(tcp_v4_md5_lookup);
974
f5b99bcd
AB
975static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
976 struct request_sock *req)
cfb6eeb4 977{
a915da9b
ED
978 union tcp_md5_addr *addr;
979
980 addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
981 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
982}
983
984/* This can be called on a newly created socket, from other files */
a915da9b
ED
985int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
986 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
987{
988 /* Add Key to the list */
b0a713e9 989 struct tcp_md5sig_key *key;
cfb6eeb4 990 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 991 struct tcp_md5sig_info *md5sig;
cfb6eeb4 992
a915da9b 993 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
cfb6eeb4
YH
994 if (key) {
995 /* Pre-existing entry - just update that one. */
a915da9b 996 memcpy(key->key, newkey, newkeylen);
b0a713e9 997 key->keylen = newkeylen;
a915da9b
ED
998 return 0;
999 }
260fcbeb 1000
a8afca03
ED
1001 md5sig = rcu_dereference_protected(tp->md5sig_info,
1002 sock_owned_by_user(sk));
a915da9b
ED
1003 if (!md5sig) {
1004 md5sig = kmalloc(sizeof(*md5sig), gfp);
1005 if (!md5sig)
cfb6eeb4 1006 return -ENOMEM;
cfb6eeb4 1007
a915da9b
ED
1008 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1009 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 1010 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 1011 }
cfb6eeb4 1012
5f3d9cb2 1013 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
1014 if (!key)
1015 return -ENOMEM;
1016 if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
5f3d9cb2 1017 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 1018 return -ENOMEM;
cfb6eeb4 1019 }
a915da9b
ED
1020
1021 memcpy(key->key, newkey, newkeylen);
1022 key->keylen = newkeylen;
1023 key->family = family;
1024 memcpy(&key->addr, addr,
1025 (family == AF_INET6) ? sizeof(struct in6_addr) :
1026 sizeof(struct in_addr));
1027 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1028 return 0;
1029}
a915da9b 1030EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1031
a915da9b 1032int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4
YH
1033{
1034 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1035 struct tcp_md5sig_key *key;
a8afca03 1036 struct tcp_md5sig_info *md5sig;
a915da9b
ED
1037
1038 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1039 if (!key)
1040 return -ENOENT;
1041 hlist_del_rcu(&key->node);
5f3d9cb2 1042 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1043 kfree_rcu(key, rcu);
a8afca03
ED
1044 md5sig = rcu_dereference_protected(tp->md5sig_info,
1045 sock_owned_by_user(sk));
1046 if (hlist_empty(&md5sig->head))
a915da9b
ED
1047 tcp_free_md5sig_pool();
1048 return 0;
cfb6eeb4 1049}
a915da9b 1050EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1051
a915da9b 1052void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1053{
1054 struct tcp_sock *tp = tcp_sk(sk);
a915da9b
ED
1055 struct tcp_md5sig_key *key;
1056 struct hlist_node *pos, *n;
a8afca03 1057 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1058
a8afca03
ED
1059 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1060
1061 if (!hlist_empty(&md5sig->head))
cfb6eeb4 1062 tcp_free_md5sig_pool();
a8afca03 1063 hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
a915da9b 1064 hlist_del_rcu(&key->node);
5f3d9cb2 1065 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1066 kfree_rcu(key, rcu);
cfb6eeb4
YH
1067 }
1068}
1069
7174259e
ACM
1070static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1071 int optlen)
cfb6eeb4
YH
1072{
1073 struct tcp_md5sig cmd;
1074 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1075
1076 if (optlen < sizeof(cmd))
1077 return -EINVAL;
1078
7174259e 1079 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1080 return -EFAULT;
1081
1082 if (sin->sin_family != AF_INET)
1083 return -EINVAL;
1084
a8afca03 1085 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1086 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1087 AF_INET);
cfb6eeb4
YH
1088
1089 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1090 return -EINVAL;
1091
a915da9b
ED
1092 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1093 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1094 GFP_KERNEL);
cfb6eeb4
YH
1095}
1096
49a72dfb
AL
1097static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1098 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1099{
cfb6eeb4 1100 struct tcp4_pseudohdr *bp;
49a72dfb 1101 struct scatterlist sg;
cfb6eeb4
YH
1102
1103 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1104
1105 /*
49a72dfb 1106 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1107 * destination IP address, zero-padded protocol number, and
1108 * segment length)
1109 */
1110 bp->saddr = saddr;
1111 bp->daddr = daddr;
1112 bp->pad = 0;
076fb722 1113 bp->protocol = IPPROTO_TCP;
49a72dfb 1114 bp->len = cpu_to_be16(nbytes);
c7da57a1 1115
49a72dfb
AL
1116 sg_init_one(&sg, bp, sizeof(*bp));
1117 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1118}
1119
a915da9b 1120static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1121 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1122{
1123 struct tcp_md5sig_pool *hp;
1124 struct hash_desc *desc;
1125
1126 hp = tcp_get_md5sig_pool();
1127 if (!hp)
1128 goto clear_hash_noput;
1129 desc = &hp->md5_desc;
1130
1131 if (crypto_hash_init(desc))
1132 goto clear_hash;
1133 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1134 goto clear_hash;
1135 if (tcp_md5_hash_header(hp, th))
1136 goto clear_hash;
1137 if (tcp_md5_hash_key(hp, key))
1138 goto clear_hash;
1139 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1140 goto clear_hash;
1141
cfb6eeb4 1142 tcp_put_md5sig_pool();
cfb6eeb4 1143 return 0;
49a72dfb 1144
cfb6eeb4
YH
1145clear_hash:
1146 tcp_put_md5sig_pool();
1147clear_hash_noput:
1148 memset(md5_hash, 0, 16);
49a72dfb 1149 return 1;
cfb6eeb4
YH
1150}
1151
49a72dfb 1152int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1153 const struct sock *sk, const struct request_sock *req,
1154 const struct sk_buff *skb)
cfb6eeb4 1155{
49a72dfb
AL
1156 struct tcp_md5sig_pool *hp;
1157 struct hash_desc *desc;
318cf7aa 1158 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1159 __be32 saddr, daddr;
1160
1161 if (sk) {
c720c7e8
ED
1162 saddr = inet_sk(sk)->inet_saddr;
1163 daddr = inet_sk(sk)->inet_daddr;
49a72dfb
AL
1164 } else if (req) {
1165 saddr = inet_rsk(req)->loc_addr;
1166 daddr = inet_rsk(req)->rmt_addr;
cfb6eeb4 1167 } else {
49a72dfb
AL
1168 const struct iphdr *iph = ip_hdr(skb);
1169 saddr = iph->saddr;
1170 daddr = iph->daddr;
cfb6eeb4 1171 }
49a72dfb
AL
1172
1173 hp = tcp_get_md5sig_pool();
1174 if (!hp)
1175 goto clear_hash_noput;
1176 desc = &hp->md5_desc;
1177
1178 if (crypto_hash_init(desc))
1179 goto clear_hash;
1180
1181 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1182 goto clear_hash;
1183 if (tcp_md5_hash_header(hp, th))
1184 goto clear_hash;
1185 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1186 goto clear_hash;
1187 if (tcp_md5_hash_key(hp, key))
1188 goto clear_hash;
1189 if (crypto_hash_final(desc, md5_hash))
1190 goto clear_hash;
1191
1192 tcp_put_md5sig_pool();
1193 return 0;
1194
1195clear_hash:
1196 tcp_put_md5sig_pool();
1197clear_hash_noput:
1198 memset(md5_hash, 0, 16);
1199 return 1;
cfb6eeb4 1200}
49a72dfb 1201EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1202
a2a385d6 1203static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1204{
1205 /*
1206 * This gets called for each TCP segment that arrives
1207 * so we want to be efficient.
1208 * We have 3 drop cases:
1209 * o No MD5 hash and one expected.
1210 * o MD5 hash and we're not expecting one.
1211 * o MD5 hash and its wrong.
1212 */
cf533ea5 1213 const __u8 *hash_location = NULL;
cfb6eeb4 1214 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1215 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1216 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1217 int genhash;
cfb6eeb4
YH
1218 unsigned char newhash[16];
1219
a915da9b
ED
1220 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1221 AF_INET);
7d5d5525 1222 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1223
cfb6eeb4
YH
1224 /* We've parsed the options - do we have a hash? */
1225 if (!hash_expected && !hash_location)
a2a385d6 1226 return false;
cfb6eeb4
YH
1227
1228 if (hash_expected && !hash_location) {
785957d3 1229 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
a2a385d6 1230 return true;
cfb6eeb4
YH
1231 }
1232
1233 if (!hash_expected && hash_location) {
785957d3 1234 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
a2a385d6 1235 return true;
cfb6eeb4
YH
1236 }
1237
1238 /* Okay, so this is hash_expected and hash_location -
1239 * so we need to calculate the checksum.
1240 */
49a72dfb
AL
1241 genhash = tcp_v4_md5_hash_skb(newhash,
1242 hash_expected,
1243 NULL, NULL, skb);
cfb6eeb4
YH
1244
1245 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
e87cc472
JP
1246 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1247 &iph->saddr, ntohs(th->source),
1248 &iph->daddr, ntohs(th->dest),
1249 genhash ? " tcp_v4_calc_md5_hash failed"
1250 : "");
a2a385d6 1251 return true;
cfb6eeb4 1252 }
a2a385d6 1253 return false;
cfb6eeb4
YH
1254}
1255
1256#endif
1257
72a3effa 1258struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1259 .family = PF_INET,
2e6599cb 1260 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1261 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1262 .send_ack = tcp_v4_reqsk_send_ack,
1263 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1264 .send_reset = tcp_v4_send_reset,
72659ecc 1265 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1266};
1267
cfb6eeb4 1268#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1269static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1270 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1271 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1272};
b6332e6c 1273#endif
cfb6eeb4 1274
1da177e4
LT
1275int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1276{
4957faad 1277 struct tcp_extend_values tmp_ext;
1da177e4 1278 struct tcp_options_received tmp_opt;
cf533ea5 1279 const u8 *hash_location;
60236fdd 1280 struct request_sock *req;
e6b4d113 1281 struct inet_request_sock *ireq;
4957faad 1282 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1283 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1284 __be32 saddr = ip_hdr(skb)->saddr;
1285 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1286 __u32 isn = TCP_SKB_CB(skb)->when;
a2a385d6 1287 bool want_cookie = false;
1da177e4
LT
1288
1289 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1290 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1291 goto drop;
1292
1293 /* TW buckets are converted to open requests without
1294 * limitations, they conserve resources and peer is
1295 * evidently real one.
1296 */
463c84b9 1297 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
946cedcc
ED
1298 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1299 if (!want_cookie)
1300 goto drop;
1da177e4
LT
1301 }
1302
1303 /* Accept backlog is full. If we have already queued enough
1304 * of warm entries in syn queue, drop request. It is better than
1305 * clogging syn queue with openreqs with exponentially increasing
1306 * timeout.
1307 */
463c84b9 1308 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
1309 goto drop;
1310
ce4a7d0d 1311 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1312 if (!req)
1313 goto drop;
1314
cfb6eeb4
YH
1315#ifdef CONFIG_TCP_MD5SIG
1316 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1317#endif
1318
1da177e4 1319 tcp_clear_options(&tmp_opt);
bee7ca9e 1320 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1321 tmp_opt.user_mss = tp->rx_opt.user_mss;
2100c8d2 1322 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
4957faad
WAS
1323
1324 if (tmp_opt.cookie_plus > 0 &&
1325 tmp_opt.saw_tstamp &&
1326 !tp->rx_opt.cookie_out_never &&
1327 (sysctl_tcp_cookie_size > 0 ||
1328 (tp->cookie_values != NULL &&
1329 tp->cookie_values->cookie_desired > 0))) {
1330 u8 *c;
1331 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1332 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1333
1334 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1335 goto drop_and_release;
1336
1337 /* Secret recipe starts with IP addresses */
0eae88f3
ED
1338 *mess++ ^= (__force u32)daddr;
1339 *mess++ ^= (__force u32)saddr;
1da177e4 1340
4957faad
WAS
1341 /* plus variable length Initiator Cookie */
1342 c = (u8 *)mess;
1343 while (l-- > 0)
1344 *c++ ^= *hash_location++;
1345
a2a385d6 1346 want_cookie = false; /* not our kind of cookie */
4957faad
WAS
1347 tmp_ext.cookie_out_never = 0; /* false */
1348 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1349 } else if (!tp->rx_opt.cookie_in_always) {
1350 /* redundant indications, but ensure initialization. */
1351 tmp_ext.cookie_out_never = 1; /* true */
1352 tmp_ext.cookie_plus = 0;
1353 } else {
1354 goto drop_and_release;
1355 }
1356 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1da177e4 1357
4dfc2817 1358 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1359 tcp_clear_options(&tmp_opt);
1da177e4 1360
1da177e4 1361 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1362 tcp_openreq_init(req, &tmp_opt, skb);
1363
bb5b7c11
DM
1364 ireq = inet_rsk(req);
1365 ireq->loc_addr = daddr;
1366 ireq->rmt_addr = saddr;
1367 ireq->no_srccheck = inet_sk(sk)->transparent;
1368 ireq->opt = tcp_v4_save_options(sk, skb);
1369
284904aa 1370 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1371 goto drop_and_free;
284904aa 1372
172d69e6 1373 if (!want_cookie || tmp_opt.tstamp_ok)
bd14b1b2 1374 TCP_ECN_create_request(req, skb);
1da177e4
LT
1375
1376 if (want_cookie) {
1da177e4 1377 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1378 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4 1379 } else if (!isn) {
6bd023f3 1380 struct flowi4 fl4;
1da177e4
LT
1381
1382 /* VJ's idea. We save last timestamp seen
1383 * from the destination in peer table, when entering
1384 * state TIME-WAIT, and check against it before
1385 * accepting new connection request.
1386 *
1387 * If "isn" is not zero, this request hit alive
1388 * timewait bucket, so that all the necessary checks
1389 * are made in the function processing timewait state.
1390 */
1391 if (tmp_opt.saw_tstamp &&
295ff7ed 1392 tcp_death_row.sysctl_tw_recycle &&
ba3f7f04 1393 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
81166dd6
DM
1394 fl4.daddr == saddr) {
1395 if (!tcp_peer_is_proven(req, dst, true)) {
de0744af 1396 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1397 goto drop_and_release;
1da177e4
LT
1398 }
1399 }
1400 /* Kill the following clause, if you dislike this way. */
1401 else if (!sysctl_tcp_syncookies &&
463c84b9 1402 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4 1403 (sysctl_max_syn_backlog >> 2)) &&
81166dd6 1404 !tcp_peer_is_proven(req, dst, false)) {
1da177e4
LT
1405 /* Without syncookies last quarter of
1406 * backlog is filled with destinations,
1407 * proven to be alive.
1408 * It means that we continue to communicate
1409 * to destinations, already remembered
1410 * to the moment of synflood.
1411 */
afd46503 1412 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
673d57e7 1413 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1414 goto drop_and_release;
1da177e4
LT
1415 }
1416
a94f723d 1417 isn = tcp_v4_init_sequence(skb);
1da177e4 1418 }
2e6599cb 1419 tcp_rsk(req)->snt_isn = isn;
9ad7c049 1420 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4 1421
72659ecc 1422 if (tcp_v4_send_synack(sk, dst, req,
fff32699 1423 (struct request_values *)&tmp_ext,
7586eceb
ED
1424 skb_get_queue_mapping(skb),
1425 want_cookie) ||
4957faad 1426 want_cookie)
1da177e4
LT
1427 goto drop_and_free;
1428
7cd04fa7 1429 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1da177e4
LT
1430 return 0;
1431
7cd04fa7
DL
1432drop_and_release:
1433 dst_release(dst);
1da177e4 1434drop_and_free:
60236fdd 1435 reqsk_free(req);
1da177e4 1436drop:
1da177e4
LT
1437 return 0;
1438}
4bc2f18b 1439EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1440
1441
1442/*
1443 * The three way handshake has completed - we got a valid synack -
1444 * now create the new socket.
1445 */
1446struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1447 struct request_sock *req,
1da177e4
LT
1448 struct dst_entry *dst)
1449{
2e6599cb 1450 struct inet_request_sock *ireq;
1da177e4
LT
1451 struct inet_sock *newinet;
1452 struct tcp_sock *newtp;
1453 struct sock *newsk;
cfb6eeb4
YH
1454#ifdef CONFIG_TCP_MD5SIG
1455 struct tcp_md5sig_key *key;
1456#endif
f6d8bd05 1457 struct ip_options_rcu *inet_opt;
1da177e4
LT
1458
1459 if (sk_acceptq_is_full(sk))
1460 goto exit_overflow;
1461
1da177e4
LT
1462 newsk = tcp_create_openreq_child(sk, req, skb);
1463 if (!newsk)
093d2823 1464 goto exit_nonewsk;
1da177e4 1465
bcd76111 1466 newsk->sk_gso_type = SKB_GSO_TCPV4;
fae6ef87 1467 inet_sk_rx_dst_set(newsk, skb);
1da177e4
LT
1468
1469 newtp = tcp_sk(newsk);
1470 newinet = inet_sk(newsk);
2e6599cb 1471 ireq = inet_rsk(req);
c720c7e8
ED
1472 newinet->inet_daddr = ireq->rmt_addr;
1473 newinet->inet_rcv_saddr = ireq->loc_addr;
1474 newinet->inet_saddr = ireq->loc_addr;
f6d8bd05
ED
1475 inet_opt = ireq->opt;
1476 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1477 ireq->opt = NULL;
463c84b9 1478 newinet->mc_index = inet_iif(skb);
eddc9ec5 1479 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1480 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1481 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1482 if (inet_opt)
1483 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1484 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1485
dfd25fff
ED
1486 if (!dst) {
1487 dst = inet_csk_route_child_sock(sk, newsk, req);
1488 if (!dst)
1489 goto put_and_exit;
1490 } else {
1491 /* syncookie case : see end of cookie_v4_check() */
1492 }
0e734419
DM
1493 sk_setup_caps(newsk, dst);
1494
5d424d5a 1495 tcp_mtup_init(newsk);
1da177e4 1496 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1497 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1498 if (tcp_sk(sk)->rx_opt.user_mss &&
1499 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1500 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1501
1da177e4 1502 tcp_initialize_rcv_mss(newsk);
9ad7c049
JC
1503 if (tcp_rsk(req)->snt_synack)
1504 tcp_valid_rtt_meas(newsk,
1505 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1506 newtp->total_retrans = req->retrans;
1da177e4 1507
cfb6eeb4
YH
1508#ifdef CONFIG_TCP_MD5SIG
1509 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1510 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1511 AF_INET);
c720c7e8 1512 if (key != NULL) {
cfb6eeb4
YH
1513 /*
1514 * We're using one, so create a matching key
1515 * on the newsk structure. If we fail to get
1516 * memory, then we end up not copying the key
1517 * across. Shucks.
1518 */
a915da9b
ED
1519 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1520 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1521 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1522 }
1523#endif
1524
0e734419
DM
1525 if (__inet_inherit_port(sk, newsk) < 0)
1526 goto put_and_exit;
9327f705 1527 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1528
1529 return newsk;
1530
1531exit_overflow:
de0744af 1532 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1533exit_nonewsk:
1534 dst_release(dst);
1da177e4 1535exit:
de0744af 1536 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1537 return NULL;
0e734419 1538put_and_exit:
709e8697 1539 tcp_clear_xmit_timers(newsk);
d8a6e65f 1540 tcp_cleanup_congestion_control(newsk);
918eb399 1541 bh_unlock_sock(newsk);
0e734419
DM
1542 sock_put(newsk);
1543 goto exit;
1da177e4 1544}
4bc2f18b 1545EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1546
1547static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1548{
aa8223c7 1549 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1550 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1551 struct sock *nsk;
60236fdd 1552 struct request_sock **prev;
1da177e4 1553 /* Find possible connection requests. */
463c84b9
ACM
1554 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1555 iph->saddr, iph->daddr);
1da177e4
LT
1556 if (req)
1557 return tcp_check_req(sk, skb, req, prev);
1558
3b1e0a65 1559 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1560 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1561
1562 if (nsk) {
1563 if (nsk->sk_state != TCP_TIME_WAIT) {
1564 bh_lock_sock(nsk);
1565 return nsk;
1566 }
9469c7b4 1567 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1568 return NULL;
1569 }
1570
1571#ifdef CONFIG_SYN_COOKIES
af9b4738 1572 if (!th->syn)
1da177e4
LT
1573 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1574#endif
1575 return sk;
1576}
1577
b51655b9 1578static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1579{
eddc9ec5
ACM
1580 const struct iphdr *iph = ip_hdr(skb);
1581
84fa7933 1582 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1583 if (!tcp_v4_check(skb->len, iph->saddr,
1584 iph->daddr, skb->csum)) {
fb286bb2 1585 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1586 return 0;
fb286bb2 1587 }
1da177e4 1588 }
fb286bb2 1589
eddc9ec5 1590 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1591 skb->len, IPPROTO_TCP, 0);
1592
1da177e4 1593 if (skb->len <= 76) {
fb286bb2 1594 return __skb_checksum_complete(skb);
1da177e4
LT
1595 }
1596 return 0;
1597}
1598
1599
1600/* The socket must have it's spinlock held when we get
1601 * here.
1602 *
1603 * We have a potential double-lock case here, so even when
1604 * doing backlog processing we use the BH locking scheme.
1605 * This is because we cannot sleep with the original spinlock
1606 * held.
1607 */
1608int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1609{
cfb6eeb4
YH
1610 struct sock *rsk;
1611#ifdef CONFIG_TCP_MD5SIG
1612 /*
1613 * We really want to reject the packet as early as possible
1614 * if:
1615 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1616 * o There is an MD5 option and we're not expecting one
1617 */
7174259e 1618 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1619 goto discard;
1620#endif
1621
1da177e4 1622 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
404e0a8b
ED
1623 struct dst_entry *dst = sk->sk_rx_dst;
1624
bdeab991 1625 sock_rps_save_rxhash(sk, skb);
404e0a8b 1626 if (dst) {
505fbcf0
ED
1627 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1628 dst->ops->check(dst, 0) == NULL) {
92101b3b
DM
1629 dst_release(dst);
1630 sk->sk_rx_dst = NULL;
1631 }
1632 }
aa8223c7 1633 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1634 rsk = sk;
1da177e4 1635 goto reset;
cfb6eeb4 1636 }
1da177e4
LT
1637 return 0;
1638 }
1639
ab6a5bb6 1640 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1641 goto csum_err;
1642
1643 if (sk->sk_state == TCP_LISTEN) {
1644 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1645 if (!nsk)
1646 goto discard;
1647
1648 if (nsk != sk) {
bdeab991 1649 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1650 if (tcp_child_process(sk, nsk, skb)) {
1651 rsk = nsk;
1da177e4 1652 goto reset;
cfb6eeb4 1653 }
1da177e4
LT
1654 return 0;
1655 }
ca55158c 1656 } else
bdeab991 1657 sock_rps_save_rxhash(sk, skb);
ca55158c 1658
aa8223c7 1659 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1660 rsk = sk;
1da177e4 1661 goto reset;
cfb6eeb4 1662 }
1da177e4
LT
1663 return 0;
1664
1665reset:
cfb6eeb4 1666 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1667discard:
1668 kfree_skb(skb);
1669 /* Be careful here. If this function gets more complicated and
1670 * gcc suffers from register pressure on the x86, sk (in %ebx)
1671 * might be destroyed here. This current version compiles correctly,
1672 * but you have been warned.
1673 */
1674 return 0;
1675
1676csum_err:
63231bdd 1677 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1678 goto discard;
1679}
4bc2f18b 1680EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4 1681
160eb5a6 1682void tcp_v4_early_demux(struct sk_buff *skb)
41063e9d
DM
1683{
1684 struct net *net = dev_net(skb->dev);
1685 const struct iphdr *iph;
1686 const struct tcphdr *th;
1687 struct sock *sk;
41063e9d 1688
41063e9d 1689 if (skb->pkt_type != PACKET_HOST)
160eb5a6 1690 return;
41063e9d
DM
1691
1692 if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr)))
160eb5a6 1693 return;
41063e9d
DM
1694
1695 iph = ip_hdr(skb);
1696 th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb));
1697
1698 if (th->doff < sizeof(struct tcphdr) / 4)
160eb5a6 1699 return;
41063e9d 1700
41063e9d
DM
1701 sk = __inet_lookup_established(net, &tcp_hashinfo,
1702 iph->saddr, th->source,
7011d085 1703 iph->daddr, ntohs(th->dest),
9cb429d6 1704 skb->skb_iif);
41063e9d
DM
1705 if (sk) {
1706 skb->sk = sk;
1707 skb->destructor = sock_edemux;
1708 if (sk->sk_state != TCP_TIME_WAIT) {
1709 struct dst_entry *dst = sk->sk_rx_dst;
505fbcf0 1710
41063e9d
DM
1711 if (dst)
1712 dst = dst_check(dst, 0);
92101b3b 1713 if (dst &&
505fbcf0 1714 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
92101b3b 1715 skb_dst_set_noref(skb, dst);
41063e9d
DM
1716 }
1717 }
41063e9d
DM
1718}
1719
1da177e4
LT
1720/*
1721 * From tcp_input.c
1722 */
1723
1724int tcp_v4_rcv(struct sk_buff *skb)
1725{
eddc9ec5 1726 const struct iphdr *iph;
cf533ea5 1727 const struct tcphdr *th;
1da177e4
LT
1728 struct sock *sk;
1729 int ret;
a86b1e30 1730 struct net *net = dev_net(skb->dev);
1da177e4
LT
1731
1732 if (skb->pkt_type != PACKET_HOST)
1733 goto discard_it;
1734
1735 /* Count it even if it's bad */
63231bdd 1736 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1737
1738 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1739 goto discard_it;
1740
aa8223c7 1741 th = tcp_hdr(skb);
1da177e4
LT
1742
1743 if (th->doff < sizeof(struct tcphdr) / 4)
1744 goto bad_packet;
1745 if (!pskb_may_pull(skb, th->doff * 4))
1746 goto discard_it;
1747
1748 /* An explanation is required here, I think.
1749 * Packet length and doff are validated by header prediction,
caa20d9a 1750 * provided case of th->doff==0 is eliminated.
1da177e4 1751 * So, we defer the checks. */
60476372 1752 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1da177e4
LT
1753 goto bad_packet;
1754
aa8223c7 1755 th = tcp_hdr(skb);
eddc9ec5 1756 iph = ip_hdr(skb);
1da177e4
LT
1757 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1758 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1759 skb->len - th->doff * 4);
1760 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1761 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 1762 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1763 TCP_SKB_CB(skb)->sacked = 0;
1764
9a1f27c4 1765 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1766 if (!sk)
1767 goto no_tcp_socket;
1768
bb134d5d
ED
1769process:
1770 if (sk->sk_state == TCP_TIME_WAIT)
1771 goto do_time_wait;
1772
6cce09f8
ED
1773 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1774 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1775 goto discard_and_relse;
6cce09f8 1776 }
d218d111 1777
1da177e4
LT
1778 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1779 goto discard_and_relse;
b59c2701 1780 nf_reset(skb);
1da177e4 1781
fda9ef5d 1782 if (sk_filter(sk, skb))
1da177e4
LT
1783 goto discard_and_relse;
1784
1785 skb->dev = NULL;
1786
c6366184 1787 bh_lock_sock_nested(sk);
1da177e4
LT
1788 ret = 0;
1789 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1790#ifdef CONFIG_NET_DMA
1791 struct tcp_sock *tp = tcp_sk(sk);
1792 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
a2bd1140 1793 tp->ucopy.dma_chan = net_dma_find_channel();
1a2449a8 1794 if (tp->ucopy.dma_chan)
1da177e4 1795 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1796 else
1797#endif
1798 {
1799 if (!tcp_prequeue(sk, skb))
ae8d7f88 1800 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 1801 }
da882c1f
ED
1802 } else if (unlikely(sk_add_backlog(sk, skb,
1803 sk->sk_rcvbuf + sk->sk_sndbuf))) {
6b03a53a 1804 bh_unlock_sock(sk);
6cce09f8 1805 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
1806 goto discard_and_relse;
1807 }
1da177e4
LT
1808 bh_unlock_sock(sk);
1809
1810 sock_put(sk);
1811
1812 return ret;
1813
1814no_tcp_socket:
1815 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1816 goto discard_it;
1817
1818 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1819bad_packet:
63231bdd 1820 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1821 } else {
cfb6eeb4 1822 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1823 }
1824
1825discard_it:
1826 /* Discard frame. */
1827 kfree_skb(skb);
e905a9ed 1828 return 0;
1da177e4
LT
1829
1830discard_and_relse:
1831 sock_put(sk);
1832 goto discard_it;
1833
1834do_time_wait:
1835 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1836 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1837 goto discard_it;
1838 }
1839
1840 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
63231bdd 1841 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
9469c7b4 1842 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1843 goto discard_it;
1844 }
9469c7b4 1845 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1846 case TCP_TW_SYN: {
c346dca1 1847 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1848 &tcp_hashinfo,
eddc9ec5 1849 iph->daddr, th->dest,
463c84b9 1850 inet_iif(skb));
1da177e4 1851 if (sk2) {
9469c7b4
YH
1852 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1853 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1854 sk = sk2;
1855 goto process;
1856 }
1857 /* Fall through to ACK */
1858 }
1859 case TCP_TW_ACK:
1860 tcp_v4_timewait_ack(sk, skb);
1861 break;
1862 case TCP_TW_RST:
1863 goto no_tcp_socket;
1864 case TCP_TW_SUCCESS:;
1865 }
1866 goto discard_it;
1867}
1868
ccb7c410
DM
1869static struct timewait_sock_ops tcp_timewait_sock_ops = {
1870 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1871 .twsk_unique = tcp_twsk_unique,
1872 .twsk_destructor= tcp_twsk_destructor,
ccb7c410 1873};
1da177e4 1874
63d02d15 1875void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
5d299f3d
ED
1876{
1877 struct dst_entry *dst = skb_dst(skb);
1878
1879 dst_hold(dst);
1880 sk->sk_rx_dst = dst;
1881 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1882}
63d02d15 1883EXPORT_SYMBOL(inet_sk_rx_dst_set);
5d299f3d 1884
3b401a81 1885const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1886 .queue_xmit = ip_queue_xmit,
1887 .send_check = tcp_v4_send_check,
1888 .rebuild_header = inet_sk_rebuild_header,
5d299f3d 1889 .sk_rx_dst_set = inet_sk_rx_dst_set,
543d9cfe
ACM
1890 .conn_request = tcp_v4_conn_request,
1891 .syn_recv_sock = tcp_v4_syn_recv_sock,
543d9cfe
ACM
1892 .net_header_len = sizeof(struct iphdr),
1893 .setsockopt = ip_setsockopt,
1894 .getsockopt = ip_getsockopt,
1895 .addr2sockaddr = inet_csk_addr2sockaddr,
1896 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1897 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1898#ifdef CONFIG_COMPAT
543d9cfe
ACM
1899 .compat_setsockopt = compat_ip_setsockopt,
1900 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1901#endif
1da177e4 1902};
4bc2f18b 1903EXPORT_SYMBOL(ipv4_specific);
1da177e4 1904
cfb6eeb4 1905#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1906static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1907 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1908 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1909 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1910};
b6332e6c 1911#endif
cfb6eeb4 1912
1da177e4
LT
1913/* NOTE: A lot of things set to zero explicitly by call to
1914 * sk_alloc() so need not be done here.
1915 */
1916static int tcp_v4_init_sock(struct sock *sk)
1917{
6687e988 1918 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4 1919
900f65d3 1920 tcp_init_sock(sk);
1da177e4 1921
8292a17a 1922 icsk->icsk_af_ops = &ipv4_specific;
900f65d3 1923
cfb6eeb4 1924#ifdef CONFIG_TCP_MD5SIG
ac807fa8 1925 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
cfb6eeb4 1926#endif
1da177e4 1927
1da177e4
LT
1928 return 0;
1929}
1930
7d06b2e0 1931void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1932{
1933 struct tcp_sock *tp = tcp_sk(sk);
1934
1935 tcp_clear_xmit_timers(sk);
1936
6687e988 1937 tcp_cleanup_congestion_control(sk);
317a76f9 1938
1da177e4 1939 /* Cleanup up the write buffer. */
fe067e8a 1940 tcp_write_queue_purge(sk);
1da177e4
LT
1941
1942 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1943 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1944
cfb6eeb4
YH
1945#ifdef CONFIG_TCP_MD5SIG
1946 /* Clean up the MD5 key list, if any */
1947 if (tp->md5sig_info) {
a915da9b 1948 tcp_clear_md5_list(sk);
a8afca03 1949 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
1950 tp->md5sig_info = NULL;
1951 }
1952#endif
1953
1a2449a8
CL
1954#ifdef CONFIG_NET_DMA
1955 /* Cleans up our sk_async_wait_queue */
e905a9ed 1956 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1957#endif
1958
1da177e4
LT
1959 /* Clean prequeue, it must be empty really */
1960 __skb_queue_purge(&tp->ucopy.prequeue);
1961
1962 /* Clean up a referenced TCP bind bucket. */
463c84b9 1963 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1964 inet_put_port(sk);
1da177e4
LT
1965
1966 /*
1967 * If sendmsg cached page exists, toss it.
1968 */
1969 if (sk->sk_sndmsg_page) {
1970 __free_page(sk->sk_sndmsg_page);
1971 sk->sk_sndmsg_page = NULL;
1972 }
1973
435cf559
WAS
1974 /* TCP Cookie Transactions */
1975 if (tp->cookie_values != NULL) {
1976 kref_put(&tp->cookie_values->kref,
1977 tcp_cookie_values_release);
1978 tp->cookie_values = NULL;
1979 }
1980
cf60af03
YC
1981 /* If socket is aborted during connect operation */
1982 tcp_free_fastopen_req(tp);
1983
180d8cd9 1984 sk_sockets_allocated_dec(sk);
d1a4c0b3 1985 sock_release_memcg(sk);
1da177e4 1986}
1da177e4
LT
1987EXPORT_SYMBOL(tcp_v4_destroy_sock);
1988
1989#ifdef CONFIG_PROC_FS
1990/* Proc filesystem TCP sock list dumping. */
1991
3ab5aee7 1992static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1da177e4 1993{
3ab5aee7 1994 return hlist_nulls_empty(head) ? NULL :
8feaf0c0 1995 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
1996}
1997
8feaf0c0 1998static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4 1999{
3ab5aee7
ED
2000 return !is_a_nulls(tw->tw_node.next) ?
2001 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1da177e4
LT
2002}
2003
a8b690f9
TH
2004/*
2005 * Get next listener socket follow cur. If cur is NULL, get first socket
2006 * starting from bucket given in st->bucket; when st->bucket is zero the
2007 * very first socket in the hash table is returned.
2008 */
1da177e4
LT
2009static void *listening_get_next(struct seq_file *seq, void *cur)
2010{
463c84b9 2011 struct inet_connection_sock *icsk;
c25eb3bf 2012 struct hlist_nulls_node *node;
1da177e4 2013 struct sock *sk = cur;
5caea4ea 2014 struct inet_listen_hashbucket *ilb;
5799de0b 2015 struct tcp_iter_state *st = seq->private;
a4146b1b 2016 struct net *net = seq_file_net(seq);
1da177e4
LT
2017
2018 if (!sk) {
a8b690f9 2019 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 2020 spin_lock_bh(&ilb->lock);
c25eb3bf 2021 sk = sk_nulls_head(&ilb->head);
a8b690f9 2022 st->offset = 0;
1da177e4
LT
2023 goto get_sk;
2024 }
5caea4ea 2025 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2026 ++st->num;
a8b690f9 2027 ++st->offset;
1da177e4
LT
2028
2029 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2030 struct request_sock *req = cur;
1da177e4 2031
72a3effa 2032 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2033 req = req->dl_next;
2034 while (1) {
2035 while (req) {
bdccc4ca 2036 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2037 cur = req;
2038 goto out;
2039 }
2040 req = req->dl_next;
2041 }
72a3effa 2042 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2043 break;
2044get_req:
463c84b9 2045 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2046 }
1bde5ac4 2047 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2048 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2049 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2050 } else {
e905a9ed 2051 icsk = inet_csk(sk);
463c84b9
ACM
2052 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2053 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2054 goto start_req;
463c84b9 2055 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2056 sk = sk_nulls_next(sk);
1da177e4
LT
2057 }
2058get_sk:
c25eb3bf 2059 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2060 if (!net_eq(sock_net(sk), net))
2061 continue;
2062 if (sk->sk_family == st->family) {
1da177e4
LT
2063 cur = sk;
2064 goto out;
2065 }
e905a9ed 2066 icsk = inet_csk(sk);
463c84b9
ACM
2067 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2068 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2069start_req:
2070 st->uid = sock_i_uid(sk);
2071 st->syn_wait_sk = sk;
2072 st->state = TCP_SEQ_STATE_OPENREQ;
2073 st->sbucket = 0;
2074 goto get_req;
2075 }
463c84b9 2076 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2077 }
5caea4ea 2078 spin_unlock_bh(&ilb->lock);
a8b690f9 2079 st->offset = 0;
0f7ff927 2080 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2081 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2082 spin_lock_bh(&ilb->lock);
c25eb3bf 2083 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2084 goto get_sk;
2085 }
2086 cur = NULL;
2087out:
2088 return cur;
2089}
2090
2091static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2092{
a8b690f9
TH
2093 struct tcp_iter_state *st = seq->private;
2094 void *rc;
2095
2096 st->bucket = 0;
2097 st->offset = 0;
2098 rc = listening_get_next(seq, NULL);
1da177e4
LT
2099
2100 while (rc && *pos) {
2101 rc = listening_get_next(seq, rc);
2102 --*pos;
2103 }
2104 return rc;
2105}
2106
a2a385d6 2107static inline bool empty_bucket(struct tcp_iter_state *st)
6eac5604 2108{
3ab5aee7
ED
2109 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2110 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
6eac5604
AK
2111}
2112
a8b690f9
TH
2113/*
2114 * Get first established socket starting from bucket given in st->bucket.
2115 * If st->bucket is zero, the very first socket in the hash is returned.
2116 */
1da177e4
LT
2117static void *established_get_first(struct seq_file *seq)
2118{
5799de0b 2119 struct tcp_iter_state *st = seq->private;
a4146b1b 2120 struct net *net = seq_file_net(seq);
1da177e4
LT
2121 void *rc = NULL;
2122
a8b690f9
TH
2123 st->offset = 0;
2124 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2125 struct sock *sk;
3ab5aee7 2126 struct hlist_nulls_node *node;
8feaf0c0 2127 struct inet_timewait_sock *tw;
9db66bdc 2128 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2129
6eac5604
AK
2130 /* Lockless fast path for the common case of empty buckets */
2131 if (empty_bucket(st))
2132 continue;
2133
9db66bdc 2134 spin_lock_bh(lock);
3ab5aee7 2135 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2136 if (sk->sk_family != st->family ||
878628fb 2137 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2138 continue;
2139 }
2140 rc = sk;
2141 goto out;
2142 }
2143 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0 2144 inet_twsk_for_each(tw, node,
dbca9b27 2145 &tcp_hashinfo.ehash[st->bucket].twchain) {
28518fc1 2146 if (tw->tw_family != st->family ||
878628fb 2147 !net_eq(twsk_net(tw), net)) {
1da177e4
LT
2148 continue;
2149 }
2150 rc = tw;
2151 goto out;
2152 }
9db66bdc 2153 spin_unlock_bh(lock);
1da177e4
LT
2154 st->state = TCP_SEQ_STATE_ESTABLISHED;
2155 }
2156out:
2157 return rc;
2158}
2159
2160static void *established_get_next(struct seq_file *seq, void *cur)
2161{
2162 struct sock *sk = cur;
8feaf0c0 2163 struct inet_timewait_sock *tw;
3ab5aee7 2164 struct hlist_nulls_node *node;
5799de0b 2165 struct tcp_iter_state *st = seq->private;
a4146b1b 2166 struct net *net = seq_file_net(seq);
1da177e4
LT
2167
2168 ++st->num;
a8b690f9 2169 ++st->offset;
1da177e4
LT
2170
2171 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2172 tw = cur;
2173 tw = tw_next(tw);
2174get_tw:
878628fb 2175 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
1da177e4
LT
2176 tw = tw_next(tw);
2177 }
2178 if (tw) {
2179 cur = tw;
2180 goto out;
2181 }
9db66bdc 2182 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2183 st->state = TCP_SEQ_STATE_ESTABLISHED;
2184
6eac5604 2185 /* Look for next non empty bucket */
a8b690f9 2186 st->offset = 0;
f373b53b 2187 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
6eac5604
AK
2188 empty_bucket(st))
2189 ;
f373b53b 2190 if (st->bucket > tcp_hashinfo.ehash_mask)
6eac5604
AK
2191 return NULL;
2192
9db66bdc 2193 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
3ab5aee7 2194 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4 2195 } else
3ab5aee7 2196 sk = sk_nulls_next(sk);
1da177e4 2197
3ab5aee7 2198 sk_nulls_for_each_from(sk, node) {
878628fb 2199 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1da177e4
LT
2200 goto found;
2201 }
2202
2203 st->state = TCP_SEQ_STATE_TIME_WAIT;
dbca9b27 2204 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
1da177e4
LT
2205 goto get_tw;
2206found:
2207 cur = sk;
2208out:
2209 return cur;
2210}
2211
2212static void *established_get_idx(struct seq_file *seq, loff_t pos)
2213{
a8b690f9
TH
2214 struct tcp_iter_state *st = seq->private;
2215 void *rc;
2216
2217 st->bucket = 0;
2218 rc = established_get_first(seq);
1da177e4
LT
2219
2220 while (rc && pos) {
2221 rc = established_get_next(seq, rc);
2222 --pos;
7174259e 2223 }
1da177e4
LT
2224 return rc;
2225}
2226
2227static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2228{
2229 void *rc;
5799de0b 2230 struct tcp_iter_state *st = seq->private;
1da177e4 2231
1da177e4
LT
2232 st->state = TCP_SEQ_STATE_LISTENING;
2233 rc = listening_get_idx(seq, &pos);
2234
2235 if (!rc) {
1da177e4
LT
2236 st->state = TCP_SEQ_STATE_ESTABLISHED;
2237 rc = established_get_idx(seq, pos);
2238 }
2239
2240 return rc;
2241}
2242
a8b690f9
TH
2243static void *tcp_seek_last_pos(struct seq_file *seq)
2244{
2245 struct tcp_iter_state *st = seq->private;
2246 int offset = st->offset;
2247 int orig_num = st->num;
2248 void *rc = NULL;
2249
2250 switch (st->state) {
2251 case TCP_SEQ_STATE_OPENREQ:
2252 case TCP_SEQ_STATE_LISTENING:
2253 if (st->bucket >= INET_LHTABLE_SIZE)
2254 break;
2255 st->state = TCP_SEQ_STATE_LISTENING;
2256 rc = listening_get_next(seq, NULL);
2257 while (offset-- && rc)
2258 rc = listening_get_next(seq, rc);
2259 if (rc)
2260 break;
2261 st->bucket = 0;
2262 /* Fallthrough */
2263 case TCP_SEQ_STATE_ESTABLISHED:
2264 case TCP_SEQ_STATE_TIME_WAIT:
2265 st->state = TCP_SEQ_STATE_ESTABLISHED;
2266 if (st->bucket > tcp_hashinfo.ehash_mask)
2267 break;
2268 rc = established_get_first(seq);
2269 while (offset-- && rc)
2270 rc = established_get_next(seq, rc);
2271 }
2272
2273 st->num = orig_num;
2274
2275 return rc;
2276}
2277
1da177e4
LT
2278static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2279{
5799de0b 2280 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2281 void *rc;
2282
2283 if (*pos && *pos == st->last_pos) {
2284 rc = tcp_seek_last_pos(seq);
2285 if (rc)
2286 goto out;
2287 }
2288
1da177e4
LT
2289 st->state = TCP_SEQ_STATE_LISTENING;
2290 st->num = 0;
a8b690f9
TH
2291 st->bucket = 0;
2292 st->offset = 0;
2293 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2294
2295out:
2296 st->last_pos = *pos;
2297 return rc;
1da177e4
LT
2298}
2299
2300static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2301{
a8b690f9 2302 struct tcp_iter_state *st = seq->private;
1da177e4 2303 void *rc = NULL;
1da177e4
LT
2304
2305 if (v == SEQ_START_TOKEN) {
2306 rc = tcp_get_idx(seq, 0);
2307 goto out;
2308 }
1da177e4
LT
2309
2310 switch (st->state) {
2311 case TCP_SEQ_STATE_OPENREQ:
2312 case TCP_SEQ_STATE_LISTENING:
2313 rc = listening_get_next(seq, v);
2314 if (!rc) {
1da177e4 2315 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2316 st->bucket = 0;
2317 st->offset = 0;
1da177e4
LT
2318 rc = established_get_first(seq);
2319 }
2320 break;
2321 case TCP_SEQ_STATE_ESTABLISHED:
2322 case TCP_SEQ_STATE_TIME_WAIT:
2323 rc = established_get_next(seq, v);
2324 break;
2325 }
2326out:
2327 ++*pos;
a8b690f9 2328 st->last_pos = *pos;
1da177e4
LT
2329 return rc;
2330}
2331
2332static void tcp_seq_stop(struct seq_file *seq, void *v)
2333{
5799de0b 2334 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2335
2336 switch (st->state) {
2337 case TCP_SEQ_STATE_OPENREQ:
2338 if (v) {
463c84b9
ACM
2339 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2340 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2341 }
2342 case TCP_SEQ_STATE_LISTENING:
2343 if (v != SEQ_START_TOKEN)
5caea4ea 2344 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4
LT
2345 break;
2346 case TCP_SEQ_STATE_TIME_WAIT:
2347 case TCP_SEQ_STATE_ESTABLISHED:
2348 if (v)
9db66bdc 2349 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2350 break;
2351 }
2352}
2353
73cb88ec 2354int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4
LT
2355{
2356 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
1da177e4 2357 struct tcp_iter_state *s;
52d6f3f1 2358 int err;
1da177e4 2359
52d6f3f1
DL
2360 err = seq_open_net(inode, file, &afinfo->seq_ops,
2361 sizeof(struct tcp_iter_state));
2362 if (err < 0)
2363 return err;
f40c8174 2364
52d6f3f1 2365 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2366 s->family = afinfo->family;
a8b690f9 2367 s->last_pos = 0;
f40c8174
DL
2368 return 0;
2369}
73cb88ec 2370EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2371
6f8b13bc 2372int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2373{
2374 int rc = 0;
2375 struct proc_dir_entry *p;
2376
9427c4b3
DL
2377 afinfo->seq_ops.start = tcp_seq_start;
2378 afinfo->seq_ops.next = tcp_seq_next;
2379 afinfo->seq_ops.stop = tcp_seq_stop;
2380
84841c3c 2381 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2382 afinfo->seq_fops, afinfo);
84841c3c 2383 if (!p)
1da177e4
LT
2384 rc = -ENOMEM;
2385 return rc;
2386}
4bc2f18b 2387EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2388
6f8b13bc 2389void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2390{
6f8b13bc 2391 proc_net_remove(net, afinfo->name);
1da177e4 2392}
4bc2f18b 2393EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2394
cf533ea5 2395static void get_openreq4(const struct sock *sk, const struct request_sock *req,
5e659e4c 2396 struct seq_file *f, int i, int uid, int *len)
1da177e4 2397{
2e6599cb 2398 const struct inet_request_sock *ireq = inet_rsk(req);
a399a805 2399 long delta = req->expires - jiffies;
1da177e4 2400
5e659e4c 2401 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2402 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
1da177e4 2403 i,
2e6599cb 2404 ireq->loc_addr,
c720c7e8 2405 ntohs(inet_sk(sk)->inet_sport),
2e6599cb
ACM
2406 ireq->rmt_addr,
2407 ntohs(ireq->rmt_port),
1da177e4
LT
2408 TCP_SYN_RECV,
2409 0, 0, /* could print option size, but that is af dependent. */
2410 1, /* timers active (only the expire timer) */
a399a805 2411 jiffies_delta_to_clock_t(delta),
1da177e4
LT
2412 req->retrans,
2413 uid,
2414 0, /* non standard timer */
2415 0, /* open_requests have no inode */
2416 atomic_read(&sk->sk_refcnt),
5e659e4c
PE
2417 req,
2418 len);
1da177e4
LT
2419}
2420
5e659e4c 2421static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
1da177e4
LT
2422{
2423 int timer_active;
2424 unsigned long timer_expires;
cf533ea5 2425 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2426 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2427 const struct inet_sock *inet = inet_sk(sk);
c720c7e8
ED
2428 __be32 dest = inet->inet_daddr;
2429 __be32 src = inet->inet_rcv_saddr;
2430 __u16 destp = ntohs(inet->inet_dport);
2431 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2432 int rx_queue;
1da177e4 2433
463c84b9 2434 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 2435 timer_active = 1;
463c84b9
ACM
2436 timer_expires = icsk->icsk_timeout;
2437 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2438 timer_active = 4;
463c84b9 2439 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2440 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2441 timer_active = 2;
cf4c6bf8 2442 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2443 } else {
2444 timer_active = 0;
2445 timer_expires = jiffies;
2446 }
2447
49d09007
ED
2448 if (sk->sk_state == TCP_LISTEN)
2449 rx_queue = sk->sk_ack_backlog;
2450 else
2451 /*
2452 * because we dont lock socket, we might find a transient negative value
2453 */
2454 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2455
5e659e4c 2456 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
71338aa7 2457 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
cf4c6bf8 2458 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2459 tp->write_seq - tp->snd_una,
49d09007 2460 rx_queue,
1da177e4 2461 timer_active,
a399a805 2462 jiffies_delta_to_clock_t(timer_expires - jiffies),
463c84b9 2463 icsk->icsk_retransmits,
cf4c6bf8 2464 sock_i_uid(sk),
6687e988 2465 icsk->icsk_probes_out,
cf4c6bf8
IJ
2466 sock_i_ino(sk),
2467 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2468 jiffies_to_clock_t(icsk->icsk_rto),
2469 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2470 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2471 tp->snd_cwnd,
0b6a05c1 2472 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
5e659e4c 2473 len);
1da177e4
LT
2474}
2475
cf533ea5 2476static void get_timewait4_sock(const struct inet_timewait_sock *tw,
5e659e4c 2477 struct seq_file *f, int i, int *len)
1da177e4 2478{
23f33c2d 2479 __be32 dest, src;
1da177e4 2480 __u16 destp, srcp;
a399a805 2481 long delta = tw->tw_ttd - jiffies;
1da177e4
LT
2482
2483 dest = tw->tw_daddr;
2484 src = tw->tw_rcv_saddr;
2485 destp = ntohs(tw->tw_dport);
2486 srcp = ntohs(tw->tw_sport);
2487
5e659e4c 2488 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2489 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
1da177e4 2490 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
a399a805 2491 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
5e659e4c 2492 atomic_read(&tw->tw_refcnt), tw, len);
1da177e4
LT
2493}
2494
2495#define TMPSZ 150
2496
2497static int tcp4_seq_show(struct seq_file *seq, void *v)
2498{
5799de0b 2499 struct tcp_iter_state *st;
5e659e4c 2500 int len;
1da177e4
LT
2501
2502 if (v == SEQ_START_TOKEN) {
2503 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2504 " sl local_address rem_address st tx_queue "
2505 "rx_queue tr tm->when retrnsmt uid timeout "
2506 "inode");
2507 goto out;
2508 }
2509 st = seq->private;
2510
2511 switch (st->state) {
2512 case TCP_SEQ_STATE_LISTENING:
2513 case TCP_SEQ_STATE_ESTABLISHED:
5e659e4c 2514 get_tcp4_sock(v, seq, st->num, &len);
1da177e4
LT
2515 break;
2516 case TCP_SEQ_STATE_OPENREQ:
5e659e4c 2517 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
1da177e4
LT
2518 break;
2519 case TCP_SEQ_STATE_TIME_WAIT:
5e659e4c 2520 get_timewait4_sock(v, seq, st->num, &len);
1da177e4
LT
2521 break;
2522 }
5e659e4c 2523 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
1da177e4
LT
2524out:
2525 return 0;
2526}
2527
73cb88ec
AV
2528static const struct file_operations tcp_afinfo_seq_fops = {
2529 .owner = THIS_MODULE,
2530 .open = tcp_seq_open,
2531 .read = seq_read,
2532 .llseek = seq_lseek,
2533 .release = seq_release_net
2534};
2535
1da177e4 2536static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2537 .name = "tcp",
2538 .family = AF_INET,
73cb88ec 2539 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2540 .seq_ops = {
2541 .show = tcp4_seq_show,
2542 },
1da177e4
LT
2543};
2544
2c8c1e72 2545static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2546{
2547 return tcp_proc_register(net, &tcp4_seq_afinfo);
2548}
2549
2c8c1e72 2550static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2551{
2552 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2553}
2554
2555static struct pernet_operations tcp4_net_ops = {
2556 .init = tcp4_proc_init_net,
2557 .exit = tcp4_proc_exit_net,
2558};
2559
1da177e4
LT
2560int __init tcp4_proc_init(void)
2561{
757764f6 2562 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2563}
2564
2565void tcp4_proc_exit(void)
2566{
757764f6 2567 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2568}
2569#endif /* CONFIG_PROC_FS */
2570
bf296b12
HX
2571struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2572{
b71d1d42 2573 const struct iphdr *iph = skb_gro_network_header(skb);
bf296b12
HX
2574
2575 switch (skb->ip_summed) {
2576 case CHECKSUM_COMPLETE:
86911732 2577 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
bf296b12
HX
2578 skb->csum)) {
2579 skb->ip_summed = CHECKSUM_UNNECESSARY;
2580 break;
2581 }
2582
2583 /* fall through */
2584 case CHECKSUM_NONE:
2585 NAPI_GRO_CB(skb)->flush = 1;
2586 return NULL;
2587 }
2588
2589 return tcp_gro_receive(head, skb);
2590}
bf296b12
HX
2591
2592int tcp4_gro_complete(struct sk_buff *skb)
2593{
b71d1d42 2594 const struct iphdr *iph = ip_hdr(skb);
bf296b12
HX
2595 struct tcphdr *th = tcp_hdr(skb);
2596
2597 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2598 iph->saddr, iph->daddr, 0);
2599 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2600
2601 return tcp_gro_complete(skb);
2602}
bf296b12 2603
1da177e4
LT
2604struct proto tcp_prot = {
2605 .name = "TCP",
2606 .owner = THIS_MODULE,
2607 .close = tcp_close,
2608 .connect = tcp_v4_connect,
2609 .disconnect = tcp_disconnect,
463c84b9 2610 .accept = inet_csk_accept,
1da177e4
LT
2611 .ioctl = tcp_ioctl,
2612 .init = tcp_v4_init_sock,
2613 .destroy = tcp_v4_destroy_sock,
2614 .shutdown = tcp_shutdown,
2615 .setsockopt = tcp_setsockopt,
2616 .getsockopt = tcp_getsockopt,
1da177e4 2617 .recvmsg = tcp_recvmsg,
7ba42910
CG
2618 .sendmsg = tcp_sendmsg,
2619 .sendpage = tcp_sendpage,
1da177e4 2620 .backlog_rcv = tcp_v4_do_rcv,
46d3ceab 2621 .release_cb = tcp_release_cb,
563d34d0 2622 .mtu_reduced = tcp_v4_mtu_reduced,
ab1e0a13
ACM
2623 .hash = inet_hash,
2624 .unhash = inet_unhash,
2625 .get_port = inet_csk_get_port,
1da177e4
LT
2626 .enter_memory_pressure = tcp_enter_memory_pressure,
2627 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2628 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2629 .memory_allocated = &tcp_memory_allocated,
2630 .memory_pressure = &tcp_memory_pressure,
1da177e4
LT
2631 .sysctl_wmem = sysctl_tcp_wmem,
2632 .sysctl_rmem = sysctl_tcp_rmem,
2633 .max_header = MAX_TCP_HEADER,
2634 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2635 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2636 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2637 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2638 .h.hashinfo = &tcp_hashinfo,
7ba42910 2639 .no_autobind = true,
543d9cfe
ACM
2640#ifdef CONFIG_COMPAT
2641 .compat_setsockopt = compat_tcp_setsockopt,
2642 .compat_getsockopt = compat_tcp_getsockopt,
2643#endif
c255a458 2644#ifdef CONFIG_MEMCG_KMEM
d1a4c0b3
GC
2645 .init_cgroup = tcp_init_cgroup,
2646 .destroy_cgroup = tcp_destroy_cgroup,
2647 .proto_cgroup = tcp_proto_cgroup,
2648#endif
1da177e4 2649};
4bc2f18b 2650EXPORT_SYMBOL(tcp_prot);
1da177e4 2651
046ee902
DL
2652static int __net_init tcp_sk_init(struct net *net)
2653{
be9f4a44 2654 return 0;
046ee902
DL
2655}
2656
2657static void __net_exit tcp_sk_exit(struct net *net)
2658{
b099ce26
EB
2659}
2660
2661static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2662{
2663 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2664}
2665
2666static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2667 .init = tcp_sk_init,
2668 .exit = tcp_sk_exit,
2669 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2670};
2671
9b0f976f 2672void __init tcp_v4_init(void)
1da177e4 2673{
5caea4ea 2674 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2675 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2676 panic("Failed to create the TCP control socket.\n");
1da177e4 2677}
This page took 0.951288 seconds and 5 git commands to generate.