Merge branch 'for-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
[deliverable/linux.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
1a2449a8 75#include <net/netdma.h>
6e5714ea 76#include <net/secure_seq.h>
d1a4c0b3 77#include <net/tcp_memcontrol.h>
076bb0c8 78#include <net/busy_poll.h>
1da177e4
LT
79
80#include <linux/inet.h>
81#include <linux/ipv6.h>
82#include <linux/stddef.h>
83#include <linux/proc_fs.h>
84#include <linux/seq_file.h>
85
cfb6eeb4
YH
86#include <linux/crypto.h>
87#include <linux/scatterlist.h>
88
ab32ea5d
BH
89int sysctl_tcp_tw_reuse __read_mostly;
90int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 91EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 92
1da177e4 93
cfb6eeb4 94#ifdef CONFIG_TCP_MD5SIG
a915da9b 95static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 96 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
97#endif
98
5caea4ea 99struct inet_hashinfo tcp_hashinfo;
4bc2f18b 100EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 101
cf533ea5 102static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 103{
eddc9ec5
ACM
104 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
105 ip_hdr(skb)->saddr,
aa8223c7
ACM
106 tcp_hdr(skb)->dest,
107 tcp_hdr(skb)->source);
1da177e4
LT
108}
109
6d6ee43e
ACM
110int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
111{
112 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
113 struct tcp_sock *tp = tcp_sk(sk);
114
115 /* With PAWS, it is safe from the viewpoint
116 of data integrity. Even without PAWS it is safe provided sequence
117 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
118
119 Actually, the idea is close to VJ's one, only timestamp cache is
120 held not per host, but per port pair and TW bucket is used as state
121 holder.
122
123 If TW bucket has been already destroyed we fall back to VJ's scheme
124 and use initial timestamp retrieved from peer table.
125 */
126 if (tcptw->tw_ts_recent_stamp &&
127 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 128 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
129 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
130 if (tp->write_seq == 0)
131 tp->write_seq = 1;
132 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
133 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
134 sock_hold(sktw);
135 return 1;
136 }
137
138 return 0;
139}
6d6ee43e
ACM
140EXPORT_SYMBOL_GPL(tcp_twsk_unique);
141
1da177e4
LT
142/* This will initiate an outgoing connection. */
143int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
144{
2d7192d6 145 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
146 struct inet_sock *inet = inet_sk(sk);
147 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 148 __be16 orig_sport, orig_dport;
bada8adc 149 __be32 daddr, nexthop;
da905bd1 150 struct flowi4 *fl4;
2d7192d6 151 struct rtable *rt;
1da177e4 152 int err;
f6d8bd05 153 struct ip_options_rcu *inet_opt;
1da177e4
LT
154
155 if (addr_len < sizeof(struct sockaddr_in))
156 return -EINVAL;
157
158 if (usin->sin_family != AF_INET)
159 return -EAFNOSUPPORT;
160
161 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
162 inet_opt = rcu_dereference_protected(inet->inet_opt,
163 sock_owned_by_user(sk));
164 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
165 if (!daddr)
166 return -EINVAL;
f6d8bd05 167 nexthop = inet_opt->opt.faddr;
1da177e4
LT
168 }
169
dca8b089
DM
170 orig_sport = inet->inet_sport;
171 orig_dport = usin->sin_port;
da905bd1
DM
172 fl4 = &inet->cork.fl.u.ip4;
173 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
174 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
175 IPPROTO_TCP,
0e0d44ab 176 orig_sport, orig_dport, sk);
b23dd4fe
DM
177 if (IS_ERR(rt)) {
178 err = PTR_ERR(rt);
179 if (err == -ENETUNREACH)
f1d8cba6 180 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 181 return err;
584bdf8c 182 }
1da177e4
LT
183
184 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
185 ip_rt_put(rt);
186 return -ENETUNREACH;
187 }
188
f6d8bd05 189 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 190 daddr = fl4->daddr;
1da177e4 191
c720c7e8 192 if (!inet->inet_saddr)
da905bd1 193 inet->inet_saddr = fl4->saddr;
c720c7e8 194 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 195
c720c7e8 196 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
197 /* Reset inherited state */
198 tp->rx_opt.ts_recent = 0;
199 tp->rx_opt.ts_recent_stamp = 0;
ee995283
PE
200 if (likely(!tp->repair))
201 tp->write_seq = 0;
1da177e4
LT
202 }
203
295ff7ed 204 if (tcp_death_row.sysctl_tw_recycle &&
81166dd6
DM
205 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
206 tcp_fetch_timewait_stamp(sk, &rt->dst);
1da177e4 207
c720c7e8
ED
208 inet->inet_dport = usin->sin_port;
209 inet->inet_daddr = daddr;
1da177e4 210
d83d8461 211 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
212 if (inet_opt)
213 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 214
bee7ca9e 215 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
216
217 /* Socket identity is still unknown (sport may be zero).
218 * However we set state to SYN-SENT and not releasing socket
219 * lock select source port, enter ourselves into the hash tables and
220 * complete initialization after this.
221 */
222 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 223 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
224 if (err)
225 goto failure;
226
da905bd1 227 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
228 inet->inet_sport, inet->inet_dport, sk);
229 if (IS_ERR(rt)) {
230 err = PTR_ERR(rt);
231 rt = NULL;
1da177e4 232 goto failure;
b23dd4fe 233 }
1da177e4 234 /* OK, now commit destination to socket. */
bcd76111 235 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 236 sk_setup_caps(sk, &rt->dst);
1da177e4 237
ee995283 238 if (!tp->write_seq && likely(!tp->repair))
c720c7e8
ED
239 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
240 inet->inet_daddr,
241 inet->inet_sport,
1da177e4
LT
242 usin->sin_port);
243
c720c7e8 244 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4 245
2b916477 246 err = tcp_connect(sk);
ee995283 247
1da177e4
LT
248 rt = NULL;
249 if (err)
250 goto failure;
251
252 return 0;
253
254failure:
7174259e
ACM
255 /*
256 * This unhashes the socket and releases the local port,
257 * if necessary.
258 */
1da177e4
LT
259 tcp_set_state(sk, TCP_CLOSE);
260 ip_rt_put(rt);
261 sk->sk_route_caps = 0;
c720c7e8 262 inet->inet_dport = 0;
1da177e4
LT
263 return err;
264}
4bc2f18b 265EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 266
1da177e4 267/*
563d34d0
ED
268 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
269 * It can be called through tcp_release_cb() if socket was owned by user
270 * at the time tcp_v4_err() was called to handle ICMP message.
1da177e4 271 */
563d34d0 272static void tcp_v4_mtu_reduced(struct sock *sk)
1da177e4
LT
273{
274 struct dst_entry *dst;
275 struct inet_sock *inet = inet_sk(sk);
563d34d0 276 u32 mtu = tcp_sk(sk)->mtu_info;
1da177e4 277
80d0a69f
DM
278 dst = inet_csk_update_pmtu(sk, mtu);
279 if (!dst)
1da177e4
LT
280 return;
281
1da177e4
LT
282 /* Something is about to be wrong... Remember soft error
283 * for the case, if this connection will not able to recover.
284 */
285 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
286 sk->sk_err_soft = EMSGSIZE;
287
288 mtu = dst_mtu(dst);
289
290 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
482fc609 291 ip_sk_accept_pmtu(sk) &&
d83d8461 292 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
293 tcp_sync_mss(sk, mtu);
294
295 /* Resend the TCP packet because it's
296 * clear that the old packet has been
297 * dropped. This is the new "fast" path mtu
298 * discovery.
299 */
300 tcp_simple_retransmit(sk);
301 } /* else let the usual retransmit timer handle it */
302}
303
55be7a9c
DM
304static void do_redirect(struct sk_buff *skb, struct sock *sk)
305{
306 struct dst_entry *dst = __sk_dst_check(sk, 0);
307
1ed5c48f 308 if (dst)
6700c270 309 dst->ops->redirect(dst, sk, skb);
55be7a9c
DM
310}
311
1da177e4
LT
312/*
313 * This routine is called by the ICMP module when it gets some
314 * sort of error condition. If err < 0 then the socket should
315 * be closed and the error returned to the user. If err > 0
316 * it's just the icmp type << 8 | icmp code. After adjustment
317 * header points to the first 8 bytes of the tcp header. We need
318 * to find the appropriate port.
319 *
320 * The locking strategy used here is very "optimistic". When
321 * someone else accesses the socket the ICMP is just dropped
322 * and for some paths there is no check at all.
323 * A more general error queue to queue errors for later handling
324 * is probably better.
325 *
326 */
327
4d1a2d9e 328void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 329{
b71d1d42 330 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 331 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 332 struct inet_connection_sock *icsk;
1da177e4
LT
333 struct tcp_sock *tp;
334 struct inet_sock *inet;
4d1a2d9e
DL
335 const int type = icmp_hdr(icmp_skb)->type;
336 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 337 struct sock *sk;
f1ecd5d9 338 struct sk_buff *skb;
0a672f74
YC
339 struct request_sock *fastopen;
340 __u32 seq, snd_una;
f1ecd5d9 341 __u32 remaining;
1da177e4 342 int err;
4d1a2d9e 343 struct net *net = dev_net(icmp_skb->dev);
1da177e4 344
4d1a2d9e 345 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 346 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
347 return;
348 }
349
fd54d716 350 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 351 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 352 if (!sk) {
dcfc23ca 353 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
354 return;
355 }
356 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 357 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
358 return;
359 }
360
361 bh_lock_sock(sk);
362 /* If too many ICMPs get dropped on busy
363 * servers this needs to be solved differently.
563d34d0
ED
364 * We do take care of PMTU discovery (RFC1191) special case :
365 * we can receive locally generated ICMP messages while socket is held.
1da177e4 366 */
b74aa930
ED
367 if (sock_owned_by_user(sk)) {
368 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
369 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
370 }
1da177e4
LT
371 if (sk->sk_state == TCP_CLOSE)
372 goto out;
373
97e3ecd1 374 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
375 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
376 goto out;
377 }
378
f1ecd5d9 379 icsk = inet_csk(sk);
1da177e4
LT
380 tp = tcp_sk(sk);
381 seq = ntohl(th->seq);
0a672f74
YC
382 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
383 fastopen = tp->fastopen_rsk;
384 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
1da177e4 385 if (sk->sk_state != TCP_LISTEN &&
0a672f74 386 !between(seq, snd_una, tp->snd_nxt)) {
de0744af 387 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
388 goto out;
389 }
390
391 switch (type) {
55be7a9c
DM
392 case ICMP_REDIRECT:
393 do_redirect(icmp_skb, sk);
394 goto out;
1da177e4
LT
395 case ICMP_SOURCE_QUENCH:
396 /* Just silently ignore these. */
397 goto out;
398 case ICMP_PARAMETERPROB:
399 err = EPROTO;
400 break;
401 case ICMP_DEST_UNREACH:
402 if (code > NR_ICMP_UNREACH)
403 goto out;
404
405 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
0d4f0608
ED
406 /* We are not interested in TCP_LISTEN and open_requests
407 * (SYN-ACKs send out by Linux are always <576bytes so
408 * they should go through unfragmented).
409 */
410 if (sk->sk_state == TCP_LISTEN)
411 goto out;
412
563d34d0 413 tp->mtu_info = info;
144d56e9 414 if (!sock_owned_by_user(sk)) {
563d34d0 415 tcp_v4_mtu_reduced(sk);
144d56e9
ED
416 } else {
417 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
418 sock_hold(sk);
419 }
1da177e4
LT
420 goto out;
421 }
422
423 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
424 /* check if icmp_skb allows revert of backoff
425 * (see draft-zimmermann-tcp-lcd) */
426 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
427 break;
428 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
0a672f74 429 !icsk->icsk_backoff || fastopen)
f1ecd5d9
DL
430 break;
431
8f49c270
DM
432 if (sock_owned_by_user(sk))
433 break;
434
f1ecd5d9 435 icsk->icsk_backoff--;
740b0f18 436 inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
9ad7c049 437 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
438 tcp_bound_rto(sk);
439
440 skb = tcp_write_queue_head(sk);
441 BUG_ON(!skb);
442
443 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
444 tcp_time_stamp - TCP_SKB_CB(skb)->when);
445
446 if (remaining) {
447 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
448 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
449 } else {
450 /* RTO revert clocked out retransmission.
451 * Will retransmit now */
452 tcp_retransmit_timer(sk);
453 }
454
1da177e4
LT
455 break;
456 case ICMP_TIME_EXCEEDED:
457 err = EHOSTUNREACH;
458 break;
459 default:
460 goto out;
461 }
462
463 switch (sk->sk_state) {
60236fdd 464 struct request_sock *req, **prev;
1da177e4
LT
465 case TCP_LISTEN:
466 if (sock_owned_by_user(sk))
467 goto out;
468
463c84b9
ACM
469 req = inet_csk_search_req(sk, &prev, th->dest,
470 iph->daddr, iph->saddr);
1da177e4
LT
471 if (!req)
472 goto out;
473
474 /* ICMPs are not backlogged, hence we cannot get
475 an established socket here.
476 */
547b792c 477 WARN_ON(req->sk);
1da177e4 478
2e6599cb 479 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 480 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
481 goto out;
482 }
483
484 /*
485 * Still in SYN_RECV, just remove it silently.
486 * There is no good way to pass the error to the newly
487 * created socket, and POSIX does not want network
488 * errors returned from accept().
489 */
463c84b9 490 inet_csk_reqsk_queue_drop(sk, req, prev);
848bf15f 491 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
492 goto out;
493
494 case TCP_SYN_SENT:
0a672f74
YC
495 case TCP_SYN_RECV:
496 /* Only in fast or simultaneous open. If a fast open socket is
497 * is already accepted it is treated as a connected one below.
498 */
499 if (fastopen && fastopen->sk == NULL)
500 break;
501
1da177e4 502 if (!sock_owned_by_user(sk)) {
1da177e4
LT
503 sk->sk_err = err;
504
505 sk->sk_error_report(sk);
506
507 tcp_done(sk);
508 } else {
509 sk->sk_err_soft = err;
510 }
511 goto out;
512 }
513
514 /* If we've already connected we will keep trying
515 * until we time out, or the user gives up.
516 *
517 * rfc1122 4.2.3.9 allows to consider as hard errors
518 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
519 * but it is obsoleted by pmtu discovery).
520 *
521 * Note, that in modern internet, where routing is unreliable
522 * and in each dark corner broken firewalls sit, sending random
523 * errors ordered by their masters even this two messages finally lose
524 * their original sense (even Linux sends invalid PORT_UNREACHs)
525 *
526 * Now we are in compliance with RFCs.
527 * --ANK (980905)
528 */
529
530 inet = inet_sk(sk);
531 if (!sock_owned_by_user(sk) && inet->recverr) {
532 sk->sk_err = err;
533 sk->sk_error_report(sk);
534 } else { /* Only an error on timeout */
535 sk->sk_err_soft = err;
536 }
537
538out:
539 bh_unlock_sock(sk);
540 sock_put(sk);
541}
542
28850dc7 543void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 544{
aa8223c7 545 struct tcphdr *th = tcp_hdr(skb);
1da177e4 546
84fa7933 547 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 548 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 549 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 550 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 551 } else {
419f9f89 552 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 553 csum_partial(th,
1da177e4
LT
554 th->doff << 2,
555 skb->csum));
556 }
557}
558
419f9f89 559/* This routine computes an IPv4 TCP checksum. */
bb296246 560void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 561{
cf533ea5 562 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
563
564 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
565}
4bc2f18b 566EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 567
1da177e4
LT
568/*
569 * This routine will send an RST to the other tcp.
570 *
571 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
572 * for reset.
573 * Answer: if a packet caused RST, it is not for a socket
574 * existing in our system, if it is matched to a socket,
575 * it is just duplicate segment or bug in other side's TCP.
576 * So that we build reply only basing on parameters
577 * arrived with segment.
578 * Exception: precedence violation. We do not implement it in any case.
579 */
580
cfb6eeb4 581static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 582{
cf533ea5 583 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
584 struct {
585 struct tcphdr th;
586#ifdef CONFIG_TCP_MD5SIG
714e85be 587 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
588#endif
589 } rep;
1da177e4 590 struct ip_reply_arg arg;
cfb6eeb4
YH
591#ifdef CONFIG_TCP_MD5SIG
592 struct tcp_md5sig_key *key;
658ddaaf
SL
593 const __u8 *hash_location = NULL;
594 unsigned char newhash[16];
595 int genhash;
596 struct sock *sk1 = NULL;
cfb6eeb4 597#endif
a86b1e30 598 struct net *net;
1da177e4
LT
599
600 /* Never send a reset in response to a reset. */
601 if (th->rst)
602 return;
603
511c3f92 604 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
605 return;
606
607 /* Swap the send and the receive. */
cfb6eeb4
YH
608 memset(&rep, 0, sizeof(rep));
609 rep.th.dest = th->source;
610 rep.th.source = th->dest;
611 rep.th.doff = sizeof(struct tcphdr) / 4;
612 rep.th.rst = 1;
1da177e4
LT
613
614 if (th->ack) {
cfb6eeb4 615 rep.th.seq = th->ack_seq;
1da177e4 616 } else {
cfb6eeb4
YH
617 rep.th.ack = 1;
618 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
619 skb->len - (th->doff << 2));
1da177e4
LT
620 }
621
7174259e 622 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
623 arg.iov[0].iov_base = (unsigned char *)&rep;
624 arg.iov[0].iov_len = sizeof(rep.th);
625
626#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
627 hash_location = tcp_parse_md5sig_option(th);
628 if (!sk && hash_location) {
629 /*
630 * active side is lost. Try to find listening socket through
631 * source port, and then find md5 key through listening socket.
632 * we are not loose security here:
633 * Incoming packet is checked with md5 hash with finding key,
634 * no RST generated if md5 hash doesn't match.
635 */
636 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
da5e3630
TH
637 &tcp_hashinfo, ip_hdr(skb)->saddr,
638 th->source, ip_hdr(skb)->daddr,
658ddaaf
SL
639 ntohs(th->source), inet_iif(skb));
640 /* don't send rst if it can't find key */
641 if (!sk1)
642 return;
643 rcu_read_lock();
644 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
645 &ip_hdr(skb)->saddr, AF_INET);
646 if (!key)
647 goto release_sk1;
648
649 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
650 if (genhash || memcmp(hash_location, newhash, 16) != 0)
651 goto release_sk1;
652 } else {
653 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
654 &ip_hdr(skb)->saddr,
655 AF_INET) : NULL;
656 }
657
cfb6eeb4
YH
658 if (key) {
659 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
660 (TCPOPT_NOP << 16) |
661 (TCPOPT_MD5SIG << 8) |
662 TCPOLEN_MD5SIG);
663 /* Update length and the length the header thinks exists */
664 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
665 rep.th.doff = arg.iov[0].iov_len / 4;
666
49a72dfb 667 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
668 key, ip_hdr(skb)->saddr,
669 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
670 }
671#endif
eddc9ec5
ACM
672 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
673 ip_hdr(skb)->saddr, /* XXX */
52cd5750 674 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 675 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 676 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa 677 /* When socket is gone, all binding information is lost.
4c675258
AK
678 * routing might fail in this case. No choice here, if we choose to force
679 * input interface, we will misroute in case of asymmetric route.
e2446eaa 680 */
4c675258
AK
681 if (sk)
682 arg.bound_dev_if = sk->sk_bound_dev_if;
1da177e4 683
adf30907 684 net = dev_net(skb_dst(skb)->dev);
66b13d99 685 arg.tos = ip_hdr(skb)->tos;
be9f4a44 686 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 687 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 688
63231bdd
PE
689 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
690 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
691
692#ifdef CONFIG_TCP_MD5SIG
693release_sk1:
694 if (sk1) {
695 rcu_read_unlock();
696 sock_put(sk1);
697 }
698#endif
1da177e4
LT
699}
700
701/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
702 outside socket context is ugly, certainly. What can I do?
703 */
704
9501f972 705static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
ee684b6f 706 u32 win, u32 tsval, u32 tsecr, int oif,
88ef4a5a 707 struct tcp_md5sig_key *key,
66b13d99 708 int reply_flags, u8 tos)
1da177e4 709{
cf533ea5 710 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
711 struct {
712 struct tcphdr th;
714e85be 713 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 714#ifdef CONFIG_TCP_MD5SIG
714e85be 715 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
716#endif
717 ];
1da177e4
LT
718 } rep;
719 struct ip_reply_arg arg;
adf30907 720 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
721
722 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 723 memset(&arg, 0, sizeof(arg));
1da177e4
LT
724
725 arg.iov[0].iov_base = (unsigned char *)&rep;
726 arg.iov[0].iov_len = sizeof(rep.th);
ee684b6f 727 if (tsecr) {
cfb6eeb4
YH
728 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
729 (TCPOPT_TIMESTAMP << 8) |
730 TCPOLEN_TIMESTAMP);
ee684b6f
AV
731 rep.opt[1] = htonl(tsval);
732 rep.opt[2] = htonl(tsecr);
cb48cfe8 733 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
734 }
735
736 /* Swap the send and the receive. */
737 rep.th.dest = th->source;
738 rep.th.source = th->dest;
739 rep.th.doff = arg.iov[0].iov_len / 4;
740 rep.th.seq = htonl(seq);
741 rep.th.ack_seq = htonl(ack);
742 rep.th.ack = 1;
743 rep.th.window = htons(win);
744
cfb6eeb4 745#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4 746 if (key) {
ee684b6f 747 int offset = (tsecr) ? 3 : 0;
cfb6eeb4
YH
748
749 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
750 (TCPOPT_NOP << 16) |
751 (TCPOPT_MD5SIG << 8) |
752 TCPOLEN_MD5SIG);
753 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
754 rep.th.doff = arg.iov[0].iov_len/4;
755
49a72dfb 756 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
757 key, ip_hdr(skb)->saddr,
758 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
759 }
760#endif
88ef4a5a 761 arg.flags = reply_flags;
eddc9ec5
ACM
762 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
763 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
764 arg.iov[0].iov_len, IPPROTO_TCP, 0);
765 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
766 if (oif)
767 arg.bound_dev_if = oif;
66b13d99 768 arg.tos = tos;
be9f4a44 769 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 770 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 771
63231bdd 772 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
773}
774
775static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
776{
8feaf0c0 777 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 778 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 779
9501f972 780 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 781 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
ee684b6f 782 tcp_time_stamp + tcptw->tw_ts_offset,
9501f972
YH
783 tcptw->tw_ts_recent,
784 tw->tw_bound_dev_if,
88ef4a5a 785 tcp_twsk_md5_key(tcptw),
66b13d99
ED
786 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
787 tw->tw_tos
9501f972 788 );
1da177e4 789
8feaf0c0 790 inet_twsk_put(tw);
1da177e4
LT
791}
792
6edafaaf 793static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 794 struct request_sock *req)
1da177e4 795{
168a8f58
JC
796 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
797 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
798 */
799 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
800 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
801 tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
ee684b6f 802 tcp_time_stamp,
9501f972
YH
803 req->ts_recent,
804 0,
a915da9b
ED
805 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
806 AF_INET),
66b13d99
ED
807 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
808 ip_hdr(skb)->tos);
1da177e4
LT
809}
810
1da177e4 811/*
9bf1d83e 812 * Send a SYN-ACK after having received a SYN.
60236fdd 813 * This still operates on a request_sock only, not on a big
1da177e4
LT
814 * socket.
815 */
72659ecc
OP
816static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
817 struct request_sock *req,
843f4a55
YC
818 u16 queue_mapping,
819 struct tcp_fastopen_cookie *foc)
1da177e4 820{
2e6599cb 821 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 822 struct flowi4 fl4;
1da177e4 823 int err = -1;
d41db5af 824 struct sk_buff *skb;
1da177e4
LT
825
826 /* First, grab a route. */
ba3f7f04 827 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 828 return -1;
1da177e4 829
843f4a55 830 skb = tcp_make_synack(sk, dst, req, foc);
1da177e4
LT
831
832 if (skb) {
634fb979 833 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
1da177e4 834
fff32699 835 skb_set_queue_mapping(skb, queue_mapping);
634fb979
ED
836 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
837 ireq->ir_rmt_addr,
2e6599cb 838 ireq->opt);
b9df3cb8 839 err = net_xmit_eval(err);
016818d0
NC
840 if (!tcp_rsk(req)->snt_synack && !err)
841 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4
LT
842 }
843
1da177e4
LT
844 return err;
845}
846
1a2c6181 847static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
fd80eb94 848{
843f4a55 849 int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL);
e6c022a4 850
f19c29e3 851 if (!res) {
e6c022a4 852 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
f19c29e3
YC
853 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
854 }
e6c022a4 855 return res;
fd80eb94
DL
856}
857
1da177e4 858/*
60236fdd 859 * IPv4 request_sock destructor.
1da177e4 860 */
60236fdd 861static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 862{
a51482bd 863 kfree(inet_rsk(req)->opt);
1da177e4
LT
864}
865
946cedcc 866/*
a2a385d6 867 * Return true if a syncookie should be sent
946cedcc 868 */
a2a385d6 869bool tcp_syn_flood_action(struct sock *sk,
946cedcc
ED
870 const struct sk_buff *skb,
871 const char *proto)
1da177e4 872{
946cedcc 873 const char *msg = "Dropping request";
a2a385d6 874 bool want_cookie = false;
946cedcc
ED
875 struct listen_sock *lopt;
876
2a1d4bd4 877#ifdef CONFIG_SYN_COOKIES
946cedcc 878 if (sysctl_tcp_syncookies) {
2a1d4bd4 879 msg = "Sending cookies";
a2a385d6 880 want_cookie = true;
946cedcc
ED
881 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
882 } else
80e40daa 883#endif
946cedcc
ED
884 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
885
886 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
5ad37d5d 887 if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
946cedcc 888 lopt->synflood_warned = 1;
afd46503 889 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
946cedcc
ED
890 proto, ntohs(tcp_hdr(skb)->dest), msg);
891 }
892 return want_cookie;
2a1d4bd4 893}
946cedcc 894EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
895
896/*
60236fdd 897 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 898 */
5dff747b 899static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
1da177e4 900{
f6d8bd05
ED
901 const struct ip_options *opt = &(IPCB(skb)->opt);
902 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
903
904 if (opt && opt->optlen) {
f6d8bd05
ED
905 int opt_size = sizeof(*dopt) + opt->optlen;
906
1da177e4
LT
907 dopt = kmalloc(opt_size, GFP_ATOMIC);
908 if (dopt) {
f6d8bd05 909 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
910 kfree(dopt);
911 dopt = NULL;
912 }
913 }
914 }
915 return dopt;
916}
917
cfb6eeb4
YH
918#ifdef CONFIG_TCP_MD5SIG
919/*
920 * RFC2385 MD5 checksumming requires a mapping of
921 * IP address->MD5 Key.
922 * We need to maintain these in the sk structure.
923 */
924
925/* Find the Key structure for an address. */
a915da9b
ED
926struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
927 const union tcp_md5_addr *addr,
928 int family)
cfb6eeb4
YH
929{
930 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 931 struct tcp_md5sig_key *key;
a915da9b 932 unsigned int size = sizeof(struct in_addr);
a8afca03 933 struct tcp_md5sig_info *md5sig;
cfb6eeb4 934
a8afca03
ED
935 /* caller either holds rcu_read_lock() or socket lock */
936 md5sig = rcu_dereference_check(tp->md5sig_info,
b4fb05ea
ED
937 sock_owned_by_user(sk) ||
938 lockdep_is_held(&sk->sk_lock.slock));
a8afca03 939 if (!md5sig)
cfb6eeb4 940 return NULL;
a915da9b
ED
941#if IS_ENABLED(CONFIG_IPV6)
942 if (family == AF_INET6)
943 size = sizeof(struct in6_addr);
944#endif
b67bfe0d 945 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
a915da9b
ED
946 if (key->family != family)
947 continue;
948 if (!memcmp(&key->addr, addr, size))
949 return key;
cfb6eeb4
YH
950 }
951 return NULL;
952}
a915da9b 953EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
954
955struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
956 struct sock *addr_sk)
957{
a915da9b
ED
958 union tcp_md5_addr *addr;
959
960 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
961 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 962}
cfb6eeb4
YH
963EXPORT_SYMBOL(tcp_v4_md5_lookup);
964
f5b99bcd
AB
965static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
966 struct request_sock *req)
cfb6eeb4 967{
a915da9b
ED
968 union tcp_md5_addr *addr;
969
634fb979 970 addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
a915da9b 971 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
972}
973
974/* This can be called on a newly created socket, from other files */
a915da9b
ED
975int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
976 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
977{
978 /* Add Key to the list */
b0a713e9 979 struct tcp_md5sig_key *key;
cfb6eeb4 980 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 981 struct tcp_md5sig_info *md5sig;
cfb6eeb4 982
c0353c7b 983 key = tcp_md5_do_lookup(sk, addr, family);
cfb6eeb4
YH
984 if (key) {
985 /* Pre-existing entry - just update that one. */
a915da9b 986 memcpy(key->key, newkey, newkeylen);
b0a713e9 987 key->keylen = newkeylen;
a915da9b
ED
988 return 0;
989 }
260fcbeb 990
a8afca03
ED
991 md5sig = rcu_dereference_protected(tp->md5sig_info,
992 sock_owned_by_user(sk));
a915da9b
ED
993 if (!md5sig) {
994 md5sig = kmalloc(sizeof(*md5sig), gfp);
995 if (!md5sig)
cfb6eeb4 996 return -ENOMEM;
cfb6eeb4 997
a915da9b
ED
998 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
999 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 1000 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 1001 }
cfb6eeb4 1002
5f3d9cb2 1003 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
1004 if (!key)
1005 return -ENOMEM;
71cea17e 1006 if (!tcp_alloc_md5sig_pool()) {
5f3d9cb2 1007 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 1008 return -ENOMEM;
cfb6eeb4 1009 }
a915da9b
ED
1010
1011 memcpy(key->key, newkey, newkeylen);
1012 key->keylen = newkeylen;
1013 key->family = family;
1014 memcpy(&key->addr, addr,
1015 (family == AF_INET6) ? sizeof(struct in6_addr) :
1016 sizeof(struct in_addr));
1017 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1018 return 0;
1019}
a915da9b 1020EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1021
a915da9b 1022int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4 1023{
a915da9b
ED
1024 struct tcp_md5sig_key *key;
1025
c0353c7b 1026 key = tcp_md5_do_lookup(sk, addr, family);
a915da9b
ED
1027 if (!key)
1028 return -ENOENT;
1029 hlist_del_rcu(&key->node);
5f3d9cb2 1030 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1031 kfree_rcu(key, rcu);
a915da9b 1032 return 0;
cfb6eeb4 1033}
a915da9b 1034EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1035
e0683e70 1036static void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1037{
1038 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1039 struct tcp_md5sig_key *key;
b67bfe0d 1040 struct hlist_node *n;
a8afca03 1041 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1042
a8afca03
ED
1043 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1044
b67bfe0d 1045 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
a915da9b 1046 hlist_del_rcu(&key->node);
5f3d9cb2 1047 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1048 kfree_rcu(key, rcu);
cfb6eeb4
YH
1049 }
1050}
1051
7174259e
ACM
1052static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1053 int optlen)
cfb6eeb4
YH
1054{
1055 struct tcp_md5sig cmd;
1056 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1057
1058 if (optlen < sizeof(cmd))
1059 return -EINVAL;
1060
7174259e 1061 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1062 return -EFAULT;
1063
1064 if (sin->sin_family != AF_INET)
1065 return -EINVAL;
1066
a8afca03 1067 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1068 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1069 AF_INET);
cfb6eeb4
YH
1070
1071 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1072 return -EINVAL;
1073
a915da9b
ED
1074 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1075 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1076 GFP_KERNEL);
cfb6eeb4
YH
1077}
1078
49a72dfb
AL
1079static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1080 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1081{
cfb6eeb4 1082 struct tcp4_pseudohdr *bp;
49a72dfb 1083 struct scatterlist sg;
cfb6eeb4
YH
1084
1085 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1086
1087 /*
49a72dfb 1088 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1089 * destination IP address, zero-padded protocol number, and
1090 * segment length)
1091 */
1092 bp->saddr = saddr;
1093 bp->daddr = daddr;
1094 bp->pad = 0;
076fb722 1095 bp->protocol = IPPROTO_TCP;
49a72dfb 1096 bp->len = cpu_to_be16(nbytes);
c7da57a1 1097
49a72dfb
AL
1098 sg_init_one(&sg, bp, sizeof(*bp));
1099 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1100}
1101
a915da9b 1102static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1103 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1104{
1105 struct tcp_md5sig_pool *hp;
1106 struct hash_desc *desc;
1107
1108 hp = tcp_get_md5sig_pool();
1109 if (!hp)
1110 goto clear_hash_noput;
1111 desc = &hp->md5_desc;
1112
1113 if (crypto_hash_init(desc))
1114 goto clear_hash;
1115 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1116 goto clear_hash;
1117 if (tcp_md5_hash_header(hp, th))
1118 goto clear_hash;
1119 if (tcp_md5_hash_key(hp, key))
1120 goto clear_hash;
1121 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1122 goto clear_hash;
1123
cfb6eeb4 1124 tcp_put_md5sig_pool();
cfb6eeb4 1125 return 0;
49a72dfb 1126
cfb6eeb4
YH
1127clear_hash:
1128 tcp_put_md5sig_pool();
1129clear_hash_noput:
1130 memset(md5_hash, 0, 16);
49a72dfb 1131 return 1;
cfb6eeb4
YH
1132}
1133
49a72dfb 1134int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1135 const struct sock *sk, const struct request_sock *req,
1136 const struct sk_buff *skb)
cfb6eeb4 1137{
49a72dfb
AL
1138 struct tcp_md5sig_pool *hp;
1139 struct hash_desc *desc;
318cf7aa 1140 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1141 __be32 saddr, daddr;
1142
1143 if (sk) {
c720c7e8
ED
1144 saddr = inet_sk(sk)->inet_saddr;
1145 daddr = inet_sk(sk)->inet_daddr;
49a72dfb 1146 } else if (req) {
634fb979
ED
1147 saddr = inet_rsk(req)->ir_loc_addr;
1148 daddr = inet_rsk(req)->ir_rmt_addr;
cfb6eeb4 1149 } else {
49a72dfb
AL
1150 const struct iphdr *iph = ip_hdr(skb);
1151 saddr = iph->saddr;
1152 daddr = iph->daddr;
cfb6eeb4 1153 }
49a72dfb
AL
1154
1155 hp = tcp_get_md5sig_pool();
1156 if (!hp)
1157 goto clear_hash_noput;
1158 desc = &hp->md5_desc;
1159
1160 if (crypto_hash_init(desc))
1161 goto clear_hash;
1162
1163 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1164 goto clear_hash;
1165 if (tcp_md5_hash_header(hp, th))
1166 goto clear_hash;
1167 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1168 goto clear_hash;
1169 if (tcp_md5_hash_key(hp, key))
1170 goto clear_hash;
1171 if (crypto_hash_final(desc, md5_hash))
1172 goto clear_hash;
1173
1174 tcp_put_md5sig_pool();
1175 return 0;
1176
1177clear_hash:
1178 tcp_put_md5sig_pool();
1179clear_hash_noput:
1180 memset(md5_hash, 0, 16);
1181 return 1;
cfb6eeb4 1182}
49a72dfb 1183EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1184
a2a385d6 1185static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1186{
1187 /*
1188 * This gets called for each TCP segment that arrives
1189 * so we want to be efficient.
1190 * We have 3 drop cases:
1191 * o No MD5 hash and one expected.
1192 * o MD5 hash and we're not expecting one.
1193 * o MD5 hash and its wrong.
1194 */
cf533ea5 1195 const __u8 *hash_location = NULL;
cfb6eeb4 1196 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1197 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1198 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1199 int genhash;
cfb6eeb4
YH
1200 unsigned char newhash[16];
1201
a915da9b
ED
1202 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1203 AF_INET);
7d5d5525 1204 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1205
cfb6eeb4
YH
1206 /* We've parsed the options - do we have a hash? */
1207 if (!hash_expected && !hash_location)
a2a385d6 1208 return false;
cfb6eeb4
YH
1209
1210 if (hash_expected && !hash_location) {
785957d3 1211 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
a2a385d6 1212 return true;
cfb6eeb4
YH
1213 }
1214
1215 if (!hash_expected && hash_location) {
785957d3 1216 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
a2a385d6 1217 return true;
cfb6eeb4
YH
1218 }
1219
1220 /* Okay, so this is hash_expected and hash_location -
1221 * so we need to calculate the checksum.
1222 */
49a72dfb
AL
1223 genhash = tcp_v4_md5_hash_skb(newhash,
1224 hash_expected,
1225 NULL, NULL, skb);
cfb6eeb4
YH
1226
1227 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
e87cc472
JP
1228 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1229 &iph->saddr, ntohs(th->source),
1230 &iph->daddr, ntohs(th->dest),
1231 genhash ? " tcp_v4_calc_md5_hash failed"
1232 : "");
a2a385d6 1233 return true;
cfb6eeb4 1234 }
a2a385d6 1235 return false;
cfb6eeb4
YH
1236}
1237
1238#endif
1239
72a3effa 1240struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1241 .family = PF_INET,
2e6599cb 1242 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1243 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1244 .send_ack = tcp_v4_reqsk_send_ack,
1245 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1246 .send_reset = tcp_v4_send_reset,
72659ecc 1247 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1248};
1249
cfb6eeb4 1250#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1251static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1252 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1253 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1254};
b6332e6c 1255#endif
cfb6eeb4 1256
1da177e4
LT
1257int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1258{
1259 struct tcp_options_received tmp_opt;
60236fdd 1260 struct request_sock *req;
e6b4d113 1261 struct inet_request_sock *ireq;
4957faad 1262 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1263 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1264 __be32 saddr = ip_hdr(skb)->saddr;
1265 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1266 __u32 isn = TCP_SKB_CB(skb)->when;
843f4a55 1267 bool want_cookie = false, fastopen;
168a8f58
JC
1268 struct flowi4 fl4;
1269 struct tcp_fastopen_cookie foc = { .len = -1 };
843f4a55 1270 int err;
1da177e4
LT
1271
1272 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1273 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1274 goto drop;
1275
1276 /* TW buckets are converted to open requests without
1277 * limitations, they conserve resources and peer is
1278 * evidently real one.
1279 */
5ad37d5d
HFS
1280 if ((sysctl_tcp_syncookies == 2 ||
1281 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
946cedcc
ED
1282 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1283 if (!want_cookie)
1284 goto drop;
1da177e4
LT
1285 }
1286
1287 /* Accept backlog is full. If we have already queued enough
1288 * of warm entries in syn queue, drop request. It is better than
1289 * clogging syn queue with openreqs with exponentially increasing
1290 * timeout.
1291 */
2aeef18d
NS
1292 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1293 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1da177e4 1294 goto drop;
2aeef18d 1295 }
1da177e4 1296
ce4a7d0d 1297 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1298 if (!req)
1299 goto drop;
1300
cfb6eeb4
YH
1301#ifdef CONFIG_TCP_MD5SIG
1302 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1303#endif
1304
1da177e4 1305 tcp_clear_options(&tmp_opt);
bee7ca9e 1306 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1307 tmp_opt.user_mss = tp->rx_opt.user_mss;
1a2c6181 1308 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1da177e4 1309
4dfc2817 1310 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1311 tcp_clear_options(&tmp_opt);
1da177e4 1312
1da177e4 1313 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1314 tcp_openreq_init(req, &tmp_opt, skb);
1315
bb5b7c11 1316 ireq = inet_rsk(req);
634fb979
ED
1317 ireq->ir_loc_addr = daddr;
1318 ireq->ir_rmt_addr = saddr;
bb5b7c11 1319 ireq->no_srccheck = inet_sk(sk)->transparent;
5dff747b 1320 ireq->opt = tcp_v4_save_options(skb);
84f39b08 1321 ireq->ir_mark = inet_request_mark(sk, skb);
bb5b7c11 1322
284904aa 1323 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1324 goto drop_and_free;
284904aa 1325
172d69e6 1326 if (!want_cookie || tmp_opt.tstamp_ok)
5d134f1c 1327 TCP_ECN_create_request(req, skb, sock_net(sk));
1da177e4
LT
1328
1329 if (want_cookie) {
1da177e4 1330 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1331 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4 1332 } else if (!isn) {
1da177e4
LT
1333 /* VJ's idea. We save last timestamp seen
1334 * from the destination in peer table, when entering
1335 * state TIME-WAIT, and check against it before
1336 * accepting new connection request.
1337 *
1338 * If "isn" is not zero, this request hit alive
1339 * timewait bucket, so that all the necessary checks
1340 * are made in the function processing timewait state.
1341 */
1342 if (tmp_opt.saw_tstamp &&
295ff7ed 1343 tcp_death_row.sysctl_tw_recycle &&
ba3f7f04 1344 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
81166dd6
DM
1345 fl4.daddr == saddr) {
1346 if (!tcp_peer_is_proven(req, dst, true)) {
de0744af 1347 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1348 goto drop_and_release;
1da177e4
LT
1349 }
1350 }
1351 /* Kill the following clause, if you dislike this way. */
1352 else if (!sysctl_tcp_syncookies &&
463c84b9 1353 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4 1354 (sysctl_max_syn_backlog >> 2)) &&
81166dd6 1355 !tcp_peer_is_proven(req, dst, false)) {
1da177e4
LT
1356 /* Without syncookies last quarter of
1357 * backlog is filled with destinations,
1358 * proven to be alive.
1359 * It means that we continue to communicate
1360 * to destinations, already remembered
1361 * to the moment of synflood.
1362 */
afd46503 1363 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
673d57e7 1364 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1365 goto drop_and_release;
1da177e4
LT
1366 }
1367
a94f723d 1368 isn = tcp_v4_init_sequence(skb);
1da177e4 1369 }
843f4a55 1370 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
168a8f58
JC
1371 goto drop_and_free;
1372
843f4a55
YC
1373 tcp_rsk(req)->snt_isn = isn;
1374 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1375 tcp_openreq_init_rwin(req, sk, dst);
1376 fastopen = !want_cookie &&
1377 tcp_try_fastopen(sk, skb, req, &foc, dst);
1378 err = tcp_v4_send_synack(sk, dst, req,
1379 skb_get_queue_mapping(skb), &foc);
1380 if (!fastopen) {
168a8f58
JC
1381 if (err || want_cookie)
1382 goto drop_and_free;
1383
016818d0 1384 tcp_rsk(req)->snt_synack = tcp_time_stamp;
168a8f58 1385 tcp_rsk(req)->listener = NULL;
168a8f58 1386 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
843f4a55 1387 }
1da177e4 1388
1da177e4
LT
1389 return 0;
1390
7cd04fa7
DL
1391drop_and_release:
1392 dst_release(dst);
1da177e4 1393drop_and_free:
60236fdd 1394 reqsk_free(req);
1da177e4 1395drop:
848bf15f 1396 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
1397 return 0;
1398}
4bc2f18b 1399EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1400
1401
1402/*
1403 * The three way handshake has completed - we got a valid synack -
1404 * now create the new socket.
1405 */
1406struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1407 struct request_sock *req,
1da177e4
LT
1408 struct dst_entry *dst)
1409{
2e6599cb 1410 struct inet_request_sock *ireq;
1da177e4
LT
1411 struct inet_sock *newinet;
1412 struct tcp_sock *newtp;
1413 struct sock *newsk;
cfb6eeb4
YH
1414#ifdef CONFIG_TCP_MD5SIG
1415 struct tcp_md5sig_key *key;
1416#endif
f6d8bd05 1417 struct ip_options_rcu *inet_opt;
1da177e4
LT
1418
1419 if (sk_acceptq_is_full(sk))
1420 goto exit_overflow;
1421
1da177e4
LT
1422 newsk = tcp_create_openreq_child(sk, req, skb);
1423 if (!newsk)
093d2823 1424 goto exit_nonewsk;
1da177e4 1425
bcd76111 1426 newsk->sk_gso_type = SKB_GSO_TCPV4;
fae6ef87 1427 inet_sk_rx_dst_set(newsk, skb);
1da177e4
LT
1428
1429 newtp = tcp_sk(newsk);
1430 newinet = inet_sk(newsk);
2e6599cb 1431 ireq = inet_rsk(req);
634fb979
ED
1432 newinet->inet_daddr = ireq->ir_rmt_addr;
1433 newinet->inet_rcv_saddr = ireq->ir_loc_addr;
1434 newinet->inet_saddr = ireq->ir_loc_addr;
f6d8bd05
ED
1435 inet_opt = ireq->opt;
1436 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1437 ireq->opt = NULL;
463c84b9 1438 newinet->mc_index = inet_iif(skb);
eddc9ec5 1439 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1440 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1441 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1442 if (inet_opt)
1443 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1444 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1445
dfd25fff
ED
1446 if (!dst) {
1447 dst = inet_csk_route_child_sock(sk, newsk, req);
1448 if (!dst)
1449 goto put_and_exit;
1450 } else {
1451 /* syncookie case : see end of cookie_v4_check() */
1452 }
0e734419
DM
1453 sk_setup_caps(newsk, dst);
1454
1da177e4 1455 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1456 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1457 if (tcp_sk(sk)->rx_opt.user_mss &&
1458 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1459 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1460
1da177e4
LT
1461 tcp_initialize_rcv_mss(newsk);
1462
cfb6eeb4
YH
1463#ifdef CONFIG_TCP_MD5SIG
1464 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1465 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1466 AF_INET);
c720c7e8 1467 if (key != NULL) {
cfb6eeb4
YH
1468 /*
1469 * We're using one, so create a matching key
1470 * on the newsk structure. If we fail to get
1471 * memory, then we end up not copying the key
1472 * across. Shucks.
1473 */
a915da9b
ED
1474 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1475 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1476 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1477 }
1478#endif
1479
0e734419
DM
1480 if (__inet_inherit_port(sk, newsk) < 0)
1481 goto put_and_exit;
9327f705 1482 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1483
1484 return newsk;
1485
1486exit_overflow:
de0744af 1487 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1488exit_nonewsk:
1489 dst_release(dst);
1da177e4 1490exit:
de0744af 1491 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1492 return NULL;
0e734419 1493put_and_exit:
e337e24d
CP
1494 inet_csk_prepare_forced_close(newsk);
1495 tcp_done(newsk);
0e734419 1496 goto exit;
1da177e4 1497}
4bc2f18b 1498EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1499
1500static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1501{
aa8223c7 1502 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1503 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1504 struct sock *nsk;
60236fdd 1505 struct request_sock **prev;
1da177e4 1506 /* Find possible connection requests. */
463c84b9
ACM
1507 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1508 iph->saddr, iph->daddr);
1da177e4 1509 if (req)
8336886f 1510 return tcp_check_req(sk, skb, req, prev, false);
1da177e4 1511
3b1e0a65 1512 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1513 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1514
1515 if (nsk) {
1516 if (nsk->sk_state != TCP_TIME_WAIT) {
1517 bh_lock_sock(nsk);
1518 return nsk;
1519 }
9469c7b4 1520 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1521 return NULL;
1522 }
1523
1524#ifdef CONFIG_SYN_COOKIES
af9b4738 1525 if (!th->syn)
1da177e4
LT
1526 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1527#endif
1528 return sk;
1529}
1530
1da177e4
LT
1531/* The socket must have it's spinlock held when we get
1532 * here.
1533 *
1534 * We have a potential double-lock case here, so even when
1535 * doing backlog processing we use the BH locking scheme.
1536 * This is because we cannot sleep with the original spinlock
1537 * held.
1538 */
1539int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1540{
cfb6eeb4
YH
1541 struct sock *rsk;
1542#ifdef CONFIG_TCP_MD5SIG
1543 /*
1544 * We really want to reject the packet as early as possible
1545 * if:
1546 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1547 * o There is an MD5 option and we're not expecting one
1548 */
7174259e 1549 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1550 goto discard;
1551#endif
1552
1da177e4 1553 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
404e0a8b
ED
1554 struct dst_entry *dst = sk->sk_rx_dst;
1555
bdeab991 1556 sock_rps_save_rxhash(sk, skb);
404e0a8b 1557 if (dst) {
505fbcf0
ED
1558 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1559 dst->ops->check(dst, 0) == NULL) {
92101b3b
DM
1560 dst_release(dst);
1561 sk->sk_rx_dst = NULL;
1562 }
1563 }
c995ae22 1564 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1da177e4
LT
1565 return 0;
1566 }
1567
ab6a5bb6 1568 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1569 goto csum_err;
1570
1571 if (sk->sk_state == TCP_LISTEN) {
1572 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1573 if (!nsk)
1574 goto discard;
1575
1576 if (nsk != sk) {
bdeab991 1577 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1578 if (tcp_child_process(sk, nsk, skb)) {
1579 rsk = nsk;
1da177e4 1580 goto reset;
cfb6eeb4 1581 }
1da177e4
LT
1582 return 0;
1583 }
ca55158c 1584 } else
bdeab991 1585 sock_rps_save_rxhash(sk, skb);
ca55158c 1586
aa8223c7 1587 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1588 rsk = sk;
1da177e4 1589 goto reset;
cfb6eeb4 1590 }
1da177e4
LT
1591 return 0;
1592
1593reset:
cfb6eeb4 1594 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1595discard:
1596 kfree_skb(skb);
1597 /* Be careful here. If this function gets more complicated and
1598 * gcc suffers from register pressure on the x86, sk (in %ebx)
1599 * might be destroyed here. This current version compiles correctly,
1600 * but you have been warned.
1601 */
1602 return 0;
1603
1604csum_err:
6a5dc9e5 1605 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
63231bdd 1606 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1607 goto discard;
1608}
4bc2f18b 1609EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4 1610
160eb5a6 1611void tcp_v4_early_demux(struct sk_buff *skb)
41063e9d 1612{
41063e9d
DM
1613 const struct iphdr *iph;
1614 const struct tcphdr *th;
1615 struct sock *sk;
41063e9d 1616
41063e9d 1617 if (skb->pkt_type != PACKET_HOST)
160eb5a6 1618 return;
41063e9d 1619
45f00f99 1620 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
160eb5a6 1621 return;
41063e9d
DM
1622
1623 iph = ip_hdr(skb);
45f00f99 1624 th = tcp_hdr(skb);
41063e9d
DM
1625
1626 if (th->doff < sizeof(struct tcphdr) / 4)
160eb5a6 1627 return;
41063e9d 1628
45f00f99 1629 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
41063e9d 1630 iph->saddr, th->source,
7011d085 1631 iph->daddr, ntohs(th->dest),
9cb429d6 1632 skb->skb_iif);
41063e9d
DM
1633 if (sk) {
1634 skb->sk = sk;
1635 skb->destructor = sock_edemux;
1636 if (sk->sk_state != TCP_TIME_WAIT) {
1637 struct dst_entry *dst = sk->sk_rx_dst;
505fbcf0 1638
41063e9d
DM
1639 if (dst)
1640 dst = dst_check(dst, 0);
92101b3b 1641 if (dst &&
505fbcf0 1642 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
92101b3b 1643 skb_dst_set_noref(skb, dst);
41063e9d
DM
1644 }
1645 }
41063e9d
DM
1646}
1647
b2fb4f54
ED
1648/* Packet is added to VJ-style prequeue for processing in process
1649 * context, if a reader task is waiting. Apparently, this exciting
1650 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1651 * failed somewhere. Latency? Burstiness? Well, at least now we will
1652 * see, why it failed. 8)8) --ANK
1653 *
1654 */
1655bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1656{
1657 struct tcp_sock *tp = tcp_sk(sk);
1658
1659 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1660 return false;
1661
1662 if (skb->len <= tcp_hdrlen(skb) &&
1663 skb_queue_len(&tp->ucopy.prequeue) == 0)
1664 return false;
1665
58717686 1666 skb_dst_force(skb);
b2fb4f54
ED
1667 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1668 tp->ucopy.memory += skb->truesize;
1669 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1670 struct sk_buff *skb1;
1671
1672 BUG_ON(sock_owned_by_user(sk));
1673
1674 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1675 sk_backlog_rcv(sk, skb1);
1676 NET_INC_STATS_BH(sock_net(sk),
1677 LINUX_MIB_TCPPREQUEUEDROPPED);
1678 }
1679
1680 tp->ucopy.memory = 0;
1681 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1682 wake_up_interruptible_sync_poll(sk_sleep(sk),
1683 POLLIN | POLLRDNORM | POLLRDBAND);
1684 if (!inet_csk_ack_scheduled(sk))
1685 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1686 (3 * tcp_rto_min(sk)) / 4,
1687 TCP_RTO_MAX);
1688 }
1689 return true;
1690}
1691EXPORT_SYMBOL(tcp_prequeue);
1692
1da177e4
LT
1693/*
1694 * From tcp_input.c
1695 */
1696
1697int tcp_v4_rcv(struct sk_buff *skb)
1698{
eddc9ec5 1699 const struct iphdr *iph;
cf533ea5 1700 const struct tcphdr *th;
1da177e4
LT
1701 struct sock *sk;
1702 int ret;
a86b1e30 1703 struct net *net = dev_net(skb->dev);
1da177e4
LT
1704
1705 if (skb->pkt_type != PACKET_HOST)
1706 goto discard_it;
1707
1708 /* Count it even if it's bad */
63231bdd 1709 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1710
1711 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1712 goto discard_it;
1713
aa8223c7 1714 th = tcp_hdr(skb);
1da177e4
LT
1715
1716 if (th->doff < sizeof(struct tcphdr) / 4)
1717 goto bad_packet;
1718 if (!pskb_may_pull(skb, th->doff * 4))
1719 goto discard_it;
1720
1721 /* An explanation is required here, I think.
1722 * Packet length and doff are validated by header prediction,
caa20d9a 1723 * provided case of th->doff==0 is eliminated.
1da177e4 1724 * So, we defer the checks. */
ed70fcfc
TH
1725
1726 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
6a5dc9e5 1727 goto csum_error;
1da177e4 1728
aa8223c7 1729 th = tcp_hdr(skb);
eddc9ec5 1730 iph = ip_hdr(skb);
1da177e4
LT
1731 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1732 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1733 skb->len - th->doff * 4);
1734 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1735 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 1736 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1737 TCP_SKB_CB(skb)->sacked = 0;
1738
9a1f27c4 1739 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1740 if (!sk)
1741 goto no_tcp_socket;
1742
bb134d5d
ED
1743process:
1744 if (sk->sk_state == TCP_TIME_WAIT)
1745 goto do_time_wait;
1746
6cce09f8
ED
1747 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1748 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1749 goto discard_and_relse;
6cce09f8 1750 }
d218d111 1751
1da177e4
LT
1752 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1753 goto discard_and_relse;
b59c2701 1754 nf_reset(skb);
1da177e4 1755
fda9ef5d 1756 if (sk_filter(sk, skb))
1da177e4
LT
1757 goto discard_and_relse;
1758
8b80cda5 1759 sk_mark_napi_id(sk, skb);
1da177e4
LT
1760 skb->dev = NULL;
1761
c6366184 1762 bh_lock_sock_nested(sk);
1da177e4
LT
1763 ret = 0;
1764 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1765#ifdef CONFIG_NET_DMA
1766 struct tcp_sock *tp = tcp_sk(sk);
1767 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
a2bd1140 1768 tp->ucopy.dma_chan = net_dma_find_channel();
1a2449a8 1769 if (tp->ucopy.dma_chan)
1da177e4 1770 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1771 else
1772#endif
1773 {
1774 if (!tcp_prequeue(sk, skb))
ae8d7f88 1775 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 1776 }
da882c1f
ED
1777 } else if (unlikely(sk_add_backlog(sk, skb,
1778 sk->sk_rcvbuf + sk->sk_sndbuf))) {
6b03a53a 1779 bh_unlock_sock(sk);
6cce09f8 1780 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
1781 goto discard_and_relse;
1782 }
1da177e4
LT
1783 bh_unlock_sock(sk);
1784
1785 sock_put(sk);
1786
1787 return ret;
1788
1789no_tcp_socket:
1790 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1791 goto discard_it;
1792
1793 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
6a5dc9e5
ED
1794csum_error:
1795 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1da177e4 1796bad_packet:
63231bdd 1797 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1798 } else {
cfb6eeb4 1799 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1800 }
1801
1802discard_it:
1803 /* Discard frame. */
1804 kfree_skb(skb);
e905a9ed 1805 return 0;
1da177e4
LT
1806
1807discard_and_relse:
1808 sock_put(sk);
1809 goto discard_it;
1810
1811do_time_wait:
1812 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1813 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1814 goto discard_it;
1815 }
1816
6a5dc9e5 1817 if (skb->len < (th->doff << 2)) {
9469c7b4 1818 inet_twsk_put(inet_twsk(sk));
6a5dc9e5
ED
1819 goto bad_packet;
1820 }
1821 if (tcp_checksum_complete(skb)) {
1822 inet_twsk_put(inet_twsk(sk));
1823 goto csum_error;
1da177e4 1824 }
9469c7b4 1825 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1826 case TCP_TW_SYN: {
c346dca1 1827 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1828 &tcp_hashinfo,
da5e3630 1829 iph->saddr, th->source,
eddc9ec5 1830 iph->daddr, th->dest,
463c84b9 1831 inet_iif(skb));
1da177e4 1832 if (sk2) {
9469c7b4
YH
1833 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1834 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1835 sk = sk2;
1836 goto process;
1837 }
1838 /* Fall through to ACK */
1839 }
1840 case TCP_TW_ACK:
1841 tcp_v4_timewait_ack(sk, skb);
1842 break;
1843 case TCP_TW_RST:
1844 goto no_tcp_socket;
1845 case TCP_TW_SUCCESS:;
1846 }
1847 goto discard_it;
1848}
1849
ccb7c410
DM
1850static struct timewait_sock_ops tcp_timewait_sock_ops = {
1851 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1852 .twsk_unique = tcp_twsk_unique,
1853 .twsk_destructor= tcp_twsk_destructor,
ccb7c410 1854};
1da177e4 1855
63d02d15 1856void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
5d299f3d
ED
1857{
1858 struct dst_entry *dst = skb_dst(skb);
1859
1860 dst_hold(dst);
1861 sk->sk_rx_dst = dst;
1862 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1863}
63d02d15 1864EXPORT_SYMBOL(inet_sk_rx_dst_set);
5d299f3d 1865
3b401a81 1866const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1867 .queue_xmit = ip_queue_xmit,
1868 .send_check = tcp_v4_send_check,
1869 .rebuild_header = inet_sk_rebuild_header,
5d299f3d 1870 .sk_rx_dst_set = inet_sk_rx_dst_set,
543d9cfe
ACM
1871 .conn_request = tcp_v4_conn_request,
1872 .syn_recv_sock = tcp_v4_syn_recv_sock,
543d9cfe
ACM
1873 .net_header_len = sizeof(struct iphdr),
1874 .setsockopt = ip_setsockopt,
1875 .getsockopt = ip_getsockopt,
1876 .addr2sockaddr = inet_csk_addr2sockaddr,
1877 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1878 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1879#ifdef CONFIG_COMPAT
543d9cfe
ACM
1880 .compat_setsockopt = compat_ip_setsockopt,
1881 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1882#endif
1da177e4 1883};
4bc2f18b 1884EXPORT_SYMBOL(ipv4_specific);
1da177e4 1885
cfb6eeb4 1886#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1887static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1888 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1889 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1890 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1891};
b6332e6c 1892#endif
cfb6eeb4 1893
1da177e4
LT
1894/* NOTE: A lot of things set to zero explicitly by call to
1895 * sk_alloc() so need not be done here.
1896 */
1897static int tcp_v4_init_sock(struct sock *sk)
1898{
6687e988 1899 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4 1900
900f65d3 1901 tcp_init_sock(sk);
1da177e4 1902
8292a17a 1903 icsk->icsk_af_ops = &ipv4_specific;
900f65d3 1904
cfb6eeb4 1905#ifdef CONFIG_TCP_MD5SIG
ac807fa8 1906 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
cfb6eeb4 1907#endif
1da177e4 1908
1da177e4
LT
1909 return 0;
1910}
1911
7d06b2e0 1912void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1913{
1914 struct tcp_sock *tp = tcp_sk(sk);
1915
1916 tcp_clear_xmit_timers(sk);
1917
6687e988 1918 tcp_cleanup_congestion_control(sk);
317a76f9 1919
1da177e4 1920 /* Cleanup up the write buffer. */
fe067e8a 1921 tcp_write_queue_purge(sk);
1da177e4
LT
1922
1923 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1924 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1925
cfb6eeb4
YH
1926#ifdef CONFIG_TCP_MD5SIG
1927 /* Clean up the MD5 key list, if any */
1928 if (tp->md5sig_info) {
a915da9b 1929 tcp_clear_md5_list(sk);
a8afca03 1930 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
1931 tp->md5sig_info = NULL;
1932 }
1933#endif
1934
1a2449a8
CL
1935#ifdef CONFIG_NET_DMA
1936 /* Cleans up our sk_async_wait_queue */
e905a9ed 1937 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1938#endif
1939
1da177e4
LT
1940 /* Clean prequeue, it must be empty really */
1941 __skb_queue_purge(&tp->ucopy.prequeue);
1942
1943 /* Clean up a referenced TCP bind bucket. */
463c84b9 1944 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1945 inet_put_port(sk);
1da177e4 1946
168a8f58 1947 BUG_ON(tp->fastopen_rsk != NULL);
435cf559 1948
cf60af03
YC
1949 /* If socket is aborted during connect operation */
1950 tcp_free_fastopen_req(tp);
1951
180d8cd9 1952 sk_sockets_allocated_dec(sk);
d1a4c0b3 1953 sock_release_memcg(sk);
1da177e4 1954}
1da177e4
LT
1955EXPORT_SYMBOL(tcp_v4_destroy_sock);
1956
1957#ifdef CONFIG_PROC_FS
1958/* Proc filesystem TCP sock list dumping. */
1959
a8b690f9
TH
1960/*
1961 * Get next listener socket follow cur. If cur is NULL, get first socket
1962 * starting from bucket given in st->bucket; when st->bucket is zero the
1963 * very first socket in the hash table is returned.
1964 */
1da177e4
LT
1965static void *listening_get_next(struct seq_file *seq, void *cur)
1966{
463c84b9 1967 struct inet_connection_sock *icsk;
c25eb3bf 1968 struct hlist_nulls_node *node;
1da177e4 1969 struct sock *sk = cur;
5caea4ea 1970 struct inet_listen_hashbucket *ilb;
5799de0b 1971 struct tcp_iter_state *st = seq->private;
a4146b1b 1972 struct net *net = seq_file_net(seq);
1da177e4
LT
1973
1974 if (!sk) {
a8b690f9 1975 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 1976 spin_lock_bh(&ilb->lock);
c25eb3bf 1977 sk = sk_nulls_head(&ilb->head);
a8b690f9 1978 st->offset = 0;
1da177e4
LT
1979 goto get_sk;
1980 }
5caea4ea 1981 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 1982 ++st->num;
a8b690f9 1983 ++st->offset;
1da177e4
LT
1984
1985 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 1986 struct request_sock *req = cur;
1da177e4 1987
72a3effa 1988 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
1989 req = req->dl_next;
1990 while (1) {
1991 while (req) {
bdccc4ca 1992 if (req->rsk_ops->family == st->family) {
1da177e4
LT
1993 cur = req;
1994 goto out;
1995 }
1996 req = req->dl_next;
1997 }
72a3effa 1998 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
1999 break;
2000get_req:
463c84b9 2001 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2002 }
1bde5ac4 2003 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2004 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2005 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2006 } else {
e905a9ed 2007 icsk = inet_csk(sk);
463c84b9
ACM
2008 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2009 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2010 goto start_req;
463c84b9 2011 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2012 sk = sk_nulls_next(sk);
1da177e4
LT
2013 }
2014get_sk:
c25eb3bf 2015 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2016 if (!net_eq(sock_net(sk), net))
2017 continue;
2018 if (sk->sk_family == st->family) {
1da177e4
LT
2019 cur = sk;
2020 goto out;
2021 }
e905a9ed 2022 icsk = inet_csk(sk);
463c84b9
ACM
2023 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2024 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2025start_req:
2026 st->uid = sock_i_uid(sk);
2027 st->syn_wait_sk = sk;
2028 st->state = TCP_SEQ_STATE_OPENREQ;
2029 st->sbucket = 0;
2030 goto get_req;
2031 }
463c84b9 2032 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2033 }
5caea4ea 2034 spin_unlock_bh(&ilb->lock);
a8b690f9 2035 st->offset = 0;
0f7ff927 2036 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2037 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2038 spin_lock_bh(&ilb->lock);
c25eb3bf 2039 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2040 goto get_sk;
2041 }
2042 cur = NULL;
2043out:
2044 return cur;
2045}
2046
2047static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2048{
a8b690f9
TH
2049 struct tcp_iter_state *st = seq->private;
2050 void *rc;
2051
2052 st->bucket = 0;
2053 st->offset = 0;
2054 rc = listening_get_next(seq, NULL);
1da177e4
LT
2055
2056 while (rc && *pos) {
2057 rc = listening_get_next(seq, rc);
2058 --*pos;
2059 }
2060 return rc;
2061}
2062
05dbc7b5 2063static inline bool empty_bucket(const struct tcp_iter_state *st)
6eac5604 2064{
05dbc7b5 2065 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
6eac5604
AK
2066}
2067
a8b690f9
TH
2068/*
2069 * Get first established socket starting from bucket given in st->bucket.
2070 * If st->bucket is zero, the very first socket in the hash is returned.
2071 */
1da177e4
LT
2072static void *established_get_first(struct seq_file *seq)
2073{
5799de0b 2074 struct tcp_iter_state *st = seq->private;
a4146b1b 2075 struct net *net = seq_file_net(seq);
1da177e4
LT
2076 void *rc = NULL;
2077
a8b690f9
TH
2078 st->offset = 0;
2079 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2080 struct sock *sk;
3ab5aee7 2081 struct hlist_nulls_node *node;
9db66bdc 2082 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2083
6eac5604
AK
2084 /* Lockless fast path for the common case of empty buckets */
2085 if (empty_bucket(st))
2086 continue;
2087
9db66bdc 2088 spin_lock_bh(lock);
3ab5aee7 2089 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2090 if (sk->sk_family != st->family ||
878628fb 2091 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2092 continue;
2093 }
2094 rc = sk;
2095 goto out;
2096 }
9db66bdc 2097 spin_unlock_bh(lock);
1da177e4
LT
2098 }
2099out:
2100 return rc;
2101}
2102
2103static void *established_get_next(struct seq_file *seq, void *cur)
2104{
2105 struct sock *sk = cur;
3ab5aee7 2106 struct hlist_nulls_node *node;
5799de0b 2107 struct tcp_iter_state *st = seq->private;
a4146b1b 2108 struct net *net = seq_file_net(seq);
1da177e4
LT
2109
2110 ++st->num;
a8b690f9 2111 ++st->offset;
1da177e4 2112
05dbc7b5 2113 sk = sk_nulls_next(sk);
1da177e4 2114
3ab5aee7 2115 sk_nulls_for_each_from(sk, node) {
878628fb 2116 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
05dbc7b5 2117 return sk;
1da177e4
LT
2118 }
2119
05dbc7b5
ED
2120 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2121 ++st->bucket;
2122 return established_get_first(seq);
1da177e4
LT
2123}
2124
2125static void *established_get_idx(struct seq_file *seq, loff_t pos)
2126{
a8b690f9
TH
2127 struct tcp_iter_state *st = seq->private;
2128 void *rc;
2129
2130 st->bucket = 0;
2131 rc = established_get_first(seq);
1da177e4
LT
2132
2133 while (rc && pos) {
2134 rc = established_get_next(seq, rc);
2135 --pos;
7174259e 2136 }
1da177e4
LT
2137 return rc;
2138}
2139
2140static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2141{
2142 void *rc;
5799de0b 2143 struct tcp_iter_state *st = seq->private;
1da177e4 2144
1da177e4
LT
2145 st->state = TCP_SEQ_STATE_LISTENING;
2146 rc = listening_get_idx(seq, &pos);
2147
2148 if (!rc) {
1da177e4
LT
2149 st->state = TCP_SEQ_STATE_ESTABLISHED;
2150 rc = established_get_idx(seq, pos);
2151 }
2152
2153 return rc;
2154}
2155
a8b690f9
TH
2156static void *tcp_seek_last_pos(struct seq_file *seq)
2157{
2158 struct tcp_iter_state *st = seq->private;
2159 int offset = st->offset;
2160 int orig_num = st->num;
2161 void *rc = NULL;
2162
2163 switch (st->state) {
2164 case TCP_SEQ_STATE_OPENREQ:
2165 case TCP_SEQ_STATE_LISTENING:
2166 if (st->bucket >= INET_LHTABLE_SIZE)
2167 break;
2168 st->state = TCP_SEQ_STATE_LISTENING;
2169 rc = listening_get_next(seq, NULL);
2170 while (offset-- && rc)
2171 rc = listening_get_next(seq, rc);
2172 if (rc)
2173 break;
2174 st->bucket = 0;
05dbc7b5 2175 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2176 /* Fallthrough */
2177 case TCP_SEQ_STATE_ESTABLISHED:
a8b690f9
TH
2178 if (st->bucket > tcp_hashinfo.ehash_mask)
2179 break;
2180 rc = established_get_first(seq);
2181 while (offset-- && rc)
2182 rc = established_get_next(seq, rc);
2183 }
2184
2185 st->num = orig_num;
2186
2187 return rc;
2188}
2189
1da177e4
LT
2190static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2191{
5799de0b 2192 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2193 void *rc;
2194
2195 if (*pos && *pos == st->last_pos) {
2196 rc = tcp_seek_last_pos(seq);
2197 if (rc)
2198 goto out;
2199 }
2200
1da177e4
LT
2201 st->state = TCP_SEQ_STATE_LISTENING;
2202 st->num = 0;
a8b690f9
TH
2203 st->bucket = 0;
2204 st->offset = 0;
2205 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2206
2207out:
2208 st->last_pos = *pos;
2209 return rc;
1da177e4
LT
2210}
2211
2212static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2213{
a8b690f9 2214 struct tcp_iter_state *st = seq->private;
1da177e4 2215 void *rc = NULL;
1da177e4
LT
2216
2217 if (v == SEQ_START_TOKEN) {
2218 rc = tcp_get_idx(seq, 0);
2219 goto out;
2220 }
1da177e4
LT
2221
2222 switch (st->state) {
2223 case TCP_SEQ_STATE_OPENREQ:
2224 case TCP_SEQ_STATE_LISTENING:
2225 rc = listening_get_next(seq, v);
2226 if (!rc) {
1da177e4 2227 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2228 st->bucket = 0;
2229 st->offset = 0;
1da177e4
LT
2230 rc = established_get_first(seq);
2231 }
2232 break;
2233 case TCP_SEQ_STATE_ESTABLISHED:
1da177e4
LT
2234 rc = established_get_next(seq, v);
2235 break;
2236 }
2237out:
2238 ++*pos;
a8b690f9 2239 st->last_pos = *pos;
1da177e4
LT
2240 return rc;
2241}
2242
2243static void tcp_seq_stop(struct seq_file *seq, void *v)
2244{
5799de0b 2245 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2246
2247 switch (st->state) {
2248 case TCP_SEQ_STATE_OPENREQ:
2249 if (v) {
463c84b9
ACM
2250 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2251 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2252 }
2253 case TCP_SEQ_STATE_LISTENING:
2254 if (v != SEQ_START_TOKEN)
5caea4ea 2255 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4 2256 break;
1da177e4
LT
2257 case TCP_SEQ_STATE_ESTABLISHED:
2258 if (v)
9db66bdc 2259 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2260 break;
2261 }
2262}
2263
73cb88ec 2264int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4 2265{
d9dda78b 2266 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
1da177e4 2267 struct tcp_iter_state *s;
52d6f3f1 2268 int err;
1da177e4 2269
52d6f3f1
DL
2270 err = seq_open_net(inode, file, &afinfo->seq_ops,
2271 sizeof(struct tcp_iter_state));
2272 if (err < 0)
2273 return err;
f40c8174 2274
52d6f3f1 2275 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2276 s->family = afinfo->family;
a8b690f9 2277 s->last_pos = 0;
f40c8174
DL
2278 return 0;
2279}
73cb88ec 2280EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2281
6f8b13bc 2282int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2283{
2284 int rc = 0;
2285 struct proc_dir_entry *p;
2286
9427c4b3
DL
2287 afinfo->seq_ops.start = tcp_seq_start;
2288 afinfo->seq_ops.next = tcp_seq_next;
2289 afinfo->seq_ops.stop = tcp_seq_stop;
2290
84841c3c 2291 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2292 afinfo->seq_fops, afinfo);
84841c3c 2293 if (!p)
1da177e4
LT
2294 rc = -ENOMEM;
2295 return rc;
2296}
4bc2f18b 2297EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2298
6f8b13bc 2299void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2300{
ece31ffd 2301 remove_proc_entry(afinfo->name, net->proc_net);
1da177e4 2302}
4bc2f18b 2303EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2304
cf533ea5 2305static void get_openreq4(const struct sock *sk, const struct request_sock *req,
652586df 2306 struct seq_file *f, int i, kuid_t uid)
1da177e4 2307{
2e6599cb 2308 const struct inet_request_sock *ireq = inet_rsk(req);
a399a805 2309 long delta = req->expires - jiffies;
1da177e4 2310
5e659e4c 2311 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2312 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
1da177e4 2313 i,
634fb979 2314 ireq->ir_loc_addr,
c720c7e8 2315 ntohs(inet_sk(sk)->inet_sport),
634fb979
ED
2316 ireq->ir_rmt_addr,
2317 ntohs(ireq->ir_rmt_port),
1da177e4
LT
2318 TCP_SYN_RECV,
2319 0, 0, /* could print option size, but that is af dependent. */
2320 1, /* timers active (only the expire timer) */
a399a805 2321 jiffies_delta_to_clock_t(delta),
e6c022a4 2322 req->num_timeout,
a7cb5a49 2323 from_kuid_munged(seq_user_ns(f), uid),
1da177e4
LT
2324 0, /* non standard timer */
2325 0, /* open_requests have no inode */
2326 atomic_read(&sk->sk_refcnt),
652586df 2327 req);
1da177e4
LT
2328}
2329
652586df 2330static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
1da177e4
LT
2331{
2332 int timer_active;
2333 unsigned long timer_expires;
cf533ea5 2334 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2335 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2336 const struct inet_sock *inet = inet_sk(sk);
168a8f58 2337 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
c720c7e8
ED
2338 __be32 dest = inet->inet_daddr;
2339 __be32 src = inet->inet_rcv_saddr;
2340 __u16 destp = ntohs(inet->inet_dport);
2341 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2342 int rx_queue;
1da177e4 2343
6ba8a3b1
ND
2344 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2345 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2346 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1da177e4 2347 timer_active = 1;
463c84b9
ACM
2348 timer_expires = icsk->icsk_timeout;
2349 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2350 timer_active = 4;
463c84b9 2351 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2352 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2353 timer_active = 2;
cf4c6bf8 2354 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2355 } else {
2356 timer_active = 0;
2357 timer_expires = jiffies;
2358 }
2359
49d09007
ED
2360 if (sk->sk_state == TCP_LISTEN)
2361 rx_queue = sk->sk_ack_backlog;
2362 else
2363 /*
2364 * because we dont lock socket, we might find a transient negative value
2365 */
2366 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2367
5e659e4c 2368 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
652586df 2369 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
cf4c6bf8 2370 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2371 tp->write_seq - tp->snd_una,
49d09007 2372 rx_queue,
1da177e4 2373 timer_active,
a399a805 2374 jiffies_delta_to_clock_t(timer_expires - jiffies),
463c84b9 2375 icsk->icsk_retransmits,
a7cb5a49 2376 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
6687e988 2377 icsk->icsk_probes_out,
cf4c6bf8
IJ
2378 sock_i_ino(sk),
2379 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2380 jiffies_to_clock_t(icsk->icsk_rto),
2381 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2382 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2383 tp->snd_cwnd,
168a8f58
JC
2384 sk->sk_state == TCP_LISTEN ?
2385 (fastopenq ? fastopenq->max_qlen : 0) :
652586df 2386 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
1da177e4
LT
2387}
2388
cf533ea5 2389static void get_timewait4_sock(const struct inet_timewait_sock *tw,
652586df 2390 struct seq_file *f, int i)
1da177e4 2391{
23f33c2d 2392 __be32 dest, src;
1da177e4 2393 __u16 destp, srcp;
e2a1d3e4 2394 s32 delta = tw->tw_ttd - inet_tw_time_stamp();
1da177e4
LT
2395
2396 dest = tw->tw_daddr;
2397 src = tw->tw_rcv_saddr;
2398 destp = ntohs(tw->tw_dport);
2399 srcp = ntohs(tw->tw_sport);
2400
5e659e4c 2401 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2402 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
1da177e4 2403 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
a399a805 2404 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
652586df 2405 atomic_read(&tw->tw_refcnt), tw);
1da177e4
LT
2406}
2407
2408#define TMPSZ 150
2409
2410static int tcp4_seq_show(struct seq_file *seq, void *v)
2411{
5799de0b 2412 struct tcp_iter_state *st;
05dbc7b5 2413 struct sock *sk = v;
1da177e4 2414
652586df 2415 seq_setwidth(seq, TMPSZ - 1);
1da177e4 2416 if (v == SEQ_START_TOKEN) {
652586df 2417 seq_puts(seq, " sl local_address rem_address st tx_queue "
1da177e4
LT
2418 "rx_queue tr tm->when retrnsmt uid timeout "
2419 "inode");
2420 goto out;
2421 }
2422 st = seq->private;
2423
2424 switch (st->state) {
2425 case TCP_SEQ_STATE_LISTENING:
2426 case TCP_SEQ_STATE_ESTABLISHED:
05dbc7b5 2427 if (sk->sk_state == TCP_TIME_WAIT)
652586df 2428 get_timewait4_sock(v, seq, st->num);
05dbc7b5 2429 else
652586df 2430 get_tcp4_sock(v, seq, st->num);
1da177e4
LT
2431 break;
2432 case TCP_SEQ_STATE_OPENREQ:
652586df 2433 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid);
1da177e4
LT
2434 break;
2435 }
1da177e4 2436out:
652586df 2437 seq_pad(seq, '\n');
1da177e4
LT
2438 return 0;
2439}
2440
73cb88ec
AV
2441static const struct file_operations tcp_afinfo_seq_fops = {
2442 .owner = THIS_MODULE,
2443 .open = tcp_seq_open,
2444 .read = seq_read,
2445 .llseek = seq_lseek,
2446 .release = seq_release_net
2447};
2448
1da177e4 2449static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2450 .name = "tcp",
2451 .family = AF_INET,
73cb88ec 2452 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2453 .seq_ops = {
2454 .show = tcp4_seq_show,
2455 },
1da177e4
LT
2456};
2457
2c8c1e72 2458static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2459{
2460 return tcp_proc_register(net, &tcp4_seq_afinfo);
2461}
2462
2c8c1e72 2463static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2464{
2465 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2466}
2467
2468static struct pernet_operations tcp4_net_ops = {
2469 .init = tcp4_proc_init_net,
2470 .exit = tcp4_proc_exit_net,
2471};
2472
1da177e4
LT
2473int __init tcp4_proc_init(void)
2474{
757764f6 2475 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2476}
2477
2478void tcp4_proc_exit(void)
2479{
757764f6 2480 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2481}
2482#endif /* CONFIG_PROC_FS */
2483
2484struct proto tcp_prot = {
2485 .name = "TCP",
2486 .owner = THIS_MODULE,
2487 .close = tcp_close,
2488 .connect = tcp_v4_connect,
2489 .disconnect = tcp_disconnect,
463c84b9 2490 .accept = inet_csk_accept,
1da177e4
LT
2491 .ioctl = tcp_ioctl,
2492 .init = tcp_v4_init_sock,
2493 .destroy = tcp_v4_destroy_sock,
2494 .shutdown = tcp_shutdown,
2495 .setsockopt = tcp_setsockopt,
2496 .getsockopt = tcp_getsockopt,
1da177e4 2497 .recvmsg = tcp_recvmsg,
7ba42910
CG
2498 .sendmsg = tcp_sendmsg,
2499 .sendpage = tcp_sendpage,
1da177e4 2500 .backlog_rcv = tcp_v4_do_rcv,
46d3ceab 2501 .release_cb = tcp_release_cb,
563d34d0 2502 .mtu_reduced = tcp_v4_mtu_reduced,
ab1e0a13
ACM
2503 .hash = inet_hash,
2504 .unhash = inet_unhash,
2505 .get_port = inet_csk_get_port,
1da177e4 2506 .enter_memory_pressure = tcp_enter_memory_pressure,
c9bee3b7 2507 .stream_memory_free = tcp_stream_memory_free,
1da177e4 2508 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2509 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2510 .memory_allocated = &tcp_memory_allocated,
2511 .memory_pressure = &tcp_memory_pressure,
a4fe34bf 2512 .sysctl_mem = sysctl_tcp_mem,
1da177e4
LT
2513 .sysctl_wmem = sysctl_tcp_wmem,
2514 .sysctl_rmem = sysctl_tcp_rmem,
2515 .max_header = MAX_TCP_HEADER,
2516 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2517 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2518 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2519 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2520 .h.hashinfo = &tcp_hashinfo,
7ba42910 2521 .no_autobind = true,
543d9cfe
ACM
2522#ifdef CONFIG_COMPAT
2523 .compat_setsockopt = compat_tcp_setsockopt,
2524 .compat_getsockopt = compat_tcp_getsockopt,
2525#endif
c255a458 2526#ifdef CONFIG_MEMCG_KMEM
d1a4c0b3
GC
2527 .init_cgroup = tcp_init_cgroup,
2528 .destroy_cgroup = tcp_destroy_cgroup,
2529 .proto_cgroup = tcp_proto_cgroup,
2530#endif
1da177e4 2531};
4bc2f18b 2532EXPORT_SYMBOL(tcp_prot);
1da177e4 2533
046ee902
DL
2534static int __net_init tcp_sk_init(struct net *net)
2535{
5d134f1c 2536 net->ipv4.sysctl_tcp_ecn = 2;
be9f4a44 2537 return 0;
046ee902
DL
2538}
2539
2540static void __net_exit tcp_sk_exit(struct net *net)
2541{
b099ce26
EB
2542}
2543
2544static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2545{
2546 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2547}
2548
2549static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2550 .init = tcp_sk_init,
2551 .exit = tcp_sk_exit,
2552 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2553};
2554
9b0f976f 2555void __init tcp_v4_init(void)
1da177e4 2556{
5caea4ea 2557 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2558 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2559 panic("Failed to create the TCP control socket.\n");
1da177e4 2560}
This page took 1.107006 seconds and 5 git commands to generate.