Merge tag 'sound-3.8' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
[deliverable/linux.git] / net / netfilter / nf_conntrack_proto_tcp.c
CommitLineData
9fb9cbb1
YK
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
9fb9cbb1
YK
7 */
8
9fb9cbb1 9#include <linux/types.h>
9fb9cbb1 10#include <linux/timer.h>
9fb9cbb1
YK
11#include <linux/module.h>
12#include <linux/in.h>
13#include <linux/tcp.h>
14#include <linux/spinlock.h>
15#include <linux/skbuff.h>
16#include <linux/ipv6.h>
17#include <net/ip6_checksum.h>
534f81a5 18#include <asm/unaligned.h>
9fb9cbb1
YK
19
20#include <net/tcp.h>
21
22#include <linux/netfilter.h>
23#include <linux/netfilter_ipv4.h>
24#include <linux/netfilter_ipv6.h>
25#include <net/netfilter/nf_conntrack.h>
605dcad6 26#include <net/netfilter/nf_conntrack_l4proto.h>
f6180121 27#include <net/netfilter/nf_conntrack_ecache.h>
f01ffbd6 28#include <net/netfilter/nf_log.h>
9d2493f8
CP
29#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
9fb9cbb1 31
601e68e1
YH
32/* "Be conservative in what you do,
33 be liberal in what you accept from others."
9fb9cbb1 34 If it's non-zero, we mark only out of window RST segments as INVALID. */
3aef0fd9 35static int nf_ct_tcp_be_liberal __read_mostly = 0;
9fb9cbb1 36
a09113c2 37/* If it is set to zero, we disable picking up already established
9fb9cbb1 38 connections. */
3aef0fd9 39static int nf_ct_tcp_loose __read_mostly = 1;
9fb9cbb1 40
601e68e1
YH
41/* Max number of the retransmitted packets without receiving an (acceptable)
42 ACK from the destination. If this number is reached, a shorter timer
9fb9cbb1 43 will be started. */
3aef0fd9 44static int nf_ct_tcp_max_retrans __read_mostly = 3;
9fb9cbb1
YK
45
46 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
47 closely. They're more complex. --RR */
48
82f568fc 49static const char *const tcp_conntrack_names[] = {
9fb9cbb1
YK
50 "NONE",
51 "SYN_SENT",
52 "SYN_RECV",
53 "ESTABLISHED",
54 "FIN_WAIT",
55 "CLOSE_WAIT",
56 "LAST_ACK",
57 "TIME_WAIT",
58 "CLOSE",
874ab923 59 "SYN_SENT2",
9fb9cbb1 60};
601e68e1 61
9fb9cbb1
YK
62#define SECS * HZ
63#define MINS * 60 SECS
64#define HOURS * 60 MINS
65#define DAYS * 24 HOURS
66
33ee4464 67static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
2d646286
PM
68 [TCP_CONNTRACK_SYN_SENT] = 2 MINS,
69 [TCP_CONNTRACK_SYN_RECV] = 60 SECS,
70 [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
71 [TCP_CONNTRACK_FIN_WAIT] = 2 MINS,
72 [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS,
73 [TCP_CONNTRACK_LAST_ACK] = 30 SECS,
74 [TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
75 [TCP_CONNTRACK_CLOSE] = 10 SECS,
874ab923 76 [TCP_CONNTRACK_SYN_SENT2] = 2 MINS,
33ee4464
PNA
77/* RFC1122 says the R2 limit should be at least 100 seconds.
78 Linux uses 15 packets as limit, which corresponds
79 to ~13-30min depending on RTO. */
80 [TCP_CONNTRACK_RETRANS] = 5 MINS,
81 [TCP_CONNTRACK_UNACK] = 5 MINS,
2d646286 82};
601e68e1 83
9fb9cbb1
YK
84#define sNO TCP_CONNTRACK_NONE
85#define sSS TCP_CONNTRACK_SYN_SENT
86#define sSR TCP_CONNTRACK_SYN_RECV
87#define sES TCP_CONNTRACK_ESTABLISHED
88#define sFW TCP_CONNTRACK_FIN_WAIT
89#define sCW TCP_CONNTRACK_CLOSE_WAIT
90#define sLA TCP_CONNTRACK_LAST_ACK
91#define sTW TCP_CONNTRACK_TIME_WAIT
92#define sCL TCP_CONNTRACK_CLOSE
874ab923 93#define sS2 TCP_CONNTRACK_SYN_SENT2
9fb9cbb1
YK
94#define sIV TCP_CONNTRACK_MAX
95#define sIG TCP_CONNTRACK_IGNORE
96
97/* What TCP flags are set from RST/SYN/FIN/ACK. */
98enum tcp_bit_set {
99 TCP_SYN_SET,
100 TCP_SYNACK_SET,
101 TCP_FIN_SET,
102 TCP_ACK_SET,
103 TCP_RST_SET,
104 TCP_NONE_SET,
105};
601e68e1 106
9fb9cbb1
YK
107/*
108 * The TCP state transition table needs a few words...
109 *
110 * We are the man in the middle. All the packets go through us
111 * but might get lost in transit to the destination.
601e68e1 112 * It is assumed that the destinations can't receive segments
9fb9cbb1
YK
113 * we haven't seen.
114 *
115 * The checked segment is in window, but our windows are *not*
116 * equivalent with the ones of the sender/receiver. We always
117 * try to guess the state of the current sender.
118 *
119 * The meaning of the states are:
120 *
121 * NONE: initial state
601e68e1 122 * SYN_SENT: SYN-only packet seen
874ab923 123 * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open
9fb9cbb1
YK
124 * SYN_RECV: SYN-ACK packet seen
125 * ESTABLISHED: ACK packet seen
126 * FIN_WAIT: FIN packet seen
601e68e1 127 * CLOSE_WAIT: ACK seen (after FIN)
9fb9cbb1
YK
128 * LAST_ACK: FIN seen (after FIN)
129 * TIME_WAIT: last ACK seen
b2155e7f 130 * CLOSE: closed connection (RST)
9fb9cbb1 131 *
9fb9cbb1 132 * Packets marked as IGNORED (sIG):
601e68e1
YH
133 * if they may be either invalid or valid
134 * and the receiver may send back a connection
9fb9cbb1
YK
135 * closing RST or a SYN/ACK.
136 *
137 * Packets marked as INVALID (sIV):
874ab923 138 * if we regard them as truly invalid packets
9fb9cbb1 139 */
a5e73c29 140static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
9fb9cbb1
YK
141 {
142/* ORIGINAL */
874ab923
JK
143/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
144/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
9fb9cbb1
YK
145/*
146 * sNO -> sSS Initialize a new connection
147 * sSS -> sSS Retransmitted SYN
874ab923
JK
148 * sS2 -> sS2 Late retransmitted SYN
149 * sSR -> sIG
9fb9cbb1 150 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
601e68e1 151 * are errors. Receiver will reply with RST
9fb9cbb1
YK
152 * and close the connection.
153 * Or we are not in sync and hold a dead connection.
154 * sFW -> sIG
155 * sCW -> sIG
156 * sLA -> sIG
157 * sTW -> sSS Reopened connection (RFC 1122).
158 * sCL -> sSS
159 */
874ab923 160/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
64f509ce 161/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
9fb9cbb1 162/*
874ab923
JK
163 * sNO -> sIV Too late and no reason to do anything
164 * sSS -> sIV Client can't send SYN and then SYN/ACK
165 * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
64f509ce
JK
166 * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open
167 * sES -> sIV Invalid SYN/ACK packets sent by the client
168 * sFW -> sIV
169 * sCW -> sIV
170 * sLA -> sIV
171 * sTW -> sIV
172 * sCL -> sIV
9fb9cbb1 173 */
874ab923 174/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
9fb9cbb1
YK
175/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
176/*
177 * sNO -> sIV Too late and no reason to do anything...
178 * sSS -> sIV Client migth not send FIN in this state:
179 * we enforce waiting for a SYN/ACK reply first.
874ab923 180 * sS2 -> sIV
9fb9cbb1
YK
181 * sSR -> sFW Close started.
182 * sES -> sFW
183 * sFW -> sLA FIN seen in both directions, waiting for
601e68e1 184 * the last ACK.
9fb9cbb1
YK
185 * Migth be a retransmitted FIN as well...
186 * sCW -> sLA
187 * sLA -> sLA Retransmitted FIN. Remain in the same state.
188 * sTW -> sTW
189 * sCL -> sCL
190 */
874ab923 191/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
9fb9cbb1
YK
192/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
193/*
194 * sNO -> sES Assumed.
195 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
874ab923 196 * sS2 -> sIV
9fb9cbb1
YK
197 * sSR -> sES Established state is reached.
198 * sES -> sES :-)
199 * sFW -> sCW Normal close request answered by ACK.
200 * sCW -> sCW
201 * sLA -> sTW Last ACK detected.
202 * sTW -> sTW Retransmitted last ACK. Remain in the same state.
203 * sCL -> sCL
204 */
874ab923
JK
205/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
206/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
9fb9cbb1
YK
207/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
208 },
209 {
210/* REPLY */
874ab923
JK
211/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
212/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
9fb9cbb1
YK
213/*
214 * sNO -> sIV Never reached.
874ab923
JK
215 * sSS -> sS2 Simultaneous open
216 * sS2 -> sS2 Retransmitted simultaneous SYN
217 * sSR -> sIV Invalid SYN packets sent by the server
218 * sES -> sIV
9fb9cbb1
YK
219 * sFW -> sIV
220 * sCW -> sIV
221 * sLA -> sIV
222 * sTW -> sIV Reopened connection, but server may not do it.
223 * sCL -> sIV
224 */
874ab923 225/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
8a80c79a 226/*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
9fb9cbb1
YK
227/*
228 * sSS -> sSR Standard open.
874ab923 229 * sS2 -> sSR Simultaneous open
8a80c79a 230 * sSR -> sIG Retransmitted SYN/ACK, ignore it.
9fb9cbb1
YK
231 * sES -> sIG Late retransmitted SYN/ACK?
232 * sFW -> sIG Might be SYN/ACK answering ignored SYN
233 * sCW -> sIG
234 * sLA -> sIG
235 * sTW -> sIG
236 * sCL -> sIG
237 */
874ab923 238/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
9fb9cbb1
YK
239/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
240/*
241 * sSS -> sIV Server might not send FIN in this state.
874ab923 242 * sS2 -> sIV
9fb9cbb1
YK
243 * sSR -> sFW Close started.
244 * sES -> sFW
245 * sFW -> sLA FIN seen in both directions.
246 * sCW -> sLA
247 * sLA -> sLA Retransmitted FIN.
248 * sTW -> sTW
249 * sCL -> sCL
250 */
874ab923
JK
251/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
252/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
9fb9cbb1 253/*
73f30602 254 * sSS -> sIG Might be a half-open connection.
874ab923 255 * sS2 -> sIG
9fb9cbb1
YK
256 * sSR -> sSR Might answer late resent SYN.
257 * sES -> sES :-)
258 * sFW -> sCW Normal close request answered by ACK.
259 * sCW -> sCW
260 * sLA -> sTW Last ACK detected.
261 * sTW -> sTW Retransmitted last ACK.
262 * sCL -> sCL
263 */
874ab923
JK
264/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
265/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
9fb9cbb1 266/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
601e68e1 267 }
9fb9cbb1
YK
268};
269
d2ba1fde
G
270static inline struct nf_tcp_net *tcp_pernet(struct net *net)
271{
272 return &net->ct.nf_ct_proto.tcp;
273}
274
09f263cd
JE
275static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
276 struct nf_conntrack_tuple *tuple)
9fb9cbb1 277{
82f568fc
JE
278 const struct tcphdr *hp;
279 struct tcphdr _hdr;
9fb9cbb1
YK
280
281 /* Actually only need first 8 bytes. */
282 hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
283 if (hp == NULL)
09f263cd 284 return false;
9fb9cbb1
YK
285
286 tuple->src.u.tcp.port = hp->source;
287 tuple->dst.u.tcp.port = hp->dest;
288
09f263cd 289 return true;
9fb9cbb1
YK
290}
291
09f263cd
JE
292static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
293 const struct nf_conntrack_tuple *orig)
9fb9cbb1
YK
294{
295 tuple->src.u.tcp.port = orig->dst.u.tcp.port;
296 tuple->dst.u.tcp.port = orig->src.u.tcp.port;
09f263cd 297 return true;
9fb9cbb1
YK
298}
299
300/* Print out the per-protocol part of the tuple. */
301static int tcp_print_tuple(struct seq_file *s,
302 const struct nf_conntrack_tuple *tuple)
303{
304 return seq_printf(s, "sport=%hu dport=%hu ",
305 ntohs(tuple->src.u.tcp.port),
306 ntohs(tuple->dst.u.tcp.port));
307}
308
309/* Print out the private part of the conntrack. */
440f0d58 310static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
9fb9cbb1
YK
311{
312 enum tcp_conntrack state;
313
440f0d58 314 spin_lock_bh(&ct->lock);
c88130bc 315 state = ct->proto.tcp.state;
440f0d58 316 spin_unlock_bh(&ct->lock);
9fb9cbb1
YK
317
318 return seq_printf(s, "%s ", tcp_conntrack_names[state]);
319}
320
321static unsigned int get_conntrack_index(const struct tcphdr *tcph)
322{
323 if (tcph->rst) return TCP_RST_SET;
324 else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
325 else if (tcph->fin) return TCP_FIN_SET;
326 else if (tcph->ack) return TCP_ACK_SET;
327 else return TCP_NONE_SET;
328}
329
330/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
331 in IP Filter' by Guido van Rooij.
601e68e1 332
631dd1a8
JM
333 http://www.sane.nl/events/sane2000/papers.html
334 http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
601e68e1 335
9fb9cbb1
YK
336 The boundaries and the conditions are changed according to RFC793:
337 the packet must intersect the window (i.e. segments may be
338 after the right or before the left edge) and thus receivers may ACK
339 segments after the right edge of the window.
340
601e68e1 341 td_maxend = max(sack + max(win,1)) seen in reply packets
9fb9cbb1
YK
342 td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
343 td_maxwin += seq + len - sender.td_maxend
344 if seq + len > sender.td_maxend
345 td_end = max(seq + len) seen in sent packets
601e68e1 346
9fb9cbb1
YK
347 I. Upper bound for valid data: seq <= sender.td_maxend
348 II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
84ebe1cd
JK
349 III. Upper bound for valid (s)ack: sack <= receiver.td_end
350 IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW
9fb9cbb1 351
84ebe1cd
JK
352 where sack is the highest right edge of sack block found in the packet
353 or ack in the case of packet without SACK option.
9fb9cbb1 354
84ebe1cd 355 The upper bound limit for a valid (s)ack is not ignored -
601e68e1 356 we doesn't have to deal with fragments.
9fb9cbb1
YK
357*/
358
359static inline __u32 segment_seq_plus_len(__u32 seq,
360 size_t len,
361 unsigned int dataoff,
82f568fc 362 const struct tcphdr *tcph)
9fb9cbb1
YK
363{
364 /* XXX Should I use payload length field in IP/IPv6 header ?
365 * - YK */
366 return (seq + len - dataoff - tcph->doff*4
367 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
368}
601e68e1 369
9fb9cbb1
YK
370/* Fixme: what about big packets? */
371#define MAXACKWINCONST 66000
372#define MAXACKWINDOW(sender) \
373 ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
374 : MAXACKWINCONST)
601e68e1 375
9fb9cbb1
YK
376/*
377 * Simplified tcp_parse_options routine from tcp_input.c
378 */
379static void tcp_options(const struct sk_buff *skb,
380 unsigned int dataoff,
82f568fc 381 const struct tcphdr *tcph,
9fb9cbb1
YK
382 struct ip_ct_tcp_state *state)
383{
384 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
82f568fc 385 const unsigned char *ptr;
9fb9cbb1
YK
386 int length = (tcph->doff*4) - sizeof(struct tcphdr);
387
388 if (!length)
389 return;
390
391 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
392 length, buff);
393 BUG_ON(ptr == NULL);
394
601e68e1 395 state->td_scale =
9fb9cbb1
YK
396 state->flags = 0;
397
398 while (length > 0) {
399 int opcode=*ptr++;
400 int opsize;
401
402 switch (opcode) {
403 case TCPOPT_EOL:
404 return;
405 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
406 length--;
407 continue;
408 default:
409 opsize=*ptr++;
410 if (opsize < 2) /* "silly options" */
411 return;
412 if (opsize > length)
4a5cc84a 413 return; /* don't parse partial options */
9fb9cbb1 414
601e68e1 415 if (opcode == TCPOPT_SACK_PERM
9fb9cbb1
YK
416 && opsize == TCPOLEN_SACK_PERM)
417 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
418 else if (opcode == TCPOPT_WINDOW
419 && opsize == TCPOLEN_WINDOW) {
420 state->td_scale = *(u_int8_t *)ptr;
421
422 if (state->td_scale > 14) {
423 /* See RFC1323 */
424 state->td_scale = 14;
425 }
426 state->flags |=
427 IP_CT_TCP_FLAG_WINDOW_SCALE;
428 }
429 ptr += opsize - 2;
430 length -= opsize;
431 }
432 }
433}
434
435static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
82f568fc 436 const struct tcphdr *tcph, __u32 *sack)
9fb9cbb1 437{
601e68e1 438 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
82f568fc 439 const unsigned char *ptr;
9fb9cbb1
YK
440 int length = (tcph->doff*4) - sizeof(struct tcphdr);
441 __u32 tmp;
442
443 if (!length)
444 return;
445
446 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
447 length, buff);
448 BUG_ON(ptr == NULL);
449
450 /* Fast path for timestamp-only option */
bb9fc373 451 if (length == TCPOLEN_TSTAMP_ALIGNED
8f05ce91
YH
452 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
453 | (TCPOPT_NOP << 16)
454 | (TCPOPT_TIMESTAMP << 8)
455 | TCPOLEN_TIMESTAMP))
9fb9cbb1
YK
456 return;
457
458 while (length > 0) {
459 int opcode = *ptr++;
460 int opsize, i;
461
462 switch (opcode) {
463 case TCPOPT_EOL:
464 return;
465 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
466 length--;
467 continue;
468 default:
469 opsize = *ptr++;
470 if (opsize < 2) /* "silly options" */
471 return;
472 if (opsize > length)
4a5cc84a 473 return; /* don't parse partial options */
9fb9cbb1 474
601e68e1
YH
475 if (opcode == TCPOPT_SACK
476 && opsize >= (TCPOLEN_SACK_BASE
477 + TCPOLEN_SACK_PERBLOCK)
478 && !((opsize - TCPOLEN_SACK_BASE)
479 % TCPOLEN_SACK_PERBLOCK)) {
480 for (i = 0;
481 i < (opsize - TCPOLEN_SACK_BASE);
482 i += TCPOLEN_SACK_PERBLOCK) {
534f81a5 483 tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
9fb9cbb1
YK
484
485 if (after(tmp, *sack))
486 *sack = tmp;
487 }
488 return;
489 }
490 ptr += opsize - 2;
491 length -= opsize;
492 }
493 }
494}
495
f9dd09c7
JK
496#ifdef CONFIG_NF_NAT_NEEDED
497static inline s16 nat_offset(const struct nf_conn *ct,
498 enum ip_conntrack_dir dir,
499 u32 seq)
500{
501 typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
502
503 return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
504}
c7232c99
PM
505#define NAT_OFFSET(ct, dir, seq) \
506 (nat_offset(ct, dir, seq))
f9dd09c7 507#else
c7232c99 508#define NAT_OFFSET(ct, dir, seq) 0
f9dd09c7
JK
509#endif
510
09f263cd
JE
511static bool tcp_in_window(const struct nf_conn *ct,
512 struct ip_ct_tcp *state,
513 enum ip_conntrack_dir dir,
514 unsigned int index,
515 const struct sk_buff *skb,
516 unsigned int dataoff,
517 const struct tcphdr *tcph,
76108cea 518 u_int8_t pf)
9fb9cbb1 519{
c2a2c7e0 520 struct net *net = nf_ct_net(ct);
d2ba1fde 521 struct nf_tcp_net *tn = tcp_pernet(net);
9fb9cbb1
YK
522 struct ip_ct_tcp_state *sender = &state->seen[dir];
523 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
82f568fc 524 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
9fb9cbb1 525 __u32 seq, ack, sack, end, win, swin;
f9dd09c7 526 s16 receiver_offset;
09f263cd 527 bool res;
9fb9cbb1
YK
528
529 /*
530 * Get the required data from the packet.
531 */
532 seq = ntohl(tcph->seq);
533 ack = sack = ntohl(tcph->ack_seq);
534 win = ntohs(tcph->window);
535 end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
536
537 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
538 tcp_sack(skb, dataoff, tcph, &sack);
539
f9dd09c7 540 /* Take into account NAT sequence number mangling */
c7232c99 541 receiver_offset = NAT_OFFSET(ct, !dir, ack - 1);
f9dd09c7
JK
542 ack -= receiver_offset;
543 sack -= receiver_offset;
544
0d53778e
PM
545 pr_debug("tcp_in_window: START\n");
546 pr_debug("tcp_in_window: ");
3c9fba65 547 nf_ct_dump_tuple(tuple);
f9dd09c7
JK
548 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
549 seq, ack, receiver_offset, sack, receiver_offset, win, end);
0d53778e
PM
550 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
551 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
552 sender->td_end, sender->td_maxend, sender->td_maxwin,
553 sender->td_scale,
554 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
555 receiver->td_scale);
9fb9cbb1 556
874ab923 557 if (sender->td_maxwin == 0) {
9fb9cbb1
YK
558 /*
559 * Initialize sender data.
560 */
874ab923 561 if (tcph->syn) {
9fb9cbb1 562 /*
874ab923
JK
563 * SYN-ACK in reply to a SYN
564 * or SYN from reply direction in simultaneous open.
9fb9cbb1 565 */
601e68e1 566 sender->td_end =
9fb9cbb1
YK
567 sender->td_maxend = end;
568 sender->td_maxwin = (win == 0 ? 1 : win);
569
570 tcp_options(skb, dataoff, tcph, sender);
601e68e1 571 /*
9fb9cbb1
YK
572 * RFC 1323:
573 * Both sides must send the Window Scale option
574 * to enable window scaling in either direction.
575 */
576 if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
577 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
601e68e1 578 sender->td_scale =
9fb9cbb1 579 receiver->td_scale = 0;
874ab923
JK
580 if (!tcph->ack)
581 /* Simultaneous open */
582 return true;
9fb9cbb1
YK
583 } else {
584 /*
585 * We are in the middle of a connection,
586 * its history is lost for us.
587 * Let's try to use the data from the packet.
601e68e1 588 */
9fb9cbb1 589 sender->td_end = end;
6ee0b693
CG
590 swin = win << sender->td_scale;
591 sender->td_maxwin = (swin == 0 ? 1 : swin);
9fb9cbb1 592 sender->td_maxend = end + sender->td_maxwin;
fac42a9a
PNA
593 /*
594 * We haven't seen traffic in the other direction yet
595 * but we have to tweak window tracking to pass III
596 * and IV until that happens.
597 */
598 if (receiver->td_maxwin == 0)
599 receiver->td_end = receiver->td_maxend = sack;
9fb9cbb1
YK
600 }
601 } else if (((state->state == TCP_CONNTRACK_SYN_SENT
602 && dir == IP_CT_DIR_ORIGINAL)
603 || (state->state == TCP_CONNTRACK_SYN_RECV
604 && dir == IP_CT_DIR_REPLY))
605 && after(end, sender->td_end)) {
606 /*
607 * RFC 793: "if a TCP is reinitialized ... then it need
601e68e1 608 * not wait at all; it must only be sure to use sequence
9fb9cbb1
YK
609 * numbers larger than those recently used."
610 */
611 sender->td_end =
612 sender->td_maxend = end;
613 sender->td_maxwin = (win == 0 ? 1 : win);
614
615 tcp_options(skb, dataoff, tcph, sender);
616 }
617
618 if (!(tcph->ack)) {
619 /*
620 * If there is no ACK, just pretend it was set and OK.
621 */
622 ack = sack = receiver->td_end;
601e68e1
YH
623 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
624 (TCP_FLAG_ACK|TCP_FLAG_RST))
9fb9cbb1
YK
625 && (ack == 0)) {
626 /*
627 * Broken TCP stacks, that set ACK in RST packets as well
628 * with zero ack value.
629 */
630 ack = sack = receiver->td_end;
631 }
632
4a70bbfa 633 if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
9fb9cbb1 634 /*
4a70bbfa 635 * RST sent answering SYN.
9fb9cbb1
YK
636 */
637 seq = end = sender->td_end;
638
0d53778e 639 pr_debug("tcp_in_window: ");
3c9fba65 640 nf_ct_dump_tuple(tuple);
f9dd09c7
JK
641 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
642 seq, ack, receiver_offset, sack, receiver_offset, win, end);
0d53778e
PM
643 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
644 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
645 sender->td_end, sender->td_maxend, sender->td_maxwin,
646 sender->td_scale,
647 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
648 receiver->td_scale);
649
650 pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
651 before(seq, sender->td_maxend + 1),
652 after(end, sender->td_end - receiver->td_maxwin - 1),
653 before(sack, receiver->td_end + 1),
84ebe1cd 654 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
9fb9cbb1 655
a09113c2
PM
656 if (before(seq, sender->td_maxend + 1) &&
657 after(end, sender->td_end - receiver->td_maxwin - 1) &&
658 before(sack, receiver->td_end + 1) &&
84ebe1cd 659 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
601e68e1 660 /*
9fb9cbb1
YK
661 * Take into account window scaling (RFC 1323).
662 */
663 if (!tcph->syn)
664 win <<= sender->td_scale;
665
666 /*
667 * Update sender data.
668 */
669 swin = win + (sack - ack);
670 if (sender->td_maxwin < swin)
671 sender->td_maxwin = swin;
ae375044 672 if (after(end, sender->td_end)) {
9fb9cbb1 673 sender->td_end = end;
ae375044
PM
674 sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
675 }
bfcaa502
JK
676 if (tcph->ack) {
677 if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
678 sender->td_maxack = ack;
679 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
680 } else if (after(ack, sender->td_maxack))
681 sender->td_maxack = ack;
682 }
683
9fb9cbb1
YK
684 /*
685 * Update receiver data.
686 */
fac42a9a 687 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
9fb9cbb1
YK
688 receiver->td_maxwin += end - sender->td_maxend;
689 if (after(sack + win, receiver->td_maxend - 1)) {
690 receiver->td_maxend = sack + win;
691 if (win == 0)
692 receiver->td_maxend++;
693 }
ae375044
PM
694 if (ack == receiver->td_end)
695 receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
9fb9cbb1 696
601e68e1 697 /*
9fb9cbb1
YK
698 * Check retransmissions.
699 */
700 if (index == TCP_ACK_SET) {
701 if (state->last_dir == dir
702 && state->last_seq == seq
703 && state->last_ack == ack
c1fe3ca5
GH
704 && state->last_end == end
705 && state->last_win == win)
9fb9cbb1
YK
706 state->retrans++;
707 else {
708 state->last_dir = dir;
709 state->last_seq = seq;
710 state->last_ack = ack;
711 state->last_end = end;
c1fe3ca5 712 state->last_win = win;
9fb9cbb1
YK
713 state->retrans = 0;
714 }
715 }
09f263cd 716 res = true;
9fb9cbb1 717 } else {
09f263cd 718 res = false;
a09113c2 719 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
d2ba1fde 720 tn->tcp_be_liberal)
09f263cd 721 res = true;
c2a2c7e0 722 if (!res && LOG_INVALID(net, IPPROTO_TCP))
9fb9cbb1
YK
723 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
724 "nf_ct_tcp: %s ",
725 before(seq, sender->td_maxend + 1) ?
726 after(end, sender->td_end - receiver->td_maxwin - 1) ?
727 before(sack, receiver->td_end + 1) ?
f9dd09c7 728 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
9fb9cbb1
YK
729 : "ACK is under the lower bound (possible overly delayed ACK)"
730 : "ACK is over the upper bound (ACKed data not seen yet)"
731 : "SEQ is under the lower bound (already ACKed data retransmitted)"
732 : "SEQ is over the upper bound (over the window of the receiver)");
601e68e1
YH
733 }
734
09f263cd 735 pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
0d53778e
PM
736 "receiver end=%u maxend=%u maxwin=%u\n",
737 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
738 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
9fb9cbb1
YK
739
740 return res;
741}
742
5c8ce7c9 743/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
a3433f35
CG
744static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
745 TCPHDR_URG) + 1] =
9fb9cbb1 746{
a3433f35
CG
747 [TCPHDR_SYN] = 1,
748 [TCPHDR_SYN|TCPHDR_URG] = 1,
749 [TCPHDR_SYN|TCPHDR_ACK] = 1,
750 [TCPHDR_RST] = 1,
751 [TCPHDR_RST|TCPHDR_ACK] = 1,
752 [TCPHDR_FIN|TCPHDR_ACK] = 1,
753 [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1,
754 [TCPHDR_ACK] = 1,
755 [TCPHDR_ACK|TCPHDR_URG] = 1,
9fb9cbb1
YK
756};
757
758/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
8fea97ec 759static int tcp_error(struct net *net, struct nf_conn *tmpl,
74c51a14 760 struct sk_buff *skb,
9fb9cbb1
YK
761 unsigned int dataoff,
762 enum ip_conntrack_info *ctinfo,
76108cea 763 u_int8_t pf,
96f6bf82 764 unsigned int hooknum)
9fb9cbb1 765{
82f568fc
JE
766 const struct tcphdr *th;
767 struct tcphdr _tcph;
9fb9cbb1
YK
768 unsigned int tcplen = skb->len - dataoff;
769 u_int8_t tcpflags;
770
771 /* Smaller that minimal TCP header? */
772 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
773 if (th == NULL) {
c2a2c7e0 774 if (LOG_INVALID(net, IPPROTO_TCP))
9fb9cbb1
YK
775 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
776 "nf_ct_tcp: short packet ");
777 return -NF_ACCEPT;
601e68e1
YH
778 }
779
9fb9cbb1
YK
780 /* Not whole TCP header or malformed packet */
781 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
c2a2c7e0 782 if (LOG_INVALID(net, IPPROTO_TCP))
9fb9cbb1
YK
783 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
784 "nf_ct_tcp: truncated/malformed packet ");
785 return -NF_ACCEPT;
786 }
601e68e1 787
9fb9cbb1
YK
788 /* Checksum invalid? Ignore.
789 * We skip checking packets on the outgoing path
84fa7933 790 * because the checksum is assumed to be correct.
9fb9cbb1
YK
791 */
792 /* FIXME: Source route IP option packets --RR */
c04d0552 793 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
96f6bf82 794 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
c2a2c7e0 795 if (LOG_INVALID(net, IPPROTO_TCP))
9fb9cbb1
YK
796 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
797 "nf_ct_tcp: bad TCP checksum ");
798 return -NF_ACCEPT;
799 }
800
801 /* Check TCP flags. */
a3433f35 802 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
9fb9cbb1 803 if (!tcp_valid_flags[tcpflags]) {
c2a2c7e0 804 if (LOG_INVALID(net, IPPROTO_TCP))
9fb9cbb1
YK
805 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
806 "nf_ct_tcp: invalid TCP flag combination ");
807 return -NF_ACCEPT;
808 }
809
810 return NF_ACCEPT;
811}
812
2c8503f5
PNA
813static unsigned int *tcp_get_timeouts(struct net *net)
814{
be0593c6 815 return tcp_pernet(net)->timeouts;
2c8503f5
PNA
816}
817
9fb9cbb1 818/* Returns verdict for packet, or -1 for invalid. */
c88130bc 819static int tcp_packet(struct nf_conn *ct,
9fb9cbb1
YK
820 const struct sk_buff *skb,
821 unsigned int dataoff,
822 enum ip_conntrack_info ctinfo,
76108cea 823 u_int8_t pf,
2c8503f5
PNA
824 unsigned int hooknum,
825 unsigned int *timeouts)
9fb9cbb1 826{
c2a2c7e0 827 struct net *net = nf_ct_net(ct);
d2ba1fde 828 struct nf_tcp_net *tn = tcp_pernet(net);
0d53778e 829 struct nf_conntrack_tuple *tuple;
9fb9cbb1
YK
830 enum tcp_conntrack new_state, old_state;
831 enum ip_conntrack_dir dir;
82f568fc
JE
832 const struct tcphdr *th;
833 struct tcphdr _tcph;
9fb9cbb1
YK
834 unsigned long timeout;
835 unsigned int index;
836
837 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
838 BUG_ON(th == NULL);
839
440f0d58 840 spin_lock_bh(&ct->lock);
c88130bc 841 old_state = ct->proto.tcp.state;
9fb9cbb1
YK
842 dir = CTINFO2DIR(ctinfo);
843 index = get_conntrack_index(th);
844 new_state = tcp_conntracks[dir][index][old_state];
c88130bc 845 tuple = &ct->tuplehash[dir].tuple;
9fb9cbb1
YK
846
847 switch (new_state) {
17311393
JK
848 case TCP_CONNTRACK_SYN_SENT:
849 if (old_state < TCP_CONNTRACK_TIME_WAIT)
850 break;
b2155e7f
JK
851 /* RFC 1122: "When a connection is closed actively,
852 * it MUST linger in TIME-WAIT state for a time 2xMSL
853 * (Maximum Segment Lifetime). However, it MAY accept
854 * a new SYN from the remote TCP to reopen the connection
855 * directly from TIME-WAIT state, if..."
856 * We ignore the conditions because we are in the
857 * TIME-WAIT state anyway.
858 *
859 * Handle aborted connections: we and the server
860 * think there is an existing connection but the client
861 * aborts it and starts a new one.
862 */
863 if (((ct->proto.tcp.seen[dir].flags
864 | ct->proto.tcp.seen[!dir].flags)
865 & IP_CT_TCP_FLAG_CLOSE_INIT)
c88130bc
PM
866 || (ct->proto.tcp.last_dir == dir
867 && ct->proto.tcp.last_index == TCP_RST_SET)) {
bc34b841
JK
868 /* Attempt to reopen a closed/aborted connection.
869 * Delete this connection and look up again. */
440f0d58 870 spin_unlock_bh(&ct->lock);
2aec609f 871
6b69fe0c
PM
872 /* Only repeat if we can actually remove the timer.
873 * Destruction may already be in progress in process
874 * context and we must give it a chance to terminate.
875 */
2aec609f 876 if (nf_ct_kill(ct))
6b69fe0c 877 return -NF_REPEAT;
ec8d5409 878 return NF_DROP;
17311393
JK
879 }
880 /* Fall through */
9fb9cbb1 881 case TCP_CONNTRACK_IGNORE:
73f30602 882 /* Ignored packets:
b2155e7f
JK
883 *
884 * Our connection entry may be out of sync, so ignore
885 * packets which may signal the real connection between
886 * the client and the server.
73f30602
JK
887 *
888 * a) SYN in ORIGINAL
889 * b) SYN/ACK in REPLY
601e68e1 890 * c) ACK in reply direction after initial SYN in original.
b2155e7f
JK
891 *
892 * If the ignored packet is invalid, the receiver will send
893 * a RST we'll catch below.
73f30602 894 */
9fb9cbb1 895 if (index == TCP_SYNACK_SET
c88130bc
PM
896 && ct->proto.tcp.last_index == TCP_SYN_SET
897 && ct->proto.tcp.last_dir != dir
898 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
b2155e7f 899 /* b) This SYN/ACK acknowledges a SYN that we earlier
9fb9cbb1
YK
900 * ignored as invalid. This means that the client and
901 * the server are both in sync, while the firewall is
c4832c7b
PNA
902 * not. We get in sync from the previously annotated
903 * values.
9fb9cbb1 904 */
c4832c7b
PNA
905 old_state = TCP_CONNTRACK_SYN_SENT;
906 new_state = TCP_CONNTRACK_SYN_RECV;
907 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
908 ct->proto.tcp.last_end;
909 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
910 ct->proto.tcp.last_end;
911 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
912 ct->proto.tcp.last_win == 0 ?
913 1 : ct->proto.tcp.last_win;
914 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
915 ct->proto.tcp.last_wscale;
916 ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
917 ct->proto.tcp.last_flags;
918 memset(&ct->proto.tcp.seen[dir], 0,
919 sizeof(struct ip_ct_tcp_state));
920 break;
9fb9cbb1 921 }
c88130bc
PM
922 ct->proto.tcp.last_index = index;
923 ct->proto.tcp.last_dir = dir;
924 ct->proto.tcp.last_seq = ntohl(th->seq);
925 ct->proto.tcp.last_end =
9fb9cbb1 926 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
c4832c7b
PNA
927 ct->proto.tcp.last_win = ntohs(th->window);
928
929 /* a) This is a SYN in ORIGINAL. The client and the server
930 * may be in sync but we are not. In that case, we annotate
931 * the TCP options and let the packet go through. If it is a
932 * valid SYN packet, the server will reply with a SYN/ACK, and
933 * then we'll get in sync. Otherwise, the server ignores it. */
934 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
935 struct ip_ct_tcp_state seen = {};
936
937 ct->proto.tcp.last_flags =
938 ct->proto.tcp.last_wscale = 0;
939 tcp_options(skb, dataoff, th, &seen);
940 if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
941 ct->proto.tcp.last_flags |=
942 IP_CT_TCP_FLAG_WINDOW_SCALE;
943 ct->proto.tcp.last_wscale = seen.td_scale;
944 }
945 if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
946 ct->proto.tcp.last_flags |=
947 IP_CT_TCP_FLAG_SACK_PERM;
948 }
949 }
440f0d58 950 spin_unlock_bh(&ct->lock);
c2a2c7e0 951 if (LOG_INVALID(net, IPPROTO_TCP))
9fb9cbb1 952 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
1a4ac987
PNA
953 "nf_ct_tcp: invalid packet ignored in "
954 "state %s ", tcp_conntrack_names[old_state]);
9fb9cbb1
YK
955 return NF_ACCEPT;
956 case TCP_CONNTRACK_MAX:
957 /* Invalid packet */
0d53778e
PM
958 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
959 dir, get_conntrack_index(th), old_state);
440f0d58 960 spin_unlock_bh(&ct->lock);
c2a2c7e0 961 if (LOG_INVALID(net, IPPROTO_TCP))
9fb9cbb1
YK
962 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
963 "nf_ct_tcp: invalid state ");
964 return -NF_ACCEPT;
9fb9cbb1 965 case TCP_CONNTRACK_CLOSE:
bfcaa502
JK
966 if (index == TCP_RST_SET
967 && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
968 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
969 /* Invalid RST */
334a47f6 970 spin_unlock_bh(&ct->lock);
bfcaa502
JK
971 if (LOG_INVALID(net, IPPROTO_TCP))
972 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
973 "nf_ct_tcp: invalid RST ");
974 return -NF_ACCEPT;
975 }
9fb9cbb1 976 if (index == TCP_RST_SET
c88130bc
PM
977 && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
978 && ct->proto.tcp.last_index == TCP_SYN_SET)
979 || (!test_bit(IPS_ASSURED_BIT, &ct->status)
980 && ct->proto.tcp.last_index == TCP_ACK_SET))
981 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
93b1fae4 982 /* RST sent to invalid SYN or ACK we had let through
73f30602
JK
983 * at a) and c) above:
984 *
985 * a) SYN was in window then
986 * c) we hold a half-open connection.
987 *
988 * Delete our connection entry.
9fb9cbb1 989 * We skip window checking, because packet might ACK
73f30602 990 * segments we ignored. */
9fb9cbb1
YK
991 goto in_window;
992 }
93b1fae4 993 /* Just fall through */
9fb9cbb1
YK
994 default:
995 /* Keep compilers happy. */
996 break;
997 }
998
c88130bc 999 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
9fb9cbb1 1000 skb, dataoff, th, pf)) {
440f0d58 1001 spin_unlock_bh(&ct->lock);
9fb9cbb1
YK
1002 return -NF_ACCEPT;
1003 }
1004 in_window:
1005 /* From now on we have got in-window packets */
c88130bc
PM
1006 ct->proto.tcp.last_index = index;
1007 ct->proto.tcp.last_dir = dir;
9fb9cbb1 1008
0d53778e 1009 pr_debug("tcp_conntracks: ");
3c9fba65 1010 nf_ct_dump_tuple(tuple);
0d53778e
PM
1011 pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1012 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1013 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1014 old_state, new_state);
9fb9cbb1 1015
c88130bc 1016 ct->proto.tcp.state = new_state;
9fb9cbb1 1017 if (old_state != new_state
d0c1fd7a 1018 && new_state == TCP_CONNTRACK_FIN_WAIT)
c88130bc 1019 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
ae375044 1020
d2ba1fde 1021 if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
2c8503f5
PNA
1022 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1023 timeout = timeouts[TCP_CONNTRACK_RETRANS];
ae375044
PM
1024 else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1025 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
2c8503f5
PNA
1026 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1027 timeout = timeouts[TCP_CONNTRACK_UNACK];
ae375044 1028 else
2c8503f5 1029 timeout = timeouts[new_state];
440f0d58 1030 spin_unlock_bh(&ct->lock);
9fb9cbb1 1031
9fb9cbb1 1032 if (new_state != old_state)
a71996fc 1033 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
9fb9cbb1 1034
c88130bc 1035 if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
9fb9cbb1
YK
1036 /* If only reply is a RST, we can consider ourselves not to
1037 have an established connection: this is a fairly common
1038 problem case, so we can delete the conntrack
1039 immediately. --RR */
1040 if (th->rst) {
718d4ad9 1041 nf_ct_kill_acct(ct, ctinfo, skb);
9fb9cbb1
YK
1042 return NF_ACCEPT;
1043 }
c88130bc 1044 } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
9fb9cbb1
YK
1045 && (old_state == TCP_CONNTRACK_SYN_RECV
1046 || old_state == TCP_CONNTRACK_ESTABLISHED)
1047 && new_state == TCP_CONNTRACK_ESTABLISHED) {
601e68e1
YH
1048 /* Set ASSURED if we see see valid ack in ESTABLISHED
1049 after SYN_RECV or a valid answer for a picked up
9fb9cbb1 1050 connection. */
c88130bc 1051 set_bit(IPS_ASSURED_BIT, &ct->status);
858b3133 1052 nf_conntrack_event_cache(IPCT_ASSURED, ct);
9fb9cbb1 1053 }
c88130bc 1054 nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
9fb9cbb1
YK
1055
1056 return NF_ACCEPT;
1057}
601e68e1 1058
9fb9cbb1 1059/* Called when a new connection for this protocol found. */
09f263cd 1060static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
2c8503f5 1061 unsigned int dataoff, unsigned int *timeouts)
9fb9cbb1
YK
1062{
1063 enum tcp_conntrack new_state;
82f568fc
JE
1064 const struct tcphdr *th;
1065 struct tcphdr _tcph;
d2ba1fde
G
1066 struct net *net = nf_ct_net(ct);
1067 struct nf_tcp_net *tn = tcp_pernet(net);
82f568fc
JE
1068 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1069 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
9fb9cbb1
YK
1070
1071 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1072 BUG_ON(th == NULL);
1073
1074 /* Don't need lock here: this conntrack not in circulation yet */
e5fc9e7a 1075 new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
9fb9cbb1
YK
1076
1077 /* Invalid: delete conntrack */
1078 if (new_state >= TCP_CONNTRACK_MAX) {
0d53778e 1079 pr_debug("nf_ct_tcp: invalid new deleting.\n");
09f263cd 1080 return false;
9fb9cbb1
YK
1081 }
1082
1083 if (new_state == TCP_CONNTRACK_SYN_SENT) {
e5fc9e7a 1084 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
9fb9cbb1 1085 /* SYN packet */
c88130bc 1086 ct->proto.tcp.seen[0].td_end =
9fb9cbb1
YK
1087 segment_seq_plus_len(ntohl(th->seq), skb->len,
1088 dataoff, th);
c88130bc
PM
1089 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1090 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1091 ct->proto.tcp.seen[0].td_maxwin = 1;
1092 ct->proto.tcp.seen[0].td_maxend =
1093 ct->proto.tcp.seen[0].td_end;
1094
1095 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
d2ba1fde 1096 } else if (tn->tcp_loose == 0) {
9fb9cbb1 1097 /* Don't try to pick up connections. */
09f263cd 1098 return false;
9fb9cbb1 1099 } else {
e5fc9e7a 1100 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
9fb9cbb1
YK
1101 /*
1102 * We are in the middle of a connection,
1103 * its history is lost for us.
1104 * Let's try to use the data from the packet.
1105 */
c88130bc 1106 ct->proto.tcp.seen[0].td_end =
9fb9cbb1
YK
1107 segment_seq_plus_len(ntohl(th->seq), skb->len,
1108 dataoff, th);
c88130bc
PM
1109 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1110 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1111 ct->proto.tcp.seen[0].td_maxwin = 1;
1112 ct->proto.tcp.seen[0].td_maxend =
1113 ct->proto.tcp.seen[0].td_end +
1114 ct->proto.tcp.seen[0].td_maxwin;
9fb9cbb1 1115
a09113c2
PM
1116 /* We assume SACK and liberal window checking to handle
1117 * window scaling */
c88130bc
PM
1118 ct->proto.tcp.seen[0].flags =
1119 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1120 IP_CT_TCP_FLAG_BE_LIBERAL;
9fb9cbb1 1121 }
601e68e1 1122
9fb9cbb1 1123 /* tcp_packet will set them */
c88130bc 1124 ct->proto.tcp.last_index = TCP_NONE_SET;
601e68e1 1125
0d53778e
PM
1126 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1127 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1128 sender->td_end, sender->td_maxend, sender->td_maxwin,
1129 sender->td_scale,
1130 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1131 receiver->td_scale);
09f263cd 1132 return true;
9fb9cbb1 1133}
c1d10adb 1134
c0cd1156 1135#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
c1d10adb
PNA
1136
1137#include <linux/netfilter/nfnetlink.h>
1138#include <linux/netfilter/nfnetlink_conntrack.h>
1139
fdf70832 1140static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
440f0d58 1141 struct nf_conn *ct)
c1d10adb 1142{
df6fb868 1143 struct nlattr *nest_parms;
c8e2078c 1144 struct nf_ct_tcp_flags tmp = {};
601e68e1 1145
440f0d58 1146 spin_lock_bh(&ct->lock);
df6fb868
PM
1147 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1148 if (!nest_parms)
1149 goto nla_put_failure;
1150
4925a459
DM
1151 if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1152 nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1153 ct->proto.tcp.seen[0].td_scale) ||
1154 nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1155 ct->proto.tcp.seen[1].td_scale))
1156 goto nla_put_failure;
c8e2078c
PNA
1157
1158 tmp.flags = ct->proto.tcp.seen[0].flags;
4925a459
DM
1159 if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1160 sizeof(struct nf_ct_tcp_flags), &tmp))
1161 goto nla_put_failure;
c8e2078c
PNA
1162
1163 tmp.flags = ct->proto.tcp.seen[1].flags;
4925a459
DM
1164 if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1165 sizeof(struct nf_ct_tcp_flags), &tmp))
1166 goto nla_put_failure;
440f0d58 1167 spin_unlock_bh(&ct->lock);
c1d10adb 1168
df6fb868 1169 nla_nest_end(skb, nest_parms);
c1d10adb
PNA
1170
1171 return 0;
1172
df6fb868 1173nla_put_failure:
440f0d58 1174 spin_unlock_bh(&ct->lock);
c1d10adb
PNA
1175 return -1;
1176}
1177
f73e924c
PM
1178static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1179 [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 },
1180 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1181 [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
1182 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
1183 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
c1d10adb
PNA
1184};
1185
fdf70832 1186static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
c1d10adb 1187{
2f0d2f10 1188 struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
df6fb868 1189 struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
f73e924c 1190 int err;
c1d10adb
PNA
1191
1192 /* updates could not contain anything about the private
1193 * protocol info, in that case skip the parsing */
2f0d2f10 1194 if (!pattr)
c1d10adb
PNA
1195 return 0;
1196
2f0d2f10 1197 err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
f73e924c
PM
1198 if (err < 0)
1199 return err;
c1d10adb 1200
5f7da4d2
PM
1201 if (tb[CTA_PROTOINFO_TCP_STATE] &&
1202 nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
c1d10adb
PNA
1203 return -EINVAL;
1204
440f0d58 1205 spin_lock_bh(&ct->lock);
5f7da4d2
PM
1206 if (tb[CTA_PROTOINFO_TCP_STATE])
1207 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
c8e2078c 1208
df6fb868 1209 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
c8e2078c 1210 struct nf_ct_tcp_flags *attr =
df6fb868 1211 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
c8e2078c
PNA
1212 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1213 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1214 }
1215
df6fb868 1216 if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
c8e2078c 1217 struct nf_ct_tcp_flags *attr =
df6fb868 1218 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
c8e2078c
PNA
1219 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1220 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1221 }
1222
df6fb868
PM
1223 if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1224 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
c8e2078c
PNA
1225 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1226 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
77236b6e
PM
1227 ct->proto.tcp.seen[0].td_scale =
1228 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1229 ct->proto.tcp.seen[1].td_scale =
1230 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
c8e2078c 1231 }
440f0d58 1232 spin_unlock_bh(&ct->lock);
c1d10adb
PNA
1233
1234 return 0;
1235}
a400c30e
HE
1236
1237static int tcp_nlattr_size(void)
1238{
1239 return nla_total_size(0) /* CTA_PROTOINFO_TCP */
1240 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1241}
1242
1243static int tcp_nlattr_tuple_size(void)
1244{
1245 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1246}
c1d10adb 1247#endif
933a41e7 1248
50978462
PNA
1249#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1250
1251#include <linux/netfilter/nfnetlink.h>
1252#include <linux/netfilter/nfnetlink_cttimeout.h>
1253
8264deb8
G
1254static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1255 struct net *net, void *data)
50978462
PNA
1256{
1257 unsigned int *timeouts = data;
8264deb8 1258 struct nf_tcp_net *tn = tcp_pernet(net);
50978462
PNA
1259 int i;
1260
1261 /* set default TCP timeouts. */
1262 for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
8264deb8 1263 timeouts[i] = tn->timeouts[i];
50978462
PNA
1264
1265 if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1266 timeouts[TCP_CONNTRACK_SYN_SENT] =
1267 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1268 }
1269 if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1270 timeouts[TCP_CONNTRACK_SYN_RECV] =
1271 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1272 }
1273 if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1274 timeouts[TCP_CONNTRACK_ESTABLISHED] =
1275 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1276 }
1277 if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1278 timeouts[TCP_CONNTRACK_FIN_WAIT] =
1279 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1280 }
1281 if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1282 timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1283 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1284 }
1285 if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1286 timeouts[TCP_CONNTRACK_LAST_ACK] =
1287 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1288 }
1289 if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1290 timeouts[TCP_CONNTRACK_TIME_WAIT] =
1291 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1292 }
1293 if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1294 timeouts[TCP_CONNTRACK_CLOSE] =
1295 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1296 }
1297 if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1298 timeouts[TCP_CONNTRACK_SYN_SENT2] =
1299 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1300 }
1301 if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1302 timeouts[TCP_CONNTRACK_RETRANS] =
1303 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1304 }
1305 if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1306 timeouts[TCP_CONNTRACK_UNACK] =
1307 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1308 }
1309 return 0;
1310}
1311
1312static int
1313tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1314{
1315 const unsigned int *timeouts = data;
1316
4925a459
DM
1317 if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1318 htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1319 nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1320 htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1321 nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1322 htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1323 nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1324 htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1325 nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1326 htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1327 nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1328 htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1329 nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1330 htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1331 nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1332 htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1333 nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1334 htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1335 nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1336 htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1337 nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1338 htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1339 goto nla_put_failure;
50978462
PNA
1340 return 0;
1341
1342nla_put_failure:
1343 return -ENOSPC;
1344}
1345
1346static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1347 [CTA_TIMEOUT_TCP_SYN_SENT] = { .type = NLA_U32 },
1348 [CTA_TIMEOUT_TCP_SYN_RECV] = { .type = NLA_U32 },
1349 [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NLA_U32 },
1350 [CTA_TIMEOUT_TCP_FIN_WAIT] = { .type = NLA_U32 },
1351 [CTA_TIMEOUT_TCP_CLOSE_WAIT] = { .type = NLA_U32 },
1352 [CTA_TIMEOUT_TCP_LAST_ACK] = { .type = NLA_U32 },
1353 [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NLA_U32 },
1354 [CTA_TIMEOUT_TCP_CLOSE] = { .type = NLA_U32 },
1355 [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NLA_U32 },
6d1fafca
FW
1356 [CTA_TIMEOUT_TCP_RETRANS] = { .type = NLA_U32 },
1357 [CTA_TIMEOUT_TCP_UNACK] = { .type = NLA_U32 },
50978462
PNA
1358};
1359#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1360
933a41e7 1361#ifdef CONFIG_SYSCTL
933a41e7
PM
1362static struct ctl_table tcp_sysctl_table[] = {
1363 {
933a41e7 1364 .procname = "nf_conntrack_tcp_timeout_syn_sent",
933a41e7
PM
1365 .maxlen = sizeof(unsigned int),
1366 .mode = 0644,
6d9f239a 1367 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1368 },
1369 {
933a41e7 1370 .procname = "nf_conntrack_tcp_timeout_syn_recv",
933a41e7
PM
1371 .maxlen = sizeof(unsigned int),
1372 .mode = 0644,
6d9f239a 1373 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1374 },
1375 {
933a41e7 1376 .procname = "nf_conntrack_tcp_timeout_established",
933a41e7
PM
1377 .maxlen = sizeof(unsigned int),
1378 .mode = 0644,
6d9f239a 1379 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1380 },
1381 {
933a41e7 1382 .procname = "nf_conntrack_tcp_timeout_fin_wait",
933a41e7
PM
1383 .maxlen = sizeof(unsigned int),
1384 .mode = 0644,
6d9f239a 1385 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1386 },
1387 {
933a41e7 1388 .procname = "nf_conntrack_tcp_timeout_close_wait",
933a41e7
PM
1389 .maxlen = sizeof(unsigned int),
1390 .mode = 0644,
6d9f239a 1391 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1392 },
1393 {
933a41e7 1394 .procname = "nf_conntrack_tcp_timeout_last_ack",
933a41e7
PM
1395 .maxlen = sizeof(unsigned int),
1396 .mode = 0644,
6d9f239a 1397 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1398 },
1399 {
933a41e7 1400 .procname = "nf_conntrack_tcp_timeout_time_wait",
933a41e7
PM
1401 .maxlen = sizeof(unsigned int),
1402 .mode = 0644,
6d9f239a 1403 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1404 },
1405 {
933a41e7 1406 .procname = "nf_conntrack_tcp_timeout_close",
933a41e7
PM
1407 .maxlen = sizeof(unsigned int),
1408 .mode = 0644,
6d9f239a 1409 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1410 },
1411 {
933a41e7 1412 .procname = "nf_conntrack_tcp_timeout_max_retrans",
933a41e7
PM
1413 .maxlen = sizeof(unsigned int),
1414 .mode = 0644,
6d9f239a 1415 .proc_handler = proc_dointvec_jiffies,
933a41e7 1416 },
ae375044
PM
1417 {
1418 .procname = "nf_conntrack_tcp_timeout_unacknowledged",
ae375044
PM
1419 .maxlen = sizeof(unsigned int),
1420 .mode = 0644,
6d9f239a 1421 .proc_handler = proc_dointvec_jiffies,
ae375044 1422 },
933a41e7 1423 {
933a41e7 1424 .procname = "nf_conntrack_tcp_loose",
933a41e7
PM
1425 .maxlen = sizeof(unsigned int),
1426 .mode = 0644,
6d9f239a 1427 .proc_handler = proc_dointvec,
933a41e7
PM
1428 },
1429 {
933a41e7 1430 .procname = "nf_conntrack_tcp_be_liberal",
933a41e7
PM
1431 .maxlen = sizeof(unsigned int),
1432 .mode = 0644,
6d9f239a 1433 .proc_handler = proc_dointvec,
933a41e7
PM
1434 },
1435 {
933a41e7 1436 .procname = "nf_conntrack_tcp_max_retrans",
933a41e7
PM
1437 .maxlen = sizeof(unsigned int),
1438 .mode = 0644,
6d9f239a 1439 .proc_handler = proc_dointvec,
933a41e7 1440 },
f8572d8f 1441 { }
933a41e7 1442};
a999e683
PM
1443
1444#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1445static struct ctl_table tcp_compat_sysctl_table[] = {
1446 {
a999e683 1447 .procname = "ip_conntrack_tcp_timeout_syn_sent",
a999e683
PM
1448 .maxlen = sizeof(unsigned int),
1449 .mode = 0644,
6d9f239a 1450 .proc_handler = proc_dointvec_jiffies,
a999e683 1451 },
874ab923
JK
1452 {
1453 .procname = "ip_conntrack_tcp_timeout_syn_sent2",
874ab923
JK
1454 .maxlen = sizeof(unsigned int),
1455 .mode = 0644,
1456 .proc_handler = proc_dointvec_jiffies,
1457 },
a999e683 1458 {
a999e683 1459 .procname = "ip_conntrack_tcp_timeout_syn_recv",
a999e683
PM
1460 .maxlen = sizeof(unsigned int),
1461 .mode = 0644,
6d9f239a 1462 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1463 },
1464 {
a999e683 1465 .procname = "ip_conntrack_tcp_timeout_established",
a999e683
PM
1466 .maxlen = sizeof(unsigned int),
1467 .mode = 0644,
6d9f239a 1468 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1469 },
1470 {
a999e683 1471 .procname = "ip_conntrack_tcp_timeout_fin_wait",
a999e683
PM
1472 .maxlen = sizeof(unsigned int),
1473 .mode = 0644,
6d9f239a 1474 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1475 },
1476 {
a999e683 1477 .procname = "ip_conntrack_tcp_timeout_close_wait",
a999e683
PM
1478 .maxlen = sizeof(unsigned int),
1479 .mode = 0644,
6d9f239a 1480 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1481 },
1482 {
a999e683 1483 .procname = "ip_conntrack_tcp_timeout_last_ack",
a999e683
PM
1484 .maxlen = sizeof(unsigned int),
1485 .mode = 0644,
6d9f239a 1486 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1487 },
1488 {
a999e683 1489 .procname = "ip_conntrack_tcp_timeout_time_wait",
a999e683
PM
1490 .maxlen = sizeof(unsigned int),
1491 .mode = 0644,
6d9f239a 1492 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1493 },
1494 {
a999e683 1495 .procname = "ip_conntrack_tcp_timeout_close",
a999e683
PM
1496 .maxlen = sizeof(unsigned int),
1497 .mode = 0644,
6d9f239a 1498 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1499 },
1500 {
a999e683 1501 .procname = "ip_conntrack_tcp_timeout_max_retrans",
a999e683
PM
1502 .maxlen = sizeof(unsigned int),
1503 .mode = 0644,
6d9f239a 1504 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1505 },
1506 {
a999e683 1507 .procname = "ip_conntrack_tcp_loose",
a999e683
PM
1508 .maxlen = sizeof(unsigned int),
1509 .mode = 0644,
6d9f239a 1510 .proc_handler = proc_dointvec,
a999e683
PM
1511 },
1512 {
a999e683 1513 .procname = "ip_conntrack_tcp_be_liberal",
a999e683
PM
1514 .maxlen = sizeof(unsigned int),
1515 .mode = 0644,
6d9f239a 1516 .proc_handler = proc_dointvec,
a999e683
PM
1517 },
1518 {
a999e683 1519 .procname = "ip_conntrack_tcp_max_retrans",
a999e683
PM
1520 .maxlen = sizeof(unsigned int),
1521 .mode = 0644,
6d9f239a 1522 .proc_handler = proc_dointvec,
a999e683 1523 },
f8572d8f 1524 { }
a999e683
PM
1525};
1526#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
933a41e7
PM
1527#endif /* CONFIG_SYSCTL */
1528
efa758fe
G
1529static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1530 struct nf_tcp_net *tn)
d2ba1fde
G
1531{
1532#ifdef CONFIG_SYSCTL
d2ba1fde
G
1533 if (pn->ctl_table)
1534 return 0;
1535
1536 pn->ctl_table = kmemdup(tcp_sysctl_table,
1537 sizeof(tcp_sysctl_table),
1538 GFP_KERNEL);
1539 if (!pn->ctl_table)
1540 return -ENOMEM;
1541
1542 pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1543 pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1544 pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1545 pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1546 pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1547 pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1548 pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1549 pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1550 pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1551 pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1552 pn->ctl_table[10].data = &tn->tcp_loose;
1553 pn->ctl_table[11].data = &tn->tcp_be_liberal;
1554 pn->ctl_table[12].data = &tn->tcp_max_retrans;
1555#endif
1556 return 0;
1557}
1558
efa758fe
G
1559static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
1560 struct nf_tcp_net *tn)
d2ba1fde
G
1561{
1562#ifdef CONFIG_SYSCTL
1563#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
d2ba1fde
G
1564 pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
1565 sizeof(tcp_compat_sysctl_table),
1566 GFP_KERNEL);
1567 if (!pn->ctl_compat_table)
1568 return -ENOMEM;
1569
1570 pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1571 pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
1572 pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1573 pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1574 pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1575 pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1576 pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1577 pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1578 pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1579 pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1580 pn->ctl_compat_table[10].data = &tn->tcp_loose;
1581 pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
1582 pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
1583#endif
1584#endif
1585 return 0;
1586}
1587
efa758fe 1588static int tcp_init_net(struct net *net, u_int16_t proto)
d2ba1fde 1589{
efa758fe 1590 int ret;
d2ba1fde 1591 struct nf_tcp_net *tn = tcp_pernet(net);
efa758fe
G
1592 struct nf_proto_net *pn = &tn->pn;
1593
1594 if (!pn->users) {
1595 int i;
d2ba1fde 1596
d2ba1fde
G
1597 for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1598 tn->timeouts[i] = tcp_timeouts[i];
1599
1600 tn->tcp_loose = nf_ct_tcp_loose;
1601 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1602 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1603 }
1604
efa758fe
G
1605 if (proto == AF_INET) {
1606 ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
1607 if (ret < 0)
1608 return ret;
d2ba1fde 1609
efa758fe
G
1610 ret = tcp_kmemdup_sysctl_table(pn, tn);
1611 if (ret < 0)
1612 nf_ct_kfree_compat_sysctl_table(pn);
1613 } else
1614 ret = tcp_kmemdup_sysctl_table(pn, tn);
d2ba1fde 1615
d2ba1fde
G
1616 return ret;
1617}
1618
08911475
PNA
1619static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1620{
1621 return &net->ct.nf_ct_proto.tcp.pn;
1622}
1623
61075af5 1624struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
9fb9cbb1
YK
1625{
1626 .l3proto = PF_INET,
605dcad6 1627 .l4proto = IPPROTO_TCP,
9fb9cbb1
YK
1628 .name = "tcp",
1629 .pkt_to_tuple = tcp_pkt_to_tuple,
1630 .invert_tuple = tcp_invert_tuple,
1631 .print_tuple = tcp_print_tuple,
1632 .print_conntrack = tcp_print_conntrack,
1633 .packet = tcp_packet,
2c8503f5 1634 .get_timeouts = tcp_get_timeouts,
9fb9cbb1 1635 .new = tcp_new,
96f6bf82 1636 .error = tcp_error,
c0cd1156 1637#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
fdf70832 1638 .to_nlattr = tcp_to_nlattr,
a400c30e 1639 .nlattr_size = tcp_nlattr_size,
fdf70832
PM
1640 .from_nlattr = nlattr_to_tcp,
1641 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1642 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
a400c30e 1643 .nlattr_tuple_size = tcp_nlattr_tuple_size,
f73e924c 1644 .nla_policy = nf_ct_port_nla_policy,
c1d10adb 1645#endif
50978462
PNA
1646#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1647 .ctnl_timeout = {
1648 .nlattr_to_obj = tcp_timeout_nlattr_to_obj,
1649 .obj_to_nlattr = tcp_timeout_obj_to_nlattr,
1650 .nlattr_max = CTA_TIMEOUT_TCP_MAX,
1651 .obj_size = sizeof(unsigned int) *
1652 TCP_CONNTRACK_TIMEOUT_MAX,
1653 .nla_policy = tcp_timeout_nla_policy,
1654 },
1655#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
efa758fe 1656 .init_net = tcp_init_net,
08911475 1657 .get_net_proto = tcp_get_net_proto,
9fb9cbb1 1658};
13b18339 1659EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
9fb9cbb1 1660
61075af5 1661struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
9fb9cbb1
YK
1662{
1663 .l3proto = PF_INET6,
605dcad6 1664 .l4proto = IPPROTO_TCP,
9fb9cbb1
YK
1665 .name = "tcp",
1666 .pkt_to_tuple = tcp_pkt_to_tuple,
1667 .invert_tuple = tcp_invert_tuple,
1668 .print_tuple = tcp_print_tuple,
1669 .print_conntrack = tcp_print_conntrack,
1670 .packet = tcp_packet,
2c8503f5 1671 .get_timeouts = tcp_get_timeouts,
9fb9cbb1 1672 .new = tcp_new,
96f6bf82 1673 .error = tcp_error,
c0cd1156 1674#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
fdf70832 1675 .to_nlattr = tcp_to_nlattr,
a400c30e 1676 .nlattr_size = tcp_nlattr_size,
fdf70832
PM
1677 .from_nlattr = nlattr_to_tcp,
1678 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1679 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
a400c30e 1680 .nlattr_tuple_size = tcp_nlattr_tuple_size,
f73e924c 1681 .nla_policy = nf_ct_port_nla_policy,
c1d10adb 1682#endif
50978462
PNA
1683#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1684 .ctnl_timeout = {
1685 .nlattr_to_obj = tcp_timeout_nlattr_to_obj,
1686 .obj_to_nlattr = tcp_timeout_obj_to_nlattr,
1687 .nlattr_max = CTA_TIMEOUT_TCP_MAX,
1688 .obj_size = sizeof(unsigned int) *
1689 TCP_CONNTRACK_TIMEOUT_MAX,
1690 .nla_policy = tcp_timeout_nla_policy,
1691 },
1692#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
efa758fe 1693 .init_net = tcp_init_net,
08911475 1694 .get_net_proto = tcp_get_net_proto,
9fb9cbb1 1695};
13b18339 1696EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
This page took 0.727792 seconds and 5 git commands to generate.