Merge branch 'linus' into timers/core
[deliverable/linux.git] / net / netfilter / nf_conntrack_proto_tcp.c
CommitLineData
9fb9cbb1
YK
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
f229f6ce
PM
3 * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
9fb9cbb1
YK
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9fb9cbb1
YK
9 */
10
9fb9cbb1 11#include <linux/types.h>
9fb9cbb1 12#include <linux/timer.h>
9fb9cbb1
YK
13#include <linux/module.h>
14#include <linux/in.h>
15#include <linux/tcp.h>
16#include <linux/spinlock.h>
17#include <linux/skbuff.h>
18#include <linux/ipv6.h>
19#include <net/ip6_checksum.h>
534f81a5 20#include <asm/unaligned.h>
9fb9cbb1
YK
21
22#include <net/tcp.h>
23
24#include <linux/netfilter.h>
25#include <linux/netfilter_ipv4.h>
26#include <linux/netfilter_ipv6.h>
27#include <net/netfilter/nf_conntrack.h>
605dcad6 28#include <net/netfilter/nf_conntrack_l4proto.h>
f6180121 29#include <net/netfilter/nf_conntrack_ecache.h>
f01ffbd6 30#include <net/netfilter/nf_log.h>
9d2493f8
CP
31#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
32#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
9fb9cbb1 33
601e68e1
YH
34/* "Be conservative in what you do,
35 be liberal in what you accept from others."
9fb9cbb1 36 If it's non-zero, we mark only out of window RST segments as INVALID. */
3aef0fd9 37static int nf_ct_tcp_be_liberal __read_mostly = 0;
9fb9cbb1 38
a09113c2 39/* If it is set to zero, we disable picking up already established
9fb9cbb1 40 connections. */
3aef0fd9 41static int nf_ct_tcp_loose __read_mostly = 1;
9fb9cbb1 42
601e68e1
YH
43/* Max number of the retransmitted packets without receiving an (acceptable)
44 ACK from the destination. If this number is reached, a shorter timer
9fb9cbb1 45 will be started. */
3aef0fd9 46static int nf_ct_tcp_max_retrans __read_mostly = 3;
9fb9cbb1
YK
47
48 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
49 closely. They're more complex. --RR */
50
82f568fc 51static const char *const tcp_conntrack_names[] = {
9fb9cbb1
YK
52 "NONE",
53 "SYN_SENT",
54 "SYN_RECV",
55 "ESTABLISHED",
56 "FIN_WAIT",
57 "CLOSE_WAIT",
58 "LAST_ACK",
59 "TIME_WAIT",
60 "CLOSE",
874ab923 61 "SYN_SENT2",
9fb9cbb1 62};
601e68e1 63
9fb9cbb1
YK
64#define SECS * HZ
65#define MINS * 60 SECS
66#define HOURS * 60 MINS
67#define DAYS * 24 HOURS
68
33ee4464 69static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
2d646286
PM
70 [TCP_CONNTRACK_SYN_SENT] = 2 MINS,
71 [TCP_CONNTRACK_SYN_RECV] = 60 SECS,
72 [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
73 [TCP_CONNTRACK_FIN_WAIT] = 2 MINS,
74 [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS,
75 [TCP_CONNTRACK_LAST_ACK] = 30 SECS,
76 [TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
77 [TCP_CONNTRACK_CLOSE] = 10 SECS,
874ab923 78 [TCP_CONNTRACK_SYN_SENT2] = 2 MINS,
33ee4464
PNA
79/* RFC1122 says the R2 limit should be at least 100 seconds.
80 Linux uses 15 packets as limit, which corresponds
81 to ~13-30min depending on RTO. */
82 [TCP_CONNTRACK_RETRANS] = 5 MINS,
83 [TCP_CONNTRACK_UNACK] = 5 MINS,
2d646286 84};
601e68e1 85
9fb9cbb1
YK
86#define sNO TCP_CONNTRACK_NONE
87#define sSS TCP_CONNTRACK_SYN_SENT
88#define sSR TCP_CONNTRACK_SYN_RECV
89#define sES TCP_CONNTRACK_ESTABLISHED
90#define sFW TCP_CONNTRACK_FIN_WAIT
91#define sCW TCP_CONNTRACK_CLOSE_WAIT
92#define sLA TCP_CONNTRACK_LAST_ACK
93#define sTW TCP_CONNTRACK_TIME_WAIT
94#define sCL TCP_CONNTRACK_CLOSE
874ab923 95#define sS2 TCP_CONNTRACK_SYN_SENT2
9fb9cbb1
YK
96#define sIV TCP_CONNTRACK_MAX
97#define sIG TCP_CONNTRACK_IGNORE
98
99/* What TCP flags are set from RST/SYN/FIN/ACK. */
100enum tcp_bit_set {
101 TCP_SYN_SET,
102 TCP_SYNACK_SET,
103 TCP_FIN_SET,
104 TCP_ACK_SET,
105 TCP_RST_SET,
106 TCP_NONE_SET,
107};
601e68e1 108
9fb9cbb1
YK
109/*
110 * The TCP state transition table needs a few words...
111 *
112 * We are the man in the middle. All the packets go through us
113 * but might get lost in transit to the destination.
601e68e1 114 * It is assumed that the destinations can't receive segments
9fb9cbb1
YK
115 * we haven't seen.
116 *
117 * The checked segment is in window, but our windows are *not*
118 * equivalent with the ones of the sender/receiver. We always
119 * try to guess the state of the current sender.
120 *
121 * The meaning of the states are:
122 *
123 * NONE: initial state
601e68e1 124 * SYN_SENT: SYN-only packet seen
874ab923 125 * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open
9fb9cbb1
YK
126 * SYN_RECV: SYN-ACK packet seen
127 * ESTABLISHED: ACK packet seen
128 * FIN_WAIT: FIN packet seen
601e68e1 129 * CLOSE_WAIT: ACK seen (after FIN)
9fb9cbb1
YK
130 * LAST_ACK: FIN seen (after FIN)
131 * TIME_WAIT: last ACK seen
b2155e7f 132 * CLOSE: closed connection (RST)
9fb9cbb1 133 *
9fb9cbb1 134 * Packets marked as IGNORED (sIG):
601e68e1
YH
135 * if they may be either invalid or valid
136 * and the receiver may send back a connection
9fb9cbb1
YK
137 * closing RST or a SYN/ACK.
138 *
139 * Packets marked as INVALID (sIV):
874ab923 140 * if we regard them as truly invalid packets
9fb9cbb1 141 */
a5e73c29 142static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
9fb9cbb1
YK
143 {
144/* ORIGINAL */
874ab923
JK
145/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
146/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
9fb9cbb1
YK
147/*
148 * sNO -> sSS Initialize a new connection
149 * sSS -> sSS Retransmitted SYN
874ab923
JK
150 * sS2 -> sS2 Late retransmitted SYN
151 * sSR -> sIG
9fb9cbb1 152 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
601e68e1 153 * are errors. Receiver will reply with RST
9fb9cbb1
YK
154 * and close the connection.
155 * Or we are not in sync and hold a dead connection.
156 * sFW -> sIG
157 * sCW -> sIG
158 * sLA -> sIG
159 * sTW -> sSS Reopened connection (RFC 1122).
160 * sCL -> sSS
161 */
874ab923 162/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
64f509ce 163/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
9fb9cbb1 164/*
874ab923
JK
165 * sNO -> sIV Too late and no reason to do anything
166 * sSS -> sIV Client can't send SYN and then SYN/ACK
167 * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
64f509ce
JK
168 * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open
169 * sES -> sIV Invalid SYN/ACK packets sent by the client
170 * sFW -> sIV
171 * sCW -> sIV
172 * sLA -> sIV
173 * sTW -> sIV
174 * sCL -> sIV
9fb9cbb1 175 */
874ab923 176/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
9fb9cbb1
YK
177/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
178/*
179 * sNO -> sIV Too late and no reason to do anything...
180 * sSS -> sIV Client migth not send FIN in this state:
181 * we enforce waiting for a SYN/ACK reply first.
874ab923 182 * sS2 -> sIV
9fb9cbb1
YK
183 * sSR -> sFW Close started.
184 * sES -> sFW
185 * sFW -> sLA FIN seen in both directions, waiting for
601e68e1 186 * the last ACK.
9fb9cbb1
YK
187 * Migth be a retransmitted FIN as well...
188 * sCW -> sLA
189 * sLA -> sLA Retransmitted FIN. Remain in the same state.
190 * sTW -> sTW
191 * sCL -> sCL
192 */
874ab923 193/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
9fb9cbb1
YK
194/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
195/*
196 * sNO -> sES Assumed.
197 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
874ab923 198 * sS2 -> sIV
9fb9cbb1
YK
199 * sSR -> sES Established state is reached.
200 * sES -> sES :-)
201 * sFW -> sCW Normal close request answered by ACK.
202 * sCW -> sCW
203 * sLA -> sTW Last ACK detected.
204 * sTW -> sTW Retransmitted last ACK. Remain in the same state.
205 * sCL -> sCL
206 */
874ab923
JK
207/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
208/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
9fb9cbb1
YK
209/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
210 },
211 {
212/* REPLY */
874ab923
JK
213/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
214/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
9fb9cbb1
YK
215/*
216 * sNO -> sIV Never reached.
874ab923
JK
217 * sSS -> sS2 Simultaneous open
218 * sS2 -> sS2 Retransmitted simultaneous SYN
219 * sSR -> sIV Invalid SYN packets sent by the server
220 * sES -> sIV
9fb9cbb1
YK
221 * sFW -> sIV
222 * sCW -> sIV
223 * sLA -> sIV
224 * sTW -> sIV Reopened connection, but server may not do it.
225 * sCL -> sIV
226 */
874ab923 227/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
8a80c79a 228/*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
9fb9cbb1
YK
229/*
230 * sSS -> sSR Standard open.
874ab923 231 * sS2 -> sSR Simultaneous open
8a80c79a 232 * sSR -> sIG Retransmitted SYN/ACK, ignore it.
9fb9cbb1
YK
233 * sES -> sIG Late retransmitted SYN/ACK?
234 * sFW -> sIG Might be SYN/ACK answering ignored SYN
235 * sCW -> sIG
236 * sLA -> sIG
237 * sTW -> sIG
238 * sCL -> sIG
239 */
874ab923 240/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
9fb9cbb1
YK
241/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
242/*
243 * sSS -> sIV Server might not send FIN in this state.
874ab923 244 * sS2 -> sIV
9fb9cbb1
YK
245 * sSR -> sFW Close started.
246 * sES -> sFW
247 * sFW -> sLA FIN seen in both directions.
248 * sCW -> sLA
249 * sLA -> sLA Retransmitted FIN.
250 * sTW -> sTW
251 * sCL -> sCL
252 */
874ab923
JK
253/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
254/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
9fb9cbb1 255/*
73f30602 256 * sSS -> sIG Might be a half-open connection.
874ab923 257 * sS2 -> sIG
9fb9cbb1
YK
258 * sSR -> sSR Might answer late resent SYN.
259 * sES -> sES :-)
260 * sFW -> sCW Normal close request answered by ACK.
261 * sCW -> sCW
262 * sLA -> sTW Last ACK detected.
263 * sTW -> sTW Retransmitted last ACK.
264 * sCL -> sCL
265 */
874ab923
JK
266/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
267/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
9fb9cbb1 268/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
601e68e1 269 }
9fb9cbb1
YK
270};
271
d2ba1fde
G
272static inline struct nf_tcp_net *tcp_pernet(struct net *net)
273{
274 return &net->ct.nf_ct_proto.tcp;
275}
276
09f263cd
JE
277static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
278 struct nf_conntrack_tuple *tuple)
9fb9cbb1 279{
82f568fc
JE
280 const struct tcphdr *hp;
281 struct tcphdr _hdr;
9fb9cbb1
YK
282
283 /* Actually only need first 8 bytes. */
284 hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
285 if (hp == NULL)
09f263cd 286 return false;
9fb9cbb1
YK
287
288 tuple->src.u.tcp.port = hp->source;
289 tuple->dst.u.tcp.port = hp->dest;
290
09f263cd 291 return true;
9fb9cbb1
YK
292}
293
09f263cd
JE
294static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
295 const struct nf_conntrack_tuple *orig)
9fb9cbb1
YK
296{
297 tuple->src.u.tcp.port = orig->dst.u.tcp.port;
298 tuple->dst.u.tcp.port = orig->src.u.tcp.port;
09f263cd 299 return true;
9fb9cbb1
YK
300}
301
302/* Print out the per-protocol part of the tuple. */
303static int tcp_print_tuple(struct seq_file *s,
304 const struct nf_conntrack_tuple *tuple)
305{
306 return seq_printf(s, "sport=%hu dport=%hu ",
307 ntohs(tuple->src.u.tcp.port),
308 ntohs(tuple->dst.u.tcp.port));
309}
310
311/* Print out the private part of the conntrack. */
440f0d58 312static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
9fb9cbb1
YK
313{
314 enum tcp_conntrack state;
315
440f0d58 316 spin_lock_bh(&ct->lock);
c88130bc 317 state = ct->proto.tcp.state;
440f0d58 318 spin_unlock_bh(&ct->lock);
9fb9cbb1
YK
319
320 return seq_printf(s, "%s ", tcp_conntrack_names[state]);
321}
322
323static unsigned int get_conntrack_index(const struct tcphdr *tcph)
324{
325 if (tcph->rst) return TCP_RST_SET;
326 else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
327 else if (tcph->fin) return TCP_FIN_SET;
328 else if (tcph->ack) return TCP_ACK_SET;
329 else return TCP_NONE_SET;
330}
331
332/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
333 in IP Filter' by Guido van Rooij.
601e68e1 334
631dd1a8
JM
335 http://www.sane.nl/events/sane2000/papers.html
336 http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
601e68e1 337
9fb9cbb1
YK
338 The boundaries and the conditions are changed according to RFC793:
339 the packet must intersect the window (i.e. segments may be
340 after the right or before the left edge) and thus receivers may ACK
341 segments after the right edge of the window.
342
601e68e1 343 td_maxend = max(sack + max(win,1)) seen in reply packets
9fb9cbb1
YK
344 td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
345 td_maxwin += seq + len - sender.td_maxend
346 if seq + len > sender.td_maxend
347 td_end = max(seq + len) seen in sent packets
601e68e1 348
9fb9cbb1
YK
349 I. Upper bound for valid data: seq <= sender.td_maxend
350 II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
84ebe1cd
JK
351 III. Upper bound for valid (s)ack: sack <= receiver.td_end
352 IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW
9fb9cbb1 353
84ebe1cd
JK
354 where sack is the highest right edge of sack block found in the packet
355 or ack in the case of packet without SACK option.
9fb9cbb1 356
84ebe1cd 357 The upper bound limit for a valid (s)ack is not ignored -
601e68e1 358 we doesn't have to deal with fragments.
9fb9cbb1
YK
359*/
360
361static inline __u32 segment_seq_plus_len(__u32 seq,
362 size_t len,
363 unsigned int dataoff,
82f568fc 364 const struct tcphdr *tcph)
9fb9cbb1
YK
365{
366 /* XXX Should I use payload length field in IP/IPv6 header ?
367 * - YK */
368 return (seq + len - dataoff - tcph->doff*4
369 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
370}
601e68e1 371
9fb9cbb1
YK
372/* Fixme: what about big packets? */
373#define MAXACKWINCONST 66000
374#define MAXACKWINDOW(sender) \
375 ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
376 : MAXACKWINCONST)
601e68e1 377
9fb9cbb1
YK
378/*
379 * Simplified tcp_parse_options routine from tcp_input.c
380 */
381static void tcp_options(const struct sk_buff *skb,
382 unsigned int dataoff,
82f568fc 383 const struct tcphdr *tcph,
9fb9cbb1
YK
384 struct ip_ct_tcp_state *state)
385{
386 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
82f568fc 387 const unsigned char *ptr;
9fb9cbb1
YK
388 int length = (tcph->doff*4) - sizeof(struct tcphdr);
389
390 if (!length)
391 return;
392
393 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
394 length, buff);
395 BUG_ON(ptr == NULL);
396
601e68e1 397 state->td_scale =
9fb9cbb1
YK
398 state->flags = 0;
399
400 while (length > 0) {
401 int opcode=*ptr++;
402 int opsize;
403
404 switch (opcode) {
405 case TCPOPT_EOL:
406 return;
407 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
408 length--;
409 continue;
410 default:
411 opsize=*ptr++;
412 if (opsize < 2) /* "silly options" */
413 return;
414 if (opsize > length)
4a5cc84a 415 return; /* don't parse partial options */
9fb9cbb1 416
601e68e1 417 if (opcode == TCPOPT_SACK_PERM
9fb9cbb1
YK
418 && opsize == TCPOLEN_SACK_PERM)
419 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
420 else if (opcode == TCPOPT_WINDOW
421 && opsize == TCPOLEN_WINDOW) {
422 state->td_scale = *(u_int8_t *)ptr;
423
424 if (state->td_scale > 14) {
425 /* See RFC1323 */
426 state->td_scale = 14;
427 }
428 state->flags |=
429 IP_CT_TCP_FLAG_WINDOW_SCALE;
430 }
431 ptr += opsize - 2;
432 length -= opsize;
433 }
434 }
435}
436
437static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
82f568fc 438 const struct tcphdr *tcph, __u32 *sack)
9fb9cbb1 439{
601e68e1 440 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
82f568fc 441 const unsigned char *ptr;
9fb9cbb1
YK
442 int length = (tcph->doff*4) - sizeof(struct tcphdr);
443 __u32 tmp;
444
445 if (!length)
446 return;
447
448 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
449 length, buff);
450 BUG_ON(ptr == NULL);
451
452 /* Fast path for timestamp-only option */
bb9fc373 453 if (length == TCPOLEN_TSTAMP_ALIGNED
8f05ce91
YH
454 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
455 | (TCPOPT_NOP << 16)
456 | (TCPOPT_TIMESTAMP << 8)
457 | TCPOLEN_TIMESTAMP))
9fb9cbb1
YK
458 return;
459
460 while (length > 0) {
461 int opcode = *ptr++;
462 int opsize, i;
463
464 switch (opcode) {
465 case TCPOPT_EOL:
466 return;
467 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
468 length--;
469 continue;
470 default:
471 opsize = *ptr++;
472 if (opsize < 2) /* "silly options" */
473 return;
474 if (opsize > length)
4a5cc84a 475 return; /* don't parse partial options */
9fb9cbb1 476
601e68e1
YH
477 if (opcode == TCPOPT_SACK
478 && opsize >= (TCPOLEN_SACK_BASE
479 + TCPOLEN_SACK_PERBLOCK)
480 && !((opsize - TCPOLEN_SACK_BASE)
481 % TCPOLEN_SACK_PERBLOCK)) {
482 for (i = 0;
483 i < (opsize - TCPOLEN_SACK_BASE);
484 i += TCPOLEN_SACK_PERBLOCK) {
534f81a5 485 tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
9fb9cbb1
YK
486
487 if (after(tmp, *sack))
488 *sack = tmp;
489 }
490 return;
491 }
492 ptr += opsize - 2;
493 length -= opsize;
494 }
495 }
496}
497
f9dd09c7
JK
498#ifdef CONFIG_NF_NAT_NEEDED
499static inline s16 nat_offset(const struct nf_conn *ct,
500 enum ip_conntrack_dir dir,
501 u32 seq)
502{
503 typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
504
505 return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
506}
c7232c99
PM
507#define NAT_OFFSET(ct, dir, seq) \
508 (nat_offset(ct, dir, seq))
f9dd09c7 509#else
c7232c99 510#define NAT_OFFSET(ct, dir, seq) 0
f9dd09c7
JK
511#endif
512
09f263cd
JE
513static bool tcp_in_window(const struct nf_conn *ct,
514 struct ip_ct_tcp *state,
515 enum ip_conntrack_dir dir,
516 unsigned int index,
517 const struct sk_buff *skb,
518 unsigned int dataoff,
519 const struct tcphdr *tcph,
76108cea 520 u_int8_t pf)
9fb9cbb1 521{
c2a2c7e0 522 struct net *net = nf_ct_net(ct);
d2ba1fde 523 struct nf_tcp_net *tn = tcp_pernet(net);
9fb9cbb1
YK
524 struct ip_ct_tcp_state *sender = &state->seen[dir];
525 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
82f568fc 526 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
9fb9cbb1 527 __u32 seq, ack, sack, end, win, swin;
f9dd09c7 528 s16 receiver_offset;
356d7d88 529 bool res, in_recv_win;
9fb9cbb1
YK
530
531 /*
532 * Get the required data from the packet.
533 */
534 seq = ntohl(tcph->seq);
535 ack = sack = ntohl(tcph->ack_seq);
536 win = ntohs(tcph->window);
537 end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
538
539 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
540 tcp_sack(skb, dataoff, tcph, &sack);
541
f9dd09c7 542 /* Take into account NAT sequence number mangling */
c7232c99 543 receiver_offset = NAT_OFFSET(ct, !dir, ack - 1);
f9dd09c7
JK
544 ack -= receiver_offset;
545 sack -= receiver_offset;
546
0d53778e
PM
547 pr_debug("tcp_in_window: START\n");
548 pr_debug("tcp_in_window: ");
3c9fba65 549 nf_ct_dump_tuple(tuple);
f9dd09c7
JK
550 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
551 seq, ack, receiver_offset, sack, receiver_offset, win, end);
0d53778e
PM
552 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
553 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
554 sender->td_end, sender->td_maxend, sender->td_maxwin,
555 sender->td_scale,
556 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
557 receiver->td_scale);
9fb9cbb1 558
874ab923 559 if (sender->td_maxwin == 0) {
9fb9cbb1
YK
560 /*
561 * Initialize sender data.
562 */
874ab923 563 if (tcph->syn) {
9fb9cbb1 564 /*
874ab923
JK
565 * SYN-ACK in reply to a SYN
566 * or SYN from reply direction in simultaneous open.
9fb9cbb1 567 */
601e68e1 568 sender->td_end =
9fb9cbb1
YK
569 sender->td_maxend = end;
570 sender->td_maxwin = (win == 0 ? 1 : win);
571
572 tcp_options(skb, dataoff, tcph, sender);
601e68e1 573 /*
9fb9cbb1
YK
574 * RFC 1323:
575 * Both sides must send the Window Scale option
576 * to enable window scaling in either direction.
577 */
578 if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
579 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
601e68e1 580 sender->td_scale =
9fb9cbb1 581 receiver->td_scale = 0;
874ab923
JK
582 if (!tcph->ack)
583 /* Simultaneous open */
584 return true;
9fb9cbb1
YK
585 } else {
586 /*
587 * We are in the middle of a connection,
588 * its history is lost for us.
589 * Let's try to use the data from the packet.
601e68e1 590 */
9fb9cbb1 591 sender->td_end = end;
6ee0b693
CG
592 swin = win << sender->td_scale;
593 sender->td_maxwin = (swin == 0 ? 1 : swin);
9fb9cbb1 594 sender->td_maxend = end + sender->td_maxwin;
fac42a9a
PNA
595 /*
596 * We haven't seen traffic in the other direction yet
597 * but we have to tweak window tracking to pass III
598 * and IV until that happens.
599 */
600 if (receiver->td_maxwin == 0)
601 receiver->td_end = receiver->td_maxend = sack;
9fb9cbb1
YK
602 }
603 } else if (((state->state == TCP_CONNTRACK_SYN_SENT
604 && dir == IP_CT_DIR_ORIGINAL)
605 || (state->state == TCP_CONNTRACK_SYN_RECV
606 && dir == IP_CT_DIR_REPLY))
607 && after(end, sender->td_end)) {
608 /*
609 * RFC 793: "if a TCP is reinitialized ... then it need
601e68e1 610 * not wait at all; it must only be sure to use sequence
9fb9cbb1
YK
611 * numbers larger than those recently used."
612 */
613 sender->td_end =
614 sender->td_maxend = end;
615 sender->td_maxwin = (win == 0 ? 1 : win);
616
617 tcp_options(skb, dataoff, tcph, sender);
618 }
619
620 if (!(tcph->ack)) {
621 /*
622 * If there is no ACK, just pretend it was set and OK.
623 */
624 ack = sack = receiver->td_end;
601e68e1
YH
625 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
626 (TCP_FLAG_ACK|TCP_FLAG_RST))
9fb9cbb1
YK
627 && (ack == 0)) {
628 /*
629 * Broken TCP stacks, that set ACK in RST packets as well
630 * with zero ack value.
631 */
632 ack = sack = receiver->td_end;
633 }
634
4a70bbfa 635 if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
9fb9cbb1 636 /*
4a70bbfa 637 * RST sent answering SYN.
9fb9cbb1
YK
638 */
639 seq = end = sender->td_end;
640
0d53778e 641 pr_debug("tcp_in_window: ");
3c9fba65 642 nf_ct_dump_tuple(tuple);
f9dd09c7
JK
643 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
644 seq, ack, receiver_offset, sack, receiver_offset, win, end);
0d53778e
PM
645 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
646 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
647 sender->td_end, sender->td_maxend, sender->td_maxwin,
648 sender->td_scale,
649 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
650 receiver->td_scale);
651
356d7d88
YC
652 /* Is the ending sequence in the receive window (if available)? */
653 in_recv_win = !receiver->td_maxwin ||
654 after(end, sender->td_end - receiver->td_maxwin - 1);
655
0d53778e
PM
656 pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
657 before(seq, sender->td_maxend + 1),
356d7d88 658 (in_recv_win ? 1 : 0),
0d53778e 659 before(sack, receiver->td_end + 1),
84ebe1cd 660 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
9fb9cbb1 661
a09113c2 662 if (before(seq, sender->td_maxend + 1) &&
356d7d88 663 in_recv_win &&
a09113c2 664 before(sack, receiver->td_end + 1) &&
84ebe1cd 665 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
601e68e1 666 /*
9fb9cbb1
YK
667 * Take into account window scaling (RFC 1323).
668 */
669 if (!tcph->syn)
670 win <<= sender->td_scale;
671
672 /*
673 * Update sender data.
674 */
675 swin = win + (sack - ack);
676 if (sender->td_maxwin < swin)
677 sender->td_maxwin = swin;
ae375044 678 if (after(end, sender->td_end)) {
9fb9cbb1 679 sender->td_end = end;
ae375044
PM
680 sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
681 }
bfcaa502
JK
682 if (tcph->ack) {
683 if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
684 sender->td_maxack = ack;
685 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
686 } else if (after(ack, sender->td_maxack))
687 sender->td_maxack = ack;
688 }
689
9fb9cbb1
YK
690 /*
691 * Update receiver data.
692 */
fac42a9a 693 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
9fb9cbb1
YK
694 receiver->td_maxwin += end - sender->td_maxend;
695 if (after(sack + win, receiver->td_maxend - 1)) {
696 receiver->td_maxend = sack + win;
697 if (win == 0)
698 receiver->td_maxend++;
699 }
ae375044
PM
700 if (ack == receiver->td_end)
701 receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
9fb9cbb1 702
601e68e1 703 /*
9fb9cbb1
YK
704 * Check retransmissions.
705 */
706 if (index == TCP_ACK_SET) {
707 if (state->last_dir == dir
708 && state->last_seq == seq
709 && state->last_ack == ack
c1fe3ca5
GH
710 && state->last_end == end
711 && state->last_win == win)
9fb9cbb1
YK
712 state->retrans++;
713 else {
714 state->last_dir = dir;
715 state->last_seq = seq;
716 state->last_ack = ack;
717 state->last_end = end;
c1fe3ca5 718 state->last_win = win;
9fb9cbb1
YK
719 state->retrans = 0;
720 }
721 }
09f263cd 722 res = true;
9fb9cbb1 723 } else {
09f263cd 724 res = false;
a09113c2 725 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
d2ba1fde 726 tn->tcp_be_liberal)
09f263cd 727 res = true;
c2a2c7e0 728 if (!res && LOG_INVALID(net, IPPROTO_TCP))
30e0c6a6 729 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
9fb9cbb1
YK
730 "nf_ct_tcp: %s ",
731 before(seq, sender->td_maxend + 1) ?
356d7d88 732 in_recv_win ?
9fb9cbb1 733 before(sack, receiver->td_end + 1) ?
f9dd09c7 734 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
9fb9cbb1
YK
735 : "ACK is under the lower bound (possible overly delayed ACK)"
736 : "ACK is over the upper bound (ACKed data not seen yet)"
737 : "SEQ is under the lower bound (already ACKed data retransmitted)"
738 : "SEQ is over the upper bound (over the window of the receiver)");
601e68e1
YH
739 }
740
09f263cd 741 pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
0d53778e
PM
742 "receiver end=%u maxend=%u maxwin=%u\n",
743 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
744 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
9fb9cbb1
YK
745
746 return res;
747}
748
5c8ce7c9 749/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
a3433f35
CG
750static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
751 TCPHDR_URG) + 1] =
9fb9cbb1 752{
a3433f35
CG
753 [TCPHDR_SYN] = 1,
754 [TCPHDR_SYN|TCPHDR_URG] = 1,
755 [TCPHDR_SYN|TCPHDR_ACK] = 1,
756 [TCPHDR_RST] = 1,
757 [TCPHDR_RST|TCPHDR_ACK] = 1,
758 [TCPHDR_FIN|TCPHDR_ACK] = 1,
759 [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1,
760 [TCPHDR_ACK] = 1,
761 [TCPHDR_ACK|TCPHDR_URG] = 1,
9fb9cbb1
YK
762};
763
764/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
8fea97ec 765static int tcp_error(struct net *net, struct nf_conn *tmpl,
74c51a14 766 struct sk_buff *skb,
9fb9cbb1
YK
767 unsigned int dataoff,
768 enum ip_conntrack_info *ctinfo,
76108cea 769 u_int8_t pf,
96f6bf82 770 unsigned int hooknum)
9fb9cbb1 771{
82f568fc
JE
772 const struct tcphdr *th;
773 struct tcphdr _tcph;
9fb9cbb1
YK
774 unsigned int tcplen = skb->len - dataoff;
775 u_int8_t tcpflags;
776
777 /* Smaller that minimal TCP header? */
778 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
779 if (th == NULL) {
c2a2c7e0 780 if (LOG_INVALID(net, IPPROTO_TCP))
30e0c6a6 781 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
9fb9cbb1
YK
782 "nf_ct_tcp: short packet ");
783 return -NF_ACCEPT;
601e68e1
YH
784 }
785
9fb9cbb1
YK
786 /* Not whole TCP header or malformed packet */
787 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
c2a2c7e0 788 if (LOG_INVALID(net, IPPROTO_TCP))
30e0c6a6 789 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
9fb9cbb1
YK
790 "nf_ct_tcp: truncated/malformed packet ");
791 return -NF_ACCEPT;
792 }
601e68e1 793
9fb9cbb1
YK
794 /* Checksum invalid? Ignore.
795 * We skip checking packets on the outgoing path
84fa7933 796 * because the checksum is assumed to be correct.
9fb9cbb1
YK
797 */
798 /* FIXME: Source route IP option packets --RR */
c04d0552 799 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
96f6bf82 800 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
c2a2c7e0 801 if (LOG_INVALID(net, IPPROTO_TCP))
30e0c6a6 802 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
9fb9cbb1
YK
803 "nf_ct_tcp: bad TCP checksum ");
804 return -NF_ACCEPT;
805 }
806
807 /* Check TCP flags. */
a3433f35 808 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
9fb9cbb1 809 if (!tcp_valid_flags[tcpflags]) {
c2a2c7e0 810 if (LOG_INVALID(net, IPPROTO_TCP))
30e0c6a6 811 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
9fb9cbb1
YK
812 "nf_ct_tcp: invalid TCP flag combination ");
813 return -NF_ACCEPT;
814 }
815
816 return NF_ACCEPT;
817}
818
2c8503f5
PNA
819static unsigned int *tcp_get_timeouts(struct net *net)
820{
be0593c6 821 return tcp_pernet(net)->timeouts;
2c8503f5
PNA
822}
823
9fb9cbb1 824/* Returns verdict for packet, or -1 for invalid. */
c88130bc 825static int tcp_packet(struct nf_conn *ct,
9fb9cbb1
YK
826 const struct sk_buff *skb,
827 unsigned int dataoff,
828 enum ip_conntrack_info ctinfo,
76108cea 829 u_int8_t pf,
2c8503f5
PNA
830 unsigned int hooknum,
831 unsigned int *timeouts)
9fb9cbb1 832{
c2a2c7e0 833 struct net *net = nf_ct_net(ct);
d2ba1fde 834 struct nf_tcp_net *tn = tcp_pernet(net);
0d53778e 835 struct nf_conntrack_tuple *tuple;
9fb9cbb1
YK
836 enum tcp_conntrack new_state, old_state;
837 enum ip_conntrack_dir dir;
82f568fc
JE
838 const struct tcphdr *th;
839 struct tcphdr _tcph;
9fb9cbb1
YK
840 unsigned long timeout;
841 unsigned int index;
842
843 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
844 BUG_ON(th == NULL);
845
440f0d58 846 spin_lock_bh(&ct->lock);
c88130bc 847 old_state = ct->proto.tcp.state;
9fb9cbb1
YK
848 dir = CTINFO2DIR(ctinfo);
849 index = get_conntrack_index(th);
850 new_state = tcp_conntracks[dir][index][old_state];
c88130bc 851 tuple = &ct->tuplehash[dir].tuple;
9fb9cbb1
YK
852
853 switch (new_state) {
17311393
JK
854 case TCP_CONNTRACK_SYN_SENT:
855 if (old_state < TCP_CONNTRACK_TIME_WAIT)
856 break;
b2155e7f
JK
857 /* RFC 1122: "When a connection is closed actively,
858 * it MUST linger in TIME-WAIT state for a time 2xMSL
859 * (Maximum Segment Lifetime). However, it MAY accept
860 * a new SYN from the remote TCP to reopen the connection
861 * directly from TIME-WAIT state, if..."
862 * We ignore the conditions because we are in the
863 * TIME-WAIT state anyway.
864 *
865 * Handle aborted connections: we and the server
866 * think there is an existing connection but the client
867 * aborts it and starts a new one.
868 */
869 if (((ct->proto.tcp.seen[dir].flags
870 | ct->proto.tcp.seen[!dir].flags)
871 & IP_CT_TCP_FLAG_CLOSE_INIT)
c88130bc
PM
872 || (ct->proto.tcp.last_dir == dir
873 && ct->proto.tcp.last_index == TCP_RST_SET)) {
bc34b841
JK
874 /* Attempt to reopen a closed/aborted connection.
875 * Delete this connection and look up again. */
440f0d58 876 spin_unlock_bh(&ct->lock);
2aec609f 877
6b69fe0c
PM
878 /* Only repeat if we can actually remove the timer.
879 * Destruction may already be in progress in process
880 * context and we must give it a chance to terminate.
881 */
2aec609f 882 if (nf_ct_kill(ct))
6b69fe0c 883 return -NF_REPEAT;
ec8d5409 884 return NF_DROP;
17311393
JK
885 }
886 /* Fall through */
9fb9cbb1 887 case TCP_CONNTRACK_IGNORE:
73f30602 888 /* Ignored packets:
b2155e7f
JK
889 *
890 * Our connection entry may be out of sync, so ignore
891 * packets which may signal the real connection between
892 * the client and the server.
73f30602
JK
893 *
894 * a) SYN in ORIGINAL
895 * b) SYN/ACK in REPLY
601e68e1 896 * c) ACK in reply direction after initial SYN in original.
b2155e7f
JK
897 *
898 * If the ignored packet is invalid, the receiver will send
899 * a RST we'll catch below.
73f30602 900 */
9fb9cbb1 901 if (index == TCP_SYNACK_SET
c88130bc
PM
902 && ct->proto.tcp.last_index == TCP_SYN_SET
903 && ct->proto.tcp.last_dir != dir
904 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
b2155e7f 905 /* b) This SYN/ACK acknowledges a SYN that we earlier
9fb9cbb1
YK
906 * ignored as invalid. This means that the client and
907 * the server are both in sync, while the firewall is
c4832c7b
PNA
908 * not. We get in sync from the previously annotated
909 * values.
9fb9cbb1 910 */
c4832c7b
PNA
911 old_state = TCP_CONNTRACK_SYN_SENT;
912 new_state = TCP_CONNTRACK_SYN_RECV;
913 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
914 ct->proto.tcp.last_end;
915 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
916 ct->proto.tcp.last_end;
917 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
918 ct->proto.tcp.last_win == 0 ?
919 1 : ct->proto.tcp.last_win;
920 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
921 ct->proto.tcp.last_wscale;
922 ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
923 ct->proto.tcp.last_flags;
924 memset(&ct->proto.tcp.seen[dir], 0,
925 sizeof(struct ip_ct_tcp_state));
926 break;
9fb9cbb1 927 }
c88130bc
PM
928 ct->proto.tcp.last_index = index;
929 ct->proto.tcp.last_dir = dir;
930 ct->proto.tcp.last_seq = ntohl(th->seq);
931 ct->proto.tcp.last_end =
9fb9cbb1 932 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
c4832c7b
PNA
933 ct->proto.tcp.last_win = ntohs(th->window);
934
935 /* a) This is a SYN in ORIGINAL. The client and the server
936 * may be in sync but we are not. In that case, we annotate
937 * the TCP options and let the packet go through. If it is a
938 * valid SYN packet, the server will reply with a SYN/ACK, and
939 * then we'll get in sync. Otherwise, the server ignores it. */
940 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
941 struct ip_ct_tcp_state seen = {};
942
943 ct->proto.tcp.last_flags =
944 ct->proto.tcp.last_wscale = 0;
945 tcp_options(skb, dataoff, th, &seen);
946 if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
947 ct->proto.tcp.last_flags |=
948 IP_CT_TCP_FLAG_WINDOW_SCALE;
949 ct->proto.tcp.last_wscale = seen.td_scale;
950 }
951 if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
952 ct->proto.tcp.last_flags |=
953 IP_CT_TCP_FLAG_SACK_PERM;
954 }
955 }
440f0d58 956 spin_unlock_bh(&ct->lock);
c2a2c7e0 957 if (LOG_INVALID(net, IPPROTO_TCP))
30e0c6a6 958 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
1a4ac987
PNA
959 "nf_ct_tcp: invalid packet ignored in "
960 "state %s ", tcp_conntrack_names[old_state]);
9fb9cbb1
YK
961 return NF_ACCEPT;
962 case TCP_CONNTRACK_MAX:
963 /* Invalid packet */
0d53778e
PM
964 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
965 dir, get_conntrack_index(th), old_state);
440f0d58 966 spin_unlock_bh(&ct->lock);
c2a2c7e0 967 if (LOG_INVALID(net, IPPROTO_TCP))
30e0c6a6 968 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
9fb9cbb1
YK
969 "nf_ct_tcp: invalid state ");
970 return -NF_ACCEPT;
9fb9cbb1 971 case TCP_CONNTRACK_CLOSE:
bfcaa502
JK
972 if (index == TCP_RST_SET
973 && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
974 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
975 /* Invalid RST */
334a47f6 976 spin_unlock_bh(&ct->lock);
bfcaa502 977 if (LOG_INVALID(net, IPPROTO_TCP))
30e0c6a6
G
978 nf_log_packet(net, pf, 0, skb, NULL, NULL,
979 NULL, "nf_ct_tcp: invalid RST ");
bfcaa502
JK
980 return -NF_ACCEPT;
981 }
9fb9cbb1 982 if (index == TCP_RST_SET
c88130bc
PM
983 && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
984 && ct->proto.tcp.last_index == TCP_SYN_SET)
985 || (!test_bit(IPS_ASSURED_BIT, &ct->status)
986 && ct->proto.tcp.last_index == TCP_ACK_SET))
987 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
93b1fae4 988 /* RST sent to invalid SYN or ACK we had let through
73f30602
JK
989 * at a) and c) above:
990 *
991 * a) SYN was in window then
992 * c) we hold a half-open connection.
993 *
994 * Delete our connection entry.
9fb9cbb1 995 * We skip window checking, because packet might ACK
73f30602 996 * segments we ignored. */
9fb9cbb1
YK
997 goto in_window;
998 }
93b1fae4 999 /* Just fall through */
9fb9cbb1
YK
1000 default:
1001 /* Keep compilers happy. */
1002 break;
1003 }
1004
c88130bc 1005 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
9fb9cbb1 1006 skb, dataoff, th, pf)) {
440f0d58 1007 spin_unlock_bh(&ct->lock);
9fb9cbb1
YK
1008 return -NF_ACCEPT;
1009 }
1010 in_window:
1011 /* From now on we have got in-window packets */
c88130bc
PM
1012 ct->proto.tcp.last_index = index;
1013 ct->proto.tcp.last_dir = dir;
9fb9cbb1 1014
0d53778e 1015 pr_debug("tcp_conntracks: ");
3c9fba65 1016 nf_ct_dump_tuple(tuple);
0d53778e
PM
1017 pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1018 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1019 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1020 old_state, new_state);
9fb9cbb1 1021
c88130bc 1022 ct->proto.tcp.state = new_state;
9fb9cbb1 1023 if (old_state != new_state
d0c1fd7a 1024 && new_state == TCP_CONNTRACK_FIN_WAIT)
c88130bc 1025 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
ae375044 1026
d2ba1fde 1027 if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
2c8503f5
PNA
1028 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1029 timeout = timeouts[TCP_CONNTRACK_RETRANS];
ae375044
PM
1030 else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1031 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
2c8503f5
PNA
1032 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1033 timeout = timeouts[TCP_CONNTRACK_UNACK];
ae375044 1034 else
2c8503f5 1035 timeout = timeouts[new_state];
440f0d58 1036 spin_unlock_bh(&ct->lock);
9fb9cbb1 1037
9fb9cbb1 1038 if (new_state != old_state)
a71996fc 1039 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
9fb9cbb1 1040
c88130bc 1041 if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
9fb9cbb1
YK
1042 /* If only reply is a RST, we can consider ourselves not to
1043 have an established connection: this is a fairly common
1044 problem case, so we can delete the conntrack
1045 immediately. --RR */
1046 if (th->rst) {
718d4ad9 1047 nf_ct_kill_acct(ct, ctinfo, skb);
9fb9cbb1
YK
1048 return NF_ACCEPT;
1049 }
6547a221
FW
1050 /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1051 * pickup with loose=1. Avoid large ESTABLISHED timeout.
1052 */
1053 if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1054 timeout > timeouts[TCP_CONNTRACK_UNACK])
1055 timeout = timeouts[TCP_CONNTRACK_UNACK];
c88130bc 1056 } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
9fb9cbb1
YK
1057 && (old_state == TCP_CONNTRACK_SYN_RECV
1058 || old_state == TCP_CONNTRACK_ESTABLISHED)
1059 && new_state == TCP_CONNTRACK_ESTABLISHED) {
601e68e1
YH
1060 /* Set ASSURED if we see see valid ack in ESTABLISHED
1061 after SYN_RECV or a valid answer for a picked up
9fb9cbb1 1062 connection. */
c88130bc 1063 set_bit(IPS_ASSURED_BIT, &ct->status);
858b3133 1064 nf_conntrack_event_cache(IPCT_ASSURED, ct);
9fb9cbb1 1065 }
c88130bc 1066 nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
9fb9cbb1
YK
1067
1068 return NF_ACCEPT;
1069}
601e68e1 1070
9fb9cbb1 1071/* Called when a new connection for this protocol found. */
09f263cd 1072static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
2c8503f5 1073 unsigned int dataoff, unsigned int *timeouts)
9fb9cbb1
YK
1074{
1075 enum tcp_conntrack new_state;
82f568fc
JE
1076 const struct tcphdr *th;
1077 struct tcphdr _tcph;
d2ba1fde
G
1078 struct net *net = nf_ct_net(ct);
1079 struct nf_tcp_net *tn = tcp_pernet(net);
82f568fc
JE
1080 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1081 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
9fb9cbb1
YK
1082
1083 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1084 BUG_ON(th == NULL);
1085
1086 /* Don't need lock here: this conntrack not in circulation yet */
e5fc9e7a 1087 new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
9fb9cbb1
YK
1088
1089 /* Invalid: delete conntrack */
1090 if (new_state >= TCP_CONNTRACK_MAX) {
0d53778e 1091 pr_debug("nf_ct_tcp: invalid new deleting.\n");
09f263cd 1092 return false;
9fb9cbb1
YK
1093 }
1094
1095 if (new_state == TCP_CONNTRACK_SYN_SENT) {
e5fc9e7a 1096 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
9fb9cbb1 1097 /* SYN packet */
c88130bc 1098 ct->proto.tcp.seen[0].td_end =
9fb9cbb1
YK
1099 segment_seq_plus_len(ntohl(th->seq), skb->len,
1100 dataoff, th);
c88130bc
PM
1101 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1102 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1103 ct->proto.tcp.seen[0].td_maxwin = 1;
1104 ct->proto.tcp.seen[0].td_maxend =
1105 ct->proto.tcp.seen[0].td_end;
1106
1107 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
d2ba1fde 1108 } else if (tn->tcp_loose == 0) {
9fb9cbb1 1109 /* Don't try to pick up connections. */
09f263cd 1110 return false;
9fb9cbb1 1111 } else {
e5fc9e7a 1112 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
9fb9cbb1
YK
1113 /*
1114 * We are in the middle of a connection,
1115 * its history is lost for us.
1116 * Let's try to use the data from the packet.
1117 */
c88130bc 1118 ct->proto.tcp.seen[0].td_end =
9fb9cbb1
YK
1119 segment_seq_plus_len(ntohl(th->seq), skb->len,
1120 dataoff, th);
c88130bc
PM
1121 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1122 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1123 ct->proto.tcp.seen[0].td_maxwin = 1;
1124 ct->proto.tcp.seen[0].td_maxend =
1125 ct->proto.tcp.seen[0].td_end +
1126 ct->proto.tcp.seen[0].td_maxwin;
9fb9cbb1 1127
a09113c2
PM
1128 /* We assume SACK and liberal window checking to handle
1129 * window scaling */
c88130bc
PM
1130 ct->proto.tcp.seen[0].flags =
1131 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1132 IP_CT_TCP_FLAG_BE_LIBERAL;
9fb9cbb1 1133 }
601e68e1 1134
9fb9cbb1 1135 /* tcp_packet will set them */
c88130bc 1136 ct->proto.tcp.last_index = TCP_NONE_SET;
601e68e1 1137
0d53778e
PM
1138 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1139 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1140 sender->td_end, sender->td_maxend, sender->td_maxwin,
1141 sender->td_scale,
1142 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1143 receiver->td_scale);
09f263cd 1144 return true;
9fb9cbb1 1145}
c1d10adb 1146
c0cd1156 1147#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
c1d10adb
PNA
1148
1149#include <linux/netfilter/nfnetlink.h>
1150#include <linux/netfilter/nfnetlink_conntrack.h>
1151
fdf70832 1152static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
440f0d58 1153 struct nf_conn *ct)
c1d10adb 1154{
df6fb868 1155 struct nlattr *nest_parms;
c8e2078c 1156 struct nf_ct_tcp_flags tmp = {};
601e68e1 1157
440f0d58 1158 spin_lock_bh(&ct->lock);
df6fb868
PM
1159 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1160 if (!nest_parms)
1161 goto nla_put_failure;
1162
4925a459
DM
1163 if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1164 nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1165 ct->proto.tcp.seen[0].td_scale) ||
1166 nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1167 ct->proto.tcp.seen[1].td_scale))
1168 goto nla_put_failure;
c8e2078c
PNA
1169
1170 tmp.flags = ct->proto.tcp.seen[0].flags;
4925a459
DM
1171 if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1172 sizeof(struct nf_ct_tcp_flags), &tmp))
1173 goto nla_put_failure;
c8e2078c
PNA
1174
1175 tmp.flags = ct->proto.tcp.seen[1].flags;
4925a459
DM
1176 if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1177 sizeof(struct nf_ct_tcp_flags), &tmp))
1178 goto nla_put_failure;
440f0d58 1179 spin_unlock_bh(&ct->lock);
c1d10adb 1180
df6fb868 1181 nla_nest_end(skb, nest_parms);
c1d10adb
PNA
1182
1183 return 0;
1184
df6fb868 1185nla_put_failure:
440f0d58 1186 spin_unlock_bh(&ct->lock);
c1d10adb
PNA
1187 return -1;
1188}
1189
f73e924c
PM
1190static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1191 [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 },
1192 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1193 [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
1194 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
1195 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
c1d10adb
PNA
1196};
1197
fdf70832 1198static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
c1d10adb 1199{
2f0d2f10 1200 struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
df6fb868 1201 struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
f73e924c 1202 int err;
c1d10adb
PNA
1203
1204 /* updates could not contain anything about the private
1205 * protocol info, in that case skip the parsing */
2f0d2f10 1206 if (!pattr)
c1d10adb
PNA
1207 return 0;
1208
2f0d2f10 1209 err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
f73e924c
PM
1210 if (err < 0)
1211 return err;
c1d10adb 1212
5f7da4d2
PM
1213 if (tb[CTA_PROTOINFO_TCP_STATE] &&
1214 nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
c1d10adb
PNA
1215 return -EINVAL;
1216
440f0d58 1217 spin_lock_bh(&ct->lock);
5f7da4d2
PM
1218 if (tb[CTA_PROTOINFO_TCP_STATE])
1219 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
c8e2078c 1220
df6fb868 1221 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
c8e2078c 1222 struct nf_ct_tcp_flags *attr =
df6fb868 1223 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
c8e2078c
PNA
1224 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1225 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1226 }
1227
df6fb868 1228 if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
c8e2078c 1229 struct nf_ct_tcp_flags *attr =
df6fb868 1230 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
c8e2078c
PNA
1231 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1232 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1233 }
1234
df6fb868
PM
1235 if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1236 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
c8e2078c
PNA
1237 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1238 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
77236b6e
PM
1239 ct->proto.tcp.seen[0].td_scale =
1240 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1241 ct->proto.tcp.seen[1].td_scale =
1242 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
c8e2078c 1243 }
440f0d58 1244 spin_unlock_bh(&ct->lock);
c1d10adb
PNA
1245
1246 return 0;
1247}
a400c30e
HE
1248
1249static int tcp_nlattr_size(void)
1250{
1251 return nla_total_size(0) /* CTA_PROTOINFO_TCP */
1252 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1253}
1254
1255static int tcp_nlattr_tuple_size(void)
1256{
1257 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1258}
c1d10adb 1259#endif
933a41e7 1260
50978462
PNA
1261#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1262
1263#include <linux/netfilter/nfnetlink.h>
1264#include <linux/netfilter/nfnetlink_cttimeout.h>
1265
8264deb8
G
1266static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1267 struct net *net, void *data)
50978462
PNA
1268{
1269 unsigned int *timeouts = data;
8264deb8 1270 struct nf_tcp_net *tn = tcp_pernet(net);
50978462
PNA
1271 int i;
1272
1273 /* set default TCP timeouts. */
1274 for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
8264deb8 1275 timeouts[i] = tn->timeouts[i];
50978462
PNA
1276
1277 if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1278 timeouts[TCP_CONNTRACK_SYN_SENT] =
1279 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1280 }
1281 if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1282 timeouts[TCP_CONNTRACK_SYN_RECV] =
1283 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1284 }
1285 if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1286 timeouts[TCP_CONNTRACK_ESTABLISHED] =
1287 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1288 }
1289 if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1290 timeouts[TCP_CONNTRACK_FIN_WAIT] =
1291 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1292 }
1293 if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1294 timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1295 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1296 }
1297 if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1298 timeouts[TCP_CONNTRACK_LAST_ACK] =
1299 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1300 }
1301 if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1302 timeouts[TCP_CONNTRACK_TIME_WAIT] =
1303 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1304 }
1305 if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1306 timeouts[TCP_CONNTRACK_CLOSE] =
1307 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1308 }
1309 if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1310 timeouts[TCP_CONNTRACK_SYN_SENT2] =
1311 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1312 }
1313 if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1314 timeouts[TCP_CONNTRACK_RETRANS] =
1315 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1316 }
1317 if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1318 timeouts[TCP_CONNTRACK_UNACK] =
1319 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1320 }
1321 return 0;
1322}
1323
1324static int
1325tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1326{
1327 const unsigned int *timeouts = data;
1328
4925a459
DM
1329 if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1330 htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1331 nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1332 htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1333 nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1334 htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1335 nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1336 htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1337 nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1338 htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1339 nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1340 htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1341 nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1342 htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1343 nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1344 htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1345 nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1346 htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1347 nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1348 htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1349 nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1350 htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1351 goto nla_put_failure;
50978462
PNA
1352 return 0;
1353
1354nla_put_failure:
1355 return -ENOSPC;
1356}
1357
1358static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1359 [CTA_TIMEOUT_TCP_SYN_SENT] = { .type = NLA_U32 },
1360 [CTA_TIMEOUT_TCP_SYN_RECV] = { .type = NLA_U32 },
1361 [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NLA_U32 },
1362 [CTA_TIMEOUT_TCP_FIN_WAIT] = { .type = NLA_U32 },
1363 [CTA_TIMEOUT_TCP_CLOSE_WAIT] = { .type = NLA_U32 },
1364 [CTA_TIMEOUT_TCP_LAST_ACK] = { .type = NLA_U32 },
1365 [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NLA_U32 },
1366 [CTA_TIMEOUT_TCP_CLOSE] = { .type = NLA_U32 },
1367 [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NLA_U32 },
6d1fafca
FW
1368 [CTA_TIMEOUT_TCP_RETRANS] = { .type = NLA_U32 },
1369 [CTA_TIMEOUT_TCP_UNACK] = { .type = NLA_U32 },
50978462
PNA
1370};
1371#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1372
933a41e7 1373#ifdef CONFIG_SYSCTL
933a41e7
PM
1374static struct ctl_table tcp_sysctl_table[] = {
1375 {
933a41e7 1376 .procname = "nf_conntrack_tcp_timeout_syn_sent",
933a41e7
PM
1377 .maxlen = sizeof(unsigned int),
1378 .mode = 0644,
6d9f239a 1379 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1380 },
1381 {
933a41e7 1382 .procname = "nf_conntrack_tcp_timeout_syn_recv",
933a41e7
PM
1383 .maxlen = sizeof(unsigned int),
1384 .mode = 0644,
6d9f239a 1385 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1386 },
1387 {
933a41e7 1388 .procname = "nf_conntrack_tcp_timeout_established",
933a41e7
PM
1389 .maxlen = sizeof(unsigned int),
1390 .mode = 0644,
6d9f239a 1391 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1392 },
1393 {
933a41e7 1394 .procname = "nf_conntrack_tcp_timeout_fin_wait",
933a41e7
PM
1395 .maxlen = sizeof(unsigned int),
1396 .mode = 0644,
6d9f239a 1397 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1398 },
1399 {
933a41e7 1400 .procname = "nf_conntrack_tcp_timeout_close_wait",
933a41e7
PM
1401 .maxlen = sizeof(unsigned int),
1402 .mode = 0644,
6d9f239a 1403 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1404 },
1405 {
933a41e7 1406 .procname = "nf_conntrack_tcp_timeout_last_ack",
933a41e7
PM
1407 .maxlen = sizeof(unsigned int),
1408 .mode = 0644,
6d9f239a 1409 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1410 },
1411 {
933a41e7 1412 .procname = "nf_conntrack_tcp_timeout_time_wait",
933a41e7
PM
1413 .maxlen = sizeof(unsigned int),
1414 .mode = 0644,
6d9f239a 1415 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1416 },
1417 {
933a41e7 1418 .procname = "nf_conntrack_tcp_timeout_close",
933a41e7
PM
1419 .maxlen = sizeof(unsigned int),
1420 .mode = 0644,
6d9f239a 1421 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1422 },
1423 {
933a41e7 1424 .procname = "nf_conntrack_tcp_timeout_max_retrans",
933a41e7
PM
1425 .maxlen = sizeof(unsigned int),
1426 .mode = 0644,
6d9f239a 1427 .proc_handler = proc_dointvec_jiffies,
933a41e7 1428 },
ae375044
PM
1429 {
1430 .procname = "nf_conntrack_tcp_timeout_unacknowledged",
ae375044
PM
1431 .maxlen = sizeof(unsigned int),
1432 .mode = 0644,
6d9f239a 1433 .proc_handler = proc_dointvec_jiffies,
ae375044 1434 },
933a41e7 1435 {
933a41e7 1436 .procname = "nf_conntrack_tcp_loose",
933a41e7
PM
1437 .maxlen = sizeof(unsigned int),
1438 .mode = 0644,
6d9f239a 1439 .proc_handler = proc_dointvec,
933a41e7
PM
1440 },
1441 {
933a41e7 1442 .procname = "nf_conntrack_tcp_be_liberal",
933a41e7
PM
1443 .maxlen = sizeof(unsigned int),
1444 .mode = 0644,
6d9f239a 1445 .proc_handler = proc_dointvec,
933a41e7
PM
1446 },
1447 {
933a41e7 1448 .procname = "nf_conntrack_tcp_max_retrans",
933a41e7
PM
1449 .maxlen = sizeof(unsigned int),
1450 .mode = 0644,
6d9f239a 1451 .proc_handler = proc_dointvec,
933a41e7 1452 },
f8572d8f 1453 { }
933a41e7 1454};
a999e683
PM
1455
1456#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1457static struct ctl_table tcp_compat_sysctl_table[] = {
1458 {
a999e683 1459 .procname = "ip_conntrack_tcp_timeout_syn_sent",
a999e683
PM
1460 .maxlen = sizeof(unsigned int),
1461 .mode = 0644,
6d9f239a 1462 .proc_handler = proc_dointvec_jiffies,
a999e683 1463 },
874ab923
JK
1464 {
1465 .procname = "ip_conntrack_tcp_timeout_syn_sent2",
874ab923
JK
1466 .maxlen = sizeof(unsigned int),
1467 .mode = 0644,
1468 .proc_handler = proc_dointvec_jiffies,
1469 },
a999e683 1470 {
a999e683 1471 .procname = "ip_conntrack_tcp_timeout_syn_recv",
a999e683
PM
1472 .maxlen = sizeof(unsigned int),
1473 .mode = 0644,
6d9f239a 1474 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1475 },
1476 {
a999e683 1477 .procname = "ip_conntrack_tcp_timeout_established",
a999e683
PM
1478 .maxlen = sizeof(unsigned int),
1479 .mode = 0644,
6d9f239a 1480 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1481 },
1482 {
a999e683 1483 .procname = "ip_conntrack_tcp_timeout_fin_wait",
a999e683
PM
1484 .maxlen = sizeof(unsigned int),
1485 .mode = 0644,
6d9f239a 1486 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1487 },
1488 {
a999e683 1489 .procname = "ip_conntrack_tcp_timeout_close_wait",
a999e683
PM
1490 .maxlen = sizeof(unsigned int),
1491 .mode = 0644,
6d9f239a 1492 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1493 },
1494 {
a999e683 1495 .procname = "ip_conntrack_tcp_timeout_last_ack",
a999e683
PM
1496 .maxlen = sizeof(unsigned int),
1497 .mode = 0644,
6d9f239a 1498 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1499 },
1500 {
a999e683 1501 .procname = "ip_conntrack_tcp_timeout_time_wait",
a999e683
PM
1502 .maxlen = sizeof(unsigned int),
1503 .mode = 0644,
6d9f239a 1504 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1505 },
1506 {
a999e683 1507 .procname = "ip_conntrack_tcp_timeout_close",
a999e683
PM
1508 .maxlen = sizeof(unsigned int),
1509 .mode = 0644,
6d9f239a 1510 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1511 },
1512 {
a999e683 1513 .procname = "ip_conntrack_tcp_timeout_max_retrans",
a999e683
PM
1514 .maxlen = sizeof(unsigned int),
1515 .mode = 0644,
6d9f239a 1516 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1517 },
1518 {
a999e683 1519 .procname = "ip_conntrack_tcp_loose",
a999e683
PM
1520 .maxlen = sizeof(unsigned int),
1521 .mode = 0644,
6d9f239a 1522 .proc_handler = proc_dointvec,
a999e683
PM
1523 },
1524 {
a999e683 1525 .procname = "ip_conntrack_tcp_be_liberal",
a999e683
PM
1526 .maxlen = sizeof(unsigned int),
1527 .mode = 0644,
6d9f239a 1528 .proc_handler = proc_dointvec,
a999e683
PM
1529 },
1530 {
a999e683 1531 .procname = "ip_conntrack_tcp_max_retrans",
a999e683
PM
1532 .maxlen = sizeof(unsigned int),
1533 .mode = 0644,
6d9f239a 1534 .proc_handler = proc_dointvec,
a999e683 1535 },
f8572d8f 1536 { }
a999e683
PM
1537};
1538#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
933a41e7
PM
1539#endif /* CONFIG_SYSCTL */
1540
efa758fe
G
1541static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1542 struct nf_tcp_net *tn)
d2ba1fde
G
1543{
1544#ifdef CONFIG_SYSCTL
d2ba1fde
G
1545 if (pn->ctl_table)
1546 return 0;
1547
1548 pn->ctl_table = kmemdup(tcp_sysctl_table,
1549 sizeof(tcp_sysctl_table),
1550 GFP_KERNEL);
1551 if (!pn->ctl_table)
1552 return -ENOMEM;
1553
1554 pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1555 pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1556 pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1557 pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1558 pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1559 pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1560 pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1561 pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1562 pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1563 pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1564 pn->ctl_table[10].data = &tn->tcp_loose;
1565 pn->ctl_table[11].data = &tn->tcp_be_liberal;
1566 pn->ctl_table[12].data = &tn->tcp_max_retrans;
1567#endif
1568 return 0;
1569}
1570
efa758fe
G
1571static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
1572 struct nf_tcp_net *tn)
d2ba1fde
G
1573{
1574#ifdef CONFIG_SYSCTL
1575#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
d2ba1fde
G
1576 pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
1577 sizeof(tcp_compat_sysctl_table),
1578 GFP_KERNEL);
1579 if (!pn->ctl_compat_table)
1580 return -ENOMEM;
1581
1582 pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1583 pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
1584 pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1585 pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1586 pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1587 pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1588 pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1589 pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1590 pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1591 pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1592 pn->ctl_compat_table[10].data = &tn->tcp_loose;
1593 pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
1594 pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
1595#endif
1596#endif
1597 return 0;
1598}
1599
efa758fe 1600static int tcp_init_net(struct net *net, u_int16_t proto)
d2ba1fde 1601{
efa758fe 1602 int ret;
d2ba1fde 1603 struct nf_tcp_net *tn = tcp_pernet(net);
efa758fe
G
1604 struct nf_proto_net *pn = &tn->pn;
1605
1606 if (!pn->users) {
1607 int i;
d2ba1fde 1608
d2ba1fde
G
1609 for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1610 tn->timeouts[i] = tcp_timeouts[i];
1611
1612 tn->tcp_loose = nf_ct_tcp_loose;
1613 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1614 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1615 }
1616
efa758fe
G
1617 if (proto == AF_INET) {
1618 ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
1619 if (ret < 0)
1620 return ret;
d2ba1fde 1621
efa758fe
G
1622 ret = tcp_kmemdup_sysctl_table(pn, tn);
1623 if (ret < 0)
1624 nf_ct_kfree_compat_sysctl_table(pn);
1625 } else
1626 ret = tcp_kmemdup_sysctl_table(pn, tn);
d2ba1fde 1627
d2ba1fde
G
1628 return ret;
1629}
1630
08911475
PNA
1631static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1632{
1633 return &net->ct.nf_ct_proto.tcp.pn;
1634}
1635
61075af5 1636struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
9fb9cbb1
YK
1637{
1638 .l3proto = PF_INET,
605dcad6 1639 .l4proto = IPPROTO_TCP,
9fb9cbb1
YK
1640 .name = "tcp",
1641 .pkt_to_tuple = tcp_pkt_to_tuple,
1642 .invert_tuple = tcp_invert_tuple,
1643 .print_tuple = tcp_print_tuple,
1644 .print_conntrack = tcp_print_conntrack,
1645 .packet = tcp_packet,
2c8503f5 1646 .get_timeouts = tcp_get_timeouts,
9fb9cbb1 1647 .new = tcp_new,
96f6bf82 1648 .error = tcp_error,
c0cd1156 1649#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
fdf70832 1650 .to_nlattr = tcp_to_nlattr,
a400c30e 1651 .nlattr_size = tcp_nlattr_size,
fdf70832
PM
1652 .from_nlattr = nlattr_to_tcp,
1653 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1654 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
a400c30e 1655 .nlattr_tuple_size = tcp_nlattr_tuple_size,
f73e924c 1656 .nla_policy = nf_ct_port_nla_policy,
c1d10adb 1657#endif
50978462
PNA
1658#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1659 .ctnl_timeout = {
1660 .nlattr_to_obj = tcp_timeout_nlattr_to_obj,
1661 .obj_to_nlattr = tcp_timeout_obj_to_nlattr,
1662 .nlattr_max = CTA_TIMEOUT_TCP_MAX,
1663 .obj_size = sizeof(unsigned int) *
1664 TCP_CONNTRACK_TIMEOUT_MAX,
1665 .nla_policy = tcp_timeout_nla_policy,
1666 },
1667#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
efa758fe 1668 .init_net = tcp_init_net,
08911475 1669 .get_net_proto = tcp_get_net_proto,
9fb9cbb1 1670};
13b18339 1671EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
9fb9cbb1 1672
61075af5 1673struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
9fb9cbb1
YK
1674{
1675 .l3proto = PF_INET6,
605dcad6 1676 .l4proto = IPPROTO_TCP,
9fb9cbb1
YK
1677 .name = "tcp",
1678 .pkt_to_tuple = tcp_pkt_to_tuple,
1679 .invert_tuple = tcp_invert_tuple,
1680 .print_tuple = tcp_print_tuple,
1681 .print_conntrack = tcp_print_conntrack,
1682 .packet = tcp_packet,
2c8503f5 1683 .get_timeouts = tcp_get_timeouts,
9fb9cbb1 1684 .new = tcp_new,
96f6bf82 1685 .error = tcp_error,
c0cd1156 1686#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
fdf70832 1687 .to_nlattr = tcp_to_nlattr,
a400c30e 1688 .nlattr_size = tcp_nlattr_size,
fdf70832
PM
1689 .from_nlattr = nlattr_to_tcp,
1690 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1691 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
a400c30e 1692 .nlattr_tuple_size = tcp_nlattr_tuple_size,
f73e924c 1693 .nla_policy = nf_ct_port_nla_policy,
c1d10adb 1694#endif
50978462
PNA
1695#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1696 .ctnl_timeout = {
1697 .nlattr_to_obj = tcp_timeout_nlattr_to_obj,
1698 .obj_to_nlattr = tcp_timeout_obj_to_nlattr,
1699 .nlattr_max = CTA_TIMEOUT_TCP_MAX,
1700 .obj_size = sizeof(unsigned int) *
1701 TCP_CONNTRACK_TIMEOUT_MAX,
1702 .nla_policy = tcp_timeout_nla_policy,
1703 },
1704#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
efa758fe 1705 .init_net = tcp_init_net,
08911475 1706 .get_net_proto = tcp_get_net_proto,
9fb9cbb1 1707};
13b18339 1708EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
This page took 0.731019 seconds and 5 git commands to generate.