Commit | Line | Data |
---|---|---|
f4bc17cd JA |
1 | /* |
2 | * ip_vs_nfct.c: Netfilter connection tracking support for IPVS | |
3 | * | |
4 | * Portions Copyright (C) 2001-2002 | |
5 | * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. | |
6 | * | |
7 | * Portions Copyright (C) 2003-2010 | |
8 | * Julian Anastasov | |
9 | * | |
10 | * | |
11 | * This code is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License as published by | |
13 | * the Free Software Foundation; either version 2 of the License, or | |
14 | * (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
e664eabd | 22 | * along with this program; if not, see <http://www.gnu.org/licenses/>. |
f4bc17cd JA |
23 | * |
24 | * | |
25 | * Authors: | |
26 | * Ben North <ben@redfrontdoor.org> | |
27 | * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels | |
28 | * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match | |
29 | * | |
30 | * | |
31 | * Current status: | |
32 | * | |
33 | * - provide conntrack confirmation for new and related connections, by | |
34 | * this way we can see their proper conntrack state in all hooks | |
35 | * - support for all forwarding methods, not only NAT | |
36 | * - FTP support (NAT), ability to support other NAT apps with expectations | |
37 | * - to correctly create expectations for related NAT connections the proper | |
38 | * NF conntrack support must be already installed, eg. ip_vs_ftp requires | |
39 | * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables | |
40 | * NAT rules are needed) | |
41 | * - alter reply for NAT when forwarding packet in original direction: | |
42 | * conntrack from client in NEW or RELATED (Passive FTP DATA) state or | |
43 | * when RELATED conntrack is created from real server (Active FTP DATA) | |
44 | * - if iptables_nat is not loaded the Passive FTP will not work (the | |
45 | * PASV response can not be NAT-ed) but Active FTP should work | |
46 | * | |
47 | */ | |
48 | ||
49 | #define KMSG_COMPONENT "IPVS" | |
50 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | |
51 | ||
52 | #include <linux/module.h> | |
53 | #include <linux/types.h> | |
54 | #include <linux/kernel.h> | |
55 | #include <linux/errno.h> | |
56 | #include <linux/compiler.h> | |
57 | #include <linux/vmalloc.h> | |
58 | #include <linux/skbuff.h> | |
59 | #include <net/ip.h> | |
60 | #include <linux/netfilter.h> | |
61 | #include <linux/netfilter_ipv4.h> | |
62 | #include <net/ip_vs.h> | |
63 | #include <net/netfilter/nf_conntrack_core.h> | |
64 | #include <net/netfilter/nf_conntrack_expect.h> | |
b25adce1 | 65 | #include <net/netfilter/nf_conntrack_seqadj.h> |
f4bc17cd JA |
66 | #include <net/netfilter/nf_conntrack_helper.h> |
67 | #include <net/netfilter/nf_conntrack_zones.h> | |
68 | ||
69 | ||
70 | #define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" | |
71 | #define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ | |
72 | &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ | |
73 | (T)->dst.protonum | |
74 | ||
75 | #define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" | |
76 | #define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ | |
77 | &((C)->vaddr.ip), ntohs((C)->vport), \ | |
78 | &((C)->daddr.ip), ntohs((C)->dport), \ | |
79 | (C)->protocol, (C)->state | |
80 | ||
81 | void | |
82 | ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) | |
83 | { | |
84 | enum ip_conntrack_info ctinfo; | |
05b4b065 | 85 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
f4bc17cd JA |
86 | struct nf_conntrack_tuple new_tuple; |
87 | ||
88 | if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) || | |
89 | nf_ct_is_dying(ct)) | |
90 | return; | |
91 | ||
92 | /* Never alter conntrack for non-NAT conns */ | |
93 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) | |
94 | return; | |
95 | ||
96 | /* Alter reply only in original direction */ | |
97 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) | |
98 | return; | |
99 | ||
b25adce1 JDB |
100 | /* Applications may adjust TCP seqs */ |
101 | if (cp->app && nf_ct_protonum(ct) == IPPROTO_TCP && | |
102 | !nfct_seqadj(ct) && !nfct_seqadj_ext_add(ct)) | |
103 | return; | |
104 | ||
f4bc17cd JA |
105 | /* |
106 | * The connection is not yet in the hashtable, so we update it. | |
107 | * CIP->VIP will remain the same, so leave the tuple in | |
108 | * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the | |
109 | * real-server we will see RIP->DIP. | |
110 | */ | |
111 | new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
112 | /* | |
113 | * This will also take care of UDP and other protocols. | |
114 | */ | |
115 | if (outin) { | |
116 | new_tuple.src.u3 = cp->daddr; | |
117 | if (new_tuple.dst.protonum != IPPROTO_ICMP && | |
118 | new_tuple.dst.protonum != IPPROTO_ICMPV6) | |
119 | new_tuple.src.u.tcp.port = cp->dport; | |
120 | } else { | |
121 | new_tuple.dst.u3 = cp->vaddr; | |
122 | if (new_tuple.dst.protonum != IPPROTO_ICMP && | |
123 | new_tuple.dst.protonum != IPPROTO_ICMPV6) | |
124 | new_tuple.dst.u.tcp.port = cp->vport; | |
125 | } | |
126 | IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, " | |
127 | "ctinfo=%d, old reply=" FMT_TUPLE | |
128 | ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n", | |
129 | __func__, ct, ct->status, ctinfo, | |
130 | ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple), | |
131 | ARG_TUPLE(&new_tuple), ARG_CONN(cp)); | |
132 | nf_conntrack_alter_reply(ct, &new_tuple); | |
133 | } | |
134 | ||
3c2de2ae | 135 | int ip_vs_confirm_conntrack(struct sk_buff *skb) |
f4bc17cd JA |
136 | { |
137 | return nf_conntrack_confirm(skb); | |
138 | } | |
139 | ||
140 | /* | |
141 | * Called from init_conntrack() as expectfn handler. | |
142 | */ | |
143 | static void ip_vs_nfct_expect_callback(struct nf_conn *ct, | |
144 | struct nf_conntrack_expect *exp) | |
145 | { | |
146 | struct nf_conntrack_tuple *orig, new_reply; | |
147 | struct ip_vs_conn *cp; | |
f11017ec | 148 | struct ip_vs_conn_param p; |
6e67e586 | 149 | struct net *net = nf_ct_net(ct); |
f4bc17cd JA |
150 | |
151 | if (exp->tuple.src.l3num != PF_INET) | |
152 | return; | |
153 | ||
154 | /* | |
155 | * We assume that no NF locks are held before this callback. | |
156 | * ip_vs_conn_out_get and ip_vs_conn_in_get should match their | |
157 | * expectations even if they use wildcard values, now we provide the | |
158 | * actual values from the newly created original conntrack direction. | |
159 | * The conntrack is confirmed when packet reaches IPVS hooks. | |
160 | */ | |
161 | ||
162 | /* RS->CLIENT */ | |
163 | orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | |
19913dec | 164 | ip_vs_conn_fill_param(net_ipvs(net), exp->tuple.src.l3num, orig->dst.protonum, |
f11017ec SH |
165 | &orig->src.u3, orig->src.u.tcp.port, |
166 | &orig->dst.u3, orig->dst.u.tcp.port, &p); | |
167 | cp = ip_vs_conn_out_get(&p); | |
f4bc17cd JA |
168 | if (cp) { |
169 | /* Change reply CLIENT->RS to CLIENT->VS */ | |
170 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
171 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " | |
172 | FMT_TUPLE ", found inout cp=" FMT_CONN "\n", | |
173 | __func__, ct, ct->status, | |
174 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | |
175 | ARG_CONN(cp)); | |
176 | new_reply.dst.u3 = cp->vaddr; | |
177 | new_reply.dst.u.tcp.port = cp->vport; | |
178 | IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE | |
179 | ", inout cp=" FMT_CONN "\n", | |
180 | __func__, ct, | |
181 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | |
182 | ARG_CONN(cp)); | |
183 | goto alter; | |
184 | } | |
185 | ||
186 | /* CLIENT->VS */ | |
f11017ec | 187 | cp = ip_vs_conn_in_get(&p); |
f4bc17cd JA |
188 | if (cp) { |
189 | /* Change reply VS->CLIENT to RS->CLIENT */ | |
190 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
191 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " | |
192 | FMT_TUPLE ", found outin cp=" FMT_CONN "\n", | |
193 | __func__, ct, ct->status, | |
194 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | |
195 | ARG_CONN(cp)); | |
196 | new_reply.src.u3 = cp->daddr; | |
197 | new_reply.src.u.tcp.port = cp->dport; | |
198 | IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " | |
199 | FMT_TUPLE ", outin cp=" FMT_CONN "\n", | |
200 | __func__, ct, | |
201 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | |
202 | ARG_CONN(cp)); | |
203 | goto alter; | |
204 | } | |
205 | ||
206 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE | |
207 | " - unknown expect\n", | |
208 | __func__, ct, ct->status, ARG_TUPLE(orig)); | |
209 | return; | |
210 | ||
211 | alter: | |
212 | /* Never alter conntrack for non-NAT conns */ | |
213 | if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) | |
214 | nf_conntrack_alter_reply(ct, &new_reply); | |
215 | ip_vs_conn_put(cp); | |
216 | return; | |
217 | } | |
218 | ||
219 | /* | |
220 | * Create NF conntrack expectation with wildcard (optional) source port. | |
221 | * Then the default callback function will alter the reply and will confirm | |
222 | * the conntrack entry when the first packet comes. | |
223 | * Use port 0 to expect connection from any port. | |
224 | */ | |
225 | void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, | |
226 | struct ip_vs_conn *cp, u_int8_t proto, | |
227 | const __be16 port, int from_rs) | |
228 | { | |
229 | struct nf_conntrack_expect *exp; | |
230 | ||
231 | if (ct == NULL || nf_ct_is_untracked(ct)) | |
232 | return; | |
233 | ||
234 | exp = nf_ct_expect_alloc(ct); | |
235 | if (!exp) | |
236 | return; | |
237 | ||
238 | nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), | |
239 | from_rs ? &cp->daddr : &cp->caddr, | |
240 | from_rs ? &cp->caddr : &cp->vaddr, | |
241 | proto, port ? &port : NULL, | |
242 | from_rs ? &cp->cport : &cp->vport); | |
243 | ||
244 | exp->expectfn = ip_vs_nfct_expect_callback; | |
245 | ||
246 | IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n", | |
247 | __func__, ct, ARG_TUPLE(&exp->tuple)); | |
248 | nf_ct_expect_related(exp); | |
249 | nf_ct_expect_put(exp); | |
250 | } | |
251 | EXPORT_SYMBOL(ip_vs_nfct_expect_related); | |
252 | ||
253 | /* | |
254 | * Our connection was terminated, try to drop the conntrack immediately | |
255 | */ | |
256 | void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) | |
257 | { | |
258 | struct nf_conntrack_tuple_hash *h; | |
259 | struct nf_conn *ct; | |
260 | struct nf_conntrack_tuple tuple; | |
261 | ||
262 | if (!cp->cport) | |
263 | return; | |
264 | ||
265 | tuple = (struct nf_conntrack_tuple) { | |
266 | .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } }; | |
267 | tuple.src.u3 = cp->caddr; | |
268 | tuple.src.u.all = cp->cport; | |
269 | tuple.src.l3num = cp->af; | |
270 | tuple.dst.u3 = cp->vaddr; | |
271 | tuple.dst.u.all = cp->vport; | |
272 | ||
273 | IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE | |
274 | " for conn " FMT_CONN "\n", | |
275 | __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); | |
276 | ||
58dbc6f2 | 277 | h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple); |
f4bc17cd JA |
278 | if (h) { |
279 | ct = nf_ct_tuplehash_to_ctrack(h); | |
280 | /* Show what happens instead of calling nf_ct_kill() */ | |
281 | if (del_timer(&ct->timeout)) { | |
282 | IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple=" | |
283 | FMT_TUPLE "\n", | |
284 | __func__, ct, ARG_TUPLE(&tuple)); | |
285 | if (ct->timeout.function) | |
286 | ct->timeout.function(ct->timeout.data); | |
287 | } else { | |
288 | IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple=" | |
289 | FMT_TUPLE "\n", | |
290 | __func__, ct, ARG_TUPLE(&tuple)); | |
291 | } | |
292 | nf_ct_put(ct); | |
293 | } else { | |
294 | IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n", | |
295 | __func__, ARG_TUPLE(&tuple)); | |
296 | } | |
297 | } | |
298 |