netfilter: nf_nat: Also handle non-ESTABLISHED routing changes in MASQUERADE
[deliverable/linux.git] / net / ipv4 / netfilter / iptable_nat.c
CommitLineData
5b1158e9
JK
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
c7232c99 3 * (C) 2011 Patrick McHardy <kaber@trash.net>
5b1158e9
JK
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
c7232c99
PM
9
10#include <linux/module.h>
5b1158e9
JK
11#include <linux/netfilter.h>
12#include <linux/netfilter_ipv4.h>
c7232c99
PM
13#include <linux/netfilter_ipv4/ip_tables.h>
14#include <linux/ip.h>
5b1158e9 15#include <net/ip.h>
5b1158e9 16
5b1158e9 17#include <net/netfilter/nf_nat.h>
5b1158e9 18#include <net/netfilter/nf_nat_core.h>
c7232c99
PM
19#include <net/netfilter/nf_nat_l3proto.h>
20
21static const struct xt_table nf_nat_ipv4_table = {
22 .name = "nat",
23 .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
24 (1 << NF_INET_POST_ROUTING) |
25 (1 << NF_INET_LOCAL_OUT) |
26 (1 << NF_INET_LOCAL_IN),
27 .me = THIS_MODULE,
28 .af = NFPROTO_IPV4,
29};
5b1158e9 30
c7232c99 31static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
5b1158e9 32{
c7232c99
PM
33 /* Force range to this IP; let proto decide mapping for
34 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
35 */
36 struct nf_nat_range range;
37
38 range.flags = 0;
39 pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
40 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
41 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
42 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
43
44 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
45}
5b1158e9 46
c7232c99
PM
47static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
48 const struct net_device *in,
49 const struct net_device *out,
50 struct nf_conn *ct)
51{
52 struct net *net = nf_ct_net(ct);
53 unsigned int ret;
5b1158e9 54
c7232c99
PM
55 ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
56 if (ret == NF_ACCEPT) {
57 if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
58 ret = alloc_null_binding(ct, hooknum);
5b1158e9 59 }
c7232c99 60 return ret;
5b1158e9 61}
5b1158e9
JK
62
63static unsigned int
c7232c99
PM
64nf_nat_ipv4_fn(unsigned int hooknum,
65 struct sk_buff *skb,
66 const struct net_device *in,
67 const struct net_device *out,
68 int (*okfn)(struct sk_buff *))
5b1158e9
JK
69{
70 struct nf_conn *ct;
71 enum ip_conntrack_info ctinfo;
72 struct nf_conn_nat *nat;
5b1158e9
JK
73 /* maniptype == SRC for postrouting. */
74 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
75
76 /* We never see fragments: conntrack defrags on pre-routing
c7232c99
PM
77 * and local-out, and nf_nat_out protects post-routing.
78 */
56f8a75c 79 NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
5b1158e9 80
3db05fea 81 ct = nf_ct_get(skb, &ctinfo);
5b1158e9 82 /* Can't track? It's not due to stress, or conntrack would
c7232c99
PM
83 * have dropped it. Hence it's the user's responsibilty to
84 * packet filter it out, or implement conntrack/NAT for that
85 * protocol. 8) --RR
86 */
42cf800c 87 if (!ct)
5b1158e9 88 return NF_ACCEPT;
5b1158e9
JK
89
90 /* Don't try to NAT if this packet is not conntracked */
5bfddbd4 91 if (nf_ct_is_untracked(ct))
5b1158e9
JK
92 return NF_ACCEPT;
93
94 nat = nfct_nat(ct);
2d59e5ca 95 if (!nat) {
8c87238b
PM
96 /* NAT module was loaded late. */
97 if (nf_ct_is_confirmed(ct))
98 return NF_ACCEPT;
2d59e5ca
YK
99 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
100 if (nat == NULL) {
0d53778e 101 pr_debug("failed to add NAT extension\n");
2d59e5ca
YK
102 return NF_ACCEPT;
103 }
104 }
5b1158e9
JK
105
106 switch (ctinfo) {
107 case IP_CT_RELATED:
fb048833 108 case IP_CT_RELATED_REPLY:
3db05fea 109 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
c7232c99
PM
110 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
111 hooknum))
5b1158e9
JK
112 return NF_DROP;
113 else
114 return NF_ACCEPT;
115 }
116 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
117 case IP_CT_NEW:
5b1158e9 118 /* Seen it before? This can happen for loopback, retrans,
c7232c99
PM
119 * or local packets.
120 */
5b1158e9
JK
121 if (!nf_nat_initialized(ct, maniptype)) {
122 unsigned int ret;
123
c68cd6cc 124 ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
22068311 125 if (ret != NF_ACCEPT)
5b1158e9 126 return ret;
c65ef8dc 127 } else {
0d53778e 128 pr_debug("Already setup manip %s for ct %p\n",
cbc9f2f4 129 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
0d53778e 130 ct);
c65ef8dc
AC
131 if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
132 goto oif_changed;
133 }
5b1158e9
JK
134 break;
135
136 default:
137 /* ESTABLISHED */
138 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
fb048833 139 ctinfo == IP_CT_ESTABLISHED_REPLY);
c65ef8dc
AC
140 if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
141 goto oif_changed;
5b1158e9
JK
142 }
143
3db05fea 144 return nf_nat_packet(ct, ctinfo, hooknum, skb);
c65ef8dc
AC
145
146oif_changed:
147 nf_ct_kill_acct(ct, ctinfo, skb);
148 return NF_DROP;
5b1158e9
JK
149}
150
151static unsigned int
c7232c99
PM
152nf_nat_ipv4_in(unsigned int hooknum,
153 struct sk_buff *skb,
154 const struct net_device *in,
155 const struct net_device *out,
156 int (*okfn)(struct sk_buff *))
5b1158e9
JK
157{
158 unsigned int ret;
3db05fea 159 __be32 daddr = ip_hdr(skb)->daddr;
5b1158e9 160
c7232c99 161 ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
5b1158e9 162 if (ret != NF_DROP && ret != NF_STOLEN &&
adf30907
ED
163 daddr != ip_hdr(skb)->daddr)
164 skb_dst_drop(skb);
165
5b1158e9
JK
166 return ret;
167}
168
169static unsigned int
c7232c99
PM
170nf_nat_ipv4_out(unsigned int hooknum,
171 struct sk_buff *skb,
172 const struct net_device *in,
173 const struct net_device *out,
174 int (*okfn)(struct sk_buff *))
5b1158e9
JK
175{
176#ifdef CONFIG_XFRM
72b72949 177 const struct nf_conn *ct;
5b1158e9
JK
178 enum ip_conntrack_info ctinfo;
179#endif
180 unsigned int ret;
181
182 /* root is playing with raw sockets. */
3db05fea
HX
183 if (skb->len < sizeof(struct iphdr) ||
184 ip_hdrlen(skb) < sizeof(struct iphdr))
5b1158e9
JK
185 return NF_ACCEPT;
186
c7232c99 187 ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
5b1158e9
JK
188#ifdef CONFIG_XFRM
189 if (ret != NF_DROP && ret != NF_STOLEN &&
c7232c99 190 !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
3db05fea 191 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
5b1158e9
JK
192 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
193
3666ed1c
JP
194 if ((ct->tuplehash[dir].tuple.src.u3.ip !=
195 ct->tuplehash[!dir].tuple.dst.u3.ip) ||
38fe36a2
UW
196 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
197 ct->tuplehash[dir].tuple.src.u.all !=
c7232c99
PM
198 ct->tuplehash[!dir].tuple.dst.u.all))
199 if (nf_xfrm_me_harder(skb, AF_INET) < 0)
200 ret = NF_DROP;
5b1158e9
JK
201 }
202#endif
203 return ret;
204}
205
206static unsigned int
c7232c99
PM
207nf_nat_ipv4_local_fn(unsigned int hooknum,
208 struct sk_buff *skb,
209 const struct net_device *in,
210 const struct net_device *out,
211 int (*okfn)(struct sk_buff *))
5b1158e9 212{
72b72949 213 const struct nf_conn *ct;
5b1158e9
JK
214 enum ip_conntrack_info ctinfo;
215 unsigned int ret;
216
217 /* root is playing with raw sockets. */
3db05fea
HX
218 if (skb->len < sizeof(struct iphdr) ||
219 ip_hdrlen(skb) < sizeof(struct iphdr))
5b1158e9
JK
220 return NF_ACCEPT;
221
c7232c99 222 ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
5b1158e9 223 if (ret != NF_DROP && ret != NF_STOLEN &&
3db05fea 224 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
5b1158e9
JK
225 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
226
227 if (ct->tuplehash[dir].tuple.dst.u3.ip !=
848c29fd 228 ct->tuplehash[!dir].tuple.src.u3.ip) {
3db05fea 229 if (ip_route_me_harder(skb, RTN_UNSPEC))
5b1158e9 230 ret = NF_DROP;
848c29fd
PM
231 }
232#ifdef CONFIG_XFRM
c7232c99 233 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
38fe36a2 234 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
c7232c99 235 ct->tuplehash[dir].tuple.dst.u.all !=
848c29fd 236 ct->tuplehash[!dir].tuple.src.u.all)
c7232c99 237 if (nf_xfrm_me_harder(skb, AF_INET) < 0)
848c29fd
PM
238 ret = NF_DROP;
239#endif
5b1158e9
JK
240 }
241 return ret;
242}
243
c7232c99 244static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
5b1158e9
JK
245 /* Before packet filtering, change destination */
246 {
c7232c99 247 .hook = nf_nat_ipv4_in,
5b1158e9 248 .owner = THIS_MODULE,
24c232d8 249 .pf = NFPROTO_IPV4,
6e23ae2a 250 .hooknum = NF_INET_PRE_ROUTING,
5b1158e9
JK
251 .priority = NF_IP_PRI_NAT_DST,
252 },
253 /* After packet filtering, change source */
254 {
c7232c99 255 .hook = nf_nat_ipv4_out,
5b1158e9 256 .owner = THIS_MODULE,
24c232d8 257 .pf = NFPROTO_IPV4,
6e23ae2a 258 .hooknum = NF_INET_POST_ROUTING,
5b1158e9
JK
259 .priority = NF_IP_PRI_NAT_SRC,
260 },
5b1158e9
JK
261 /* Before packet filtering, change destination */
262 {
c7232c99 263 .hook = nf_nat_ipv4_local_fn,
5b1158e9 264 .owner = THIS_MODULE,
24c232d8 265 .pf = NFPROTO_IPV4,
6e23ae2a 266 .hooknum = NF_INET_LOCAL_OUT,
5b1158e9
JK
267 .priority = NF_IP_PRI_NAT_DST,
268 },
269 /* After packet filtering, change source */
270 {
c7232c99 271 .hook = nf_nat_ipv4_fn,
5b1158e9 272 .owner = THIS_MODULE,
24c232d8 273 .pf = NFPROTO_IPV4,
6e23ae2a 274 .hooknum = NF_INET_LOCAL_IN,
5b1158e9
JK
275 .priority = NF_IP_PRI_NAT_SRC,
276 },
5b1158e9
JK
277};
278
c7232c99 279static int __net_init iptable_nat_net_init(struct net *net)
5b1158e9 280{
c7232c99
PM
281 struct ipt_replace *repl;
282
283 repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
284 if (repl == NULL)
285 return -ENOMEM;
286 net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
287 kfree(repl);
6229b75d 288 return PTR_RET(net->ipv4.nat_table);
c7232c99 289}
5b1158e9 290
c7232c99
PM
291static void __net_exit iptable_nat_net_exit(struct net *net)
292{
293 ipt_unregister_table(net, net->ipv4.nat_table);
294}
5b1158e9 295
c7232c99
PM
296static struct pernet_operations iptable_nat_net_ops = {
297 .init = iptable_nat_net_init,
298 .exit = iptable_nat_net_exit,
299};
5b1158e9 300
c7232c99
PM
301static int __init iptable_nat_init(void)
302{
303 int err;
304
305 err = register_pernet_subsys(&iptable_nat_net_ops);
306 if (err < 0)
307 goto err1;
308
309 err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
310 if (err < 0)
311 goto err2;
312 return 0;
313
314err2:
315 unregister_pernet_subsys(&iptable_nat_net_ops);
316err1:
317 return err;
5b1158e9
JK
318}
319
c7232c99 320static void __exit iptable_nat_exit(void)
5b1158e9 321{
c7232c99
PM
322 nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
323 unregister_pernet_subsys(&iptable_nat_net_ops);
5b1158e9
JK
324}
325
c7232c99
PM
326module_init(iptable_nat_init);
327module_exit(iptable_nat_exit);
5b1158e9
JK
328
329MODULE_LICENSE("GPL");
This page took 0.499164 seconds and 5 git commands to generate.