1 /* ip_nat_helper.c - generic support functions for NAT helpers
3 * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
4 * (C) 2003-2004 Netfilter Core Team <coreteam@netfilter.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
10 * 14 Jan 2002 Harald Welte <laforge@gnumonks.org>:
11 * - add support for SACK adjustment
12 * 14 Mar 2002 Harald Welte <laforge@gnumonks.org>:
13 * - merge SACK support into newnat API
14 * 16 Aug 2002 Brian J. Murrell <netfilter@interlinx.bc.ca>:
15 * - make ip_nat_resize_packet more generic (TCP and UDP)
16 * - add ip_nat_mangle_udp_packet
18 #include <linux/module.h>
19 #include <linux/kmod.h>
20 #include <linux/types.h>
21 #include <linux/timer.h>
22 #include <linux/skbuff.h>
23 #include <linux/netfilter_ipv4.h>
24 #include <net/checksum.h>
30 #include <linux/netfilter_ipv4/ip_conntrack.h>
31 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
32 #include <linux/netfilter_ipv4/ip_nat.h>
33 #include <linux/netfilter_ipv4/ip_nat_protocol.h>
34 #include <linux/netfilter_ipv4/ip_nat_core.h>
35 #include <linux/netfilter_ipv4/ip_nat_helper.h>
39 #define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos);
41 #define DEBUGP(format, args...)
42 #define DUMP_OFFSET(x)
45 static DEFINE_SPINLOCK(ip_nat_seqofs_lock
);
47 /* Setup TCP sequence correction given this change at this sequence */
49 adjust_tcp_sequence(u32 seq
,
51 struct ip_conntrack
*ct
,
52 enum ip_conntrack_info ctinfo
)
55 struct ip_nat_seq
*this_way
, *other_way
;
57 DEBUGP("ip_nat_resize_packet: old_size = %u, new_size = %u\n",
58 (*skb
)->len
, new_size
);
60 dir
= CTINFO2DIR(ctinfo
);
62 this_way
= &ct
->nat
.info
.seq
[dir
];
63 other_way
= &ct
->nat
.info
.seq
[!dir
];
65 DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
66 DUMP_OFFSET(this_way
);
68 spin_lock_bh(&ip_nat_seqofs_lock
);
70 /* SYN adjust. If it's uninitialized, or this is after last
71 * correction, record it: we don't handle more than one
72 * adjustment in the window, but do deal with common case of a
74 if (this_way
->offset_before
== this_way
->offset_after
75 || before(this_way
->correction_pos
, seq
)) {
76 this_way
->correction_pos
= seq
;
77 this_way
->offset_before
= this_way
->offset_after
;
78 this_way
->offset_after
+= sizediff
;
80 spin_unlock_bh(&ip_nat_seqofs_lock
);
82 DEBUGP("ip_nat_resize_packet: Seq_offset after: ");
83 DUMP_OFFSET(this_way
);
86 /* Frobs data inside this packet, which is linear. */
87 static void mangle_contents(struct sk_buff
*skb
,
89 unsigned int match_offset
,
90 unsigned int match_len
,
91 const char *rep_buffer
,
96 BUG_ON(skb_is_nonlinear(skb
));
97 data
= (unsigned char *)skb
->nh
.iph
+ dataoff
;
99 /* move post-replacement */
100 memmove(data
+ match_offset
+ rep_len
,
101 data
+ match_offset
+ match_len
,
102 skb
->tail
- (data
+ match_offset
+ match_len
));
104 /* insert data from buffer */
105 memcpy(data
+ match_offset
, rep_buffer
, rep_len
);
107 /* update skb info */
108 if (rep_len
> match_len
) {
109 DEBUGP("ip_nat_mangle_packet: Extending packet by "
110 "%u from %u bytes\n", rep_len
- match_len
,
112 skb_put(skb
, rep_len
- match_len
);
114 DEBUGP("ip_nat_mangle_packet: Shrinking packet from "
115 "%u from %u bytes\n", match_len
- rep_len
,
117 __skb_trim(skb
, skb
->len
+ rep_len
- match_len
);
120 /* fix IP hdr checksum information */
121 skb
->nh
.iph
->tot_len
= htons(skb
->len
);
122 ip_send_check(skb
->nh
.iph
);
125 /* Unusual, but possible case. */
126 static int enlarge_skb(struct sk_buff
**pskb
, unsigned int extra
)
128 struct sk_buff
*nskb
;
130 if ((*pskb
)->len
+ extra
> 65535)
133 nskb
= skb_copy_expand(*pskb
, skb_headroom(*pskb
), extra
, GFP_ATOMIC
);
137 /* Transfer socket to new skb. */
139 skb_set_owner_w(nskb
, (*pskb
)->sk
);
145 /* Generic function for mangling variable-length address changes inside
146 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
149 * Takes care about all the nasty sequence number changes, checksumming,
150 * skb enlargement, ...
154 ip_nat_mangle_tcp_packet(struct sk_buff
**pskb
,
155 struct ip_conntrack
*ct
,
156 enum ip_conntrack_info ctinfo
,
157 unsigned int match_offset
,
158 unsigned int match_len
,
159 const char *rep_buffer
,
160 unsigned int rep_len
)
166 if (!skb_make_writable(pskb
, (*pskb
)->len
))
169 if (rep_len
> match_len
170 && rep_len
- match_len
> skb_tailroom(*pskb
)
171 && !enlarge_skb(pskb
, rep_len
- match_len
))
174 SKB_LINEAR_ASSERT(*pskb
);
176 iph
= (*pskb
)->nh
.iph
;
177 tcph
= (void *)iph
+ iph
->ihl
*4;
179 oldlen
= (*pskb
)->len
- iph
->ihl
*4;
180 mangle_contents(*pskb
, iph
->ihl
*4 + tcph
->doff
*4,
181 match_offset
, match_len
, rep_buffer
, rep_len
);
183 datalen
= (*pskb
)->len
- iph
->ihl
*4;
184 if ((*pskb
)->ip_summed
!= CHECKSUM_PARTIAL
) {
186 tcph
->check
= tcp_v4_check(tcph
, datalen
,
187 iph
->saddr
, iph
->daddr
,
188 csum_partial((char *)tcph
,
191 tcph
->check
= nf_proto_csum_update(*pskb
,
192 htons(oldlen
) ^ 0xFFFF,
196 if (rep_len
!= match_len
) {
197 set_bit(IPS_SEQ_ADJUST_BIT
, &ct
->status
);
198 adjust_tcp_sequence(ntohl(tcph
->seq
),
199 (int)rep_len
- (int)match_len
,
201 /* Tell TCP window tracking about seq change */
202 ip_conntrack_tcp_update(*pskb
, ct
, CTINFO2DIR(ctinfo
));
206 EXPORT_SYMBOL(ip_nat_mangle_tcp_packet
);
208 /* Generic function for mangling variable-length address changes inside
209 * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
210 * command in the Amanda protocol)
212 * Takes care about all the nasty sequence number changes, checksumming,
213 * skb enlargement, ...
215 * XXX - This function could be merged with ip_nat_mangle_tcp_packet which
216 * should be fairly easy to do.
219 ip_nat_mangle_udp_packet(struct sk_buff
**pskb
,
220 struct ip_conntrack
*ct
,
221 enum ip_conntrack_info ctinfo
,
222 unsigned int match_offset
,
223 unsigned int match_len
,
224 const char *rep_buffer
,
225 unsigned int rep_len
)
231 /* UDP helpers might accidentally mangle the wrong packet */
232 iph
= (*pskb
)->nh
.iph
;
233 if ((*pskb
)->len
< iph
->ihl
*4 + sizeof(*udph
) +
234 match_offset
+ match_len
)
237 if (!skb_make_writable(pskb
, (*pskb
)->len
))
240 if (rep_len
> match_len
241 && rep_len
- match_len
> skb_tailroom(*pskb
)
242 && !enlarge_skb(pskb
, rep_len
- match_len
))
245 iph
= (*pskb
)->nh
.iph
;
246 udph
= (void *)iph
+ iph
->ihl
*4;
248 oldlen
= (*pskb
)->len
- iph
->ihl
*4;
249 mangle_contents(*pskb
, iph
->ihl
*4 + sizeof(*udph
),
250 match_offset
, match_len
, rep_buffer
, rep_len
);
252 /* update the length of the UDP packet */
253 datalen
= (*pskb
)->len
- iph
->ihl
*4;
254 udph
->len
= htons(datalen
);
256 /* fix udp checksum if udp checksum was previously calculated */
257 if (!udph
->check
&& (*pskb
)->ip_summed
!= CHECKSUM_PARTIAL
)
260 if ((*pskb
)->ip_summed
!= CHECKSUM_PARTIAL
) {
262 udph
->check
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
263 datalen
, IPPROTO_UDP
,
264 csum_partial((char *)udph
,
269 udph
->check
= nf_proto_csum_update(*pskb
,
270 htons(oldlen
) ^ 0xFFFF,
275 EXPORT_SYMBOL(ip_nat_mangle_udp_packet
);
277 /* Adjust one found SACK option including checksum correction */
279 sack_adjust(struct sk_buff
*skb
,
281 unsigned int sackoff
,
282 unsigned int sackend
,
283 struct ip_nat_seq
*natseq
)
285 while (sackoff
< sackend
) {
286 struct tcp_sack_block_wire
*sack
;
287 u_int32_t new_start_seq
, new_end_seq
;
289 sack
= (void *)skb
->data
+ sackoff
;
290 if (after(ntohl(sack
->start_seq
) - natseq
->offset_before
,
291 natseq
->correction_pos
))
292 new_start_seq
= ntohl(sack
->start_seq
)
293 - natseq
->offset_after
;
295 new_start_seq
= ntohl(sack
->start_seq
)
296 - natseq
->offset_before
;
297 new_start_seq
= htonl(new_start_seq
);
299 if (after(ntohl(sack
->end_seq
) - natseq
->offset_before
,
300 natseq
->correction_pos
))
301 new_end_seq
= ntohl(sack
->end_seq
)
302 - natseq
->offset_after
;
304 new_end_seq
= ntohl(sack
->end_seq
)
305 - natseq
->offset_before
;
306 new_end_seq
= htonl(new_end_seq
);
308 DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
309 ntohl(sack
->start_seq
), new_start_seq
,
310 ntohl(sack
->end_seq
), new_end_seq
);
312 tcph
->check
= nf_proto_csum_update(skb
,
316 tcph
->check
= nf_proto_csum_update(skb
,
320 sack
->start_seq
= new_start_seq
;
321 sack
->end_seq
= new_end_seq
;
322 sackoff
+= sizeof(*sack
);
326 /* TCP SACK sequence number adjustment */
327 static inline unsigned int
328 ip_nat_sack_adjust(struct sk_buff
**pskb
,
330 struct ip_conntrack
*ct
,
331 enum ip_conntrack_info ctinfo
)
333 unsigned int dir
, optoff
, optend
;
335 optoff
= (*pskb
)->nh
.iph
->ihl
*4 + sizeof(struct tcphdr
);
336 optend
= (*pskb
)->nh
.iph
->ihl
*4 + tcph
->doff
*4;
338 if (!skb_make_writable(pskb
, optend
))
341 dir
= CTINFO2DIR(ctinfo
);
343 while (optoff
< optend
) {
344 /* Usually: option, length. */
345 unsigned char *op
= (*pskb
)->data
+ optoff
;
354 /* no partial options */
355 if (optoff
+ 1 == optend
356 || optoff
+ op
[1] > optend
359 if (op
[0] == TCPOPT_SACK
360 && op
[1] >= 2+TCPOLEN_SACK_PERBLOCK
361 && ((op
[1] - 2) % TCPOLEN_SACK_PERBLOCK
) == 0)
362 sack_adjust(*pskb
, tcph
, optoff
+2,
364 &ct
->nat
.info
.seq
[!dir
]);
371 /* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
373 ip_nat_seq_adjust(struct sk_buff
**pskb
,
374 struct ip_conntrack
*ct
,
375 enum ip_conntrack_info ctinfo
)
378 int dir
, newseq
, newack
;
379 struct ip_nat_seq
*this_way
, *other_way
;
381 dir
= CTINFO2DIR(ctinfo
);
383 this_way
= &ct
->nat
.info
.seq
[dir
];
384 other_way
= &ct
->nat
.info
.seq
[!dir
];
386 if (!skb_make_writable(pskb
, (*pskb
)->nh
.iph
->ihl
*4+sizeof(*tcph
)))
389 tcph
= (void *)(*pskb
)->data
+ (*pskb
)->nh
.iph
->ihl
*4;
390 if (after(ntohl(tcph
->seq
), this_way
->correction_pos
))
391 newseq
= ntohl(tcph
->seq
) + this_way
->offset_after
;
393 newseq
= ntohl(tcph
->seq
) + this_way
->offset_before
;
394 newseq
= htonl(newseq
);
396 if (after(ntohl(tcph
->ack_seq
) - other_way
->offset_before
,
397 other_way
->correction_pos
))
398 newack
= ntohl(tcph
->ack_seq
) - other_way
->offset_after
;
400 newack
= ntohl(tcph
->ack_seq
) - other_way
->offset_before
;
401 newack
= htonl(newack
);
403 tcph
->check
= nf_proto_csum_update(*pskb
, ~tcph
->seq
, newseq
,
405 tcph
->check
= nf_proto_csum_update(*pskb
, ~tcph
->ack_seq
, newack
,
408 DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
409 ntohl(tcph
->seq
), ntohl(newseq
), ntohl(tcph
->ack_seq
),
413 tcph
->ack_seq
= newack
;
415 if (!ip_nat_sack_adjust(pskb
, tcph
, ct
, ctinfo
))
418 ip_conntrack_tcp_update(*pskb
, ct
, dir
);
422 EXPORT_SYMBOL(ip_nat_seq_adjust
);
424 /* Setup NAT on this expected conntrack so it follows master. */
425 /* If we fail to get a free NAT slot, we'll get dropped on confirm */
426 void ip_nat_follow_master(struct ip_conntrack
*ct
,
427 struct ip_conntrack_expect
*exp
)
429 struct ip_nat_range range
;
431 /* This must be a fresh one. */
432 BUG_ON(ct
->status
& IPS_NAT_DONE_MASK
);
434 /* Change src to where master sends to */
435 range
.flags
= IP_NAT_RANGE_MAP_IPS
;
436 range
.min_ip
= range
.max_ip
437 = ct
->master
->tuplehash
[!exp
->dir
].tuple
.dst
.ip
;
438 /* hook doesn't matter, but it has to do source manip */
439 ip_nat_setup_info(ct
, &range
, NF_IP_POST_ROUTING
);
441 /* For DST manip, map port here to where it's expected. */
442 range
.flags
= (IP_NAT_RANGE_MAP_IPS
| IP_NAT_RANGE_PROTO_SPECIFIED
);
443 range
.min
= range
.max
= exp
->saved_proto
;
444 range
.min_ip
= range
.max_ip
445 = ct
->master
->tuplehash
[!exp
->dir
].tuple
.src
.ip
;
446 /* hook doesn't matter, but it has to do destination manip */
447 ip_nat_setup_info(ct
, &range
, NF_IP_PRE_ROUTING
);
449 EXPORT_SYMBOL(ip_nat_follow_master
);