1 /* ip_nat_helper.c - generic support functions for NAT helpers
3 * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
4 * (C) 2003-2004 Netfilter Core Team <coreteam@netfilter.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
10 * 14 Jan 2002 Harald Welte <laforge@gnumonks.org>:
11 * - add support for SACK adjustment
12 * 14 Mar 2002 Harald Welte <laforge@gnumonks.org>:
13 * - merge SACK support into newnat API
14 * 16 Aug 2002 Brian J. Murrell <netfilter@interlinx.bc.ca>:
15 * - make ip_nat_resize_packet more generic (TCP and UDP)
16 * - add ip_nat_mangle_udp_packet
18 #include <linux/module.h>
19 #include <linux/kmod.h>
20 #include <linux/types.h>
21 #include <linux/timer.h>
22 #include <linux/skbuff.h>
23 #include <linux/netfilter_ipv4.h>
24 #include <net/checksum.h>
30 #include <linux/netfilter_ipv4/ip_conntrack.h>
31 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
32 #include <linux/netfilter_ipv4/ip_nat.h>
33 #include <linux/netfilter_ipv4/ip_nat_protocol.h>
34 #include <linux/netfilter_ipv4/ip_nat_core.h>
35 #include <linux/netfilter_ipv4/ip_nat_helper.h>
39 #define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos);
41 #define DEBUGP(format, args...)
42 #define DUMP_OFFSET(x)
45 static DEFINE_SPINLOCK(ip_nat_seqofs_lock
);
47 /* Setup TCP sequence correction given this change at this sequence */
49 adjust_tcp_sequence(u32 seq
,
51 struct ip_conntrack
*ct
,
52 enum ip_conntrack_info ctinfo
)
55 struct ip_nat_seq
*this_way
, *other_way
;
57 DEBUGP("ip_nat_resize_packet: old_size = %u, new_size = %u\n",
58 (*skb
)->len
, new_size
);
60 dir
= CTINFO2DIR(ctinfo
);
62 this_way
= &ct
->nat
.info
.seq
[dir
];
63 other_way
= &ct
->nat
.info
.seq
[!dir
];
65 DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
66 DUMP_OFFSET(this_way
);
68 spin_lock_bh(&ip_nat_seqofs_lock
);
70 /* SYN adjust. If it's uninitialized, or this is after last
71 * correction, record it: we don't handle more than one
72 * adjustment in the window, but do deal with common case of a
74 if (this_way
->offset_before
== this_way
->offset_after
75 || before(this_way
->correction_pos
, seq
)) {
76 this_way
->correction_pos
= seq
;
77 this_way
->offset_before
= this_way
->offset_after
;
78 this_way
->offset_after
+= sizediff
;
80 spin_unlock_bh(&ip_nat_seqofs_lock
);
82 DEBUGP("ip_nat_resize_packet: Seq_offset after: ");
83 DUMP_OFFSET(this_way
);
86 /* Frobs data inside this packet, which is linear. */
87 static void mangle_contents(struct sk_buff
*skb
,
89 unsigned int match_offset
,
90 unsigned int match_len
,
91 const char *rep_buffer
,
96 BUG_ON(skb_is_nonlinear(skb
));
97 data
= (unsigned char *)skb
->nh
.iph
+ dataoff
;
99 /* move post-replacement */
100 memmove(data
+ match_offset
+ rep_len
,
101 data
+ match_offset
+ match_len
,
102 skb
->tail
- (data
+ match_offset
+ match_len
));
104 /* insert data from buffer */
105 memcpy(data
+ match_offset
, rep_buffer
, rep_len
);
107 /* update skb info */
108 if (rep_len
> match_len
) {
109 DEBUGP("ip_nat_mangle_packet: Extending packet by "
110 "%u from %u bytes\n", rep_len
- match_len
,
112 skb_put(skb
, rep_len
- match_len
);
114 DEBUGP("ip_nat_mangle_packet: Shrinking packet from "
115 "%u from %u bytes\n", match_len
- rep_len
,
117 __skb_trim(skb
, skb
->len
+ rep_len
- match_len
);
120 /* fix IP hdr checksum information */
121 skb
->nh
.iph
->tot_len
= htons(skb
->len
);
122 ip_send_check(skb
->nh
.iph
);
125 /* Unusual, but possible case. */
126 static int enlarge_skb(struct sk_buff
**pskb
, unsigned int extra
)
128 struct sk_buff
*nskb
;
130 if ((*pskb
)->len
+ extra
> 65535)
133 nskb
= skb_copy_expand(*pskb
, skb_headroom(*pskb
), extra
, GFP_ATOMIC
);
137 /* Transfer socket to new skb. */
139 skb_set_owner_w(nskb
, (*pskb
)->sk
);
145 /* Generic function for mangling variable-length address changes inside
146 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
149 * Takes care about all the nasty sequence number changes, checksumming,
150 * skb enlargement, ...
154 ip_nat_mangle_tcp_packet(struct sk_buff
**pskb
,
155 struct ip_conntrack
*ct
,
156 enum ip_conntrack_info ctinfo
,
157 unsigned int match_offset
,
158 unsigned int match_len
,
159 const char *rep_buffer
,
160 unsigned int rep_len
)
166 if (!skb_make_writable(pskb
, (*pskb
)->len
))
169 if (rep_len
> match_len
170 && rep_len
- match_len
> skb_tailroom(*pskb
)
171 && !enlarge_skb(pskb
, rep_len
- match_len
))
174 SKB_LINEAR_ASSERT(*pskb
);
176 iph
= (*pskb
)->nh
.iph
;
177 tcph
= (void *)iph
+ iph
->ihl
*4;
179 oldlen
= (*pskb
)->len
- iph
->ihl
*4;
180 mangle_contents(*pskb
, iph
->ihl
*4 + tcph
->doff
*4,
181 match_offset
, match_len
, rep_buffer
, rep_len
);
183 datalen
= (*pskb
)->len
- iph
->ihl
*4;
184 if ((*pskb
)->ip_summed
!= CHECKSUM_PARTIAL
) {
186 tcph
->check
= tcp_v4_check(datalen
,
187 iph
->saddr
, iph
->daddr
,
188 csum_partial((char *)tcph
,
191 nf_proto_csum_replace2(&tcph
->check
, *pskb
,
192 htons(oldlen
), htons(datalen
), 1);
194 if (rep_len
!= match_len
) {
195 set_bit(IPS_SEQ_ADJUST_BIT
, &ct
->status
);
196 adjust_tcp_sequence(ntohl(tcph
->seq
),
197 (int)rep_len
- (int)match_len
,
199 /* Tell TCP window tracking about seq change */
200 ip_conntrack_tcp_update(*pskb
, ct
, CTINFO2DIR(ctinfo
));
204 EXPORT_SYMBOL(ip_nat_mangle_tcp_packet
);
206 /* Generic function for mangling variable-length address changes inside
207 * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
208 * command in the Amanda protocol)
210 * Takes care about all the nasty sequence number changes, checksumming,
211 * skb enlargement, ...
213 * XXX - This function could be merged with ip_nat_mangle_tcp_packet which
214 * should be fairly easy to do.
217 ip_nat_mangle_udp_packet(struct sk_buff
**pskb
,
218 struct ip_conntrack
*ct
,
219 enum ip_conntrack_info ctinfo
,
220 unsigned int match_offset
,
221 unsigned int match_len
,
222 const char *rep_buffer
,
223 unsigned int rep_len
)
229 /* UDP helpers might accidentally mangle the wrong packet */
230 iph
= (*pskb
)->nh
.iph
;
231 if ((*pskb
)->len
< iph
->ihl
*4 + sizeof(*udph
) +
232 match_offset
+ match_len
)
235 if (!skb_make_writable(pskb
, (*pskb
)->len
))
238 if (rep_len
> match_len
239 && rep_len
- match_len
> skb_tailroom(*pskb
)
240 && !enlarge_skb(pskb
, rep_len
- match_len
))
243 iph
= (*pskb
)->nh
.iph
;
244 udph
= (void *)iph
+ iph
->ihl
*4;
246 oldlen
= (*pskb
)->len
- iph
->ihl
*4;
247 mangle_contents(*pskb
, iph
->ihl
*4 + sizeof(*udph
),
248 match_offset
, match_len
, rep_buffer
, rep_len
);
250 /* update the length of the UDP packet */
251 datalen
= (*pskb
)->len
- iph
->ihl
*4;
252 udph
->len
= htons(datalen
);
254 /* fix udp checksum if udp checksum was previously calculated */
255 if (!udph
->check
&& (*pskb
)->ip_summed
!= CHECKSUM_PARTIAL
)
258 if ((*pskb
)->ip_summed
!= CHECKSUM_PARTIAL
) {
260 udph
->check
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
261 datalen
, IPPROTO_UDP
,
262 csum_partial((char *)udph
,
265 udph
->check
= CSUM_MANGLED_0
;
267 nf_proto_csum_replace2(&udph
->check
, *pskb
,
268 htons(oldlen
), htons(datalen
), 1);
271 EXPORT_SYMBOL(ip_nat_mangle_udp_packet
);
273 /* Adjust one found SACK option including checksum correction */
275 sack_adjust(struct sk_buff
*skb
,
277 unsigned int sackoff
,
278 unsigned int sackend
,
279 struct ip_nat_seq
*natseq
)
281 while (sackoff
< sackend
) {
282 struct tcp_sack_block_wire
*sack
;
283 __be32 new_start_seq
, new_end_seq
;
285 sack
= (void *)skb
->data
+ sackoff
;
286 if (after(ntohl(sack
->start_seq
) - natseq
->offset_before
,
287 natseq
->correction_pos
))
288 new_start_seq
= htonl(ntohl(sack
->start_seq
)
289 - natseq
->offset_after
);
291 new_start_seq
= htonl(ntohl(sack
->start_seq
)
292 - natseq
->offset_before
);
294 if (after(ntohl(sack
->end_seq
) - natseq
->offset_before
,
295 natseq
->correction_pos
))
296 new_end_seq
= htonl(ntohl(sack
->end_seq
)
297 - natseq
->offset_after
);
299 new_end_seq
= htonl(ntohl(sack
->end_seq
)
300 - natseq
->offset_before
);
302 DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
303 ntohl(sack
->start_seq
), new_start_seq
,
304 ntohl(sack
->end_seq
), new_end_seq
);
306 nf_proto_csum_replace4(&tcph
->check
, skb
,
307 sack
->start_seq
, new_start_seq
, 0);
308 nf_proto_csum_replace4(&tcph
->check
, skb
,
309 sack
->end_seq
, new_end_seq
, 0);
310 sack
->start_seq
= new_start_seq
;
311 sack
->end_seq
= new_end_seq
;
312 sackoff
+= sizeof(*sack
);
316 /* TCP SACK sequence number adjustment */
317 static inline unsigned int
318 ip_nat_sack_adjust(struct sk_buff
**pskb
,
320 struct ip_conntrack
*ct
,
321 enum ip_conntrack_info ctinfo
)
323 unsigned int dir
, optoff
, optend
;
325 optoff
= ip_hdrlen(*pskb
) + sizeof(struct tcphdr
);
326 optend
= ip_hdrlen(*pskb
) + tcph
->doff
* 4;
328 if (!skb_make_writable(pskb
, optend
))
331 dir
= CTINFO2DIR(ctinfo
);
333 while (optoff
< optend
) {
334 /* Usually: option, length. */
335 unsigned char *op
= (*pskb
)->data
+ optoff
;
344 /* no partial options */
345 if (optoff
+ 1 == optend
346 || optoff
+ op
[1] > optend
349 if (op
[0] == TCPOPT_SACK
350 && op
[1] >= 2+TCPOLEN_SACK_PERBLOCK
351 && ((op
[1] - 2) % TCPOLEN_SACK_PERBLOCK
) == 0)
352 sack_adjust(*pskb
, tcph
, optoff
+2,
354 &ct
->nat
.info
.seq
[!dir
]);
361 /* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
363 ip_nat_seq_adjust(struct sk_buff
**pskb
,
364 struct ip_conntrack
*ct
,
365 enum ip_conntrack_info ctinfo
)
369 __be32 newseq
, newack
;
370 struct ip_nat_seq
*this_way
, *other_way
;
372 dir
= CTINFO2DIR(ctinfo
);
374 this_way
= &ct
->nat
.info
.seq
[dir
];
375 other_way
= &ct
->nat
.info
.seq
[!dir
];
377 if (!skb_make_writable(pskb
, ip_hdrlen(*pskb
) + sizeof(*tcph
)))
380 tcph
= (void *)(*pskb
)->data
+ ip_hdrlen(*pskb
);
381 if (after(ntohl(tcph
->seq
), this_way
->correction_pos
))
382 newseq
= htonl(ntohl(tcph
->seq
) + this_way
->offset_after
);
384 newseq
= htonl(ntohl(tcph
->seq
) + this_way
->offset_before
);
386 if (after(ntohl(tcph
->ack_seq
) - other_way
->offset_before
,
387 other_way
->correction_pos
))
388 newack
= htonl(ntohl(tcph
->ack_seq
) - other_way
->offset_after
);
390 newack
= htonl(ntohl(tcph
->ack_seq
) - other_way
->offset_before
);
392 nf_proto_csum_replace4(&tcph
->check
, *pskb
, tcph
->seq
, newseq
, 0);
393 nf_proto_csum_replace4(&tcph
->check
, *pskb
, tcph
->ack_seq
, newack
, 0);
395 DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
396 ntohl(tcph
->seq
), ntohl(newseq
), ntohl(tcph
->ack_seq
),
400 tcph
->ack_seq
= newack
;
402 if (!ip_nat_sack_adjust(pskb
, tcph
, ct
, ctinfo
))
405 ip_conntrack_tcp_update(*pskb
, ct
, dir
);
409 EXPORT_SYMBOL(ip_nat_seq_adjust
);
411 /* Setup NAT on this expected conntrack so it follows master. */
412 /* If we fail to get a free NAT slot, we'll get dropped on confirm */
413 void ip_nat_follow_master(struct ip_conntrack
*ct
,
414 struct ip_conntrack_expect
*exp
)
416 struct ip_nat_range range
;
418 /* This must be a fresh one. */
419 BUG_ON(ct
->status
& IPS_NAT_DONE_MASK
);
421 /* Change src to where master sends to */
422 range
.flags
= IP_NAT_RANGE_MAP_IPS
;
423 range
.min_ip
= range
.max_ip
424 = ct
->master
->tuplehash
[!exp
->dir
].tuple
.dst
.ip
;
425 /* hook doesn't matter, but it has to do source manip */
426 ip_nat_setup_info(ct
, &range
, NF_IP_POST_ROUTING
);
428 /* For DST manip, map port here to where it's expected. */
429 range
.flags
= (IP_NAT_RANGE_MAP_IPS
| IP_NAT_RANGE_PROTO_SPECIFIED
);
430 range
.min
= range
.max
= exp
->saved_proto
;
431 range
.min_ip
= range
.max_ip
432 = ct
->master
->tuplehash
[!exp
->dir
].tuple
.src
.ip
;
433 /* hook doesn't matter, but it has to do destination manip */
434 ip_nat_setup_info(ct
, &range
, NF_IP_PRE_ROUTING
);
436 EXPORT_SYMBOL(ip_nat_follow_master
);