Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * ip_vs_xmit.c: various packet transmitters for IPVS | |
3 | * | |
1da177e4 LT |
4 | * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> |
5 | * Julian Anastasov <ja@ssi.bg> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation; either version | |
10 | * 2 of the License, or (at your option) any later version. | |
11 | * | |
12 | * Changes: | |
13 | * | |
cb59155f JA |
14 | * Description of forwarding methods: |
15 | * - all transmitters are called from LOCAL_IN (remote clients) and | |
16 | * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD | |
17 | * - not all connections have destination server, for example, | |
18 | * connections in backup server when fwmark is used | |
19 | * - bypass connections use daddr from packet | |
026ace06 JA |
20 | * - we can use dst without ref while sending in RCU section, we use |
21 | * ref when returning NF_ACCEPT for NAT-ed packet via loopback | |
cb59155f JA |
22 | * LOCAL_OUT rules: |
23 | * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) | |
24 | * - skb->pkt_type is not set yet | |
25 | * - the only place where we can see skb->sk != NULL | |
1da177e4 LT |
26 | */ |
27 | ||
9aada7ac HE |
28 | #define KMSG_COMPONENT "IPVS" |
29 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | |
30 | ||
1da177e4 | 31 | #include <linux/kernel.h> |
5a0e3ad6 | 32 | #include <linux/slab.h> |
1da177e4 | 33 | #include <linux/tcp.h> /* for tcphdr */ |
c439cb2e | 34 | #include <net/ip.h> |
1da177e4 LT |
35 | #include <net/tcp.h> /* for csum_tcpudp_magic */ |
36 | #include <net/udp.h> | |
37 | #include <net/icmp.h> /* for icmp_send */ | |
38 | #include <net/route.h> /* for ip_route_output */ | |
38cdcc9a JV |
39 | #include <net/ipv6.h> |
40 | #include <net/ip6_route.h> | |
ea1d5d77 | 41 | #include <net/ip_tunnels.h> |
714f095f | 42 | #include <net/addrconf.h> |
38cdcc9a | 43 | #include <linux/icmpv6.h> |
1da177e4 LT |
44 | #include <linux/netfilter.h> |
45 | #include <linux/netfilter_ipv4.h> | |
46 | ||
47 | #include <net/ip_vs.h> | |
48 | ||
17a8f8e3 CG |
49 | enum { |
50 | IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */ | |
51 | IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */ | |
52 | IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to | |
53 | * local | |
54 | */ | |
f2edb9f7 | 55 | IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ |
ad4d3ef8 | 56 | IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ |
4115ded1 | 57 | IP_VS_RT_MODE_TUNNEL = 32,/* Tunnel mode */ |
17a8f8e3 | 58 | }; |
1da177e4 | 59 | |
026ace06 JA |
60 | static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void) |
61 | { | |
62 | return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC); | |
63 | } | |
64 | ||
65 | static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst) | |
66 | { | |
67 | kfree(dest_dst); | |
68 | } | |
69 | ||
1da177e4 LT |
70 | /* |
71 | * Destination cache to speed up outgoing route lookup | |
72 | */ | |
73 | static inline void | |
026ace06 JA |
74 | __ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst, |
75 | struct dst_entry *dst, u32 dst_cookie) | |
1da177e4 | 76 | { |
026ace06 JA |
77 | struct ip_vs_dest_dst *old; |
78 | ||
79 | old = rcu_dereference_protected(dest->dest_dst, | |
80 | lockdep_is_held(&dest->dst_lock)); | |
81 | ||
82 | if (dest_dst) { | |
83 | dest_dst->dst_cache = dst; | |
84 | dest_dst->dst_cookie = dst_cookie; | |
85 | } | |
86 | rcu_assign_pointer(dest->dest_dst, dest_dst); | |
1da177e4 | 87 | |
026ace06 JA |
88 | if (old) |
89 | call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); | |
1da177e4 LT |
90 | } |
91 | ||
026ace06 | 92 | static inline struct ip_vs_dest_dst * |
c90558da | 93 | __ip_vs_dst_check(struct ip_vs_dest *dest) |
1da177e4 | 94 | { |
026ace06 JA |
95 | struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst); |
96 | struct dst_entry *dst; | |
1da177e4 | 97 | |
026ace06 | 98 | if (!dest_dst) |
1da177e4 | 99 | return NULL; |
026ace06 JA |
100 | dst = dest_dst->dst_cache; |
101 | if (dst->obsolete && | |
102 | dst->ops->check(dst, dest_dst->dst_cookie) == NULL) | |
1da177e4 | 103 | return NULL; |
026ace06 | 104 | return dest_dst; |
1da177e4 LT |
105 | } |
106 | ||
590e3f79 JDB |
107 | static inline bool |
108 | __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) | |
109 | { | |
4cdd3408 PM |
110 | if (IP6CB(skb)->frag_max_size) { |
111 | /* frag_max_size tell us that, this packet have been | |
112 | * defragmented by netfilter IPv6 conntrack module. | |
113 | */ | |
114 | if (IP6CB(skb)->frag_max_size > mtu) | |
115 | return true; /* largest fragment violate MTU */ | |
116 | } | |
117 | else if (skb->len > mtu && !skb_is_gso(skb)) { | |
590e3f79 JDB |
118 | return true; /* Packet size violate MTU size */ |
119 | } | |
120 | return false; | |
121 | } | |
122 | ||
f2edb9f7 JA |
123 | /* Get route to daddr, update *saddr, optionally bind route to saddr */ |
124 | static struct rtable *do_output_route4(struct net *net, __be32 daddr, | |
c90558da | 125 | int rt_mode, __be32 *saddr) |
f2edb9f7 JA |
126 | { |
127 | struct flowi4 fl4; | |
128 | struct rtable *rt; | |
129 | int loop = 0; | |
130 | ||
131 | memset(&fl4, 0, sizeof(fl4)); | |
132 | fl4.daddr = daddr; | |
ad4d3ef8 JA |
133 | fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? |
134 | FLOWI_FLAG_KNOWN_NH : 0; | |
f2edb9f7 JA |
135 | |
136 | retry: | |
137 | rt = ip_route_output_key(net, &fl4); | |
138 | if (IS_ERR(rt)) { | |
139 | /* Invalid saddr ? */ | |
140 | if (PTR_ERR(rt) == -EINVAL && *saddr && | |
141 | rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { | |
142 | *saddr = 0; | |
c90558da | 143 | flowi4_update_output(&fl4, 0, 0, daddr, 0); |
f2edb9f7 JA |
144 | goto retry; |
145 | } | |
146 | IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); | |
147 | return NULL; | |
148 | } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { | |
149 | ip_rt_put(rt); | |
150 | *saddr = fl4.saddr; | |
c90558da | 151 | flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr); |
f2edb9f7 JA |
152 | loop++; |
153 | goto retry; | |
154 | } | |
155 | *saddr = fl4.saddr; | |
156 | return rt; | |
157 | } | |
158 | ||
4a4739d5 AG |
159 | #ifdef CONFIG_IP_VS_IPV6 |
160 | static inline int __ip_vs_is_local_route6(struct rt6_info *rt) | |
161 | { | |
162 | return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK; | |
163 | } | |
164 | #endif | |
165 | ||
166 | static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb, | |
167 | int rt_mode, | |
168 | bool new_rt_is_local) | |
169 | { | |
170 | bool rt_mode_allow_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL); | |
171 | bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL); | |
172 | bool rt_mode_allow_redirect = !!(rt_mode & IP_VS_RT_MODE_RDR); | |
173 | bool source_is_loopback; | |
174 | bool old_rt_is_local; | |
175 | ||
176 | #ifdef CONFIG_IP_VS_IPV6 | |
177 | if (skb_af == AF_INET6) { | |
178 | int addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr); | |
179 | ||
180 | source_is_loopback = | |
181 | (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && | |
182 | (addr_type & IPV6_ADDR_LOOPBACK); | |
183 | old_rt_is_local = __ip_vs_is_local_route6( | |
184 | (struct rt6_info *)skb_dst(skb)); | |
185 | } else | |
186 | #endif | |
187 | { | |
188 | source_is_loopback = ipv4_is_loopback(ip_hdr(skb)->saddr); | |
189 | old_rt_is_local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; | |
190 | } | |
191 | ||
192 | if (unlikely(new_rt_is_local)) { | |
193 | if (!rt_mode_allow_local) | |
194 | return true; | |
195 | if (!rt_mode_allow_redirect && !old_rt_is_local) | |
196 | return true; | |
197 | } else { | |
198 | if (!rt_mode_allow_non_local) | |
199 | return true; | |
200 | if (source_is_loopback) | |
201 | return true; | |
202 | } | |
203 | return false; | |
204 | } | |
205 | ||
919aa0b2 AG |
206 | static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu) |
207 | { | |
208 | struct sock *sk = skb->sk; | |
209 | struct rtable *ort = skb_rtable(skb); | |
210 | ||
a8399231 | 211 | if (!skb->dev && sk && sk_fullsock(sk)) |
919aa0b2 AG |
212 | ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); |
213 | } | |
214 | ||
c63e4de2 AG |
215 | static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, |
216 | struct ip_vs_iphdr *ipvsh, | |
217 | struct sk_buff *skb, int mtu) | |
218 | { | |
219 | #ifdef CONFIG_IP_VS_IPV6 | |
220 | if (skb_af == AF_INET6) { | |
221 | struct net *net = dev_net(skb_dst(skb)->dev); | |
222 | ||
223 | if (unlikely(__mtu_check_toobig_v6(skb, mtu))) { | |
224 | if (!skb->dev) | |
225 | skb->dev = net->loopback_dev; | |
226 | /* only send ICMP too big on first fragment */ | |
89621f31 | 227 | if (!ipvsh->fragoffs && !ip_vs_iph_icmp(ipvsh)) |
c63e4de2 AG |
228 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
229 | IP_VS_DBG(1, "frag needed for %pI6c\n", | |
230 | &ipv6_hdr(skb)->saddr); | |
231 | return false; | |
232 | } | |
233 | } else | |
234 | #endif | |
235 | { | |
236 | struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); | |
237 | ||
238 | /* If we're going to tunnel the packet and pmtu discovery | |
239 | * is disabled, we'll just fragment it anyway | |
240 | */ | |
241 | if ((rt_mode & IP_VS_RT_MODE_TUNNEL) && !sysctl_pmtu_disc(ipvs)) | |
242 | return true; | |
243 | ||
244 | if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) && | |
89621f31 AG |
245 | skb->len > mtu && !skb_is_gso(skb) && |
246 | !ip_vs_iph_icmp(ipvsh))) { | |
c63e4de2 AG |
247 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
248 | htonl(mtu)); | |
249 | IP_VS_DBG(1, "frag needed for %pI4\n", | |
250 | &ip_hdr(skb)->saddr); | |
251 | return false; | |
252 | } | |
253 | } | |
254 | ||
255 | return true; | |
256 | } | |
257 | ||
17a8f8e3 | 258 | /* Get route to destination or remote server */ |
4115ded1 | 259 | static int |
4a4739d5 | 260 | __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, |
c63e4de2 AG |
261 | __be32 daddr, int rt_mode, __be32 *ret_saddr, |
262 | struct ip_vs_iphdr *ipvsh) | |
1da177e4 | 263 | { |
fc604767 | 264 | struct net *net = dev_net(skb_dst(skb)->dev); |
026ace06 | 265 | struct ip_vs_dest_dst *dest_dst; |
1da177e4 | 266 | struct rtable *rt; /* Route to the other host */ |
4115ded1 | 267 | int mtu; |
026ace06 | 268 | int local, noref = 1; |
1da177e4 LT |
269 | |
270 | if (dest) { | |
026ace06 JA |
271 | dest_dst = __ip_vs_dst_check(dest); |
272 | if (likely(dest_dst)) | |
273 | rt = (struct rtable *) dest_dst->dst_cache; | |
274 | else { | |
275 | dest_dst = ip_vs_dest_dst_alloc(); | |
ac69269a | 276 | spin_lock_bh(&dest->dst_lock); |
026ace06 JA |
277 | if (!dest_dst) { |
278 | __ip_vs_dst_set(dest, NULL, NULL, 0); | |
ac69269a | 279 | spin_unlock_bh(&dest->dst_lock); |
026ace06 JA |
280 | goto err_unreach; |
281 | } | |
c90558da | 282 | rt = do_output_route4(net, dest->addr.ip, rt_mode, |
026ace06 | 283 | &dest_dst->dst_saddr.ip); |
f2edb9f7 | 284 | if (!rt) { |
026ace06 | 285 | __ip_vs_dst_set(dest, NULL, NULL, 0); |
ac69269a | 286 | spin_unlock_bh(&dest->dst_lock); |
026ace06 | 287 | ip_vs_dest_dst_free(dest_dst); |
4115ded1 | 288 | goto err_unreach; |
1da177e4 | 289 | } |
026ace06 | 290 | __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); |
ac69269a | 291 | spin_unlock_bh(&dest->dst_lock); |
c90558da | 292 | IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", |
026ace06 | 293 | &dest->addr.ip, &dest_dst->dst_saddr.ip, |
c90558da | 294 | atomic_read(&rt->dst.__refcnt)); |
1da177e4 | 295 | } |
c92f5ca2 | 296 | if (ret_saddr) |
026ace06 | 297 | *ret_saddr = dest_dst->dst_saddr.ip; |
1da177e4 | 298 | } else { |
f2edb9f7 | 299 | __be32 saddr = htonl(INADDR_ANY); |
c92f5ca2 | 300 | |
026ace06 JA |
301 | noref = 0; |
302 | ||
f2edb9f7 JA |
303 | /* For such unconfigured boxes avoid many route lookups |
304 | * for performance reasons because we do not remember saddr | |
305 | */ | |
306 | rt_mode &= ~IP_VS_RT_MODE_CONNECT; | |
c90558da | 307 | rt = do_output_route4(net, daddr, rt_mode, &saddr); |
f2edb9f7 | 308 | if (!rt) |
4115ded1 | 309 | goto err_unreach; |
c92f5ca2 | 310 | if (ret_saddr) |
f2edb9f7 | 311 | *ret_saddr = saddr; |
1da177e4 LT |
312 | } |
313 | ||
4115ded1 | 314 | local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0; |
4a4739d5 AG |
315 | if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, |
316 | local))) { | |
317 | IP_VS_DBG_RL("We are crossing local and non-local addresses" | |
3d53666b | 318 | " daddr=%pI4\n", &daddr); |
4115ded1 | 319 | goto err_put; |
fc604767 | 320 | } |
4a4739d5 AG |
321 | |
322 | if (unlikely(local)) { | |
4115ded1 | 323 | /* skb to local stack, preserve old route */ |
026ace06 JA |
324 | if (!noref) |
325 | ip_rt_put(rt); | |
4115ded1 | 326 | return local; |
fc604767 | 327 | } |
4115ded1 JA |
328 | |
329 | if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) { | |
330 | mtu = dst_mtu(&rt->dst); | |
4115ded1 | 331 | } else { |
4115ded1 JA |
332 | mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); |
333 | if (mtu < 68) { | |
334 | IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); | |
335 | goto err_put; | |
336 | } | |
919aa0b2 | 337 | maybe_update_pmtu(skb_af, skb, mtu); |
fc604767 JA |
338 | } |
339 | ||
c63e4de2 | 340 | if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu)) |
4115ded1 | 341 | goto err_put; |
4115ded1 JA |
342 | |
343 | skb_dst_drop(skb); | |
026ace06 JA |
344 | if (noref) { |
345 | if (!local) | |
dbfc4fb7 | 346 | skb_dst_set_noref(skb, &rt->dst); |
026ace06 JA |
347 | else |
348 | skb_dst_set(skb, dst_clone(&rt->dst)); | |
349 | } else | |
350 | skb_dst_set(skb, &rt->dst); | |
4115ded1 JA |
351 | |
352 | return local; | |
353 | ||
354 | err_put: | |
026ace06 JA |
355 | if (!noref) |
356 | ip_rt_put(rt); | |
4115ded1 JA |
357 | return -1; |
358 | ||
359 | err_unreach: | |
360 | dst_link_failure(skb); | |
361 | return -1; | |
1da177e4 LT |
362 | } |
363 | ||
38cdcc9a | 364 | #ifdef CONFIG_IP_VS_IPV6 |
714f095f HS |
365 | static struct dst_entry * |
366 | __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, | |
48e8aa6e | 367 | struct in6_addr *ret_saddr, int do_xfrm, int rt_mode) |
714f095f HS |
368 | { |
369 | struct dst_entry *dst; | |
4c9483b2 DM |
370 | struct flowi6 fl6 = { |
371 | .daddr = *daddr, | |
714f095f HS |
372 | }; |
373 | ||
48e8aa6e MKL |
374 | if (rt_mode & IP_VS_RT_MODE_KNOWN_NH) |
375 | fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; | |
376 | ||
4c9483b2 | 377 | dst = ip6_route_output(net, NULL, &fl6); |
714f095f HS |
378 | if (dst->error) |
379 | goto out_err; | |
380 | if (!ret_saddr) | |
381 | return dst; | |
4c9483b2 | 382 | if (ipv6_addr_any(&fl6.saddr) && |
714f095f | 383 | ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, |
4c9483b2 | 384 | &fl6.daddr, 0, &fl6.saddr) < 0) |
714f095f | 385 | goto out_err; |
452edd59 | 386 | if (do_xfrm) { |
4c9483b2 | 387 | dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); |
452edd59 DM |
388 | if (IS_ERR(dst)) { |
389 | dst = NULL; | |
390 | goto out_err; | |
391 | } | |
392 | } | |
4e3fd7a0 | 393 | *ret_saddr = fl6.saddr; |
714f095f HS |
394 | return dst; |
395 | ||
396 | out_err: | |
397 | dst_release(dst); | |
398 | IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr); | |
399 | return NULL; | |
400 | } | |
401 | ||
fc604767 JA |
402 | /* |
403 | * Get route to destination or remote server | |
fc604767 | 404 | */ |
4115ded1 | 405 | static int |
4a4739d5 | 406 | __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, |
fc604767 | 407 | struct in6_addr *daddr, struct in6_addr *ret_saddr, |
4115ded1 | 408 | struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode) |
38cdcc9a | 409 | { |
fc604767 | 410 | struct net *net = dev_net(skb_dst(skb)->dev); |
026ace06 | 411 | struct ip_vs_dest_dst *dest_dst; |
38cdcc9a | 412 | struct rt6_info *rt; /* Route to the other host */ |
714f095f | 413 | struct dst_entry *dst; |
4115ded1 | 414 | int mtu; |
026ace06 | 415 | int local, noref = 1; |
38cdcc9a JV |
416 | |
417 | if (dest) { | |
026ace06 JA |
418 | dest_dst = __ip_vs_dst_check(dest); |
419 | if (likely(dest_dst)) | |
420 | rt = (struct rt6_info *) dest_dst->dst_cache; | |
421 | else { | |
714f095f | 422 | u32 cookie; |
38cdcc9a | 423 | |
026ace06 | 424 | dest_dst = ip_vs_dest_dst_alloc(); |
ac69269a | 425 | spin_lock_bh(&dest->dst_lock); |
026ace06 JA |
426 | if (!dest_dst) { |
427 | __ip_vs_dst_set(dest, NULL, NULL, 0); | |
ac69269a | 428 | spin_unlock_bh(&dest->dst_lock); |
026ace06 JA |
429 | goto err_unreach; |
430 | } | |
714f095f | 431 | dst = __ip_vs_route_output_v6(net, &dest->addr.in6, |
026ace06 | 432 | &dest_dst->dst_saddr.in6, |
48e8aa6e | 433 | do_xfrm, rt_mode); |
714f095f | 434 | if (!dst) { |
026ace06 | 435 | __ip_vs_dst_set(dest, NULL, NULL, 0); |
ac69269a | 436 | spin_unlock_bh(&dest->dst_lock); |
026ace06 | 437 | ip_vs_dest_dst_free(dest_dst); |
4115ded1 | 438 | goto err_unreach; |
38cdcc9a | 439 | } |
714f095f | 440 | rt = (struct rt6_info *) dst; |
b197df4f | 441 | cookie = rt6_get_cookie(rt); |
026ace06 | 442 | __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); |
ac69269a | 443 | spin_unlock_bh(&dest->dst_lock); |
714f095f | 444 | IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", |
026ace06 | 445 | &dest->addr.in6, &dest_dst->dst_saddr.in6, |
d8d1f30b | 446 | atomic_read(&rt->dst.__refcnt)); |
38cdcc9a | 447 | } |
714f095f | 448 | if (ret_saddr) |
026ace06 | 449 | *ret_saddr = dest_dst->dst_saddr.in6; |
38cdcc9a | 450 | } else { |
026ace06 | 451 | noref = 0; |
48e8aa6e MKL |
452 | dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm, |
453 | rt_mode); | |
714f095f | 454 | if (!dst) |
4115ded1 | 455 | goto err_unreach; |
714f095f | 456 | rt = (struct rt6_info *) dst; |
38cdcc9a JV |
457 | } |
458 | ||
fc604767 | 459 | local = __ip_vs_is_local_route6(rt); |
4a4739d5 AG |
460 | |
461 | if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, | |
462 | local))) { | |
463 | IP_VS_DBG_RL("We are crossing local and non-local addresses" | |
3d53666b | 464 | " daddr=%pI6\n", daddr); |
4115ded1 | 465 | goto err_put; |
fc604767 | 466 | } |
4a4739d5 AG |
467 | |
468 | if (unlikely(local)) { | |
4115ded1 | 469 | /* skb to local stack, preserve old route */ |
026ace06 JA |
470 | if (!noref) |
471 | dst_release(&rt->dst); | |
4115ded1 | 472 | return local; |
fc604767 | 473 | } |
4115ded1 JA |
474 | |
475 | /* MTU checking */ | |
476 | if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) | |
477 | mtu = dst_mtu(&rt->dst); | |
478 | else { | |
4115ded1 JA |
479 | mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); |
480 | if (mtu < IPV6_MIN_MTU) { | |
481 | IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, | |
482 | IPV6_MIN_MTU); | |
483 | goto err_put; | |
484 | } | |
919aa0b2 | 485 | maybe_update_pmtu(skb_af, skb, mtu); |
fc604767 JA |
486 | } |
487 | ||
c63e4de2 | 488 | if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu)) |
4115ded1 | 489 | goto err_put; |
4115ded1 JA |
490 | |
491 | skb_dst_drop(skb); | |
026ace06 JA |
492 | if (noref) { |
493 | if (!local) | |
dbfc4fb7 | 494 | skb_dst_set_noref(skb, &rt->dst); |
026ace06 JA |
495 | else |
496 | skb_dst_set(skb, dst_clone(&rt->dst)); | |
497 | } else | |
498 | skb_dst_set(skb, &rt->dst); | |
4115ded1 JA |
499 | |
500 | return local; | |
501 | ||
502 | err_put: | |
026ace06 JA |
503 | if (!noref) |
504 | dst_release(&rt->dst); | |
4115ded1 JA |
505 | return -1; |
506 | ||
507 | err_unreach: | |
326bf17e AG |
508 | /* The ip6_link_failure function requires the dev field to be set |
509 | * in order to get the net (further for the sake of fwmark | |
510 | * reflection). | |
511 | */ | |
512 | if (!skb->dev) | |
513 | skb->dev = skb_dst(skb)->dev; | |
514 | ||
4115ded1 JA |
515 | dst_link_failure(skb); |
516 | return -1; | |
38cdcc9a JV |
517 | } |
518 | #endif | |
519 | ||
1da177e4 | 520 | |
b8abdf09 JA |
521 | /* return NF_ACCEPT to allow forwarding or other NF_xxx on error */ |
522 | static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, | |
523 | struct ip_vs_conn *cp) | |
524 | { | |
525 | int ret = NF_ACCEPT; | |
526 | ||
527 | skb->ipvs_property = 1; | |
528 | if (unlikely(cp->flags & IP_VS_CONN_F_NFCT)) | |
529 | ret = ip_vs_confirm_conntrack(skb); | |
530 | if (ret == NF_ACCEPT) { | |
531 | nf_reset(skb); | |
532 | skb_forward_csum(skb); | |
e3895c03 JA |
533 | if (!skb->sk) |
534 | skb_sender_cpu_clear(skb); | |
b8abdf09 JA |
535 | } |
536 | return ret; | |
537 | } | |
538 | ||
71563f34 AG |
539 | /* In the event of a remote destination, it's possible that we would have |
540 | * matches against an old socket (particularly a TIME-WAIT socket). This | |
541 | * causes havoc down the line (ip_local_out et. al. expect regular sockets | |
542 | * and invalid memory accesses will happen) so simply drop the association | |
543 | * in this case. | |
544 | */ | |
545 | static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb) | |
546 | { | |
547 | /* If dev is set, the packet came from the LOCAL_IN callback and | |
548 | * not from a local TCP socket. | |
549 | */ | |
550 | if (skb->dev) | |
551 | skb_orphan(skb); | |
552 | } | |
553 | ||
b8abdf09 JA |
554 | /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ |
555 | static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, | |
556 | struct ip_vs_conn *cp, int local) | |
557 | { | |
558 | int ret = NF_STOLEN; | |
559 | ||
560 | skb->ipvs_property = 1; | |
561 | if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) | |
562 | ip_vs_notrack(skb); | |
563 | else | |
564 | ip_vs_update_conntrack(skb, cp, 1); | |
71563f34 AG |
565 | |
566 | /* Remove the early_demux association unless it's bound for the | |
567 | * exact same port and address on this host after translation. | |
568 | */ | |
569 | if (!local || cp->vport != cp->dport || | |
570 | !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr)) | |
571 | ip_vs_drop_early_demux_sk(skb); | |
572 | ||
b8abdf09 JA |
573 | if (!local) { |
574 | skb_forward_csum(skb); | |
e3895c03 JA |
575 | if (!skb->sk) |
576 | skb_sender_cpu_clear(skb); | |
58dbc6f2 | 577 | NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, |
0c4b51f0 | 578 | NULL, skb_dst(skb)->dev, dst_output_okfn); |
b8abdf09 JA |
579 | } else |
580 | ret = NF_ACCEPT; | |
71563f34 | 581 | |
b8abdf09 JA |
582 | return ret; |
583 | } | |
584 | ||
585 | /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ | |
586 | static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, | |
587 | struct ip_vs_conn *cp, int local) | |
588 | { | |
589 | int ret = NF_STOLEN; | |
590 | ||
591 | skb->ipvs_property = 1; | |
592 | if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) | |
593 | ip_vs_notrack(skb); | |
594 | if (!local) { | |
71563f34 | 595 | ip_vs_drop_early_demux_sk(skb); |
b8abdf09 | 596 | skb_forward_csum(skb); |
e3895c03 JA |
597 | if (!skb->sk) |
598 | skb_sender_cpu_clear(skb); | |
58dbc6f2 | 599 | NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, |
0c4b51f0 | 600 | NULL, skb_dst(skb)->dev, dst_output_okfn); |
b8abdf09 JA |
601 | } else |
602 | ret = NF_ACCEPT; | |
603 | return ret; | |
604 | } | |
1da177e4 LT |
605 | |
606 | ||
607 | /* | |
608 | * NULL transmitter (do nothing except return NF_ACCEPT) | |
609 | */ | |
610 | int | |
611 | ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |
d4383f04 | 612 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) |
1da177e4 LT |
613 | { |
614 | /* we do not touch skb and do not need pskb ptr */ | |
b8abdf09 | 615 | return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); |
1da177e4 LT |
616 | } |
617 | ||
618 | ||
619 | /* | |
620 | * Bypass transmitter | |
621 | * Let packets bypass the destination when the destination is not | |
622 | * available, it may be only used in transparent cache cluster. | |
623 | */ | |
624 | int | |
625 | ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |
d4383f04 | 626 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) |
1da177e4 | 627 | { |
eddc9ec5 | 628 | struct iphdr *iph = ip_hdr(skb); |
1da177e4 LT |
629 | |
630 | EnterFunction(10); | |
631 | ||
026ace06 | 632 | rcu_read_lock(); |
4a4739d5 | 633 | if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr, |
c63e4de2 | 634 | IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0) |
1da177e4 | 635 | goto tx_error; |
1da177e4 | 636 | |
4115ded1 | 637 | ip_send_check(iph); |
1da177e4 LT |
638 | |
639 | /* Another hack: avoid icmp_send in ip_fragment */ | |
60ff7467 | 640 | skb->ignore_df = 1; |
1da177e4 | 641 | |
b8abdf09 | 642 | ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); |
026ace06 | 643 | rcu_read_unlock(); |
1da177e4 LT |
644 | |
645 | LeaveFunction(10); | |
646 | return NF_STOLEN; | |
647 | ||
1da177e4 LT |
648 | tx_error: |
649 | kfree_skb(skb); | |
026ace06 | 650 | rcu_read_unlock(); |
1da177e4 LT |
651 | LeaveFunction(10); |
652 | return NF_STOLEN; | |
653 | } | |
654 | ||
b3cdd2a7 JV |
655 | #ifdef CONFIG_IP_VS_IPV6 |
656 | int | |
657 | ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |
4115ded1 | 658 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) |
b3cdd2a7 | 659 | { |
3481894f AG |
660 | struct ipv6hdr *iph = ipv6_hdr(skb); |
661 | ||
b3cdd2a7 JV |
662 | EnterFunction(10); |
663 | ||
026ace06 | 664 | rcu_read_lock(); |
3481894f | 665 | if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &iph->daddr, NULL, |
4115ded1 | 666 | ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) |
b3cdd2a7 | 667 | goto tx_error; |
b3cdd2a7 JV |
668 | |
669 | /* Another hack: avoid icmp_send in ip_fragment */ | |
60ff7467 | 670 | skb->ignore_df = 1; |
b3cdd2a7 | 671 | |
b8abdf09 | 672 | ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); |
026ace06 | 673 | rcu_read_unlock(); |
b3cdd2a7 JV |
674 | |
675 | LeaveFunction(10); | |
676 | return NF_STOLEN; | |
677 | ||
b3cdd2a7 JV |
678 | tx_error: |
679 | kfree_skb(skb); | |
026ace06 | 680 | rcu_read_unlock(); |
b3cdd2a7 JV |
681 | LeaveFunction(10); |
682 | return NF_STOLEN; | |
683 | } | |
684 | #endif | |
1da177e4 LT |
685 | |
686 | /* | |
687 | * NAT transmitter (only for outside-to-inside nat forwarding) | |
688 | * Not used for related ICMP | |
689 | */ | |
690 | int | |
691 | ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |
d4383f04 | 692 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) |
1da177e4 LT |
693 | { |
694 | struct rtable *rt; /* Route to the other host */ | |
4115ded1 | 695 | int local, rc, was_input; |
1da177e4 LT |
696 | |
697 | EnterFunction(10); | |
698 | ||
026ace06 | 699 | rcu_read_lock(); |
1da177e4 LT |
700 | /* check if it is a connection of no-client-port */ |
701 | if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { | |
014d730d | 702 | __be16 _pt, *p; |
4115ded1 JA |
703 | |
704 | p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt); | |
1da177e4 LT |
705 | if (p == NULL) |
706 | goto tx_error; | |
707 | ip_vs_conn_fill_cport(cp, *p); | |
708 | IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); | |
709 | } | |
710 | ||
4115ded1 | 711 | was_input = rt_is_input_route(skb_rtable(skb)); |
4a4739d5 | 712 | local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, |
4115ded1 JA |
713 | IP_VS_RT_MODE_LOCAL | |
714 | IP_VS_RT_MODE_NON_LOCAL | | |
c63e4de2 | 715 | IP_VS_RT_MODE_RDR, NULL, ipvsh); |
4115ded1 JA |
716 | if (local < 0) |
717 | goto tx_error; | |
718 | rt = skb_rtable(skb); | |
fc604767 JA |
719 | /* |
720 | * Avoid duplicate tuple in reply direction for NAT traffic | |
721 | * to local address when connection is sync-ed | |
722 | */ | |
c0cd1156 | 723 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
fc604767 JA |
724 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { |
725 | enum ip_conntrack_info ctinfo; | |
05b4b065 | 726 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
fc604767 JA |
727 | |
728 | if (ct && !nf_ct_is_untracked(ct)) { | |
b0e010c5 | 729 | IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off, |
0d79641a | 730 | "ip_vs_nat_xmit(): " |
fc604767 | 731 | "stopping DNAT to local address"); |
4115ded1 | 732 | goto tx_error; |
fc604767 JA |
733 | } |
734 | } | |
735 | #endif | |
736 | ||
737 | /* From world but DNAT to loopback address? */ | |
4115ded1 | 738 | if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { |
b0e010c5 AG |
739 | IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, ipvsh->off, |
740 | "ip_vs_nat_xmit(): stopping DNAT to loopback " | |
741 | "address"); | |
4115ded1 | 742 | goto tx_error; |
1da177e4 LT |
743 | } |
744 | ||
745 | /* copy-on-write the packet before mangling it */ | |
af1e1cf0 | 746 | if (!skb_make_writable(skb, sizeof(struct iphdr))) |
4115ded1 | 747 | goto tx_error; |
1da177e4 | 748 | |
d8d1f30b | 749 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
4115ded1 | 750 | goto tx_error; |
1da177e4 | 751 | |
1da177e4 | 752 | /* mangle the packet */ |
d4383f04 | 753 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) |
4115ded1 | 754 | goto tx_error; |
e7ade46a | 755 | ip_hdr(skb)->daddr = cp->daddr.ip; |
eddc9ec5 | 756 | ip_send_check(ip_hdr(skb)); |
1da177e4 | 757 | |
b0e010c5 | 758 | IP_VS_DBG_PKT(10, AF_INET, pp, skb, ipvsh->off, "After DNAT"); |
1da177e4 LT |
759 | |
760 | /* FIXME: when application helper enlarges the packet and the length | |
761 | is larger than the MTU of outgoing device, there will be still | |
762 | MTU problem. */ | |
763 | ||
764 | /* Another hack: avoid icmp_send in ip_fragment */ | |
60ff7467 | 765 | skb->ignore_df = 1; |
1da177e4 | 766 | |
b8abdf09 | 767 | rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); |
026ace06 | 768 | rcu_read_unlock(); |
1da177e4 LT |
769 | |
770 | LeaveFunction(10); | |
b8abdf09 | 771 | return rc; |
1da177e4 | 772 | |
1da177e4 | 773 | tx_error: |
1da177e4 | 774 | kfree_skb(skb); |
026ace06 | 775 | rcu_read_unlock(); |
f4bc17cd | 776 | LeaveFunction(10); |
1da177e4 | 777 | return NF_STOLEN; |
1da177e4 LT |
778 | } |
779 | ||
b3cdd2a7 JV |
780 | #ifdef CONFIG_IP_VS_IPV6 |
781 | int | |
782 | ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |
4115ded1 | 783 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) |
b3cdd2a7 JV |
784 | { |
785 | struct rt6_info *rt; /* Route to the other host */ | |
b8abdf09 | 786 | int local, rc; |
b3cdd2a7 JV |
787 | |
788 | EnterFunction(10); | |
789 | ||
026ace06 | 790 | rcu_read_lock(); |
b3cdd2a7 | 791 | /* check if it is a connection of no-client-port */ |
4115ded1 | 792 | if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) { |
b3cdd2a7 | 793 | __be16 _pt, *p; |
4115ded1 | 794 | p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt); |
b3cdd2a7 JV |
795 | if (p == NULL) |
796 | goto tx_error; | |
797 | ip_vs_conn_fill_cport(cp, *p); | |
798 | IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); | |
799 | } | |
800 | ||
4a4739d5 AG |
801 | local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, |
802 | NULL, ipvsh, 0, | |
4115ded1 JA |
803 | IP_VS_RT_MODE_LOCAL | |
804 | IP_VS_RT_MODE_NON_LOCAL | | |
805 | IP_VS_RT_MODE_RDR); | |
806 | if (local < 0) | |
807 | goto tx_error; | |
808 | rt = (struct rt6_info *) skb_dst(skb); | |
fc604767 JA |
809 | /* |
810 | * Avoid duplicate tuple in reply direction for NAT traffic | |
811 | * to local address when connection is sync-ed | |
812 | */ | |
c0cd1156 | 813 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
fc604767 JA |
814 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { |
815 | enum ip_conntrack_info ctinfo; | |
05b4b065 | 816 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
fc604767 JA |
817 | |
818 | if (ct && !nf_ct_is_untracked(ct)) { | |
b0e010c5 | 819 | IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off, |
fc604767 JA |
820 | "ip_vs_nat_xmit_v6(): " |
821 | "stopping DNAT to local address"); | |
4115ded1 | 822 | goto tx_error; |
fc604767 JA |
823 | } |
824 | } | |
825 | #endif | |
826 | ||
827 | /* From world but DNAT to loopback address? */ | |
828 | if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && | |
fd0273d7 | 829 | ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) { |
b0e010c5 | 830 | IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, ipvsh->off, |
fc604767 JA |
831 | "ip_vs_nat_xmit_v6(): " |
832 | "stopping DNAT to loopback address"); | |
4115ded1 | 833 | goto tx_error; |
b3cdd2a7 JV |
834 | } |
835 | ||
836 | /* copy-on-write the packet before mangling it */ | |
837 | if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) | |
4115ded1 | 838 | goto tx_error; |
b3cdd2a7 | 839 | |
d8d1f30b | 840 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
4115ded1 | 841 | goto tx_error; |
b3cdd2a7 | 842 | |
b3cdd2a7 | 843 | /* mangle the packet */ |
4115ded1 | 844 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) |
b3cdd2a7 | 845 | goto tx_error; |
4e3fd7a0 | 846 | ipv6_hdr(skb)->daddr = cp->daddr.in6; |
fc604767 | 847 | |
b0e010c5 | 848 | IP_VS_DBG_PKT(10, AF_INET6, pp, skb, ipvsh->off, "After DNAT"); |
b3cdd2a7 JV |
849 | |
850 | /* FIXME: when application helper enlarges the packet and the length | |
851 | is larger than the MTU of outgoing device, there will be still | |
852 | MTU problem. */ | |
853 | ||
854 | /* Another hack: avoid icmp_send in ip_fragment */ | |
60ff7467 | 855 | skb->ignore_df = 1; |
b3cdd2a7 | 856 | |
b8abdf09 | 857 | rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); |
026ace06 | 858 | rcu_read_unlock(); |
b3cdd2a7 JV |
859 | |
860 | LeaveFunction(10); | |
b8abdf09 | 861 | return rc; |
b3cdd2a7 | 862 | |
b3cdd2a7 JV |
863 | tx_error: |
864 | LeaveFunction(10); | |
865 | kfree_skb(skb); | |
026ace06 | 866 | rcu_read_unlock(); |
b3cdd2a7 | 867 | return NF_STOLEN; |
b3cdd2a7 JV |
868 | } |
869 | #endif | |
870 | ||
8052ba29 AG |
871 | /* When forwarding a packet, we must ensure that we've got enough headroom |
872 | * for the encapsulation packet in the skb. This also gives us an | |
873 | * opportunity to figure out what the payload_len, dsfield, ttl, and df | |
874 | * values should be, so that we won't need to look at the old ip header | |
875 | * again | |
876 | */ | |
877 | static struct sk_buff * | |
878 | ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, | |
879 | unsigned int max_headroom, __u8 *next_protocol, | |
880 | __u32 *payload_len, __u8 *dsfield, __u8 *ttl, | |
881 | __be16 *df) | |
882 | { | |
883 | struct sk_buff *new_skb = NULL; | |
884 | struct iphdr *old_iph = NULL; | |
885 | #ifdef CONFIG_IP_VS_IPV6 | |
886 | struct ipv6hdr *old_ipv6h = NULL; | |
887 | #endif | |
888 | ||
71563f34 AG |
889 | ip_vs_drop_early_demux_sk(skb); |
890 | ||
8052ba29 AG |
891 | if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { |
892 | new_skb = skb_realloc_headroom(skb, max_headroom); | |
893 | if (!new_skb) | |
894 | goto error; | |
50656d9d CO |
895 | if (skb->sk) |
896 | skb_set_owner_w(new_skb, skb->sk); | |
8052ba29 AG |
897 | consume_skb(skb); |
898 | skb = new_skb; | |
899 | } | |
900 | ||
901 | #ifdef CONFIG_IP_VS_IPV6 | |
902 | if (skb_af == AF_INET6) { | |
903 | old_ipv6h = ipv6_hdr(skb); | |
904 | *next_protocol = IPPROTO_IPV6; | |
905 | if (payload_len) | |
906 | *payload_len = | |
907 | ntohs(old_ipv6h->payload_len) + | |
908 | sizeof(*old_ipv6h); | |
909 | *dsfield = ipv6_get_dsfield(old_ipv6h); | |
910 | *ttl = old_ipv6h->hop_limit; | |
911 | if (df) | |
912 | *df = 0; | |
913 | } else | |
914 | #endif | |
915 | { | |
916 | old_iph = ip_hdr(skb); | |
917 | /* Copy DF, reset fragment offset and MF */ | |
918 | if (df) | |
919 | *df = (old_iph->frag_off & htons(IP_DF)); | |
920 | *next_protocol = IPPROTO_IPIP; | |
921 | ||
922 | /* fix old IP header checksum */ | |
923 | ip_send_check(old_iph); | |
924 | *dsfield = ipv4_get_dsfield(old_iph); | |
925 | *ttl = old_iph->ttl; | |
926 | if (payload_len) | |
927 | *payload_len = ntohs(old_iph->tot_len); | |
928 | } | |
929 | ||
930 | return skb; | |
931 | error: | |
932 | kfree_skb(skb); | |
933 | return ERR_PTR(-ENOMEM); | |
934 | } | |
935 | ||
936 | static inline int __tun_gso_type_mask(int encaps_af, int orig_af) | |
937 | { | |
938 | if (encaps_af == AF_INET) { | |
939 | if (orig_af == AF_INET) | |
940 | return SKB_GSO_IPIP; | |
941 | ||
942 | return SKB_GSO_SIT; | |
943 | } | |
944 | ||
945 | /* GSO: we need to provide proper SKB_GSO_ value for IPv6: | |
946 | * SKB_GSO_SIT/IPV6 | |
947 | */ | |
948 | return 0; | |
949 | } | |
1da177e4 LT |
950 | |
951 | /* | |
952 | * IP Tunneling transmitter | |
953 | * | |
954 | * This function encapsulates the packet in a new IP packet, its | |
955 | * destination will be set to cp->daddr. Most code of this function | |
956 | * is taken from ipip.c. | |
957 | * | |
958 | * It is used in VS/TUN cluster. The load balancer selects a real | |
959 | * server from a cluster based on a scheduling algorithm, | |
960 | * encapsulates the request packet and forwards it to the selected | |
961 | * server. For example, all real servers are configured with | |
962 | * "ifconfig tunl0 <Virtual IP Address> up". When the server receives | |
963 | * the encapsulated packet, it will decapsulate the packet, processe | |
964 | * the request and return the response packets directly to the client | |
965 | * without passing the load balancer. This can greatly increase the | |
966 | * scalability of virtual server. | |
967 | * | |
968 | * Used for ANY protocol | |
969 | */ | |
970 | int | |
971 | ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |
d4383f04 | 972 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) |
1da177e4 | 973 | { |
b6a7719a HFS |
974 | struct net *net = skb_net(skb); |
975 | struct netns_ipvs *ipvs = net_ipvs(net); | |
1da177e4 | 976 | struct rtable *rt; /* Route to the other host */ |
c92f5ca2 | 977 | __be32 saddr; /* Source for tunnel */ |
1da177e4 | 978 | struct net_device *tdev; /* Device to other host */ |
8052ba29 AG |
979 | __u8 next_protocol = 0; |
980 | __u8 dsfield = 0; | |
981 | __u8 ttl = 0; | |
982 | __be16 df = 0; | |
983 | __be16 *dfp = NULL; | |
1da177e4 | 984 | struct iphdr *iph; /* Our new IP header */ |
c2636b4d | 985 | unsigned int max_headroom; /* The extra header space needed */ |
4115ded1 | 986 | int ret, local; |
1da177e4 LT |
987 | |
988 | EnterFunction(10); | |
989 | ||
026ace06 | 990 | rcu_read_lock(); |
4a4739d5 | 991 | local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, |
4115ded1 JA |
992 | IP_VS_RT_MODE_LOCAL | |
993 | IP_VS_RT_MODE_NON_LOCAL | | |
994 | IP_VS_RT_MODE_CONNECT | | |
c63e4de2 | 995 | IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh); |
4115ded1 JA |
996 | if (local < 0) |
997 | goto tx_error; | |
026ace06 JA |
998 | if (local) { |
999 | rcu_read_unlock(); | |
b8abdf09 | 1000 | return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); |
026ace06 | 1001 | } |
1da177e4 | 1002 | |
4115ded1 | 1003 | rt = skb_rtable(skb); |
d8d1f30b | 1004 | tdev = rt->dst.dev; |
1da177e4 | 1005 | |
1da177e4 LT |
1006 | /* |
1007 | * Okay, now see if we can stuff it in the buffer as-is. | |
1008 | */ | |
1009 | max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); | |
1010 | ||
8052ba29 AG |
1011 | /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */ |
1012 | dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL; | |
1013 | skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, | |
1014 | &next_protocol, NULL, &dsfield, | |
1015 | &ttl, dfp); | |
1016 | if (IS_ERR(skb)) | |
1017 | goto tx_error; | |
1da177e4 | 1018 | |
8052ba29 AG |
1019 | skb = iptunnel_handle_offloads( |
1020 | skb, false, __tun_gso_type_mask(AF_INET, cp->af)); | |
ea1d5d77 JA |
1021 | if (IS_ERR(skb)) |
1022 | goto tx_error; | |
1023 | ||
1024 | skb->transport_header = skb->network_header; | |
1025 | ||
e2d1bca7 ACM |
1026 | skb_push(skb, sizeof(struct iphdr)); |
1027 | skb_reset_network_header(skb); | |
1da177e4 LT |
1028 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
1029 | ||
1da177e4 LT |
1030 | /* |
1031 | * Push down and install the IPIP header. | |
1032 | */ | |
eddc9ec5 | 1033 | iph = ip_hdr(skb); |
1da177e4 LT |
1034 | iph->version = 4; |
1035 | iph->ihl = sizeof(struct iphdr)>>2; | |
1036 | iph->frag_off = df; | |
8052ba29 AG |
1037 | iph->protocol = next_protocol; |
1038 | iph->tos = dsfield; | |
c92f5ca2 JA |
1039 | iph->daddr = cp->daddr.ip; |
1040 | iph->saddr = saddr; | |
8052ba29 | 1041 | iph->ttl = ttl; |
b6a7719a | 1042 | ip_select_ident(net, skb, NULL); |
1da177e4 LT |
1043 | |
1044 | /* Another hack: avoid icmp_send in ip_fragment */ | |
60ff7467 | 1045 | skb->ignore_df = 1; |
1da177e4 | 1046 | |
b8abdf09 | 1047 | ret = ip_vs_tunnel_xmit_prepare(skb, cp); |
f4bc17cd JA |
1048 | if (ret == NF_ACCEPT) |
1049 | ip_local_out(skb); | |
1050 | else if (ret == NF_DROP) | |
1051 | kfree_skb(skb); | |
026ace06 | 1052 | rcu_read_unlock(); |
1da177e4 LT |
1053 | |
1054 | LeaveFunction(10); | |
1055 | ||
1056 | return NF_STOLEN; | |
1057 | ||
1da177e4 | 1058 | tx_error: |
ea1d5d77 JA |
1059 | if (!IS_ERR(skb)) |
1060 | kfree_skb(skb); | |
026ace06 | 1061 | rcu_read_unlock(); |
1da177e4 LT |
1062 | LeaveFunction(10); |
1063 | return NF_STOLEN; | |
1064 | } | |
1065 | ||
b3cdd2a7 JV |
1066 | #ifdef CONFIG_IP_VS_IPV6 |
1067 | int | |
1068 | ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |
d4383f04 | 1069 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) |
b3cdd2a7 JV |
1070 | { |
1071 | struct rt6_info *rt; /* Route to the other host */ | |
714f095f | 1072 | struct in6_addr saddr; /* Source for tunnel */ |
b3cdd2a7 | 1073 | struct net_device *tdev; /* Device to other host */ |
8052ba29 AG |
1074 | __u8 next_protocol = 0; |
1075 | __u32 payload_len = 0; | |
1076 | __u8 dsfield = 0; | |
1077 | __u8 ttl = 0; | |
b3cdd2a7 JV |
1078 | struct ipv6hdr *iph; /* Our new IP header */ |
1079 | unsigned int max_headroom; /* The extra header space needed */ | |
4115ded1 | 1080 | int ret, local; |
b3cdd2a7 JV |
1081 | |
1082 | EnterFunction(10); | |
1083 | ||
026ace06 | 1084 | rcu_read_lock(); |
4a4739d5 | 1085 | local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, |
4115ded1 JA |
1086 | &saddr, ipvsh, 1, |
1087 | IP_VS_RT_MODE_LOCAL | | |
1088 | IP_VS_RT_MODE_NON_LOCAL | | |
1089 | IP_VS_RT_MODE_TUNNEL); | |
1090 | if (local < 0) | |
1091 | goto tx_error; | |
026ace06 JA |
1092 | if (local) { |
1093 | rcu_read_unlock(); | |
b8abdf09 | 1094 | return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); |
026ace06 | 1095 | } |
b3cdd2a7 | 1096 | |
4115ded1 | 1097 | rt = (struct rt6_info *) skb_dst(skb); |
d8d1f30b | 1098 | tdev = rt->dst.dev; |
b3cdd2a7 | 1099 | |
b3cdd2a7 JV |
1100 | /* |
1101 | * Okay, now see if we can stuff it in the buffer as-is. | |
1102 | */ | |
1103 | max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); | |
1104 | ||
8052ba29 AG |
1105 | skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, |
1106 | &next_protocol, &payload_len, | |
1107 | &dsfield, &ttl, NULL); | |
1108 | if (IS_ERR(skb)) | |
1109 | goto tx_error; | |
b3cdd2a7 | 1110 | |
8052ba29 AG |
1111 | skb = iptunnel_handle_offloads( |
1112 | skb, false, __tun_gso_type_mask(AF_INET6, cp->af)); | |
ea1d5d77 JA |
1113 | if (IS_ERR(skb)) |
1114 | goto tx_error; | |
1115 | ||
714f095f | 1116 | skb->transport_header = skb->network_header; |
b3cdd2a7 JV |
1117 | |
1118 | skb_push(skb, sizeof(struct ipv6hdr)); | |
1119 | skb_reset_network_header(skb); | |
1120 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | |
1121 | ||
b3cdd2a7 JV |
1122 | /* |
1123 | * Push down and install the IPIP header. | |
1124 | */ | |
1125 | iph = ipv6_hdr(skb); | |
1126 | iph->version = 6; | |
8052ba29 AG |
1127 | iph->nexthdr = next_protocol; |
1128 | iph->payload_len = htons(payload_len); | |
b3cdd2a7 | 1129 | memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); |
8052ba29 | 1130 | ipv6_change_dsfield(iph, 0, dsfield); |
4e3fd7a0 AD |
1131 | iph->daddr = cp->daddr.in6; |
1132 | iph->saddr = saddr; | |
8052ba29 | 1133 | iph->hop_limit = ttl; |
b3cdd2a7 JV |
1134 | |
1135 | /* Another hack: avoid icmp_send in ip_fragment */ | |
60ff7467 | 1136 | skb->ignore_df = 1; |
b3cdd2a7 | 1137 | |
b8abdf09 | 1138 | ret = ip_vs_tunnel_xmit_prepare(skb, cp); |
f4bc17cd JA |
1139 | if (ret == NF_ACCEPT) |
1140 | ip6_local_out(skb); | |
1141 | else if (ret == NF_DROP) | |
1142 | kfree_skb(skb); | |
026ace06 | 1143 | rcu_read_unlock(); |
b3cdd2a7 JV |
1144 | |
1145 | LeaveFunction(10); | |
1146 | ||
1147 | return NF_STOLEN; | |
1148 | ||
b3cdd2a7 | 1149 | tx_error: |
ea1d5d77 JA |
1150 | if (!IS_ERR(skb)) |
1151 | kfree_skb(skb); | |
026ace06 | 1152 | rcu_read_unlock(); |
b3cdd2a7 JV |
1153 | LeaveFunction(10); |
1154 | return NF_STOLEN; | |
1155 | } | |
1156 | #endif | |
1157 | ||
1da177e4 LT |
1158 | |
1159 | /* | |
1160 | * Direct Routing transmitter | |
1161 | * Used for ANY protocol | |
1162 | */ | |
1163 | int | |
1164 | ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |
d4383f04 | 1165 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) |
1da177e4 | 1166 | { |
4115ded1 | 1167 | int local; |
1da177e4 LT |
1168 | |
1169 | EnterFunction(10); | |
1170 | ||
026ace06 | 1171 | rcu_read_lock(); |
4a4739d5 | 1172 | local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, |
4115ded1 JA |
1173 | IP_VS_RT_MODE_LOCAL | |
1174 | IP_VS_RT_MODE_NON_LOCAL | | |
c63e4de2 | 1175 | IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh); |
4115ded1 | 1176 | if (local < 0) |
1da177e4 | 1177 | goto tx_error; |
026ace06 JA |
1178 | if (local) { |
1179 | rcu_read_unlock(); | |
4115ded1 | 1180 | return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); |
026ace06 | 1181 | } |
1da177e4 | 1182 | |
eddc9ec5 | 1183 | ip_send_check(ip_hdr(skb)); |
1da177e4 | 1184 | |
1da177e4 | 1185 | /* Another hack: avoid icmp_send in ip_fragment */ |
60ff7467 | 1186 | skb->ignore_df = 1; |
1da177e4 | 1187 | |
b8abdf09 | 1188 | ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); |
026ace06 | 1189 | rcu_read_unlock(); |
1da177e4 LT |
1190 | |
1191 | LeaveFunction(10); | |
1192 | return NF_STOLEN; | |
1193 | ||
1da177e4 LT |
1194 | tx_error: |
1195 | kfree_skb(skb); | |
026ace06 | 1196 | rcu_read_unlock(); |
1da177e4 LT |
1197 | LeaveFunction(10); |
1198 | return NF_STOLEN; | |
1199 | } | |
1200 | ||
b3cdd2a7 JV |
1201 | #ifdef CONFIG_IP_VS_IPV6 |
1202 | int | |
1203 | ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |
4115ded1 | 1204 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) |
b3cdd2a7 | 1205 | { |
4115ded1 | 1206 | int local; |
b3cdd2a7 JV |
1207 | |
1208 | EnterFunction(10); | |
1209 | ||
026ace06 | 1210 | rcu_read_lock(); |
4a4739d5 AG |
1211 | local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, |
1212 | NULL, ipvsh, 0, | |
4115ded1 | 1213 | IP_VS_RT_MODE_LOCAL | |
48e8aa6e MKL |
1214 | IP_VS_RT_MODE_NON_LOCAL | |
1215 | IP_VS_RT_MODE_KNOWN_NH); | |
4115ded1 | 1216 | if (local < 0) |
b3cdd2a7 | 1217 | goto tx_error; |
026ace06 JA |
1218 | if (local) { |
1219 | rcu_read_unlock(); | |
4115ded1 | 1220 | return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); |
026ace06 | 1221 | } |
b3cdd2a7 JV |
1222 | |
1223 | /* Another hack: avoid icmp_send in ip_fragment */ | |
60ff7467 | 1224 | skb->ignore_df = 1; |
b3cdd2a7 | 1225 | |
b8abdf09 | 1226 | ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); |
026ace06 | 1227 | rcu_read_unlock(); |
b3cdd2a7 JV |
1228 | |
1229 | LeaveFunction(10); | |
1230 | return NF_STOLEN; | |
1231 | ||
b3cdd2a7 JV |
1232 | tx_error: |
1233 | kfree_skb(skb); | |
026ace06 | 1234 | rcu_read_unlock(); |
b3cdd2a7 JV |
1235 | LeaveFunction(10); |
1236 | return NF_STOLEN; | |
1237 | } | |
1238 | #endif | |
1239 | ||
1da177e4 LT |
1240 | |
1241 | /* | |
1242 | * ICMP packet transmitter | |
1243 | * called by the ip_vs_in_icmp | |
1244 | */ | |
1245 | int | |
1246 | ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |
d4383f04 JDB |
1247 | struct ip_vs_protocol *pp, int offset, unsigned int hooknum, |
1248 | struct ip_vs_iphdr *iph) | |
1da177e4 LT |
1249 | { |
1250 | struct rtable *rt; /* Route to the other host */ | |
1da177e4 | 1251 | int rc; |
fc604767 | 1252 | int local; |
4115ded1 | 1253 | int rt_mode, was_input; |
1da177e4 LT |
1254 | |
1255 | EnterFunction(10); | |
1256 | ||
1257 | /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be | |
1258 | forwarded directly here, because there is no need to | |
1259 | translate address/port back */ | |
1260 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { | |
1261 | if (cp->packet_xmit) | |
d4383f04 | 1262 | rc = cp->packet_xmit(skb, cp, pp, iph); |
1da177e4 LT |
1263 | else |
1264 | rc = NF_ACCEPT; | |
1265 | /* do not touch skb anymore */ | |
1266 | atomic_inc(&cp->in_pkts); | |
1da177e4 LT |
1267 | goto out; |
1268 | } | |
1269 | ||
1270 | /* | |
1271 | * mangle and send the packet here (only for VS/NAT) | |
1272 | */ | |
4115ded1 | 1273 | was_input = rt_is_input_route(skb_rtable(skb)); |
1da177e4 | 1274 | |
c92f5ca2 JA |
1275 | /* LOCALNODE from FORWARD hook is not supported */ |
1276 | rt_mode = (hooknum != NF_INET_FORWARD) ? | |
1277 | IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | | |
1278 | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; | |
026ace06 | 1279 | rcu_read_lock(); |
4a4739d5 | 1280 | local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode, |
c63e4de2 | 1281 | NULL, iph); |
4115ded1 JA |
1282 | if (local < 0) |
1283 | goto tx_error; | |
1284 | rt = skb_rtable(skb); | |
fc604767 JA |
1285 | |
1286 | /* | |
1287 | * Avoid duplicate tuple in reply direction for NAT traffic | |
1288 | * to local address when connection is sync-ed | |
1289 | */ | |
c0cd1156 | 1290 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
fc604767 JA |
1291 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { |
1292 | enum ip_conntrack_info ctinfo; | |
05b4b065 | 1293 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
fc604767 JA |
1294 | |
1295 | if (ct && !nf_ct_is_untracked(ct)) { | |
1296 | IP_VS_DBG(10, "%s(): " | |
1297 | "stopping DNAT to local address %pI4\n", | |
1298 | __func__, &cp->daddr.ip); | |
4115ded1 | 1299 | goto tx_error; |
fc604767 JA |
1300 | } |
1301 | } | |
1302 | #endif | |
1303 | ||
1304 | /* From world but DNAT to loopback address? */ | |
4115ded1 | 1305 | if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { |
fc604767 JA |
1306 | IP_VS_DBG(1, "%s(): " |
1307 | "stopping DNAT to loopback %pI4\n", | |
1308 | __func__, &cp->daddr.ip); | |
4115ded1 | 1309 | goto tx_error; |
1da177e4 LT |
1310 | } |
1311 | ||
1312 | /* copy-on-write the packet before mangling it */ | |
af1e1cf0 | 1313 | if (!skb_make_writable(skb, offset)) |
4115ded1 | 1314 | goto tx_error; |
1da177e4 | 1315 | |
d8d1f30b | 1316 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
4115ded1 | 1317 | goto tx_error; |
1da177e4 | 1318 | |
1da177e4 LT |
1319 | ip_vs_nat_icmp(skb, pp, cp, 0); |
1320 | ||
1321 | /* Another hack: avoid icmp_send in ip_fragment */ | |
60ff7467 | 1322 | skb->ignore_df = 1; |
1da177e4 | 1323 | |
b8abdf09 | 1324 | rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); |
026ace06 | 1325 | rcu_read_unlock(); |
1da177e4 LT |
1326 | goto out; |
1327 | ||
1da177e4 | 1328 | tx_error: |
026ace06 JA |
1329 | kfree_skb(skb); |
1330 | rcu_read_unlock(); | |
1da177e4 LT |
1331 | rc = NF_STOLEN; |
1332 | out: | |
1333 | LeaveFunction(10); | |
1334 | return rc; | |
1da177e4 | 1335 | } |
b3cdd2a7 JV |
1336 | |
1337 | #ifdef CONFIG_IP_VS_IPV6 | |
1338 | int | |
1339 | ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |
d4383f04 | 1340 | struct ip_vs_protocol *pp, int offset, unsigned int hooknum, |
4115ded1 | 1341 | struct ip_vs_iphdr *ipvsh) |
b3cdd2a7 JV |
1342 | { |
1343 | struct rt6_info *rt; /* Route to the other host */ | |
b3cdd2a7 | 1344 | int rc; |
fc604767 | 1345 | int local; |
c92f5ca2 | 1346 | int rt_mode; |
b3cdd2a7 JV |
1347 | |
1348 | EnterFunction(10); | |
1349 | ||
1350 | /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be | |
1351 | forwarded directly here, because there is no need to | |
1352 | translate address/port back */ | |
1353 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { | |
1354 | if (cp->packet_xmit) | |
4115ded1 | 1355 | rc = cp->packet_xmit(skb, cp, pp, ipvsh); |
b3cdd2a7 JV |
1356 | else |
1357 | rc = NF_ACCEPT; | |
1358 | /* do not touch skb anymore */ | |
1359 | atomic_inc(&cp->in_pkts); | |
1360 | goto out; | |
1361 | } | |
1362 | ||
1363 | /* | |
1364 | * mangle and send the packet here (only for VS/NAT) | |
1365 | */ | |
1366 | ||
c92f5ca2 JA |
1367 | /* LOCALNODE from FORWARD hook is not supported */ |
1368 | rt_mode = (hooknum != NF_INET_FORWARD) ? | |
1369 | IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | | |
1370 | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; | |
026ace06 | 1371 | rcu_read_lock(); |
4a4739d5 AG |
1372 | local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, |
1373 | NULL, ipvsh, 0, rt_mode); | |
4115ded1 JA |
1374 | if (local < 0) |
1375 | goto tx_error; | |
1376 | rt = (struct rt6_info *) skb_dst(skb); | |
fc604767 JA |
1377 | /* |
1378 | * Avoid duplicate tuple in reply direction for NAT traffic | |
1379 | * to local address when connection is sync-ed | |
1380 | */ | |
c0cd1156 | 1381 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
fc604767 JA |
1382 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { |
1383 | enum ip_conntrack_info ctinfo; | |
05b4b065 | 1384 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
fc604767 JA |
1385 | |
1386 | if (ct && !nf_ct_is_untracked(ct)) { | |
1387 | IP_VS_DBG(10, "%s(): " | |
1388 | "stopping DNAT to local address %pI6\n", | |
1389 | __func__, &cp->daddr.in6); | |
4115ded1 | 1390 | goto tx_error; |
fc604767 JA |
1391 | } |
1392 | } | |
1393 | #endif | |
1394 | ||
1395 | /* From world but DNAT to loopback address? */ | |
1396 | if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && | |
fd0273d7 | 1397 | ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) { |
fc604767 JA |
1398 | IP_VS_DBG(1, "%s(): " |
1399 | "stopping DNAT to loopback %pI6\n", | |
1400 | __func__, &cp->daddr.in6); | |
4115ded1 | 1401 | goto tx_error; |
b3cdd2a7 JV |
1402 | } |
1403 | ||
1404 | /* copy-on-write the packet before mangling it */ | |
1405 | if (!skb_make_writable(skb, offset)) | |
4115ded1 | 1406 | goto tx_error; |
b3cdd2a7 | 1407 | |
d8d1f30b | 1408 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
4115ded1 | 1409 | goto tx_error; |
b3cdd2a7 | 1410 | |
b3cdd2a7 JV |
1411 | ip_vs_nat_icmp_v6(skb, pp, cp, 0); |
1412 | ||
1413 | /* Another hack: avoid icmp_send in ip_fragment */ | |
60ff7467 | 1414 | skb->ignore_df = 1; |
b3cdd2a7 | 1415 | |
b8abdf09 | 1416 | rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); |
026ace06 | 1417 | rcu_read_unlock(); |
b3cdd2a7 JV |
1418 | goto out; |
1419 | ||
b3cdd2a7 | 1420 | tx_error: |
026ace06 JA |
1421 | kfree_skb(skb); |
1422 | rcu_read_unlock(); | |
b3cdd2a7 JV |
1423 | rc = NF_STOLEN; |
1424 | out: | |
1425 | LeaveFunction(10); | |
1426 | return rc; | |
b3cdd2a7 JV |
1427 | } |
1428 | #endif |