ipvs: API change to avoid rescan of IPv6 exthdr
[deliverable/linux.git] / net / netfilter / ipvs / ip_vs_xmit.c
CommitLineData
1da177e4
LT
1/*
2 * ip_vs_xmit.c: various packet transmitters for IPVS
3 *
1da177e4
LT
4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Changes:
13 *
cb59155f
JA
14 * Description of forwarding methods:
15 * - all transmitters are called from LOCAL_IN (remote clients) and
16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
17 * - not all connections have destination server, for example,
18 * connections in backup server when fwmark is used
19 * - bypass connections use daddr from packet
20 * LOCAL_OUT rules:
21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
22 * - skb->pkt_type is not set yet
23 * - the only place where we can see skb->sk != NULL
1da177e4
LT
24 */
25
9aada7ac
HE
26#define KMSG_COMPONENT "IPVS"
27#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
28
1da177e4 29#include <linux/kernel.h>
5a0e3ad6 30#include <linux/slab.h>
1da177e4 31#include <linux/tcp.h> /* for tcphdr */
c439cb2e 32#include <net/ip.h>
1da177e4
LT
33#include <net/tcp.h> /* for csum_tcpudp_magic */
34#include <net/udp.h>
35#include <net/icmp.h> /* for icmp_send */
36#include <net/route.h> /* for ip_route_output */
38cdcc9a
JV
37#include <net/ipv6.h>
38#include <net/ip6_route.h>
714f095f 39#include <net/addrconf.h>
38cdcc9a 40#include <linux/icmpv6.h>
1da177e4
LT
41#include <linux/netfilter.h>
42#include <linux/netfilter_ipv4.h>
43
44#include <net/ip_vs.h>
45
17a8f8e3
CG
46enum {
47 IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */
48 IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */
49 IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to
50 * local
51 */
f2edb9f7 52 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
17a8f8e3 53};
1da177e4
LT
54
55/*
56 * Destination cache to speed up outgoing route lookup
57 */
58static inline void
714f095f
HS
59__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
60 u32 dst_cookie)
1da177e4
LT
61{
62 struct dst_entry *old_dst;
63
64 old_dst = dest->dst_cache;
65 dest->dst_cache = dst;
66 dest->dst_rtos = rtos;
714f095f 67 dest->dst_cookie = dst_cookie;
1da177e4
LT
68 dst_release(old_dst);
69}
70
71static inline struct dst_entry *
714f095f 72__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
1da177e4
LT
73{
74 struct dst_entry *dst = dest->dst_cache;
75
76 if (!dst)
77 return NULL;
714f095f
HS
78 if ((dst->obsolete || rtos != dest->dst_rtos) &&
79 dst->ops->check(dst, dest->dst_cookie) == NULL) {
1da177e4
LT
80 dest->dst_cache = NULL;
81 dst_release(dst);
82 return NULL;
83 }
84 dst_hold(dst);
85 return dst;
86}
87
590e3f79
JDB
88static inline bool
89__mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
90{
4cdd3408
PM
91 if (IP6CB(skb)->frag_max_size) {
92 /* frag_max_size tell us that, this packet have been
93 * defragmented by netfilter IPv6 conntrack module.
94 */
95 if (IP6CB(skb)->frag_max_size > mtu)
96 return true; /* largest fragment violate MTU */
97 }
98 else if (skb->len > mtu && !skb_is_gso(skb)) {
590e3f79
JDB
99 return true; /* Packet size violate MTU size */
100 }
101 return false;
102}
103
f2edb9f7
JA
104/* Get route to daddr, update *saddr, optionally bind route to saddr */
105static struct rtable *do_output_route4(struct net *net, __be32 daddr,
106 u32 rtos, int rt_mode, __be32 *saddr)
107{
108 struct flowi4 fl4;
109 struct rtable *rt;
110 int loop = 0;
111
112 memset(&fl4, 0, sizeof(fl4));
113 fl4.daddr = daddr;
114 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
115 fl4.flowi4_tos = rtos;
116
117retry:
118 rt = ip_route_output_key(net, &fl4);
119 if (IS_ERR(rt)) {
120 /* Invalid saddr ? */
121 if (PTR_ERR(rt) == -EINVAL && *saddr &&
122 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
123 *saddr = 0;
124 flowi4_update_output(&fl4, 0, rtos, daddr, 0);
125 goto retry;
126 }
127 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
128 return NULL;
129 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
130 ip_rt_put(rt);
131 *saddr = fl4.saddr;
132 flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr);
133 loop++;
134 goto retry;
135 }
136 *saddr = fl4.saddr;
137 return rt;
138}
139
17a8f8e3 140/* Get route to destination or remote server */
ad1b30b1 141static struct rtable *
fc604767 142__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
c92f5ca2 143 __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr)
1da177e4 144{
fc604767 145 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 146 struct rtable *rt; /* Route to the other host */
fc604767
JA
147 struct rtable *ort; /* Original route */
148 int local;
1da177e4
LT
149
150 if (dest) {
151 spin_lock(&dest->dst_lock);
152 if (!(rt = (struct rtable *)
714f095f 153 __ip_vs_dst_check(dest, rtos))) {
f2edb9f7
JA
154 rt = do_output_route4(net, dest->addr.ip, rtos,
155 rt_mode, &dest->dst_saddr.ip);
156 if (!rt) {
1da177e4 157 spin_unlock(&dest->dst_lock);
1da177e4
LT
158 return NULL;
159 }
714f095f 160 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
c92f5ca2
JA
161 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
162 "rtos=%X\n",
163 &dest->addr.ip, &dest->dst_saddr.ip,
d8d1f30b 164 atomic_read(&rt->dst.__refcnt), rtos);
1da177e4 165 }
44e3125c 166 daddr = dest->addr.ip;
c92f5ca2
JA
167 if (ret_saddr)
168 *ret_saddr = dest->dst_saddr.ip;
1da177e4
LT
169 spin_unlock(&dest->dst_lock);
170 } else {
f2edb9f7 171 __be32 saddr = htonl(INADDR_ANY);
c92f5ca2 172
f2edb9f7
JA
173 /* For such unconfigured boxes avoid many route lookups
174 * for performance reasons because we do not remember saddr
175 */
176 rt_mode &= ~IP_VS_RT_MODE_CONNECT;
177 rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
178 if (!rt)
1da177e4 179 return NULL;
c92f5ca2 180 if (ret_saddr)
f2edb9f7 181 *ret_saddr = saddr;
1da177e4
LT
182 }
183
fc604767 184 local = rt->rt_flags & RTCF_LOCAL;
17a8f8e3
CG
185 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
186 rt_mode)) {
fc604767
JA
187 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
188 (rt->rt_flags & RTCF_LOCAL) ?
44e3125c 189 "local":"non-local", &daddr);
fc604767
JA
190 ip_rt_put(rt);
191 return NULL;
192 }
17a8f8e3
CG
193 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
194 !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
fc604767
JA
195 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
196 "requires NAT method, dest: %pI4\n",
44e3125c 197 &ip_hdr(skb)->daddr, &daddr);
fc604767
JA
198 ip_rt_put(rt);
199 return NULL;
200 }
201 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
202 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
203 "to non-local address, dest: %pI4\n",
44e3125c 204 &ip_hdr(skb)->saddr, &daddr);
fc604767
JA
205 ip_rt_put(rt);
206 return NULL;
207 }
208
1da177e4
LT
209 return rt;
210}
211
fc604767
JA
212/* Reroute packet to local IPv4 stack after DNAT */
213static int
214__ip_vs_reroute_locally(struct sk_buff *skb)
215{
216 struct rtable *rt = skb_rtable(skb);
217 struct net_device *dev = rt->dst.dev;
218 struct net *net = dev_net(dev);
219 struct iphdr *iph = ip_hdr(skb);
220
c7537967 221 if (rt_is_input_route(rt)) {
fc604767
JA
222 unsigned long orefdst = skb->_skb_refdst;
223
224 if (ip_route_input(skb, iph->daddr, iph->saddr,
225 iph->tos, skb->dev))
226 return 0;
227 refdst_drop(orefdst);
228 } else {
9d6ec938
DM
229 struct flowi4 fl4 = {
230 .daddr = iph->daddr,
231 .saddr = iph->saddr,
232 .flowi4_tos = RT_TOS(iph->tos),
233 .flowi4_mark = skb->mark,
fc604767 234 };
fc604767 235
9d6ec938 236 rt = ip_route_output_key(net, &fl4);
b23dd4fe 237 if (IS_ERR(rt))
fc604767
JA
238 return 0;
239 if (!(rt->rt_flags & RTCF_LOCAL)) {
240 ip_rt_put(rt);
241 return 0;
242 }
243 /* Drop old route. */
244 skb_dst_drop(skb);
245 skb_dst_set(skb, &rt->dst);
246 }
247 return 1;
248}
249
38cdcc9a 250#ifdef CONFIG_IP_VS_IPV6
714f095f 251
fc604767
JA
252static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
253{
d1918542 254 return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
fc604767
JA
255}
256
714f095f
HS
257static struct dst_entry *
258__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
259 struct in6_addr *ret_saddr, int do_xfrm)
260{
261 struct dst_entry *dst;
4c9483b2
DM
262 struct flowi6 fl6 = {
263 .daddr = *daddr,
714f095f
HS
264 };
265
4c9483b2 266 dst = ip6_route_output(net, NULL, &fl6);
714f095f
HS
267 if (dst->error)
268 goto out_err;
269 if (!ret_saddr)
270 return dst;
4c9483b2 271 if (ipv6_addr_any(&fl6.saddr) &&
714f095f 272 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
4c9483b2 273 &fl6.daddr, 0, &fl6.saddr) < 0)
714f095f 274 goto out_err;
452edd59 275 if (do_xfrm) {
4c9483b2 276 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
452edd59
DM
277 if (IS_ERR(dst)) {
278 dst = NULL;
279 goto out_err;
280 }
281 }
4e3fd7a0 282 *ret_saddr = fl6.saddr;
714f095f
HS
283 return dst;
284
285out_err:
286 dst_release(dst);
287 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
288 return NULL;
289}
290
fc604767
JA
291/*
292 * Get route to destination or remote server
fc604767 293 */
38cdcc9a 294static struct rt6_info *
fc604767
JA
295__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
296 struct in6_addr *daddr, struct in6_addr *ret_saddr,
297 int do_xfrm, int rt_mode)
38cdcc9a 298{
fc604767 299 struct net *net = dev_net(skb_dst(skb)->dev);
38cdcc9a 300 struct rt6_info *rt; /* Route to the other host */
fc604767 301 struct rt6_info *ort; /* Original route */
714f095f 302 struct dst_entry *dst;
fc604767 303 int local;
38cdcc9a
JV
304
305 if (dest) {
306 spin_lock(&dest->dst_lock);
714f095f 307 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
38cdcc9a 308 if (!rt) {
714f095f 309 u32 cookie;
38cdcc9a 310
714f095f 311 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
c92f5ca2 312 &dest->dst_saddr.in6,
714f095f
HS
313 do_xfrm);
314 if (!dst) {
38cdcc9a 315 spin_unlock(&dest->dst_lock);
38cdcc9a
JV
316 return NULL;
317 }
714f095f
HS
318 rt = (struct rt6_info *) dst;
319 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
320 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
321 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
c92f5ca2 322 &dest->addr.in6, &dest->dst_saddr.in6,
d8d1f30b 323 atomic_read(&rt->dst.__refcnt));
38cdcc9a 324 }
714f095f 325 if (ret_saddr)
4e3fd7a0 326 *ret_saddr = dest->dst_saddr.in6;
38cdcc9a
JV
327 spin_unlock(&dest->dst_lock);
328 } else {
fc604767 329 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
714f095f 330 if (!dst)
38cdcc9a 331 return NULL;
714f095f 332 rt = (struct rt6_info *) dst;
38cdcc9a
JV
333 }
334
fc604767 335 local = __ip_vs_is_local_route6(rt);
e58b3442
DM
336 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
337 rt_mode)) {
120b9c14 338 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
fc604767
JA
339 local ? "local":"non-local", daddr);
340 dst_release(&rt->dst);
341 return NULL;
342 }
e58b3442 343 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
fc604767
JA
344 !((ort = (struct rt6_info *) skb_dst(skb)) &&
345 __ip_vs_is_local_route6(ort))) {
120b9c14
JDB
346 IP_VS_DBG_RL("Redirect from non-local address %pI6c to local "
347 "requires NAT method, dest: %pI6c\n",
fc604767
JA
348 &ipv6_hdr(skb)->daddr, daddr);
349 dst_release(&rt->dst);
350 return NULL;
351 }
352 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
353 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
354 IPV6_ADDR_LOOPBACK)) {
120b9c14
JDB
355 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c "
356 "to non-local address, dest: %pI6c\n",
fc604767
JA
357 &ipv6_hdr(skb)->saddr, daddr);
358 dst_release(&rt->dst);
359 return NULL;
360 }
361
38cdcc9a
JV
362 return rt;
363}
364#endif
365
1da177e4
LT
366
367/*
368 * Release dest->dst_cache before a dest is removed
369 */
370void
371ip_vs_dst_reset(struct ip_vs_dest *dest)
372{
373 struct dst_entry *old_dst;
374
375 old_dst = dest->dst_cache;
376 dest->dst_cache = NULL;
377 dst_release(old_dst);
f2edb9f7 378 dest->dst_saddr.ip = 0;
1da177e4
LT
379}
380
f4bc17cd
JA
381#define IP_VS_XMIT_TUNNEL(skb, cp) \
382({ \
383 int __ret = NF_ACCEPT; \
384 \
cf356d69 385 (skb)->ipvs_property = 1; \
f4bc17cd 386 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
3c2de2ae 387 __ret = ip_vs_confirm_conntrack(skb); \
f4bc17cd
JA
388 if (__ret == NF_ACCEPT) { \
389 nf_reset(skb); \
4256f1aa 390 skb_forward_csum(skb); \
f4bc17cd
JA
391 } \
392 __ret; \
393})
394
fc604767 395#define IP_VS_XMIT_NAT(pf, skb, cp, local) \
1da177e4 396do { \
cf356d69 397 (skb)->ipvs_property = 1; \
f4bc17cd 398 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
cf356d69 399 ip_vs_notrack(skb); \
f4bc17cd
JA
400 else \
401 ip_vs_update_conntrack(skb, cp, 1); \
fc604767
JA
402 if (local) \
403 return NF_ACCEPT; \
ccc7911f 404 skb_forward_csum(skb); \
38cdcc9a 405 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
f4bc17cd
JA
406 skb_dst(skb)->dev, dst_output); \
407} while (0)
408
fc604767 409#define IP_VS_XMIT(pf, skb, cp, local) \
f4bc17cd 410do { \
cf356d69 411 (skb)->ipvs_property = 1; \
f4bc17cd 412 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
cf356d69 413 ip_vs_notrack(skb); \
fc604767
JA
414 if (local) \
415 return NF_ACCEPT; \
f4bc17cd
JA
416 skb_forward_csum(skb); \
417 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
418 skb_dst(skb)->dev, dst_output); \
1da177e4
LT
419} while (0)
420
421
422/*
423 * NULL transmitter (do nothing except return NF_ACCEPT)
424 */
425int
426ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
d4383f04 427 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1da177e4
LT
428{
429 /* we do not touch skb and do not need pskb ptr */
fc604767 430 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
1da177e4
LT
431}
432
433
434/*
435 * Bypass transmitter
436 * Let packets bypass the destination when the destination is not
437 * available, it may be only used in transparent cache cluster.
438 */
439int
440ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
d4383f04 441 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1da177e4
LT
442{
443 struct rtable *rt; /* Route to the other host */
eddc9ec5 444 struct iphdr *iph = ip_hdr(skb);
1da177e4 445 int mtu;
1da177e4
LT
446
447 EnterFunction(10);
448
17a8f8e3 449 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos),
c92f5ca2 450 IP_VS_RT_MODE_NON_LOCAL, NULL)))
1da177e4 451 goto tx_error_icmp;
1da177e4
LT
452
453 /* MTU checking */
d8d1f30b 454 mtu = dst_mtu(&rt->dst);
8f1b03a4
SH
455 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
456 !skb_is_gso(skb)) {
1da177e4
LT
457 ip_rt_put(rt);
458 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1e3e238e 459 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1da177e4
LT
460 goto tx_error;
461 }
462
463 /*
464 * Call ip_send_check because we are not sure it is called
465 * after ip_defrag. Is copy-on-write needed?
466 */
467 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
468 ip_rt_put(rt);
469 return NF_STOLEN;
470 }
eddc9ec5 471 ip_send_check(ip_hdr(skb));
1da177e4
LT
472
473 /* drop old route */
adf30907 474 skb_dst_drop(skb);
d8d1f30b 475 skb_dst_set(skb, &rt->dst);
1da177e4
LT
476
477 /* Another hack: avoid icmp_send in ip_fragment */
478 skb->local_df = 1;
479
fc604767 480 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
1da177e4
LT
481
482 LeaveFunction(10);
483 return NF_STOLEN;
484
485 tx_error_icmp:
486 dst_link_failure(skb);
487 tx_error:
488 kfree_skb(skb);
489 LeaveFunction(10);
490 return NF_STOLEN;
491}
492
b3cdd2a7
JV
493#ifdef CONFIG_IP_VS_IPV6
494int
495ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
d4383f04 496 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
b3cdd2a7
JV
497{
498 struct rt6_info *rt; /* Route to the other host */
b3cdd2a7 499 int mtu;
b3cdd2a7
JV
500
501 EnterFunction(10);
502
d4383f04 503 rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0,
2f74713d
JDB
504 IP_VS_RT_MODE_NON_LOCAL);
505 if (!rt)
b3cdd2a7 506 goto tx_error_icmp;
b3cdd2a7
JV
507
508 /* MTU checking */
d8d1f30b 509 mtu = dst_mtu(&rt->dst);
590e3f79 510 if (__mtu_check_toobig_v6(skb, mtu)) {
cb59155f
JA
511 if (!skb->dev) {
512 struct net *net = dev_net(skb_dst(skb)->dev);
513
514 skb->dev = net->loopback_dev;
515 }
2f74713d 516 /* only send ICMP too big on first fragment */
d4383f04 517 if (!iph->fragoffs)
2f74713d 518 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
cb59155f 519 dst_release(&rt->dst);
1e3e238e 520 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
b3cdd2a7
JV
521 goto tx_error;
522 }
523
524 /*
525 * Call ip_send_check because we are not sure it is called
526 * after ip_defrag. Is copy-on-write needed?
527 */
528 skb = skb_share_check(skb, GFP_ATOMIC);
529 if (unlikely(skb == NULL)) {
d8d1f30b 530 dst_release(&rt->dst);
b3cdd2a7
JV
531 return NF_STOLEN;
532 }
533
534 /* drop old route */
adf30907 535 skb_dst_drop(skb);
d8d1f30b 536 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
537
538 /* Another hack: avoid icmp_send in ip_fragment */
539 skb->local_df = 1;
540
fc604767 541 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
b3cdd2a7
JV
542
543 LeaveFunction(10);
544 return NF_STOLEN;
545
546 tx_error_icmp:
547 dst_link_failure(skb);
548 tx_error:
549 kfree_skb(skb);
550 LeaveFunction(10);
551 return NF_STOLEN;
552}
553#endif
1da177e4
LT
554
555/*
556 * NAT transmitter (only for outside-to-inside nat forwarding)
557 * Not used for related ICMP
558 */
559int
560ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
d4383f04 561 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1da177e4
LT
562{
563 struct rtable *rt; /* Route to the other host */
564 int mtu;
eddc9ec5 565 struct iphdr *iph = ip_hdr(skb);
fc604767 566 int local;
1da177e4
LT
567
568 EnterFunction(10);
569
570 /* check if it is a connection of no-client-port */
571 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
014d730d 572 __be16 _pt, *p;
1da177e4
LT
573 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
574 if (p == NULL)
575 goto tx_error;
576 ip_vs_conn_fill_cport(cp, *p);
577 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
578 }
579
fc604767 580 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
17a8f8e3
CG
581 RT_TOS(iph->tos),
582 IP_VS_RT_MODE_LOCAL |
583 IP_VS_RT_MODE_NON_LOCAL |
c92f5ca2 584 IP_VS_RT_MODE_RDR, NULL)))
1da177e4 585 goto tx_error_icmp;
fc604767
JA
586 local = rt->rt_flags & RTCF_LOCAL;
587 /*
588 * Avoid duplicate tuple in reply direction for NAT traffic
589 * to local address when connection is sync-ed
590 */
c0cd1156 591#if IS_ENABLED(CONFIG_NF_CONNTRACK)
fc604767
JA
592 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
593 enum ip_conntrack_info ctinfo;
594 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
595
596 if (ct && !nf_ct_is_untracked(ct)) {
0d79641a
JA
597 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
598 "ip_vs_nat_xmit(): "
fc604767
JA
599 "stopping DNAT to local address");
600 goto tx_error_put;
601 }
602 }
603#endif
604
605 /* From world but DNAT to loopback address? */
c92f5ca2 606 if (local && ipv4_is_loopback(cp->daddr.ip) &&
c7537967 607 rt_is_input_route(skb_rtable(skb))) {
0d79641a 608 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
fc604767
JA
609 "stopping DNAT to loopback address");
610 goto tx_error_put;
611 }
1da177e4
LT
612
613 /* MTU checking */
d8d1f30b 614 mtu = dst_mtu(&rt->dst);
8f1b03a4
SH
615 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
616 !skb_is_gso(skb)) {
1da177e4 617 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
0d79641a
JA
618 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
619 "ip_vs_nat_xmit(): frag needed for");
fc604767 620 goto tx_error_put;
1da177e4
LT
621 }
622
623 /* copy-on-write the packet before mangling it */
af1e1cf0 624 if (!skb_make_writable(skb, sizeof(struct iphdr)))
1da177e4
LT
625 goto tx_error_put;
626
d8d1f30b 627 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1da177e4
LT
628 goto tx_error_put;
629
1da177e4 630 /* mangle the packet */
d4383f04 631 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
fc604767 632 goto tx_error_put;
e7ade46a 633 ip_hdr(skb)->daddr = cp->daddr.ip;
eddc9ec5 634 ip_send_check(ip_hdr(skb));
1da177e4 635
fc604767
JA
636 if (!local) {
637 /* drop old route */
638 skb_dst_drop(skb);
639 skb_dst_set(skb, &rt->dst);
640 } else {
641 ip_rt_put(rt);
642 /*
643 * Some IPv4 replies get local address from routes,
644 * not from iph, so while we DNAT after routing
645 * we need this second input/output route.
646 */
647 if (!__ip_vs_reroute_locally(skb))
648 goto tx_error;
649 }
650
0d79641a 651 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
1da177e4
LT
652
653 /* FIXME: when application helper enlarges the packet and the length
654 is larger than the MTU of outgoing device, there will be still
655 MTU problem. */
656
657 /* Another hack: avoid icmp_send in ip_fragment */
658 skb->local_df = 1;
659
fc604767 660 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
1da177e4
LT
661
662 LeaveFunction(10);
663 return NF_STOLEN;
664
665 tx_error_icmp:
666 dst_link_failure(skb);
667 tx_error:
1da177e4 668 kfree_skb(skb);
f4bc17cd 669 LeaveFunction(10);
1da177e4
LT
670 return NF_STOLEN;
671 tx_error_put:
672 ip_rt_put(rt);
673 goto tx_error;
674}
675
b3cdd2a7
JV
676#ifdef CONFIG_IP_VS_IPV6
677int
678ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
d4383f04 679 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
b3cdd2a7
JV
680{
681 struct rt6_info *rt; /* Route to the other host */
682 int mtu;
fc604767 683 int local;
b3cdd2a7
JV
684
685 EnterFunction(10);
686
687 /* check if it is a connection of no-client-port */
d4383f04 688 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) {
b3cdd2a7 689 __be16 _pt, *p;
d4383f04 690 p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt);
b3cdd2a7
JV
691 if (p == NULL)
692 goto tx_error;
693 ip_vs_conn_fill_cport(cp, *p);
694 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
695 }
696
fc604767 697 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
e58b3442
DM
698 0, (IP_VS_RT_MODE_LOCAL |
699 IP_VS_RT_MODE_NON_LOCAL |
700 IP_VS_RT_MODE_RDR))))
b3cdd2a7 701 goto tx_error_icmp;
fc604767
JA
702 local = __ip_vs_is_local_route6(rt);
703 /*
704 * Avoid duplicate tuple in reply direction for NAT traffic
705 * to local address when connection is sync-ed
706 */
c0cd1156 707#if IS_ENABLED(CONFIG_NF_CONNTRACK)
fc604767
JA
708 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
709 enum ip_conntrack_info ctinfo;
710 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
711
712 if (ct && !nf_ct_is_untracked(ct)) {
0d79641a 713 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
fc604767
JA
714 "ip_vs_nat_xmit_v6(): "
715 "stopping DNAT to local address");
716 goto tx_error_put;
717 }
718 }
719#endif
720
721 /* From world but DNAT to loopback address? */
722 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
723 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
0d79641a 724 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
fc604767
JA
725 "ip_vs_nat_xmit_v6(): "
726 "stopping DNAT to loopback address");
727 goto tx_error_put;
728 }
b3cdd2a7
JV
729
730 /* MTU checking */
d8d1f30b 731 mtu = dst_mtu(&rt->dst);
590e3f79 732 if (__mtu_check_toobig_v6(skb, mtu)) {
cb59155f
JA
733 if (!skb->dev) {
734 struct net *net = dev_net(skb_dst(skb)->dev);
735
736 skb->dev = net->loopback_dev;
737 }
2f74713d 738 /* only send ICMP too big on first fragment */
d4383f04 739 if (!iph->fragoffs)
2f74713d 740 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
0d79641a 741 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
b3cdd2a7 742 "ip_vs_nat_xmit_v6(): frag needed for");
fc604767 743 goto tx_error_put;
b3cdd2a7
JV
744 }
745
746 /* copy-on-write the packet before mangling it */
747 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
748 goto tx_error_put;
749
d8d1f30b 750 if (skb_cow(skb, rt->dst.dev->hard_header_len))
b3cdd2a7
JV
751 goto tx_error_put;
752
b3cdd2a7 753 /* mangle the packet */
d4383f04 754 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph))
b3cdd2a7 755 goto tx_error;
4e3fd7a0 756 ipv6_hdr(skb)->daddr = cp->daddr.in6;
fc604767
JA
757
758 if (!local || !skb->dev) {
759 /* drop the old route when skb is not shared */
760 skb_dst_drop(skb);
761 skb_dst_set(skb, &rt->dst);
762 } else {
763 /* destined to loopback, do we need to change route? */
764 dst_release(&rt->dst);
765 }
b3cdd2a7 766
0d79641a 767 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
b3cdd2a7
JV
768
769 /* FIXME: when application helper enlarges the packet and the length
770 is larger than the MTU of outgoing device, there will be still
771 MTU problem. */
772
773 /* Another hack: avoid icmp_send in ip_fragment */
774 skb->local_df = 1;
775
fc604767 776 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
b3cdd2a7
JV
777
778 LeaveFunction(10);
779 return NF_STOLEN;
780
781tx_error_icmp:
782 dst_link_failure(skb);
783tx_error:
784 LeaveFunction(10);
785 kfree_skb(skb);
786 return NF_STOLEN;
787tx_error_put:
d8d1f30b 788 dst_release(&rt->dst);
b3cdd2a7
JV
789 goto tx_error;
790}
791#endif
792
1da177e4
LT
793
794/*
795 * IP Tunneling transmitter
796 *
797 * This function encapsulates the packet in a new IP packet, its
798 * destination will be set to cp->daddr. Most code of this function
799 * is taken from ipip.c.
800 *
801 * It is used in VS/TUN cluster. The load balancer selects a real
802 * server from a cluster based on a scheduling algorithm,
803 * encapsulates the request packet and forwards it to the selected
804 * server. For example, all real servers are configured with
805 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
806 * the encapsulated packet, it will decapsulate the packet, processe
807 * the request and return the response packets directly to the client
808 * without passing the load balancer. This can greatly increase the
809 * scalability of virtual server.
810 *
811 * Used for ANY protocol
812 */
813int
814ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
d4383f04 815 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1da177e4 816{
3654e611 817 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
1da177e4 818 struct rtable *rt; /* Route to the other host */
c92f5ca2 819 __be32 saddr; /* Source for tunnel */
1da177e4 820 struct net_device *tdev; /* Device to other host */
eddc9ec5 821 struct iphdr *old_iph = ip_hdr(skb);
1da177e4 822 u8 tos = old_iph->tos;
f2edb9f7 823 __be16 df;
1da177e4 824 struct iphdr *iph; /* Our new IP header */
c2636b4d 825 unsigned int max_headroom; /* The extra header space needed */
1da177e4 826 int mtu;
f4bc17cd 827 int ret;
1da177e4
LT
828
829 EnterFunction(10);
830
fc604767 831 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
17a8f8e3 832 RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
f2edb9f7
JA
833 IP_VS_RT_MODE_NON_LOCAL |
834 IP_VS_RT_MODE_CONNECT,
c92f5ca2 835 &saddr)))
1da177e4 836 goto tx_error_icmp;
fc604767
JA
837 if (rt->rt_flags & RTCF_LOCAL) {
838 ip_rt_put(rt);
839 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
840 }
1da177e4 841
d8d1f30b 842 tdev = rt->dst.dev;
1da177e4 843
d8d1f30b 844 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
1da177e4 845 if (mtu < 68) {
1e3e238e 846 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
fc604767 847 goto tx_error_put;
1da177e4 848 }
f2edb9f7 849 if (rt_is_output_route(skb_rtable(skb)))
6700c270 850 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
1da177e4 851
f2edb9f7 852 /* Copy DF, reset fragment offset and MF */
3654e611 853 df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
1da177e4 854
3654e611 855 if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
1da177e4 856 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1e3e238e 857 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
fc604767 858 goto tx_error_put;
1da177e4
LT
859 }
860
861 /*
862 * Okay, now see if we can stuff it in the buffer as-is.
863 */
864 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
865
866 if (skb_headroom(skb) < max_headroom
867 || skb_cloned(skb) || skb_shared(skb)) {
868 struct sk_buff *new_skb =
869 skb_realloc_headroom(skb, max_headroom);
870 if (!new_skb) {
871 ip_rt_put(rt);
872 kfree_skb(skb);
1e3e238e 873 IP_VS_ERR_RL("%s(): no memory\n", __func__);
1da177e4
LT
874 return NF_STOLEN;
875 }
5d0ba55b 876 consume_skb(skb);
1da177e4 877 skb = new_skb;
eddc9ec5 878 old_iph = ip_hdr(skb);
1da177e4
LT
879 }
880
714f095f 881 skb->transport_header = skb->network_header;
1da177e4
LT
882
883 /* fix old IP header checksum */
884 ip_send_check(old_iph);
885
e2d1bca7
ACM
886 skb_push(skb, sizeof(struct iphdr));
887 skb_reset_network_header(skb);
1da177e4
LT
888 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
889
890 /* drop old route */
adf30907 891 skb_dst_drop(skb);
d8d1f30b 892 skb_dst_set(skb, &rt->dst);
1da177e4
LT
893
894 /*
895 * Push down and install the IPIP header.
896 */
eddc9ec5 897 iph = ip_hdr(skb);
1da177e4
LT
898 iph->version = 4;
899 iph->ihl = sizeof(struct iphdr)>>2;
900 iph->frag_off = df;
901 iph->protocol = IPPROTO_IPIP;
902 iph->tos = tos;
c92f5ca2
JA
903 iph->daddr = cp->daddr.ip;
904 iph->saddr = saddr;
1da177e4 905 iph->ttl = old_iph->ttl;
d8d1f30b 906 ip_select_ident(iph, &rt->dst, NULL);
1da177e4
LT
907
908 /* Another hack: avoid icmp_send in ip_fragment */
909 skb->local_df = 1;
910
f4bc17cd
JA
911 ret = IP_VS_XMIT_TUNNEL(skb, cp);
912 if (ret == NF_ACCEPT)
913 ip_local_out(skb);
914 else if (ret == NF_DROP)
915 kfree_skb(skb);
1da177e4
LT
916
917 LeaveFunction(10);
918
919 return NF_STOLEN;
920
921 tx_error_icmp:
922 dst_link_failure(skb);
923 tx_error:
924 kfree_skb(skb);
925 LeaveFunction(10);
926 return NF_STOLEN;
fc604767
JA
927tx_error_put:
928 ip_rt_put(rt);
929 goto tx_error;
1da177e4
LT
930}
931
b3cdd2a7
JV
932#ifdef CONFIG_IP_VS_IPV6
933int
934ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
d4383f04 935 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
b3cdd2a7
JV
936{
937 struct rt6_info *rt; /* Route to the other host */
714f095f 938 struct in6_addr saddr; /* Source for tunnel */
b3cdd2a7
JV
939 struct net_device *tdev; /* Device to other host */
940 struct ipv6hdr *old_iph = ipv6_hdr(skb);
b3cdd2a7
JV
941 struct ipv6hdr *iph; /* Our new IP header */
942 unsigned int max_headroom; /* The extra header space needed */
943 int mtu;
f4bc17cd 944 int ret;
b3cdd2a7
JV
945
946 EnterFunction(10);
947
fc604767 948 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
e58b3442
DM
949 &saddr, 1, (IP_VS_RT_MODE_LOCAL |
950 IP_VS_RT_MODE_NON_LOCAL))))
b3cdd2a7 951 goto tx_error_icmp;
fc604767
JA
952 if (__ip_vs_is_local_route6(rt)) {
953 dst_release(&rt->dst);
954 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
955 }
b3cdd2a7 956
d8d1f30b 957 tdev = rt->dst.dev;
b3cdd2a7 958
d8d1f30b 959 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
714f095f 960 if (mtu < IPV6_MIN_MTU) {
714f095f
HS
961 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
962 IPV6_MIN_MTU);
fc604767 963 goto tx_error_put;
b3cdd2a7 964 }
adf30907 965 if (skb_dst(skb))
6700c270 966 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
b3cdd2a7 967
590e3f79
JDB
968 /* MTU checking: Notice that 'mtu' have been adjusted before hand */
969 if (__mtu_check_toobig_v6(skb, mtu)) {
cb59155f
JA
970 if (!skb->dev) {
971 struct net *net = dev_net(skb_dst(skb)->dev);
972
973 skb->dev = net->loopback_dev;
974 }
2f74713d 975 /* only send ICMP too big on first fragment */
d4383f04 976 if (!ipvsh->fragoffs)
2f74713d 977 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1e3e238e 978 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
fc604767 979 goto tx_error_put;
b3cdd2a7
JV
980 }
981
982 /*
983 * Okay, now see if we can stuff it in the buffer as-is.
984 */
985 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
986
987 if (skb_headroom(skb) < max_headroom
988 || skb_cloned(skb) || skb_shared(skb)) {
989 struct sk_buff *new_skb =
990 skb_realloc_headroom(skb, max_headroom);
991 if (!new_skb) {
d8d1f30b 992 dst_release(&rt->dst);
b3cdd2a7 993 kfree_skb(skb);
1e3e238e 994 IP_VS_ERR_RL("%s(): no memory\n", __func__);
b3cdd2a7
JV
995 return NF_STOLEN;
996 }
5d0ba55b 997 consume_skb(skb);
b3cdd2a7
JV
998 skb = new_skb;
999 old_iph = ipv6_hdr(skb);
1000 }
1001
714f095f 1002 skb->transport_header = skb->network_header;
b3cdd2a7
JV
1003
1004 skb_push(skb, sizeof(struct ipv6hdr));
1005 skb_reset_network_header(skb);
1006 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1007
1008 /* drop old route */
adf30907 1009 skb_dst_drop(skb);
d8d1f30b 1010 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
1011
1012 /*
1013 * Push down and install the IPIP header.
1014 */
1015 iph = ipv6_hdr(skb);
1016 iph->version = 6;
1017 iph->nexthdr = IPPROTO_IPV6;
b7b45f47
HH
1018 iph->payload_len = old_iph->payload_len;
1019 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
b3cdd2a7
JV
1020 iph->priority = old_iph->priority;
1021 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
4e3fd7a0
AD
1022 iph->daddr = cp->daddr.in6;
1023 iph->saddr = saddr;
b3cdd2a7
JV
1024 iph->hop_limit = old_iph->hop_limit;
1025
1026 /* Another hack: avoid icmp_send in ip_fragment */
1027 skb->local_df = 1;
1028
f4bc17cd
JA
1029 ret = IP_VS_XMIT_TUNNEL(skb, cp);
1030 if (ret == NF_ACCEPT)
1031 ip6_local_out(skb);
1032 else if (ret == NF_DROP)
1033 kfree_skb(skb);
b3cdd2a7
JV
1034
1035 LeaveFunction(10);
1036
1037 return NF_STOLEN;
1038
1039tx_error_icmp:
1040 dst_link_failure(skb);
1041tx_error:
1042 kfree_skb(skb);
1043 LeaveFunction(10);
1044 return NF_STOLEN;
fc604767
JA
1045tx_error_put:
1046 dst_release(&rt->dst);
1047 goto tx_error;
b3cdd2a7
JV
1048}
1049#endif
1050
1da177e4
LT
1051
1052/*
1053 * Direct Routing transmitter
1054 * Used for ANY protocol
1055 */
1056int
1057ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
d4383f04 1058 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1da177e4
LT
1059{
1060 struct rtable *rt; /* Route to the other host */
eddc9ec5 1061 struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
1062 int mtu;
1063
1064 EnterFunction(10);
1065
fc604767 1066 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
17a8f8e3
CG
1067 RT_TOS(iph->tos),
1068 IP_VS_RT_MODE_LOCAL |
c92f5ca2 1069 IP_VS_RT_MODE_NON_LOCAL, NULL)))
1da177e4 1070 goto tx_error_icmp;
fc604767
JA
1071 if (rt->rt_flags & RTCF_LOCAL) {
1072 ip_rt_put(rt);
1073 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
1074 }
1da177e4
LT
1075
1076 /* MTU checking */
d8d1f30b 1077 mtu = dst_mtu(&rt->dst);
8f1b03a4
SH
1078 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
1079 !skb_is_gso(skb)) {
1da177e4
LT
1080 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1081 ip_rt_put(rt);
1e3e238e 1082 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1da177e4
LT
1083 goto tx_error;
1084 }
1085
1086 /*
1087 * Call ip_send_check because we are not sure it is called
1088 * after ip_defrag. Is copy-on-write needed?
1089 */
1090 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
1091 ip_rt_put(rt);
1092 return NF_STOLEN;
1093 }
eddc9ec5 1094 ip_send_check(ip_hdr(skb));
1da177e4
LT
1095
1096 /* drop old route */
adf30907 1097 skb_dst_drop(skb);
d8d1f30b 1098 skb_dst_set(skb, &rt->dst);
1da177e4
LT
1099
1100 /* Another hack: avoid icmp_send in ip_fragment */
1101 skb->local_df = 1;
1102
fc604767 1103 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
1da177e4
LT
1104
1105 LeaveFunction(10);
1106 return NF_STOLEN;
1107
1108 tx_error_icmp:
1109 dst_link_failure(skb);
1110 tx_error:
1111 kfree_skb(skb);
1112 LeaveFunction(10);
1113 return NF_STOLEN;
1114}
1115
b3cdd2a7
JV
1116#ifdef CONFIG_IP_VS_IPV6
1117int
1118ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
d4383f04 1119 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
b3cdd2a7
JV
1120{
1121 struct rt6_info *rt; /* Route to the other host */
1122 int mtu;
1123
1124 EnterFunction(10);
1125
fc604767 1126 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
e58b3442
DM
1127 0, (IP_VS_RT_MODE_LOCAL |
1128 IP_VS_RT_MODE_NON_LOCAL))))
b3cdd2a7 1129 goto tx_error_icmp;
fc604767
JA
1130 if (__ip_vs_is_local_route6(rt)) {
1131 dst_release(&rt->dst);
1132 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
1133 }
b3cdd2a7
JV
1134
1135 /* MTU checking */
d8d1f30b 1136 mtu = dst_mtu(&rt->dst);
590e3f79 1137 if (__mtu_check_toobig_v6(skb, mtu)) {
cb59155f
JA
1138 if (!skb->dev) {
1139 struct net *net = dev_net(skb_dst(skb)->dev);
1140
1141 skb->dev = net->loopback_dev;
1142 }
2f74713d 1143 /* only send ICMP too big on first fragment */
d4383f04 1144 if (!iph->fragoffs)
2f74713d 1145 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
d8d1f30b 1146 dst_release(&rt->dst);
1e3e238e 1147 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
b3cdd2a7
JV
1148 goto tx_error;
1149 }
1150
1151 /*
1152 * Call ip_send_check because we are not sure it is called
1153 * after ip_defrag. Is copy-on-write needed?
1154 */
1155 skb = skb_share_check(skb, GFP_ATOMIC);
1156 if (unlikely(skb == NULL)) {
d8d1f30b 1157 dst_release(&rt->dst);
b3cdd2a7
JV
1158 return NF_STOLEN;
1159 }
1160
1161 /* drop old route */
adf30907 1162 skb_dst_drop(skb);
d8d1f30b 1163 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
1164
1165 /* Another hack: avoid icmp_send in ip_fragment */
1166 skb->local_df = 1;
1167
fc604767 1168 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
b3cdd2a7
JV
1169
1170 LeaveFunction(10);
1171 return NF_STOLEN;
1172
1173tx_error_icmp:
1174 dst_link_failure(skb);
1175tx_error:
1176 kfree_skb(skb);
1177 LeaveFunction(10);
1178 return NF_STOLEN;
1179}
1180#endif
1181
1da177e4
LT
1182
1183/*
1184 * ICMP packet transmitter
1185 * called by the ip_vs_in_icmp
1186 */
1187int
1188ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
d4383f04
JDB
1189 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1190 struct ip_vs_iphdr *iph)
1da177e4
LT
1191{
1192 struct rtable *rt; /* Route to the other host */
1193 int mtu;
1194 int rc;
fc604767 1195 int local;
c92f5ca2 1196 int rt_mode;
1da177e4
LT
1197
1198 EnterFunction(10);
1199
1200 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1201 forwarded directly here, because there is no need to
1202 translate address/port back */
1203 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1204 if (cp->packet_xmit)
d4383f04 1205 rc = cp->packet_xmit(skb, cp, pp, iph);
1da177e4
LT
1206 else
1207 rc = NF_ACCEPT;
1208 /* do not touch skb anymore */
1209 atomic_inc(&cp->in_pkts);
1da177e4
LT
1210 goto out;
1211 }
1212
1213 /*
1214 * mangle and send the packet here (only for VS/NAT)
1215 */
1216
c92f5ca2
JA
1217 /* LOCALNODE from FORWARD hook is not supported */
1218 rt_mode = (hooknum != NF_INET_FORWARD) ?
1219 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1220 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
fc604767 1221 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
17a8f8e3 1222 RT_TOS(ip_hdr(skb)->tos),
c92f5ca2 1223 rt_mode, NULL)))
1da177e4 1224 goto tx_error_icmp;
fc604767
JA
1225 local = rt->rt_flags & RTCF_LOCAL;
1226
1227 /*
1228 * Avoid duplicate tuple in reply direction for NAT traffic
1229 * to local address when connection is sync-ed
1230 */
c0cd1156 1231#if IS_ENABLED(CONFIG_NF_CONNTRACK)
fc604767
JA
1232 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1233 enum ip_conntrack_info ctinfo;
1234 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1235
1236 if (ct && !nf_ct_is_untracked(ct)) {
1237 IP_VS_DBG(10, "%s(): "
1238 "stopping DNAT to local address %pI4\n",
1239 __func__, &cp->daddr.ip);
1240 goto tx_error_put;
1241 }
1242 }
1243#endif
1244
1245 /* From world but DNAT to loopback address? */
c92f5ca2 1246 if (local && ipv4_is_loopback(cp->daddr.ip) &&
c7537967 1247 rt_is_input_route(skb_rtable(skb))) {
fc604767
JA
1248 IP_VS_DBG(1, "%s(): "
1249 "stopping DNAT to loopback %pI4\n",
1250 __func__, &cp->daddr.ip);
1251 goto tx_error_put;
1252 }
1da177e4
LT
1253
1254 /* MTU checking */
d8d1f30b 1255 mtu = dst_mtu(&rt->dst);
8f1b03a4
SH
1256 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1257 !skb_is_gso(skb)) {
1da177e4 1258 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1e3e238e 1259 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
fc604767 1260 goto tx_error_put;
1da177e4
LT
1261 }
1262
1263 /* copy-on-write the packet before mangling it */
af1e1cf0 1264 if (!skb_make_writable(skb, offset))
1da177e4
LT
1265 goto tx_error_put;
1266
d8d1f30b 1267 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1da177e4
LT
1268 goto tx_error_put;
1269
1da177e4
LT
1270 ip_vs_nat_icmp(skb, pp, cp, 0);
1271
fc604767
JA
1272 if (!local) {
1273 /* drop the old route when skb is not shared */
1274 skb_dst_drop(skb);
1275 skb_dst_set(skb, &rt->dst);
1276 } else {
1277 ip_rt_put(rt);
1278 /*
1279 * Some IPv4 replies get local address from routes,
1280 * not from iph, so while we DNAT after routing
1281 * we need this second input/output route.
1282 */
1283 if (!__ip_vs_reroute_locally(skb))
1284 goto tx_error;
1285 }
1286
1da177e4
LT
1287 /* Another hack: avoid icmp_send in ip_fragment */
1288 skb->local_df = 1;
1289
fc604767 1290 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
1da177e4
LT
1291
1292 rc = NF_STOLEN;
1293 goto out;
1294
1295 tx_error_icmp:
1296 dst_link_failure(skb);
1297 tx_error:
1298 dev_kfree_skb(skb);
1299 rc = NF_STOLEN;
1300 out:
1301 LeaveFunction(10);
1302 return rc;
1303 tx_error_put:
1304 ip_rt_put(rt);
1305 goto tx_error;
1306}
b3cdd2a7
JV
1307
1308#ifdef CONFIG_IP_VS_IPV6
1309int
1310ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
d4383f04
JDB
1311 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1312 struct ip_vs_iphdr *iph)
b3cdd2a7
JV
1313{
1314 struct rt6_info *rt; /* Route to the other host */
1315 int mtu;
1316 int rc;
fc604767 1317 int local;
c92f5ca2 1318 int rt_mode;
b3cdd2a7
JV
1319
1320 EnterFunction(10);
1321
1322 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1323 forwarded directly here, because there is no need to
1324 translate address/port back */
1325 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1326 if (cp->packet_xmit)
d4383f04 1327 rc = cp->packet_xmit(skb, cp, pp, iph);
b3cdd2a7
JV
1328 else
1329 rc = NF_ACCEPT;
1330 /* do not touch skb anymore */
1331 atomic_inc(&cp->in_pkts);
1332 goto out;
1333 }
1334
1335 /*
1336 * mangle and send the packet here (only for VS/NAT)
1337 */
1338
c92f5ca2
JA
1339 /* LOCALNODE from FORWARD hook is not supported */
1340 rt_mode = (hooknum != NF_INET_FORWARD) ?
1341 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1342 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
fc604767 1343 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
c92f5ca2 1344 0, rt_mode)))
b3cdd2a7
JV
1345 goto tx_error_icmp;
1346
fc604767
JA
1347 local = __ip_vs_is_local_route6(rt);
1348 /*
1349 * Avoid duplicate tuple in reply direction for NAT traffic
1350 * to local address when connection is sync-ed
1351 */
c0cd1156 1352#if IS_ENABLED(CONFIG_NF_CONNTRACK)
fc604767
JA
1353 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1354 enum ip_conntrack_info ctinfo;
1355 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1356
1357 if (ct && !nf_ct_is_untracked(ct)) {
1358 IP_VS_DBG(10, "%s(): "
1359 "stopping DNAT to local address %pI6\n",
1360 __func__, &cp->daddr.in6);
1361 goto tx_error_put;
1362 }
1363 }
1364#endif
1365
1366 /* From world but DNAT to loopback address? */
1367 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1368 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1369 IP_VS_DBG(1, "%s(): "
1370 "stopping DNAT to loopback %pI6\n",
1371 __func__, &cp->daddr.in6);
1372 goto tx_error_put;
1373 }
1374
b3cdd2a7 1375 /* MTU checking */
d8d1f30b 1376 mtu = dst_mtu(&rt->dst);
590e3f79 1377 if (__mtu_check_toobig_v6(skb, mtu)) {
cb59155f
JA
1378 if (!skb->dev) {
1379 struct net *net = dev_net(skb_dst(skb)->dev);
1380
1381 skb->dev = net->loopback_dev;
1382 }
2f74713d 1383 /* only send ICMP too big on first fragment */
d4383f04 1384 if (!iph->fragoffs)
2f74713d 1385 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1e3e238e 1386 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
fc604767 1387 goto tx_error_put;
b3cdd2a7
JV
1388 }
1389
1390 /* copy-on-write the packet before mangling it */
1391 if (!skb_make_writable(skb, offset))
1392 goto tx_error_put;
1393
d8d1f30b 1394 if (skb_cow(skb, rt->dst.dev->hard_header_len))
b3cdd2a7
JV
1395 goto tx_error_put;
1396
b3cdd2a7
JV
1397 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1398
fc604767
JA
1399 if (!local || !skb->dev) {
1400 /* drop the old route when skb is not shared */
1401 skb_dst_drop(skb);
1402 skb_dst_set(skb, &rt->dst);
1403 } else {
1404 /* destined to loopback, do we need to change route? */
1405 dst_release(&rt->dst);
1406 }
1407
b3cdd2a7
JV
1408 /* Another hack: avoid icmp_send in ip_fragment */
1409 skb->local_df = 1;
1410
fc604767 1411 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
b3cdd2a7
JV
1412
1413 rc = NF_STOLEN;
1414 goto out;
1415
1416tx_error_icmp:
1417 dst_link_failure(skb);
1418tx_error:
1419 dev_kfree_skb(skb);
1420 rc = NF_STOLEN;
1421out:
1422 LeaveFunction(10);
1423 return rc;
1424tx_error_put:
d8d1f30b 1425 dst_release(&rt->dst);
b3cdd2a7
JV
1426 goto tx_error;
1427}
1428#endif
This page took 0.663891 seconds and 5 git commands to generate.