2 * Copyright (c) 2013 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/init.h>
34 #include <linux/in6.h>
35 #include <linux/inetdevice.h>
36 #include <linux/igmp.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/etherdevice.h>
39 #include <linux/if_ether.h>
40 #include <linux/if_vlan.h>
41 #include <linux/rculist.h>
42 #include <linux/err.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
59 #if IS_ENABLED(CONFIG_IPV6)
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
65 static unsigned int ip_tunnel_hash(__be32 key
, __be32 remote
)
67 return hash_32((__force u32
)key
^ (__force u32
)remote
,
71 static void __tunnel_dst_set(struct ip_tunnel_dst
*idst
,
72 struct dst_entry
*dst
, __be32 saddr
)
74 struct dst_entry
*old_dst
;
77 old_dst
= xchg((__force
struct dst_entry
**)&idst
->dst
, dst
);
82 static noinline
void tunnel_dst_set(struct ip_tunnel
*t
,
83 struct dst_entry
*dst
, __be32 saddr
)
85 __tunnel_dst_set(raw_cpu_ptr(t
->dst_cache
), dst
, saddr
);
88 static void tunnel_dst_reset(struct ip_tunnel
*t
)
90 tunnel_dst_set(t
, NULL
, 0);
93 void ip_tunnel_dst_reset_all(struct ip_tunnel
*t
)
97 for_each_possible_cpu(i
)
98 __tunnel_dst_set(per_cpu_ptr(t
->dst_cache
, i
), NULL
, 0);
100 EXPORT_SYMBOL(ip_tunnel_dst_reset_all
);
102 static struct rtable
*tunnel_rtable_get(struct ip_tunnel
*t
,
103 u32 cookie
, __be32
*saddr
)
105 struct ip_tunnel_dst
*idst
;
106 struct dst_entry
*dst
;
109 idst
= raw_cpu_ptr(t
->dst_cache
);
110 dst
= rcu_dereference(idst
->dst
);
111 if (dst
&& !atomic_inc_not_zero(&dst
->__refcnt
))
114 if (!dst
->obsolete
|| dst
->ops
->check(dst
, cookie
)) {
115 *saddr
= idst
->saddr
;
123 return (struct rtable
*)dst
;
126 static bool ip_tunnel_key_match(const struct ip_tunnel_parm
*p
,
127 __be16 flags
, __be32 key
)
129 if (p
->i_flags
& TUNNEL_KEY
) {
130 if (flags
& TUNNEL_KEY
)
131 return key
== p
->i_key
;
133 /* key expected, none present */
136 return !(flags
& TUNNEL_KEY
);
139 /* Fallback tunnel: no source, no destination, no key, no options
142 We require exact key match i.e. if a key is present in packet
143 it will match only tunnel with the same key; if it is not present,
144 it will match only keyless tunnel.
146 All keysless packets, if not matched configured keyless tunnels
147 will match fallback tunnel.
148 Given src, dst and key, find appropriate for input tunnel.
150 struct ip_tunnel
*ip_tunnel_lookup(struct ip_tunnel_net
*itn
,
151 int link
, __be16 flags
,
152 __be32 remote
, __be32 local
,
156 struct ip_tunnel
*t
, *cand
= NULL
;
157 struct hlist_head
*head
;
159 hash
= ip_tunnel_hash(key
, remote
);
160 head
= &itn
->tunnels
[hash
];
162 hlist_for_each_entry_rcu(t
, head
, hash_node
) {
163 if (local
!= t
->parms
.iph
.saddr
||
164 remote
!= t
->parms
.iph
.daddr
||
165 !(t
->dev
->flags
& IFF_UP
))
168 if (!ip_tunnel_key_match(&t
->parms
, flags
, key
))
171 if (t
->parms
.link
== link
)
177 hlist_for_each_entry_rcu(t
, head
, hash_node
) {
178 if (remote
!= t
->parms
.iph
.daddr
||
179 t
->parms
.iph
.saddr
!= 0 ||
180 !(t
->dev
->flags
& IFF_UP
))
183 if (!ip_tunnel_key_match(&t
->parms
, flags
, key
))
186 if (t
->parms
.link
== link
)
192 hash
= ip_tunnel_hash(key
, 0);
193 head
= &itn
->tunnels
[hash
];
195 hlist_for_each_entry_rcu(t
, head
, hash_node
) {
196 if ((local
!= t
->parms
.iph
.saddr
|| t
->parms
.iph
.daddr
!= 0) &&
197 (local
!= t
->parms
.iph
.daddr
|| !ipv4_is_multicast(local
)))
200 if (!(t
->dev
->flags
& IFF_UP
))
203 if (!ip_tunnel_key_match(&t
->parms
, flags
, key
))
206 if (t
->parms
.link
== link
)
212 if (flags
& TUNNEL_NO_KEY
)
213 goto skip_key_lookup
;
215 hlist_for_each_entry_rcu(t
, head
, hash_node
) {
216 if (t
->parms
.i_key
!= key
||
217 t
->parms
.iph
.saddr
!= 0 ||
218 t
->parms
.iph
.daddr
!= 0 ||
219 !(t
->dev
->flags
& IFF_UP
))
222 if (t
->parms
.link
== link
)
232 t
= rcu_dereference(itn
->collect_md_tun
);
236 if (itn
->fb_tunnel_dev
&& itn
->fb_tunnel_dev
->flags
& IFF_UP
)
237 return netdev_priv(itn
->fb_tunnel_dev
);
241 EXPORT_SYMBOL_GPL(ip_tunnel_lookup
);
243 static struct hlist_head
*ip_bucket(struct ip_tunnel_net
*itn
,
244 struct ip_tunnel_parm
*parms
)
248 __be32 i_key
= parms
->i_key
;
250 if (parms
->iph
.daddr
&& !ipv4_is_multicast(parms
->iph
.daddr
))
251 remote
= parms
->iph
.daddr
;
255 if (!(parms
->i_flags
& TUNNEL_KEY
) && (parms
->i_flags
& VTI_ISVTI
))
258 h
= ip_tunnel_hash(i_key
, remote
);
259 return &itn
->tunnels
[h
];
262 static void ip_tunnel_add(struct ip_tunnel_net
*itn
, struct ip_tunnel
*t
)
264 struct hlist_head
*head
= ip_bucket(itn
, &t
->parms
);
267 rcu_assign_pointer(itn
->collect_md_tun
, t
);
268 hlist_add_head_rcu(&t
->hash_node
, head
);
271 static void ip_tunnel_del(struct ip_tunnel_net
*itn
, struct ip_tunnel
*t
)
274 rcu_assign_pointer(itn
->collect_md_tun
, NULL
);
275 hlist_del_init_rcu(&t
->hash_node
);
278 static struct ip_tunnel
*ip_tunnel_find(struct ip_tunnel_net
*itn
,
279 struct ip_tunnel_parm
*parms
,
282 __be32 remote
= parms
->iph
.daddr
;
283 __be32 local
= parms
->iph
.saddr
;
284 __be32 key
= parms
->i_key
;
285 __be16 flags
= parms
->i_flags
;
286 int link
= parms
->link
;
287 struct ip_tunnel
*t
= NULL
;
288 struct hlist_head
*head
= ip_bucket(itn
, parms
);
290 hlist_for_each_entry_rcu(t
, head
, hash_node
) {
291 if (local
== t
->parms
.iph
.saddr
&&
292 remote
== t
->parms
.iph
.daddr
&&
293 link
== t
->parms
.link
&&
294 type
== t
->dev
->type
&&
295 ip_tunnel_key_match(&t
->parms
, flags
, key
))
301 static struct net_device
*__ip_tunnel_create(struct net
*net
,
302 const struct rtnl_link_ops
*ops
,
303 struct ip_tunnel_parm
*parms
)
306 struct ip_tunnel
*tunnel
;
307 struct net_device
*dev
;
311 strlcpy(name
, parms
->name
, IFNAMSIZ
);
313 if (strlen(ops
->kind
) > (IFNAMSIZ
- 3)) {
317 strlcpy(name
, ops
->kind
, IFNAMSIZ
);
318 strncat(name
, "%d", 2);
322 dev
= alloc_netdev(ops
->priv_size
, name
, NET_NAME_UNKNOWN
, ops
->setup
);
327 dev_net_set(dev
, net
);
329 dev
->rtnl_link_ops
= ops
;
331 tunnel
= netdev_priv(dev
);
332 tunnel
->parms
= *parms
;
335 err
= register_netdevice(dev
);
347 static inline void init_tunnel_flow(struct flowi4
*fl4
,
349 __be32 daddr
, __be32 saddr
,
350 __be32 key
, __u8 tos
, int oif
)
352 memset(fl4
, 0, sizeof(*fl4
));
353 fl4
->flowi4_oif
= oif
;
356 fl4
->flowi4_tos
= tos
;
357 fl4
->flowi4_proto
= proto
;
358 fl4
->fl4_gre_key
= key
;
361 static int ip_tunnel_bind_dev(struct net_device
*dev
)
363 struct net_device
*tdev
= NULL
;
364 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
365 const struct iphdr
*iph
;
366 int hlen
= LL_MAX_HEADER
;
367 int mtu
= ETH_DATA_LEN
;
368 int t_hlen
= tunnel
->hlen
+ sizeof(struct iphdr
);
370 iph
= &tunnel
->parms
.iph
;
372 /* Guess output device to choose reasonable mtu and needed_headroom */
377 init_tunnel_flow(&fl4
, iph
->protocol
, iph
->daddr
,
378 iph
->saddr
, tunnel
->parms
.o_key
,
379 RT_TOS(iph
->tos
), tunnel
->parms
.link
);
380 rt
= ip_route_output_key(tunnel
->net
, &fl4
);
384 tunnel_dst_set(tunnel
, &rt
->dst
, fl4
.saddr
);
387 if (dev
->type
!= ARPHRD_ETHER
)
388 dev
->flags
|= IFF_POINTOPOINT
;
391 if (!tdev
&& tunnel
->parms
.link
)
392 tdev
= __dev_get_by_index(tunnel
->net
, tunnel
->parms
.link
);
395 hlen
= tdev
->hard_header_len
+ tdev
->needed_headroom
;
399 dev
->needed_headroom
= t_hlen
+ hlen
;
400 mtu
-= (dev
->hard_header_len
+ t_hlen
);
408 static struct ip_tunnel
*ip_tunnel_create(struct net
*net
,
409 struct ip_tunnel_net
*itn
,
410 struct ip_tunnel_parm
*parms
)
412 struct ip_tunnel
*nt
;
413 struct net_device
*dev
;
415 BUG_ON(!itn
->fb_tunnel_dev
);
416 dev
= __ip_tunnel_create(net
, itn
->fb_tunnel_dev
->rtnl_link_ops
, parms
);
418 return ERR_CAST(dev
);
420 dev
->mtu
= ip_tunnel_bind_dev(dev
);
422 nt
= netdev_priv(dev
);
423 ip_tunnel_add(itn
, nt
);
427 int ip_tunnel_rcv(struct ip_tunnel
*tunnel
, struct sk_buff
*skb
,
428 const struct tnl_ptk_info
*tpi
, struct metadata_dst
*tun_dst
,
431 struct pcpu_sw_netstats
*tstats
;
432 const struct iphdr
*iph
= ip_hdr(skb
);
435 #ifdef CONFIG_NET_IPGRE_BROADCAST
436 if (ipv4_is_multicast(iph
->daddr
)) {
437 tunnel
->dev
->stats
.multicast
++;
438 skb
->pkt_type
= PACKET_BROADCAST
;
442 if ((!(tpi
->flags
&TUNNEL_CSUM
) && (tunnel
->parms
.i_flags
&TUNNEL_CSUM
)) ||
443 ((tpi
->flags
&TUNNEL_CSUM
) && !(tunnel
->parms
.i_flags
&TUNNEL_CSUM
))) {
444 tunnel
->dev
->stats
.rx_crc_errors
++;
445 tunnel
->dev
->stats
.rx_errors
++;
449 if (tunnel
->parms
.i_flags
&TUNNEL_SEQ
) {
450 if (!(tpi
->flags
&TUNNEL_SEQ
) ||
451 (tunnel
->i_seqno
&& (s32
)(ntohl(tpi
->seq
) - tunnel
->i_seqno
) < 0)) {
452 tunnel
->dev
->stats
.rx_fifo_errors
++;
453 tunnel
->dev
->stats
.rx_errors
++;
456 tunnel
->i_seqno
= ntohl(tpi
->seq
) + 1;
459 skb_reset_network_header(skb
);
461 err
= IP_ECN_decapsulate(iph
, skb
);
464 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
465 &iph
->saddr
, iph
->tos
);
467 ++tunnel
->dev
->stats
.rx_frame_errors
;
468 ++tunnel
->dev
->stats
.rx_errors
;
473 tstats
= this_cpu_ptr(tunnel
->dev
->tstats
);
474 u64_stats_update_begin(&tstats
->syncp
);
475 tstats
->rx_packets
++;
476 tstats
->rx_bytes
+= skb
->len
;
477 u64_stats_update_end(&tstats
->syncp
);
479 skb_scrub_packet(skb
, !net_eq(tunnel
->net
, dev_net(tunnel
->dev
)));
481 if (tunnel
->dev
->type
== ARPHRD_ETHER
) {
482 skb
->protocol
= eth_type_trans(skb
, tunnel
->dev
);
483 skb_postpull_rcsum(skb
, eth_hdr(skb
), ETH_HLEN
);
485 skb
->dev
= tunnel
->dev
;
489 skb_dst_set(skb
, (struct dst_entry
*)tun_dst
);
491 gro_cells_receive(&tunnel
->gro_cells
, skb
);
498 EXPORT_SYMBOL_GPL(ip_tunnel_rcv
);
500 static int ip_encap_hlen(struct ip_tunnel_encap
*e
)
502 const struct ip_tunnel_encap_ops
*ops
;
505 if (e
->type
== TUNNEL_ENCAP_NONE
)
508 if (e
->type
>= MAX_IPTUN_ENCAP_OPS
)
512 ops
= rcu_dereference(iptun_encaps
[e
->type
]);
513 if (likely(ops
&& ops
->encap_hlen
))
514 hlen
= ops
->encap_hlen(e
);
520 const struct ip_tunnel_encap_ops __rcu
*
521 iptun_encaps
[MAX_IPTUN_ENCAP_OPS
] __read_mostly
;
523 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops
*ops
,
526 if (num
>= MAX_IPTUN_ENCAP_OPS
)
529 return !cmpxchg((const struct ip_tunnel_encap_ops
**)
533 EXPORT_SYMBOL(ip_tunnel_encap_add_ops
);
535 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops
*ops
,
540 if (num
>= MAX_IPTUN_ENCAP_OPS
)
543 ret
= (cmpxchg((const struct ip_tunnel_encap_ops
**)
545 ops
, NULL
) == ops
) ? 0 : -1;
551 EXPORT_SYMBOL(ip_tunnel_encap_del_ops
);
553 int ip_tunnel_encap_setup(struct ip_tunnel
*t
,
554 struct ip_tunnel_encap
*ipencap
)
558 memset(&t
->encap
, 0, sizeof(t
->encap
));
560 hlen
= ip_encap_hlen(ipencap
);
564 t
->encap
.type
= ipencap
->type
;
565 t
->encap
.sport
= ipencap
->sport
;
566 t
->encap
.dport
= ipencap
->dport
;
567 t
->encap
.flags
= ipencap
->flags
;
569 t
->encap_hlen
= hlen
;
570 t
->hlen
= t
->encap_hlen
+ t
->tun_hlen
;
574 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup
);
576 int ip_tunnel_encap(struct sk_buff
*skb
, struct ip_tunnel
*t
,
577 u8
*protocol
, struct flowi4
*fl4
)
579 const struct ip_tunnel_encap_ops
*ops
;
582 if (t
->encap
.type
== TUNNEL_ENCAP_NONE
)
585 if (t
->encap
.type
>= MAX_IPTUN_ENCAP_OPS
)
589 ops
= rcu_dereference(iptun_encaps
[t
->encap
.type
]);
590 if (likely(ops
&& ops
->build_header
))
591 ret
= ops
->build_header(skb
, &t
->encap
, protocol
, fl4
);
596 EXPORT_SYMBOL(ip_tunnel_encap
);
598 static int tnl_update_pmtu(struct net_device
*dev
, struct sk_buff
*skb
,
599 struct rtable
*rt
, __be16 df
,
600 const struct iphdr
*inner_iph
)
602 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
603 int pkt_size
= skb
->len
- tunnel
->hlen
- dev
->hard_header_len
;
607 mtu
= dst_mtu(&rt
->dst
) - dev
->hard_header_len
608 - sizeof(struct iphdr
) - tunnel
->hlen
;
610 mtu
= skb_dst(skb
) ? dst_mtu(skb_dst(skb
)) : dev
->mtu
;
613 skb_dst(skb
)->ops
->update_pmtu(skb_dst(skb
), NULL
, skb
, mtu
);
615 if (skb
->protocol
== htons(ETH_P_IP
)) {
616 if (!skb_is_gso(skb
) &&
617 (inner_iph
->frag_off
& htons(IP_DF
)) &&
619 memset(IPCB(skb
), 0, sizeof(*IPCB(skb
)));
620 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_FRAG_NEEDED
, htonl(mtu
));
624 #if IS_ENABLED(CONFIG_IPV6)
625 else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
626 struct rt6_info
*rt6
= (struct rt6_info
*)skb_dst(skb
);
628 if (rt6
&& mtu
< dst_mtu(skb_dst(skb
)) &&
629 mtu
>= IPV6_MIN_MTU
) {
630 if ((tunnel
->parms
.iph
.daddr
&&
631 !ipv4_is_multicast(tunnel
->parms
.iph
.daddr
)) ||
632 rt6
->rt6i_dst
.plen
== 128) {
633 rt6
->rt6i_flags
|= RTF_MODIFIED
;
634 dst_metric_set(skb_dst(skb
), RTAX_MTU
, mtu
);
638 if (!skb_is_gso(skb
) && mtu
>= IPV6_MIN_MTU
&&
640 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
648 void ip_tunnel_xmit(struct sk_buff
*skb
, struct net_device
*dev
,
649 const struct iphdr
*tnl_params
, u8 protocol
)
651 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
652 const struct iphdr
*inner_iph
;
656 struct rtable
*rt
; /* Route to the other host */
657 unsigned int max_headroom
; /* The extra header space needed */
662 inner_iph
= (const struct iphdr
*)skb_inner_network_header(skb
);
663 connected
= (tunnel
->parms
.iph
.daddr
!= 0);
665 dst
= tnl_params
->daddr
;
670 dev
->stats
.tx_fifo_errors
++;
674 if (skb
->protocol
== htons(ETH_P_IP
)) {
675 rt
= skb_rtable(skb
);
676 dst
= rt_nexthop(rt
, inner_iph
->daddr
);
678 #if IS_ENABLED(CONFIG_IPV6)
679 else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
680 const struct in6_addr
*addr6
;
681 struct neighbour
*neigh
;
682 bool do_tx_error_icmp
;
685 neigh
= dst_neigh_lookup(skb_dst(skb
),
686 &ipv6_hdr(skb
)->daddr
);
690 addr6
= (const struct in6_addr
*)&neigh
->primary_key
;
691 addr_type
= ipv6_addr_type(addr6
);
693 if (addr_type
== IPV6_ADDR_ANY
) {
694 addr6
= &ipv6_hdr(skb
)->daddr
;
695 addr_type
= ipv6_addr_type(addr6
);
698 if ((addr_type
& IPV6_ADDR_COMPATv4
) == 0)
699 do_tx_error_icmp
= true;
701 do_tx_error_icmp
= false;
702 dst
= addr6
->s6_addr32
[3];
704 neigh_release(neigh
);
705 if (do_tx_error_icmp
)
715 tos
= tnl_params
->tos
;
718 if (skb
->protocol
== htons(ETH_P_IP
)) {
719 tos
= inner_iph
->tos
;
721 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
722 tos
= ipv6_get_dsfield((const struct ipv6hdr
*)inner_iph
);
727 init_tunnel_flow(&fl4
, protocol
, dst
, tnl_params
->saddr
,
728 tunnel
->parms
.o_key
, RT_TOS(tos
), tunnel
->parms
.link
);
730 if (ip_tunnel_encap(skb
, tunnel
, &protocol
, &fl4
) < 0)
733 rt
= connected
? tunnel_rtable_get(tunnel
, 0, &fl4
.saddr
) : NULL
;
736 rt
= ip_route_output_key(tunnel
->net
, &fl4
);
739 dev
->stats
.tx_carrier_errors
++;
743 tunnel_dst_set(tunnel
, &rt
->dst
, fl4
.saddr
);
746 if (rt
->dst
.dev
== dev
) {
748 dev
->stats
.collisions
++;
752 if (tnl_update_pmtu(dev
, skb
, rt
, tnl_params
->frag_off
, inner_iph
)) {
757 if (tunnel
->err_count
> 0) {
758 if (time_before(jiffies
,
759 tunnel
->err_time
+ IPTUNNEL_ERR_TIMEO
)) {
762 memset(IPCB(skb
), 0, sizeof(*IPCB(skb
)));
763 dst_link_failure(skb
);
765 tunnel
->err_count
= 0;
768 tos
= ip_tunnel_ecn_encap(tos
, inner_iph
, skb
);
769 ttl
= tnl_params
->ttl
;
771 if (skb
->protocol
== htons(ETH_P_IP
))
772 ttl
= inner_iph
->ttl
;
773 #if IS_ENABLED(CONFIG_IPV6)
774 else if (skb
->protocol
== htons(ETH_P_IPV6
))
775 ttl
= ((const struct ipv6hdr
*)inner_iph
)->hop_limit
;
778 ttl
= ip4_dst_hoplimit(&rt
->dst
);
781 df
= tnl_params
->frag_off
;
782 if (skb
->protocol
== htons(ETH_P_IP
))
783 df
|= (inner_iph
->frag_off
&htons(IP_DF
));
785 max_headroom
= LL_RESERVED_SPACE(rt
->dst
.dev
) + sizeof(struct iphdr
)
786 + rt
->dst
.header_len
+ ip_encap_hlen(&tunnel
->encap
);
787 if (max_headroom
> dev
->needed_headroom
)
788 dev
->needed_headroom
= max_headroom
;
790 if (skb_cow_head(skb
, dev
->needed_headroom
)) {
792 dev
->stats
.tx_dropped
++;
797 err
= iptunnel_xmit(NULL
, rt
, skb
, fl4
.saddr
, fl4
.daddr
, protocol
,
798 tos
, ttl
, df
, !net_eq(tunnel
->net
, dev_net(dev
)));
799 iptunnel_xmit_stats(err
, &dev
->stats
, dev
->tstats
);
803 #if IS_ENABLED(CONFIG_IPV6)
805 dst_link_failure(skb
);
808 dev
->stats
.tx_errors
++;
811 EXPORT_SYMBOL_GPL(ip_tunnel_xmit
);
813 static void ip_tunnel_update(struct ip_tunnel_net
*itn
,
815 struct net_device
*dev
,
816 struct ip_tunnel_parm
*p
,
819 ip_tunnel_del(itn
, t
);
820 t
->parms
.iph
.saddr
= p
->iph
.saddr
;
821 t
->parms
.iph
.daddr
= p
->iph
.daddr
;
822 t
->parms
.i_key
= p
->i_key
;
823 t
->parms
.o_key
= p
->o_key
;
824 if (dev
->type
!= ARPHRD_ETHER
) {
825 memcpy(dev
->dev_addr
, &p
->iph
.saddr
, 4);
826 memcpy(dev
->broadcast
, &p
->iph
.daddr
, 4);
828 ip_tunnel_add(itn
, t
);
830 t
->parms
.iph
.ttl
= p
->iph
.ttl
;
831 t
->parms
.iph
.tos
= p
->iph
.tos
;
832 t
->parms
.iph
.frag_off
= p
->iph
.frag_off
;
834 if (t
->parms
.link
!= p
->link
) {
837 t
->parms
.link
= p
->link
;
838 mtu
= ip_tunnel_bind_dev(dev
);
842 ip_tunnel_dst_reset_all(t
);
843 netdev_state_change(dev
);
846 int ip_tunnel_ioctl(struct net_device
*dev
, struct ip_tunnel_parm
*p
, int cmd
)
849 struct ip_tunnel
*t
= netdev_priv(dev
);
850 struct net
*net
= t
->net
;
851 struct ip_tunnel_net
*itn
= net_generic(net
, t
->ip_tnl_net_id
);
853 BUG_ON(!itn
->fb_tunnel_dev
);
856 if (dev
== itn
->fb_tunnel_dev
) {
857 t
= ip_tunnel_find(itn
, p
, itn
->fb_tunnel_dev
->type
);
859 t
= netdev_priv(dev
);
861 memcpy(p
, &t
->parms
, sizeof(*p
));
867 if (!ns_capable(net
->user_ns
, CAP_NET_ADMIN
))
870 p
->iph
.frag_off
|= htons(IP_DF
);
871 if (!(p
->i_flags
& VTI_ISVTI
)) {
872 if (!(p
->i_flags
& TUNNEL_KEY
))
874 if (!(p
->o_flags
& TUNNEL_KEY
))
878 t
= ip_tunnel_find(itn
, p
, itn
->fb_tunnel_dev
->type
);
880 if (cmd
== SIOCADDTUNNEL
) {
882 t
= ip_tunnel_create(net
, itn
, p
);
883 err
= PTR_ERR_OR_ZERO(t
);
890 if (dev
!= itn
->fb_tunnel_dev
&& cmd
== SIOCCHGTUNNEL
) {
897 unsigned int nflags
= 0;
899 if (ipv4_is_multicast(p
->iph
.daddr
))
900 nflags
= IFF_BROADCAST
;
901 else if (p
->iph
.daddr
)
902 nflags
= IFF_POINTOPOINT
;
904 if ((dev
->flags
^nflags
)&(IFF_POINTOPOINT
|IFF_BROADCAST
)) {
909 t
= netdev_priv(dev
);
915 ip_tunnel_update(itn
, t
, dev
, p
, true);
923 if (!ns_capable(net
->user_ns
, CAP_NET_ADMIN
))
926 if (dev
== itn
->fb_tunnel_dev
) {
928 t
= ip_tunnel_find(itn
, p
, itn
->fb_tunnel_dev
->type
);
932 if (t
== netdev_priv(itn
->fb_tunnel_dev
))
936 unregister_netdevice(dev
);
947 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl
);
949 int ip_tunnel_change_mtu(struct net_device
*dev
, int new_mtu
)
951 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
952 int t_hlen
= tunnel
->hlen
+ sizeof(struct iphdr
);
955 new_mtu
> 0xFFF8 - dev
->hard_header_len
- t_hlen
)
960 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu
);
962 static void ip_tunnel_dev_free(struct net_device
*dev
)
964 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
966 gro_cells_destroy(&tunnel
->gro_cells
);
967 free_percpu(tunnel
->dst_cache
);
968 free_percpu(dev
->tstats
);
972 void ip_tunnel_dellink(struct net_device
*dev
, struct list_head
*head
)
974 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
975 struct ip_tunnel_net
*itn
;
977 itn
= net_generic(tunnel
->net
, tunnel
->ip_tnl_net_id
);
979 if (itn
->fb_tunnel_dev
!= dev
) {
980 ip_tunnel_del(itn
, netdev_priv(dev
));
981 unregister_netdevice_queue(dev
, head
);
984 EXPORT_SYMBOL_GPL(ip_tunnel_dellink
);
986 struct net
*ip_tunnel_get_link_net(const struct net_device
*dev
)
988 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
992 EXPORT_SYMBOL(ip_tunnel_get_link_net
);
994 int ip_tunnel_get_iflink(const struct net_device
*dev
)
996 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
998 return tunnel
->parms
.link
;
1000 EXPORT_SYMBOL(ip_tunnel_get_iflink
);
1002 int ip_tunnel_init_net(struct net
*net
, int ip_tnl_net_id
,
1003 struct rtnl_link_ops
*ops
, char *devname
)
1005 struct ip_tunnel_net
*itn
= net_generic(net
, ip_tnl_net_id
);
1006 struct ip_tunnel_parm parms
;
1009 for (i
= 0; i
< IP_TNL_HASH_SIZE
; i
++)
1010 INIT_HLIST_HEAD(&itn
->tunnels
[i
]);
1013 itn
->fb_tunnel_dev
= NULL
;
1017 memset(&parms
, 0, sizeof(parms
));
1019 strlcpy(parms
.name
, devname
, IFNAMSIZ
);
1022 itn
->fb_tunnel_dev
= __ip_tunnel_create(net
, ops
, &parms
);
1023 /* FB netdevice is special: we have one, and only one per netns.
1024 * Allowing to move it to another netns is clearly unsafe.
1026 if (!IS_ERR(itn
->fb_tunnel_dev
)) {
1027 itn
->fb_tunnel_dev
->features
|= NETIF_F_NETNS_LOCAL
;
1028 itn
->fb_tunnel_dev
->mtu
= ip_tunnel_bind_dev(itn
->fb_tunnel_dev
);
1029 ip_tunnel_add(itn
, netdev_priv(itn
->fb_tunnel_dev
));
1033 return PTR_ERR_OR_ZERO(itn
->fb_tunnel_dev
);
1035 EXPORT_SYMBOL_GPL(ip_tunnel_init_net
);
1037 static void ip_tunnel_destroy(struct ip_tunnel_net
*itn
, struct list_head
*head
,
1038 struct rtnl_link_ops
*ops
)
1040 struct net
*net
= dev_net(itn
->fb_tunnel_dev
);
1041 struct net_device
*dev
, *aux
;
1044 for_each_netdev_safe(net
, dev
, aux
)
1045 if (dev
->rtnl_link_ops
== ops
)
1046 unregister_netdevice_queue(dev
, head
);
1048 for (h
= 0; h
< IP_TNL_HASH_SIZE
; h
++) {
1049 struct ip_tunnel
*t
;
1050 struct hlist_node
*n
;
1051 struct hlist_head
*thead
= &itn
->tunnels
[h
];
1053 hlist_for_each_entry_safe(t
, n
, thead
, hash_node
)
1054 /* If dev is in the same netns, it has already
1055 * been added to the list by the previous loop.
1057 if (!net_eq(dev_net(t
->dev
), net
))
1058 unregister_netdevice_queue(t
->dev
, head
);
1062 void ip_tunnel_delete_net(struct ip_tunnel_net
*itn
, struct rtnl_link_ops
*ops
)
1067 ip_tunnel_destroy(itn
, &list
, ops
);
1068 unregister_netdevice_many(&list
);
1071 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net
);
1073 int ip_tunnel_newlink(struct net_device
*dev
, struct nlattr
*tb
[],
1074 struct ip_tunnel_parm
*p
)
1076 struct ip_tunnel
*nt
;
1077 struct net
*net
= dev_net(dev
);
1078 struct ip_tunnel_net
*itn
;
1082 nt
= netdev_priv(dev
);
1083 itn
= net_generic(net
, nt
->ip_tnl_net_id
);
1085 if (nt
->collect_md
) {
1086 if (rtnl_dereference(itn
->collect_md_tun
))
1089 if (ip_tunnel_find(itn
, p
, dev
->type
))
1095 err
= register_netdevice(dev
);
1099 if (dev
->type
== ARPHRD_ETHER
&& !tb
[IFLA_ADDRESS
])
1100 eth_hw_addr_random(dev
);
1102 mtu
= ip_tunnel_bind_dev(dev
);
1106 ip_tunnel_add(itn
, nt
);
1110 EXPORT_SYMBOL_GPL(ip_tunnel_newlink
);
1112 int ip_tunnel_changelink(struct net_device
*dev
, struct nlattr
*tb
[],
1113 struct ip_tunnel_parm
*p
)
1115 struct ip_tunnel
*t
;
1116 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1117 struct net
*net
= tunnel
->net
;
1118 struct ip_tunnel_net
*itn
= net_generic(net
, tunnel
->ip_tnl_net_id
);
1120 if (dev
== itn
->fb_tunnel_dev
)
1123 t
= ip_tunnel_find(itn
, p
, dev
->type
);
1131 if (dev
->type
!= ARPHRD_ETHER
) {
1132 unsigned int nflags
= 0;
1134 if (ipv4_is_multicast(p
->iph
.daddr
))
1135 nflags
= IFF_BROADCAST
;
1136 else if (p
->iph
.daddr
)
1137 nflags
= IFF_POINTOPOINT
;
1139 if ((dev
->flags
^ nflags
) &
1140 (IFF_POINTOPOINT
| IFF_BROADCAST
))
1145 ip_tunnel_update(itn
, t
, dev
, p
, !tb
[IFLA_MTU
]);
1148 EXPORT_SYMBOL_GPL(ip_tunnel_changelink
);
1150 int ip_tunnel_init(struct net_device
*dev
)
1152 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1153 struct iphdr
*iph
= &tunnel
->parms
.iph
;
1156 dev
->destructor
= ip_tunnel_dev_free
;
1157 dev
->tstats
= netdev_alloc_pcpu_stats(struct pcpu_sw_netstats
);
1161 tunnel
->dst_cache
= alloc_percpu(struct ip_tunnel_dst
);
1162 if (!tunnel
->dst_cache
) {
1163 free_percpu(dev
->tstats
);
1167 err
= gro_cells_init(&tunnel
->gro_cells
, dev
);
1169 free_percpu(tunnel
->dst_cache
);
1170 free_percpu(dev
->tstats
);
1175 tunnel
->net
= dev_net(dev
);
1176 strcpy(tunnel
->parms
.name
, dev
->name
);
1180 if (tunnel
->collect_md
) {
1181 dev
->features
|= NETIF_F_NETNS_LOCAL
;
1182 netif_keep_dst(dev
);
1186 EXPORT_SYMBOL_GPL(ip_tunnel_init
);
1188 void ip_tunnel_uninit(struct net_device
*dev
)
1190 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1191 struct net
*net
= tunnel
->net
;
1192 struct ip_tunnel_net
*itn
;
1194 itn
= net_generic(net
, tunnel
->ip_tnl_net_id
);
1195 /* fb_tunnel_dev will be unregisted in net-exit call. */
1196 if (itn
->fb_tunnel_dev
!= dev
)
1197 ip_tunnel_del(itn
, netdev_priv(dev
));
1199 ip_tunnel_dst_reset_all(tunnel
);
1201 EXPORT_SYMBOL_GPL(ip_tunnel_uninit
);
1203 /* Do least required initialization, rest of init is done in tunnel_init call */
1204 void ip_tunnel_setup(struct net_device
*dev
, int net_id
)
1206 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1207 tunnel
->ip_tnl_net_id
= net_id
;
1209 EXPORT_SYMBOL_GPL(ip_tunnel_setup
);
1211 MODULE_LICENSE("GPL");