net: remove unnecessary mroute.h includes
[deliverable/linux.git] / net / ipv4 / ip_tunnel.c
CommitLineData
c5441932
PS
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
c5441932
PS
33#include <linux/init.h>
34#include <linux/in6.h>
35#include <linux/inetdevice.h>
36#include <linux/igmp.h>
37#include <linux/netfilter_ipv4.h>
38#include <linux/etherdevice.h>
39#include <linux/if_ether.h>
40#include <linux/if_vlan.h>
41#include <linux/rculist.h>
27d79f3b 42#include <linux/err.h>
c5441932
PS
43
44#include <net/sock.h>
45#include <net/ip.h>
46#include <net/icmp.h>
47#include <net/protocol.h>
48#include <net/ip_tunnels.h>
49#include <net/arp.h>
50#include <net/checksum.h>
51#include <net/dsfield.h>
52#include <net/inet_ecn.h>
53#include <net/xfrm.h>
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
56#include <net/rtnetlink.h>
56328486 57#include <net/udp.h>
63487bab 58
c5441932
PS
59#if IS_ENABLED(CONFIG_IPV6)
60#include <net/ipv6.h>
61#include <net/ip6_fib.h>
62#include <net/ip6_route.h>
63#endif
64
967680e0 65static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
c5441932
PS
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
6c7e7610 71static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
95cb5745 72 struct dst_entry *dst, __be32 saddr)
7d442fab
TH
73{
74 struct dst_entry *old_dst;
75
f8864972 76 dst_clone(dst);
6c7e7610 77 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
7d442fab 78 dst_release(old_dst);
95cb5745 79 idst->saddr = saddr;
7d442fab
TH
80}
81
a35165ca 82static noinline void tunnel_dst_set(struct ip_tunnel *t,
95cb5745 83 struct dst_entry *dst, __be32 saddr)
7d442fab 84{
a35165ca 85 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
7d442fab
TH
86}
87
6c7e7610 88static void tunnel_dst_reset(struct ip_tunnel *t)
7d442fab 89{
95cb5745 90 tunnel_dst_set(t, NULL, 0);
7d442fab
TH
91}
92
cf71d2bc 93void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
9a4aa9af
TH
94{
95 int i;
96
97 for_each_possible_cpu(i)
95cb5745 98 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
9a4aa9af 99}
cf71d2bc 100EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
9a4aa9af 101
95cb5745
DP
102static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
103 u32 cookie, __be32 *saddr)
7d442fab 104{
95cb5745 105 struct ip_tunnel_dst *idst;
7d442fab
TH
106 struct dst_entry *dst;
107
108 rcu_read_lock();
a35165ca 109 idst = raw_cpu_ptr(t->dst_cache);
95cb5745 110 dst = rcu_dereference(idst->dst);
f8864972
ED
111 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
112 dst = NULL;
b045d37b 113 if (dst) {
95cb5745
DP
114 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
115 *saddr = idst->saddr;
116 } else {
b045d37b 117 tunnel_dst_reset(t);
f8864972
ED
118 dst_release(dst);
119 dst = NULL;
b045d37b 120 }
7d442fab 121 }
b045d37b
ED
122 rcu_read_unlock();
123 return (struct rtable *)dst;
7d442fab
TH
124}
125
c5441932
PS
126static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
127 __be16 flags, __be32 key)
128{
129 if (p->i_flags & TUNNEL_KEY) {
130 if (flags & TUNNEL_KEY)
131 return key == p->i_key;
132 else
133 /* key expected, none present */
134 return false;
135 } else
136 return !(flags & TUNNEL_KEY);
137}
138
139/* Fallback tunnel: no source, no destination, no key, no options
140
141 Tunnel hash table:
142 We require exact key match i.e. if a key is present in packet
143 it will match only tunnel with the same key; if it is not present,
144 it will match only keyless tunnel.
145
146 All keysless packets, if not matched configured keyless tunnels
147 will match fallback tunnel.
148 Given src, dst and key, find appropriate for input tunnel.
149*/
150struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
151 int link, __be16 flags,
152 __be32 remote, __be32 local,
153 __be32 key)
154{
155 unsigned int hash;
156 struct ip_tunnel *t, *cand = NULL;
157 struct hlist_head *head;
158
967680e0 159 hash = ip_tunnel_hash(key, remote);
c5441932
PS
160 head = &itn->tunnels[hash];
161
162 hlist_for_each_entry_rcu(t, head, hash_node) {
163 if (local != t->parms.iph.saddr ||
164 remote != t->parms.iph.daddr ||
165 !(t->dev->flags & IFF_UP))
166 continue;
167
168 if (!ip_tunnel_key_match(&t->parms, flags, key))
169 continue;
170
171 if (t->parms.link == link)
172 return t;
173 else
174 cand = t;
175 }
176
177 hlist_for_each_entry_rcu(t, head, hash_node) {
178 if (remote != t->parms.iph.daddr ||
e0056593 179 t->parms.iph.saddr != 0 ||
c5441932
PS
180 !(t->dev->flags & IFF_UP))
181 continue;
182
183 if (!ip_tunnel_key_match(&t->parms, flags, key))
184 continue;
185
186 if (t->parms.link == link)
187 return t;
188 else if (!cand)
189 cand = t;
190 }
191
967680e0 192 hash = ip_tunnel_hash(key, 0);
c5441932
PS
193 head = &itn->tunnels[hash];
194
195 hlist_for_each_entry_rcu(t, head, hash_node) {
e0056593
DP
196 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
197 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
198 continue;
199
200 if (!(t->dev->flags & IFF_UP))
c5441932
PS
201 continue;
202
203 if (!ip_tunnel_key_match(&t->parms, flags, key))
204 continue;
205
206 if (t->parms.link == link)
207 return t;
208 else if (!cand)
209 cand = t;
210 }
211
212 if (flags & TUNNEL_NO_KEY)
213 goto skip_key_lookup;
214
215 hlist_for_each_entry_rcu(t, head, hash_node) {
216 if (t->parms.i_key != key ||
e0056593
DP
217 t->parms.iph.saddr != 0 ||
218 t->parms.iph.daddr != 0 ||
c5441932
PS
219 !(t->dev->flags & IFF_UP))
220 continue;
221
222 if (t->parms.link == link)
223 return t;
224 else if (!cand)
225 cand = t;
226 }
227
228skip_key_lookup:
229 if (cand)
230 return cand;
231
2e15ea39
PS
232 t = rcu_dereference(itn->collect_md_tun);
233 if (t)
234 return t;
235
c5441932
PS
236 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
237 return netdev_priv(itn->fb_tunnel_dev);
238
c5441932
PS
239 return NULL;
240}
241EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
242
243static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
244 struct ip_tunnel_parm *parms)
245{
246 unsigned int h;
247 __be32 remote;
6d608f06 248 __be32 i_key = parms->i_key;
c5441932
PS
249
250 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
251 remote = parms->iph.daddr;
252 else
253 remote = 0;
254
6d608f06
SK
255 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
256 i_key = 0;
257
258 h = ip_tunnel_hash(i_key, remote);
c5441932
PS
259 return &itn->tunnels[h];
260}
261
262static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
263{
264 struct hlist_head *head = ip_bucket(itn, &t->parms);
265
2e15ea39
PS
266 if (t->collect_md)
267 rcu_assign_pointer(itn->collect_md_tun, t);
c5441932
PS
268 hlist_add_head_rcu(&t->hash_node, head);
269}
270
2e15ea39 271static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
c5441932 272{
2e15ea39
PS
273 if (t->collect_md)
274 rcu_assign_pointer(itn->collect_md_tun, NULL);
c5441932
PS
275 hlist_del_init_rcu(&t->hash_node);
276}
277
278static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
279 struct ip_tunnel_parm *parms,
280 int type)
281{
282 __be32 remote = parms->iph.daddr;
283 __be32 local = parms->iph.saddr;
284 __be32 key = parms->i_key;
5ce54af1 285 __be16 flags = parms->i_flags;
c5441932
PS
286 int link = parms->link;
287 struct ip_tunnel *t = NULL;
288 struct hlist_head *head = ip_bucket(itn, parms);
289
290 hlist_for_each_entry_rcu(t, head, hash_node) {
291 if (local == t->parms.iph.saddr &&
292 remote == t->parms.iph.daddr &&
c5441932 293 link == t->parms.link &&
5ce54af1
DP
294 type == t->dev->type &&
295 ip_tunnel_key_match(&t->parms, flags, key))
c5441932
PS
296 break;
297 }
298 return t;
299}
300
301static struct net_device *__ip_tunnel_create(struct net *net,
302 const struct rtnl_link_ops *ops,
303 struct ip_tunnel_parm *parms)
304{
305 int err;
306 struct ip_tunnel *tunnel;
307 struct net_device *dev;
308 char name[IFNAMSIZ];
309
310 if (parms->name[0])
311 strlcpy(name, parms->name, IFNAMSIZ);
312 else {
54a5d382 313 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
c5441932
PS
314 err = -E2BIG;
315 goto failed;
316 }
317 strlcpy(name, ops->kind, IFNAMSIZ);
318 strncat(name, "%d", 2);
319 }
320
321 ASSERT_RTNL();
c835a677 322 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
c5441932
PS
323 if (!dev) {
324 err = -ENOMEM;
325 goto failed;
326 }
327 dev_net_set(dev, net);
328
329 dev->rtnl_link_ops = ops;
330
331 tunnel = netdev_priv(dev);
332 tunnel->parms = *parms;
5e6700b3 333 tunnel->net = net;
c5441932
PS
334
335 err = register_netdevice(dev);
336 if (err)
337 goto failed_free;
338
339 return dev;
340
341failed_free:
342 free_netdev(dev);
343failed:
344 return ERR_PTR(err);
345}
346
7d442fab
TH
347static inline void init_tunnel_flow(struct flowi4 *fl4,
348 int proto,
349 __be32 daddr, __be32 saddr,
350 __be32 key, __u8 tos, int oif)
c5441932
PS
351{
352 memset(fl4, 0, sizeof(*fl4));
353 fl4->flowi4_oif = oif;
354 fl4->daddr = daddr;
355 fl4->saddr = saddr;
356 fl4->flowi4_tos = tos;
357 fl4->flowi4_proto = proto;
358 fl4->fl4_gre_key = key;
c5441932
PS
359}
360
361static int ip_tunnel_bind_dev(struct net_device *dev)
362{
363 struct net_device *tdev = NULL;
364 struct ip_tunnel *tunnel = netdev_priv(dev);
365 const struct iphdr *iph;
366 int hlen = LL_MAX_HEADER;
367 int mtu = ETH_DATA_LEN;
368 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
369
370 iph = &tunnel->parms.iph;
371
372 /* Guess output device to choose reasonable mtu and needed_headroom */
373 if (iph->daddr) {
374 struct flowi4 fl4;
375 struct rtable *rt;
376
7d442fab
TH
377 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
378 iph->saddr, tunnel->parms.o_key,
379 RT_TOS(iph->tos), tunnel->parms.link);
380 rt = ip_route_output_key(tunnel->net, &fl4);
381
c5441932
PS
382 if (!IS_ERR(rt)) {
383 tdev = rt->dst.dev;
95cb5745 384 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
c5441932
PS
385 ip_rt_put(rt);
386 }
387 if (dev->type != ARPHRD_ETHER)
388 dev->flags |= IFF_POINTOPOINT;
389 }
390
391 if (!tdev && tunnel->parms.link)
6c742e71 392 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
c5441932
PS
393
394 if (tdev) {
395 hlen = tdev->hard_header_len + tdev->needed_headroom;
396 mtu = tdev->mtu;
397 }
c5441932
PS
398
399 dev->needed_headroom = t_hlen + hlen;
400 mtu -= (dev->hard_header_len + t_hlen);
401
402 if (mtu < 68)
403 mtu = 68;
404
405 return mtu;
406}
407
408static struct ip_tunnel *ip_tunnel_create(struct net *net,
409 struct ip_tunnel_net *itn,
410 struct ip_tunnel_parm *parms)
411{
4929fd8c 412 struct ip_tunnel *nt;
c5441932
PS
413 struct net_device *dev;
414
415 BUG_ON(!itn->fb_tunnel_dev);
c5441932
PS
416 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
417 if (IS_ERR(dev))
6dd3c9ec 418 return ERR_CAST(dev);
c5441932
PS
419
420 dev->mtu = ip_tunnel_bind_dev(dev);
421
422 nt = netdev_priv(dev);
423 ip_tunnel_add(itn, nt);
424 return nt;
425}
426
427int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
2e15ea39
PS
428 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
429 bool log_ecn_error)
c5441932 430{
8f84985f 431 struct pcpu_sw_netstats *tstats;
c5441932
PS
432 const struct iphdr *iph = ip_hdr(skb);
433 int err;
434
c5441932
PS
435#ifdef CONFIG_NET_IPGRE_BROADCAST
436 if (ipv4_is_multicast(iph->daddr)) {
c5441932
PS
437 tunnel->dev->stats.multicast++;
438 skb->pkt_type = PACKET_BROADCAST;
439 }
440#endif
441
442 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
443 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
444 tunnel->dev->stats.rx_crc_errors++;
445 tunnel->dev->stats.rx_errors++;
446 goto drop;
447 }
448
449 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
450 if (!(tpi->flags&TUNNEL_SEQ) ||
451 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
452 tunnel->dev->stats.rx_fifo_errors++;
453 tunnel->dev->stats.rx_errors++;
454 goto drop;
455 }
456 tunnel->i_seqno = ntohl(tpi->seq) + 1;
457 }
458
e96f2e7c
YC
459 skb_reset_network_header(skb);
460
c5441932
PS
461 err = IP_ECN_decapsulate(iph, skb);
462 if (unlikely(err)) {
463 if (log_ecn_error)
464 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
465 &iph->saddr, iph->tos);
466 if (err > 1) {
467 ++tunnel->dev->stats.rx_frame_errors;
468 ++tunnel->dev->stats.rx_errors;
469 goto drop;
470 }
471 }
472
473 tstats = this_cpu_ptr(tunnel->dev->tstats);
474 u64_stats_update_begin(&tstats->syncp);
475 tstats->rx_packets++;
476 tstats->rx_bytes += skb->len;
477 u64_stats_update_end(&tstats->syncp);
478
81b9eab5
AS
479 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
480
3d7b46cd
PS
481 if (tunnel->dev->type == ARPHRD_ETHER) {
482 skb->protocol = eth_type_trans(skb, tunnel->dev);
483 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
484 } else {
485 skb->dev = tunnel->dev;
486 }
64261f23 487
2e15ea39
PS
488 if (tun_dst)
489 skb_dst_set(skb, (struct dst_entry *)tun_dst);
490
c5441932
PS
491 gro_cells_receive(&tunnel->gro_cells, skb);
492 return 0;
493
494drop:
495 kfree_skb(skb);
496 return 0;
497}
498EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
499
56328486
TH
500static int ip_encap_hlen(struct ip_tunnel_encap *e)
501{
a8c5f90f
TH
502 const struct ip_tunnel_encap_ops *ops;
503 int hlen = -EINVAL;
504
505 if (e->type == TUNNEL_ENCAP_NONE)
56328486 506 return 0;
a8c5f90f
TH
507
508 if (e->type >= MAX_IPTUN_ENCAP_OPS)
56328486 509 return -EINVAL;
a8c5f90f
TH
510
511 rcu_read_lock();
512 ops = rcu_dereference(iptun_encaps[e->type]);
513 if (likely(ops && ops->encap_hlen))
514 hlen = ops->encap_hlen(e);
515 rcu_read_unlock();
516
517 return hlen;
518}
519
520const struct ip_tunnel_encap_ops __rcu *
521 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
522
523int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
524 unsigned int num)
525{
bb1553c8
TG
526 if (num >= MAX_IPTUN_ENCAP_OPS)
527 return -ERANGE;
528
a8c5f90f
TH
529 return !cmpxchg((const struct ip_tunnel_encap_ops **)
530 &iptun_encaps[num],
531 NULL, ops) ? 0 : -1;
56328486 532}
a8c5f90f
TH
533EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
534
535int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
536 unsigned int num)
537{
538 int ret;
539
bb1553c8
TG
540 if (num >= MAX_IPTUN_ENCAP_OPS)
541 return -ERANGE;
542
a8c5f90f
TH
543 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
544 &iptun_encaps[num],
545 ops, NULL) == ops) ? 0 : -1;
546
547 synchronize_net();
548
549 return ret;
550}
551EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
56328486
TH
552
553int ip_tunnel_encap_setup(struct ip_tunnel *t,
554 struct ip_tunnel_encap *ipencap)
555{
556 int hlen;
557
558 memset(&t->encap, 0, sizeof(t->encap));
559
560 hlen = ip_encap_hlen(ipencap);
561 if (hlen < 0)
562 return hlen;
563
564 t->encap.type = ipencap->type;
565 t->encap.sport = ipencap->sport;
566 t->encap.dport = ipencap->dport;
567 t->encap.flags = ipencap->flags;
568
569 t->encap_hlen = hlen;
570 t->hlen = t->encap_hlen + t->tun_hlen;
571
572 return 0;
573}
574EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
575
56328486
TH
576int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
577 u8 *protocol, struct flowi4 *fl4)
578{
a8c5f90f
TH
579 const struct ip_tunnel_encap_ops *ops;
580 int ret = -EINVAL;
581
582 if (t->encap.type == TUNNEL_ENCAP_NONE)
56328486 583 return 0;
a8c5f90f 584
f1fb521f
TG
585 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
586 return -EINVAL;
587
a8c5f90f
TH
588 rcu_read_lock();
589 ops = rcu_dereference(iptun_encaps[t->encap.type]);
590 if (likely(ops && ops->build_header))
591 ret = ops->build_header(skb, &t->encap, protocol, fl4);
592 rcu_read_unlock();
593
594 return ret;
56328486
TH
595}
596EXPORT_SYMBOL(ip_tunnel_encap);
597
23a3647b 598static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
fc24f2b2
TT
599 struct rtable *rt, __be16 df,
600 const struct iphdr *inner_iph)
23a3647b
PS
601{
602 struct ip_tunnel *tunnel = netdev_priv(dev);
8c91e162 603 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
23a3647b
PS
604 int mtu;
605
606 if (df)
607 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
608 - sizeof(struct iphdr) - tunnel->hlen;
609 else
610 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
611
612 if (skb_dst(skb))
613 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
614
615 if (skb->protocol == htons(ETH_P_IP)) {
616 if (!skb_is_gso(skb) &&
fc24f2b2
TT
617 (inner_iph->frag_off & htons(IP_DF)) &&
618 mtu < pkt_size) {
23a3647b
PS
619 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
620 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
621 return -E2BIG;
622 }
623 }
624#if IS_ENABLED(CONFIG_IPV6)
625 else if (skb->protocol == htons(ETH_P_IPV6)) {
626 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
627
628 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
629 mtu >= IPV6_MIN_MTU) {
630 if ((tunnel->parms.iph.daddr &&
631 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
632 rt6->rt6i_dst.plen == 128) {
633 rt6->rt6i_flags |= RTF_MODIFIED;
634 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
635 }
636 }
637
638 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
639 mtu < pkt_size) {
640 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
641 return -E2BIG;
642 }
643 }
644#endif
645 return 0;
646}
647
c5441932 648void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
56328486 649 const struct iphdr *tnl_params, u8 protocol)
c5441932
PS
650{
651 struct ip_tunnel *tunnel = netdev_priv(dev);
652 const struct iphdr *inner_iph;
c5441932
PS
653 struct flowi4 fl4;
654 u8 tos, ttl;
655 __be16 df;
b045d37b 656 struct rtable *rt; /* Route to the other host */
c5441932
PS
657 unsigned int max_headroom; /* The extra header space needed */
658 __be32 dst;
0e6fbc5b 659 int err;
22fb22ea 660 bool connected;
c5441932
PS
661
662 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
22fb22ea 663 connected = (tunnel->parms.iph.daddr != 0);
c5441932
PS
664
665 dst = tnl_params->daddr;
666 if (dst == 0) {
667 /* NBMA tunnel */
668
51456b29 669 if (!skb_dst(skb)) {
c5441932
PS
670 dev->stats.tx_fifo_errors++;
671 goto tx_error;
672 }
673
674 if (skb->protocol == htons(ETH_P_IP)) {
675 rt = skb_rtable(skb);
676 dst = rt_nexthop(rt, inner_iph->daddr);
677 }
678#if IS_ENABLED(CONFIG_IPV6)
679 else if (skb->protocol == htons(ETH_P_IPV6)) {
680 const struct in6_addr *addr6;
681 struct neighbour *neigh;
682 bool do_tx_error_icmp;
683 int addr_type;
684
685 neigh = dst_neigh_lookup(skb_dst(skb),
686 &ipv6_hdr(skb)->daddr);
51456b29 687 if (!neigh)
c5441932
PS
688 goto tx_error;
689
690 addr6 = (const struct in6_addr *)&neigh->primary_key;
691 addr_type = ipv6_addr_type(addr6);
692
693 if (addr_type == IPV6_ADDR_ANY) {
694 addr6 = &ipv6_hdr(skb)->daddr;
695 addr_type = ipv6_addr_type(addr6);
696 }
697
698 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
699 do_tx_error_icmp = true;
700 else {
701 do_tx_error_icmp = false;
702 dst = addr6->s6_addr32[3];
703 }
704 neigh_release(neigh);
705 if (do_tx_error_icmp)
706 goto tx_error_icmp;
707 }
708#endif
709 else
710 goto tx_error;
7d442fab
TH
711
712 connected = false;
c5441932
PS
713 }
714
715 tos = tnl_params->tos;
716 if (tos & 0x1) {
717 tos &= ~0x1;
7d442fab 718 if (skb->protocol == htons(ETH_P_IP)) {
c5441932 719 tos = inner_iph->tos;
7d442fab
TH
720 connected = false;
721 } else if (skb->protocol == htons(ETH_P_IPV6)) {
c5441932 722 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
7d442fab
TH
723 connected = false;
724 }
c5441932
PS
725 }
726
7d442fab
TH
727 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
728 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
729
56328486
TH
730 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
731 goto tx_error;
732
95cb5745 733 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
7d442fab
TH
734
735 if (!rt) {
736 rt = ip_route_output_key(tunnel->net, &fl4);
737
738 if (IS_ERR(rt)) {
739 dev->stats.tx_carrier_errors++;
740 goto tx_error;
741 }
742 if (connected)
95cb5745 743 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
c5441932 744 }
7d442fab 745
0e6fbc5b 746 if (rt->dst.dev == dev) {
c5441932
PS
747 ip_rt_put(rt);
748 dev->stats.collisions++;
749 goto tx_error;
750 }
c5441932 751
fc24f2b2 752 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
23a3647b
PS
753 ip_rt_put(rt);
754 goto tx_error;
c5441932 755 }
c5441932
PS
756
757 if (tunnel->err_count > 0) {
758 if (time_before(jiffies,
759 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
760 tunnel->err_count--;
761
11c21a30 762 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
c5441932
PS
763 dst_link_failure(skb);
764 } else
765 tunnel->err_count = 0;
766 }
767
d4a71b15 768 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
c5441932
PS
769 ttl = tnl_params->ttl;
770 if (ttl == 0) {
771 if (skb->protocol == htons(ETH_P_IP))
772 ttl = inner_iph->ttl;
773#if IS_ENABLED(CONFIG_IPV6)
774 else if (skb->protocol == htons(ETH_P_IPV6))
775 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
776#endif
777 else
778 ttl = ip4_dst_hoplimit(&rt->dst);
779 }
780
23a3647b
PS
781 df = tnl_params->frag_off;
782 if (skb->protocol == htons(ETH_P_IP))
783 df |= (inner_iph->frag_off&htons(IP_DF));
784
0e6fbc5b 785 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
7371e022 786 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
3e08f4a7 787 if (max_headroom > dev->needed_headroom)
c5441932 788 dev->needed_headroom = max_headroom;
3e08f4a7
SK
789
790 if (skb_cow_head(skb, dev->needed_headroom)) {
586d5fc8 791 ip_rt_put(rt);
3e08f4a7 792 dev->stats.tx_dropped++;
3acfa1e7 793 kfree_skb(skb);
3e08f4a7 794 return;
c5441932
PS
795 }
796
79b16aad 797 err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol,
d4a71b15 798 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
0e6fbc5b 799 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
c5441932 800
c5441932
PS
801 return;
802
803#if IS_ENABLED(CONFIG_IPV6)
804tx_error_icmp:
805 dst_link_failure(skb);
806#endif
807tx_error:
808 dev->stats.tx_errors++;
3acfa1e7 809 kfree_skb(skb);
c5441932
PS
810}
811EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
812
813static void ip_tunnel_update(struct ip_tunnel_net *itn,
814 struct ip_tunnel *t,
815 struct net_device *dev,
816 struct ip_tunnel_parm *p,
817 bool set_mtu)
818{
2e15ea39 819 ip_tunnel_del(itn, t);
c5441932
PS
820 t->parms.iph.saddr = p->iph.saddr;
821 t->parms.iph.daddr = p->iph.daddr;
822 t->parms.i_key = p->i_key;
823 t->parms.o_key = p->o_key;
824 if (dev->type != ARPHRD_ETHER) {
825 memcpy(dev->dev_addr, &p->iph.saddr, 4);
826 memcpy(dev->broadcast, &p->iph.daddr, 4);
827 }
828 ip_tunnel_add(itn, t);
829
830 t->parms.iph.ttl = p->iph.ttl;
831 t->parms.iph.tos = p->iph.tos;
832 t->parms.iph.frag_off = p->iph.frag_off;
833
834 if (t->parms.link != p->link) {
835 int mtu;
836
837 t->parms.link = p->link;
838 mtu = ip_tunnel_bind_dev(dev);
839 if (set_mtu)
840 dev->mtu = mtu;
841 }
cf71d2bc 842 ip_tunnel_dst_reset_all(t);
c5441932
PS
843 netdev_state_change(dev);
844}
845
846int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
847{
848 int err = 0;
8c923ce2
ND
849 struct ip_tunnel *t = netdev_priv(dev);
850 struct net *net = t->net;
851 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
c5441932
PS
852
853 BUG_ON(!itn->fb_tunnel_dev);
854 switch (cmd) {
855 case SIOCGETTUNNEL:
8c923ce2 856 if (dev == itn->fb_tunnel_dev) {
c5441932 857 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
51456b29 858 if (!t)
8c923ce2
ND
859 t = netdev_priv(dev);
860 }
c5441932
PS
861 memcpy(p, &t->parms, sizeof(*p));
862 break;
863
864 case SIOCADDTUNNEL:
865 case SIOCCHGTUNNEL:
866 err = -EPERM;
867 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
868 goto done;
869 if (p->iph.ttl)
870 p->iph.frag_off |= htons(IP_DF);
7c8e6b9c
DP
871 if (!(p->i_flags & VTI_ISVTI)) {
872 if (!(p->i_flags & TUNNEL_KEY))
873 p->i_key = 0;
874 if (!(p->o_flags & TUNNEL_KEY))
875 p->o_key = 0;
876 }
c5441932
PS
877
878 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
879
d61746b2
SK
880 if (cmd == SIOCADDTUNNEL) {
881 if (!t) {
882 t = ip_tunnel_create(net, itn, p);
883 err = PTR_ERR_OR_ZERO(t);
884 break;
885 }
886
887 err = -EEXIST;
ee30ef4d 888 break;
6dd3c9ec 889 }
c5441932 890 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
00db4124 891 if (t) {
c5441932
PS
892 if (t->dev != dev) {
893 err = -EEXIST;
894 break;
895 }
896 } else {
897 unsigned int nflags = 0;
898
899 if (ipv4_is_multicast(p->iph.daddr))
900 nflags = IFF_BROADCAST;
901 else if (p->iph.daddr)
902 nflags = IFF_POINTOPOINT;
903
904 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
905 err = -EINVAL;
906 break;
907 }
908
909 t = netdev_priv(dev);
910 }
911 }
912
913 if (t) {
914 err = 0;
915 ip_tunnel_update(itn, t, dev, p, true);
6dd3c9ec
FW
916 } else {
917 err = -ENOENT;
918 }
c5441932
PS
919 break;
920
921 case SIOCDELTUNNEL:
922 err = -EPERM;
923 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
924 goto done;
925
926 if (dev == itn->fb_tunnel_dev) {
927 err = -ENOENT;
928 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
51456b29 929 if (!t)
c5441932
PS
930 goto done;
931 err = -EPERM;
932 if (t == netdev_priv(itn->fb_tunnel_dev))
933 goto done;
934 dev = t->dev;
935 }
936 unregister_netdevice(dev);
937 err = 0;
938 break;
939
940 default:
941 err = -EINVAL;
942 }
943
944done:
945 return err;
946}
947EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
948
949int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
950{
951 struct ip_tunnel *tunnel = netdev_priv(dev);
952 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
953
954 if (new_mtu < 68 ||
955 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
956 return -EINVAL;
957 dev->mtu = new_mtu;
958 return 0;
959}
960EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
961
962static void ip_tunnel_dev_free(struct net_device *dev)
963{
964 struct ip_tunnel *tunnel = netdev_priv(dev);
965
966 gro_cells_destroy(&tunnel->gro_cells);
9a4aa9af 967 free_percpu(tunnel->dst_cache);
c5441932
PS
968 free_percpu(dev->tstats);
969 free_netdev(dev);
970}
971
972void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
973{
c5441932
PS
974 struct ip_tunnel *tunnel = netdev_priv(dev);
975 struct ip_tunnel_net *itn;
976
6c742e71 977 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
c5441932
PS
978
979 if (itn->fb_tunnel_dev != dev) {
2e15ea39 980 ip_tunnel_del(itn, netdev_priv(dev));
c5441932
PS
981 unregister_netdevice_queue(dev, head);
982 }
983}
984EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
985
1728d4fa
ND
986struct net *ip_tunnel_get_link_net(const struct net_device *dev)
987{
988 struct ip_tunnel *tunnel = netdev_priv(dev);
989
990 return tunnel->net;
991}
992EXPORT_SYMBOL(ip_tunnel_get_link_net);
993
1e99584b
ND
994int ip_tunnel_get_iflink(const struct net_device *dev)
995{
996 struct ip_tunnel *tunnel = netdev_priv(dev);
997
998 return tunnel->parms.link;
999}
1000EXPORT_SYMBOL(ip_tunnel_get_iflink);
1001
d3b6f614 1002int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
c5441932
PS
1003 struct rtnl_link_ops *ops, char *devname)
1004{
1005 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1006 struct ip_tunnel_parm parms;
6261d983 1007 unsigned int i;
c5441932 1008
6261d983 1009 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1010 INIT_HLIST_HEAD(&itn->tunnels[i]);
c5441932
PS
1011
1012 if (!ops) {
1013 itn->fb_tunnel_dev = NULL;
1014 return 0;
1015 }
6261d983 1016
c5441932
PS
1017 memset(&parms, 0, sizeof(parms));
1018 if (devname)
1019 strlcpy(parms.name, devname, IFNAMSIZ);
1020
1021 rtnl_lock();
1022 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
ea857f28
DC
1023 /* FB netdevice is special: we have one, and only one per netns.
1024 * Allowing to move it to another netns is clearly unsafe.
1025 */
67013282 1026 if (!IS_ERR(itn->fb_tunnel_dev)) {
b4de77ad 1027 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
78ff4be4 1028 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
67013282
SK
1029 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1030 }
b4de77ad 1031 rtnl_unlock();
c5441932 1032
27d79f3b 1033 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
c5441932
PS
1034}
1035EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1036
6c742e71
ND
1037static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1038 struct rtnl_link_ops *ops)
c5441932 1039{
6c742e71
ND
1040 struct net *net = dev_net(itn->fb_tunnel_dev);
1041 struct net_device *dev, *aux;
c5441932
PS
1042 int h;
1043
6c742e71
ND
1044 for_each_netdev_safe(net, dev, aux)
1045 if (dev->rtnl_link_ops == ops)
1046 unregister_netdevice_queue(dev, head);
1047
c5441932
PS
1048 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1049 struct ip_tunnel *t;
1050 struct hlist_node *n;
1051 struct hlist_head *thead = &itn->tunnels[h];
1052
1053 hlist_for_each_entry_safe(t, n, thead, hash_node)
6c742e71
ND
1054 /* If dev is in the same netns, it has already
1055 * been added to the list by the previous loop.
1056 */
1057 if (!net_eq(dev_net(t->dev), net))
1058 unregister_netdevice_queue(t->dev, head);
c5441932 1059 }
c5441932
PS
1060}
1061
6c742e71 1062void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
c5441932
PS
1063{
1064 LIST_HEAD(list);
1065
1066 rtnl_lock();
6c742e71 1067 ip_tunnel_destroy(itn, &list, ops);
c5441932
PS
1068 unregister_netdevice_many(&list);
1069 rtnl_unlock();
c5441932
PS
1070}
1071EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1072
1073int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1074 struct ip_tunnel_parm *p)
1075{
1076 struct ip_tunnel *nt;
1077 struct net *net = dev_net(dev);
1078 struct ip_tunnel_net *itn;
1079 int mtu;
1080 int err;
1081
1082 nt = netdev_priv(dev);
1083 itn = net_generic(net, nt->ip_tnl_net_id);
1084
2e15ea39
PS
1085 if (nt->collect_md) {
1086 if (rtnl_dereference(itn->collect_md_tun))
1087 return -EEXIST;
1088 } else {
1089 if (ip_tunnel_find(itn, p, dev->type))
1090 return -EEXIST;
1091 }
c5441932 1092
5e6700b3 1093 nt->net = net;
c5441932
PS
1094 nt->parms = *p;
1095 err = register_netdevice(dev);
1096 if (err)
1097 goto out;
1098
1099 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1100 eth_hw_addr_random(dev);
1101
1102 mtu = ip_tunnel_bind_dev(dev);
1103 if (!tb[IFLA_MTU])
1104 dev->mtu = mtu;
1105
1106 ip_tunnel_add(itn, nt);
c5441932
PS
1107out:
1108 return err;
1109}
1110EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1111
1112int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1113 struct ip_tunnel_parm *p)
1114{
6c742e71 1115 struct ip_tunnel *t;
c5441932 1116 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1117 struct net *net = tunnel->net;
c5441932
PS
1118 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1119
1120 if (dev == itn->fb_tunnel_dev)
1121 return -EINVAL;
1122
c5441932
PS
1123 t = ip_tunnel_find(itn, p, dev->type);
1124
1125 if (t) {
1126 if (t->dev != dev)
1127 return -EEXIST;
1128 } else {
6c742e71 1129 t = tunnel;
c5441932
PS
1130
1131 if (dev->type != ARPHRD_ETHER) {
1132 unsigned int nflags = 0;
1133
1134 if (ipv4_is_multicast(p->iph.daddr))
1135 nflags = IFF_BROADCAST;
1136 else if (p->iph.daddr)
1137 nflags = IFF_POINTOPOINT;
1138
1139 if ((dev->flags ^ nflags) &
1140 (IFF_POINTOPOINT | IFF_BROADCAST))
1141 return -EINVAL;
1142 }
1143 }
1144
1145 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1146 return 0;
1147}
1148EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1149
1150int ip_tunnel_init(struct net_device *dev)
1151{
1152 struct ip_tunnel *tunnel = netdev_priv(dev);
1153 struct iphdr *iph = &tunnel->parms.iph;
1c213bd2 1154 int err;
c5441932
PS
1155
1156 dev->destructor = ip_tunnel_dev_free;
1c213bd2 1157 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
c5441932
PS
1158 if (!dev->tstats)
1159 return -ENOMEM;
1160
9a4aa9af
TH
1161 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1162 if (!tunnel->dst_cache) {
1163 free_percpu(dev->tstats);
1164 return -ENOMEM;
1165 }
1166
c5441932
PS
1167 err = gro_cells_init(&tunnel->gro_cells, dev);
1168 if (err) {
9a4aa9af 1169 free_percpu(tunnel->dst_cache);
c5441932
PS
1170 free_percpu(dev->tstats);
1171 return err;
1172 }
1173
1174 tunnel->dev = dev;
6c742e71 1175 tunnel->net = dev_net(dev);
c5441932
PS
1176 strcpy(tunnel->parms.name, dev->name);
1177 iph->version = 4;
1178 iph->ihl = 5;
1179
2e15ea39
PS
1180 if (tunnel->collect_md) {
1181 dev->features |= NETIF_F_NETNS_LOCAL;
1182 netif_keep_dst(dev);
1183 }
c5441932
PS
1184 return 0;
1185}
1186EXPORT_SYMBOL_GPL(ip_tunnel_init);
1187
1188void ip_tunnel_uninit(struct net_device *dev)
1189{
c5441932 1190 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1191 struct net *net = tunnel->net;
c5441932
PS
1192 struct ip_tunnel_net *itn;
1193
1194 itn = net_generic(net, tunnel->ip_tnl_net_id);
1195 /* fb_tunnel_dev will be unregisted in net-exit call. */
1196 if (itn->fb_tunnel_dev != dev)
2e15ea39 1197 ip_tunnel_del(itn, netdev_priv(dev));
7d442fab 1198
cf71d2bc 1199 ip_tunnel_dst_reset_all(tunnel);
c5441932
PS
1200}
1201EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1202
1203/* Do least required initialization, rest of init is done in tunnel_init call */
1204void ip_tunnel_setup(struct net_device *dev, int net_id)
1205{
1206 struct ip_tunnel *tunnel = netdev_priv(dev);
1207 tunnel->ip_tnl_net_id = net_id;
1208}
1209EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1210
1211MODULE_LICENSE("GPL");
This page took 0.277203 seconds and 5 git commands to generate.