ip_tunnel: Add sanity checks to ip_tunnel_encap_add_ops()
[deliverable/linux.git] / net / ipv4 / ip_tunnel.c
CommitLineData
c5441932
PS
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
27d79f3b 43#include <linux/err.h>
c5441932
PS
44
45#include <net/sock.h>
46#include <net/ip.h>
47#include <net/icmp.h>
48#include <net/protocol.h>
49#include <net/ip_tunnels.h>
50#include <net/arp.h>
51#include <net/checksum.h>
52#include <net/dsfield.h>
53#include <net/inet_ecn.h>
54#include <net/xfrm.h>
55#include <net/net_namespace.h>
56#include <net/netns/generic.h>
57#include <net/rtnetlink.h>
56328486 58#include <net/udp.h>
63487bab 59
c5441932
PS
60#if IS_ENABLED(CONFIG_IPV6)
61#include <net/ipv6.h>
62#include <net/ip6_fib.h>
63#include <net/ip6_route.h>
64#endif
65
967680e0 66static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
c5441932
PS
67{
68 return hash_32((__force u32)key ^ (__force u32)remote,
69 IP_TNL_HASH_BITS);
70}
71
6c7e7610 72static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
95cb5745 73 struct dst_entry *dst, __be32 saddr)
7d442fab
TH
74{
75 struct dst_entry *old_dst;
76
f8864972 77 dst_clone(dst);
6c7e7610 78 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
7d442fab 79 dst_release(old_dst);
95cb5745 80 idst->saddr = saddr;
7d442fab
TH
81}
82
a35165ca 83static noinline void tunnel_dst_set(struct ip_tunnel *t,
95cb5745 84 struct dst_entry *dst, __be32 saddr)
7d442fab 85{
a35165ca 86 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
7d442fab
TH
87}
88
6c7e7610 89static void tunnel_dst_reset(struct ip_tunnel *t)
7d442fab 90{
95cb5745 91 tunnel_dst_set(t, NULL, 0);
7d442fab
TH
92}
93
cf71d2bc 94void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
9a4aa9af
TH
95{
96 int i;
97
98 for_each_possible_cpu(i)
95cb5745 99 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
9a4aa9af 100}
cf71d2bc 101EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
9a4aa9af 102
95cb5745
DP
103static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
104 u32 cookie, __be32 *saddr)
7d442fab 105{
95cb5745 106 struct ip_tunnel_dst *idst;
7d442fab
TH
107 struct dst_entry *dst;
108
109 rcu_read_lock();
a35165ca 110 idst = raw_cpu_ptr(t->dst_cache);
95cb5745 111 dst = rcu_dereference(idst->dst);
f8864972
ED
112 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
113 dst = NULL;
b045d37b 114 if (dst) {
95cb5745
DP
115 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
116 *saddr = idst->saddr;
117 } else {
b045d37b 118 tunnel_dst_reset(t);
f8864972
ED
119 dst_release(dst);
120 dst = NULL;
b045d37b 121 }
7d442fab 122 }
b045d37b
ED
123 rcu_read_unlock();
124 return (struct rtable *)dst;
7d442fab
TH
125}
126
c5441932
PS
127static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
128 __be16 flags, __be32 key)
129{
130 if (p->i_flags & TUNNEL_KEY) {
131 if (flags & TUNNEL_KEY)
132 return key == p->i_key;
133 else
134 /* key expected, none present */
135 return false;
136 } else
137 return !(flags & TUNNEL_KEY);
138}
139
140/* Fallback tunnel: no source, no destination, no key, no options
141
142 Tunnel hash table:
143 We require exact key match i.e. if a key is present in packet
144 it will match only tunnel with the same key; if it is not present,
145 it will match only keyless tunnel.
146
147 All keysless packets, if not matched configured keyless tunnels
148 will match fallback tunnel.
149 Given src, dst and key, find appropriate for input tunnel.
150*/
151struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
152 int link, __be16 flags,
153 __be32 remote, __be32 local,
154 __be32 key)
155{
156 unsigned int hash;
157 struct ip_tunnel *t, *cand = NULL;
158 struct hlist_head *head;
159
967680e0 160 hash = ip_tunnel_hash(key, remote);
c5441932
PS
161 head = &itn->tunnels[hash];
162
163 hlist_for_each_entry_rcu(t, head, hash_node) {
164 if (local != t->parms.iph.saddr ||
165 remote != t->parms.iph.daddr ||
166 !(t->dev->flags & IFF_UP))
167 continue;
168
169 if (!ip_tunnel_key_match(&t->parms, flags, key))
170 continue;
171
172 if (t->parms.link == link)
173 return t;
174 else
175 cand = t;
176 }
177
178 hlist_for_each_entry_rcu(t, head, hash_node) {
179 if (remote != t->parms.iph.daddr ||
e0056593 180 t->parms.iph.saddr != 0 ||
c5441932
PS
181 !(t->dev->flags & IFF_UP))
182 continue;
183
184 if (!ip_tunnel_key_match(&t->parms, flags, key))
185 continue;
186
187 if (t->parms.link == link)
188 return t;
189 else if (!cand)
190 cand = t;
191 }
192
967680e0 193 hash = ip_tunnel_hash(key, 0);
c5441932
PS
194 head = &itn->tunnels[hash];
195
196 hlist_for_each_entry_rcu(t, head, hash_node) {
e0056593
DP
197 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
198 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
199 continue;
200
201 if (!(t->dev->flags & IFF_UP))
c5441932
PS
202 continue;
203
204 if (!ip_tunnel_key_match(&t->parms, flags, key))
205 continue;
206
207 if (t->parms.link == link)
208 return t;
209 else if (!cand)
210 cand = t;
211 }
212
213 if (flags & TUNNEL_NO_KEY)
214 goto skip_key_lookup;
215
216 hlist_for_each_entry_rcu(t, head, hash_node) {
217 if (t->parms.i_key != key ||
e0056593
DP
218 t->parms.iph.saddr != 0 ||
219 t->parms.iph.daddr != 0 ||
c5441932
PS
220 !(t->dev->flags & IFF_UP))
221 continue;
222
223 if (t->parms.link == link)
224 return t;
225 else if (!cand)
226 cand = t;
227 }
228
229skip_key_lookup:
230 if (cand)
231 return cand;
232
233 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
234 return netdev_priv(itn->fb_tunnel_dev);
235
236
237 return NULL;
238}
239EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
240
241static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
242 struct ip_tunnel_parm *parms)
243{
244 unsigned int h;
245 __be32 remote;
6d608f06 246 __be32 i_key = parms->i_key;
c5441932
PS
247
248 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
249 remote = parms->iph.daddr;
250 else
251 remote = 0;
252
6d608f06
SK
253 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
254 i_key = 0;
255
256 h = ip_tunnel_hash(i_key, remote);
c5441932
PS
257 return &itn->tunnels[h];
258}
259
260static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
261{
262 struct hlist_head *head = ip_bucket(itn, &t->parms);
263
264 hlist_add_head_rcu(&t->hash_node, head);
265}
266
267static void ip_tunnel_del(struct ip_tunnel *t)
268{
269 hlist_del_init_rcu(&t->hash_node);
270}
271
272static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
273 struct ip_tunnel_parm *parms,
274 int type)
275{
276 __be32 remote = parms->iph.daddr;
277 __be32 local = parms->iph.saddr;
278 __be32 key = parms->i_key;
5ce54af1 279 __be16 flags = parms->i_flags;
c5441932
PS
280 int link = parms->link;
281 struct ip_tunnel *t = NULL;
282 struct hlist_head *head = ip_bucket(itn, parms);
283
284 hlist_for_each_entry_rcu(t, head, hash_node) {
285 if (local == t->parms.iph.saddr &&
286 remote == t->parms.iph.daddr &&
c5441932 287 link == t->parms.link &&
5ce54af1
DP
288 type == t->dev->type &&
289 ip_tunnel_key_match(&t->parms, flags, key))
c5441932
PS
290 break;
291 }
292 return t;
293}
294
295static struct net_device *__ip_tunnel_create(struct net *net,
296 const struct rtnl_link_ops *ops,
297 struct ip_tunnel_parm *parms)
298{
299 int err;
300 struct ip_tunnel *tunnel;
301 struct net_device *dev;
302 char name[IFNAMSIZ];
303
304 if (parms->name[0])
305 strlcpy(name, parms->name, IFNAMSIZ);
306 else {
54a5d382 307 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
c5441932
PS
308 err = -E2BIG;
309 goto failed;
310 }
311 strlcpy(name, ops->kind, IFNAMSIZ);
312 strncat(name, "%d", 2);
313 }
314
315 ASSERT_RTNL();
c835a677 316 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
c5441932
PS
317 if (!dev) {
318 err = -ENOMEM;
319 goto failed;
320 }
321 dev_net_set(dev, net);
322
323 dev->rtnl_link_ops = ops;
324
325 tunnel = netdev_priv(dev);
326 tunnel->parms = *parms;
5e6700b3 327 tunnel->net = net;
c5441932
PS
328
329 err = register_netdevice(dev);
330 if (err)
331 goto failed_free;
332
333 return dev;
334
335failed_free:
336 free_netdev(dev);
337failed:
338 return ERR_PTR(err);
339}
340
7d442fab
TH
341static inline void init_tunnel_flow(struct flowi4 *fl4,
342 int proto,
343 __be32 daddr, __be32 saddr,
344 __be32 key, __u8 tos, int oif)
c5441932
PS
345{
346 memset(fl4, 0, sizeof(*fl4));
347 fl4->flowi4_oif = oif;
348 fl4->daddr = daddr;
349 fl4->saddr = saddr;
350 fl4->flowi4_tos = tos;
351 fl4->flowi4_proto = proto;
352 fl4->fl4_gre_key = key;
c5441932
PS
353}
354
355static int ip_tunnel_bind_dev(struct net_device *dev)
356{
357 struct net_device *tdev = NULL;
358 struct ip_tunnel *tunnel = netdev_priv(dev);
359 const struct iphdr *iph;
360 int hlen = LL_MAX_HEADER;
361 int mtu = ETH_DATA_LEN;
362 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
363
364 iph = &tunnel->parms.iph;
365
366 /* Guess output device to choose reasonable mtu and needed_headroom */
367 if (iph->daddr) {
368 struct flowi4 fl4;
369 struct rtable *rt;
370
7d442fab
TH
371 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
372 iph->saddr, tunnel->parms.o_key,
373 RT_TOS(iph->tos), tunnel->parms.link);
374 rt = ip_route_output_key(tunnel->net, &fl4);
375
c5441932
PS
376 if (!IS_ERR(rt)) {
377 tdev = rt->dst.dev;
95cb5745 378 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
c5441932
PS
379 ip_rt_put(rt);
380 }
381 if (dev->type != ARPHRD_ETHER)
382 dev->flags |= IFF_POINTOPOINT;
383 }
384
385 if (!tdev && tunnel->parms.link)
6c742e71 386 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
c5441932
PS
387
388 if (tdev) {
389 hlen = tdev->hard_header_len + tdev->needed_headroom;
390 mtu = tdev->mtu;
391 }
392 dev->iflink = tunnel->parms.link;
393
394 dev->needed_headroom = t_hlen + hlen;
395 mtu -= (dev->hard_header_len + t_hlen);
396
397 if (mtu < 68)
398 mtu = 68;
399
400 return mtu;
401}
402
403static struct ip_tunnel *ip_tunnel_create(struct net *net,
404 struct ip_tunnel_net *itn,
405 struct ip_tunnel_parm *parms)
406{
4929fd8c 407 struct ip_tunnel *nt;
c5441932
PS
408 struct net_device *dev;
409
410 BUG_ON(!itn->fb_tunnel_dev);
c5441932
PS
411 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
412 if (IS_ERR(dev))
6dd3c9ec 413 return ERR_CAST(dev);
c5441932
PS
414
415 dev->mtu = ip_tunnel_bind_dev(dev);
416
417 nt = netdev_priv(dev);
418 ip_tunnel_add(itn, nt);
419 return nt;
420}
421
422int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
423 const struct tnl_ptk_info *tpi, bool log_ecn_error)
424{
8f84985f 425 struct pcpu_sw_netstats *tstats;
c5441932
PS
426 const struct iphdr *iph = ip_hdr(skb);
427 int err;
428
c5441932
PS
429#ifdef CONFIG_NET_IPGRE_BROADCAST
430 if (ipv4_is_multicast(iph->daddr)) {
c5441932
PS
431 tunnel->dev->stats.multicast++;
432 skb->pkt_type = PACKET_BROADCAST;
433 }
434#endif
435
436 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
437 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
438 tunnel->dev->stats.rx_crc_errors++;
439 tunnel->dev->stats.rx_errors++;
440 goto drop;
441 }
442
443 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
444 if (!(tpi->flags&TUNNEL_SEQ) ||
445 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
446 tunnel->dev->stats.rx_fifo_errors++;
447 tunnel->dev->stats.rx_errors++;
448 goto drop;
449 }
450 tunnel->i_seqno = ntohl(tpi->seq) + 1;
451 }
452
e96f2e7c
YC
453 skb_reset_network_header(skb);
454
c5441932
PS
455 err = IP_ECN_decapsulate(iph, skb);
456 if (unlikely(err)) {
457 if (log_ecn_error)
458 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
459 &iph->saddr, iph->tos);
460 if (err > 1) {
461 ++tunnel->dev->stats.rx_frame_errors;
462 ++tunnel->dev->stats.rx_errors;
463 goto drop;
464 }
465 }
466
467 tstats = this_cpu_ptr(tunnel->dev->tstats);
468 u64_stats_update_begin(&tstats->syncp);
469 tstats->rx_packets++;
470 tstats->rx_bytes += skb->len;
471 u64_stats_update_end(&tstats->syncp);
472
81b9eab5
AS
473 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
474
3d7b46cd
PS
475 if (tunnel->dev->type == ARPHRD_ETHER) {
476 skb->protocol = eth_type_trans(skb, tunnel->dev);
477 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
478 } else {
479 skb->dev = tunnel->dev;
480 }
64261f23 481
c5441932
PS
482 gro_cells_receive(&tunnel->gro_cells, skb);
483 return 0;
484
485drop:
486 kfree_skb(skb);
487 return 0;
488}
489EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
490
56328486
TH
491static int ip_encap_hlen(struct ip_tunnel_encap *e)
492{
a8c5f90f
TH
493 const struct ip_tunnel_encap_ops *ops;
494 int hlen = -EINVAL;
495
496 if (e->type == TUNNEL_ENCAP_NONE)
56328486 497 return 0;
a8c5f90f
TH
498
499 if (e->type >= MAX_IPTUN_ENCAP_OPS)
56328486 500 return -EINVAL;
a8c5f90f
TH
501
502 rcu_read_lock();
503 ops = rcu_dereference(iptun_encaps[e->type]);
504 if (likely(ops && ops->encap_hlen))
505 hlen = ops->encap_hlen(e);
506 rcu_read_unlock();
507
508 return hlen;
509}
510
511const struct ip_tunnel_encap_ops __rcu *
512 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
513
514int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
515 unsigned int num)
516{
bb1553c8
TG
517 if (num >= MAX_IPTUN_ENCAP_OPS)
518 return -ERANGE;
519
a8c5f90f
TH
520 return !cmpxchg((const struct ip_tunnel_encap_ops **)
521 &iptun_encaps[num],
522 NULL, ops) ? 0 : -1;
56328486 523}
a8c5f90f
TH
524EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
525
526int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
527 unsigned int num)
528{
529 int ret;
530
bb1553c8
TG
531 if (num >= MAX_IPTUN_ENCAP_OPS)
532 return -ERANGE;
533
a8c5f90f
TH
534 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
535 &iptun_encaps[num],
536 ops, NULL) == ops) ? 0 : -1;
537
538 synchronize_net();
539
540 return ret;
541}
542EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
56328486
TH
543
544int ip_tunnel_encap_setup(struct ip_tunnel *t,
545 struct ip_tunnel_encap *ipencap)
546{
547 int hlen;
548
549 memset(&t->encap, 0, sizeof(t->encap));
550
551 hlen = ip_encap_hlen(ipencap);
552 if (hlen < 0)
553 return hlen;
554
555 t->encap.type = ipencap->type;
556 t->encap.sport = ipencap->sport;
557 t->encap.dport = ipencap->dport;
558 t->encap.flags = ipencap->flags;
559
560 t->encap_hlen = hlen;
561 t->hlen = t->encap_hlen + t->tun_hlen;
562
563 return 0;
564}
565EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
566
56328486
TH
567int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
568 u8 *protocol, struct flowi4 *fl4)
569{
a8c5f90f
TH
570 const struct ip_tunnel_encap_ops *ops;
571 int ret = -EINVAL;
572
573 if (t->encap.type == TUNNEL_ENCAP_NONE)
56328486 574 return 0;
a8c5f90f
TH
575
576 rcu_read_lock();
577 ops = rcu_dereference(iptun_encaps[t->encap.type]);
578 if (likely(ops && ops->build_header))
579 ret = ops->build_header(skb, &t->encap, protocol, fl4);
580 rcu_read_unlock();
581
582 return ret;
56328486
TH
583}
584EXPORT_SYMBOL(ip_tunnel_encap);
585
23a3647b
PS
586static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
587 struct rtable *rt, __be16 df)
588{
589 struct ip_tunnel *tunnel = netdev_priv(dev);
8c91e162 590 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
23a3647b
PS
591 int mtu;
592
593 if (df)
594 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
595 - sizeof(struct iphdr) - tunnel->hlen;
596 else
597 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
598
599 if (skb_dst(skb))
600 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
601
602 if (skb->protocol == htons(ETH_P_IP)) {
603 if (!skb_is_gso(skb) &&
604 (df & htons(IP_DF)) && mtu < pkt_size) {
605 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
606 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
607 return -E2BIG;
608 }
609 }
610#if IS_ENABLED(CONFIG_IPV6)
611 else if (skb->protocol == htons(ETH_P_IPV6)) {
612 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
613
614 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
615 mtu >= IPV6_MIN_MTU) {
616 if ((tunnel->parms.iph.daddr &&
617 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
618 rt6->rt6i_dst.plen == 128) {
619 rt6->rt6i_flags |= RTF_MODIFIED;
620 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
621 }
622 }
623
624 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
625 mtu < pkt_size) {
626 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
627 return -E2BIG;
628 }
629 }
630#endif
631 return 0;
632}
633
c5441932 634void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
56328486 635 const struct iphdr *tnl_params, u8 protocol)
c5441932
PS
636{
637 struct ip_tunnel *tunnel = netdev_priv(dev);
638 const struct iphdr *inner_iph;
c5441932
PS
639 struct flowi4 fl4;
640 u8 tos, ttl;
641 __be16 df;
b045d37b 642 struct rtable *rt; /* Route to the other host */
c5441932
PS
643 unsigned int max_headroom; /* The extra header space needed */
644 __be32 dst;
0e6fbc5b 645 int err;
22fb22ea 646 bool connected;
c5441932
PS
647
648 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
22fb22ea 649 connected = (tunnel->parms.iph.daddr != 0);
c5441932
PS
650
651 dst = tnl_params->daddr;
652 if (dst == 0) {
653 /* NBMA tunnel */
654
655 if (skb_dst(skb) == NULL) {
656 dev->stats.tx_fifo_errors++;
657 goto tx_error;
658 }
659
660 if (skb->protocol == htons(ETH_P_IP)) {
661 rt = skb_rtable(skb);
662 dst = rt_nexthop(rt, inner_iph->daddr);
663 }
664#if IS_ENABLED(CONFIG_IPV6)
665 else if (skb->protocol == htons(ETH_P_IPV6)) {
666 const struct in6_addr *addr6;
667 struct neighbour *neigh;
668 bool do_tx_error_icmp;
669 int addr_type;
670
671 neigh = dst_neigh_lookup(skb_dst(skb),
672 &ipv6_hdr(skb)->daddr);
673 if (neigh == NULL)
674 goto tx_error;
675
676 addr6 = (const struct in6_addr *)&neigh->primary_key;
677 addr_type = ipv6_addr_type(addr6);
678
679 if (addr_type == IPV6_ADDR_ANY) {
680 addr6 = &ipv6_hdr(skb)->daddr;
681 addr_type = ipv6_addr_type(addr6);
682 }
683
684 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
685 do_tx_error_icmp = true;
686 else {
687 do_tx_error_icmp = false;
688 dst = addr6->s6_addr32[3];
689 }
690 neigh_release(neigh);
691 if (do_tx_error_icmp)
692 goto tx_error_icmp;
693 }
694#endif
695 else
696 goto tx_error;
7d442fab
TH
697
698 connected = false;
c5441932
PS
699 }
700
701 tos = tnl_params->tos;
702 if (tos & 0x1) {
703 tos &= ~0x1;
7d442fab 704 if (skb->protocol == htons(ETH_P_IP)) {
c5441932 705 tos = inner_iph->tos;
7d442fab
TH
706 connected = false;
707 } else if (skb->protocol == htons(ETH_P_IPV6)) {
c5441932 708 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
7d442fab
TH
709 connected = false;
710 }
c5441932
PS
711 }
712
7d442fab
TH
713 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
714 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
715
56328486
TH
716 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
717 goto tx_error;
718
95cb5745 719 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
7d442fab
TH
720
721 if (!rt) {
722 rt = ip_route_output_key(tunnel->net, &fl4);
723
724 if (IS_ERR(rt)) {
725 dev->stats.tx_carrier_errors++;
726 goto tx_error;
727 }
728 if (connected)
95cb5745 729 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
c5441932 730 }
7d442fab 731
0e6fbc5b 732 if (rt->dst.dev == dev) {
c5441932
PS
733 ip_rt_put(rt);
734 dev->stats.collisions++;
735 goto tx_error;
736 }
c5441932 737
23a3647b
PS
738 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
739 ip_rt_put(rt);
740 goto tx_error;
c5441932 741 }
c5441932
PS
742
743 if (tunnel->err_count > 0) {
744 if (time_before(jiffies,
745 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
746 tunnel->err_count--;
747
11c21a30 748 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
c5441932
PS
749 dst_link_failure(skb);
750 } else
751 tunnel->err_count = 0;
752 }
753
d4a71b15 754 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
c5441932
PS
755 ttl = tnl_params->ttl;
756 if (ttl == 0) {
757 if (skb->protocol == htons(ETH_P_IP))
758 ttl = inner_iph->ttl;
759#if IS_ENABLED(CONFIG_IPV6)
760 else if (skb->protocol == htons(ETH_P_IPV6))
761 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
762#endif
763 else
764 ttl = ip4_dst_hoplimit(&rt->dst);
765 }
766
23a3647b
PS
767 df = tnl_params->frag_off;
768 if (skb->protocol == htons(ETH_P_IP))
769 df |= (inner_iph->frag_off&htons(IP_DF));
770
0e6fbc5b 771 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
7371e022 772 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
3e08f4a7 773 if (max_headroom > dev->needed_headroom)
c5441932 774 dev->needed_headroom = max_headroom;
3e08f4a7
SK
775
776 if (skb_cow_head(skb, dev->needed_headroom)) {
586d5fc8 777 ip_rt_put(rt);
3e08f4a7 778 dev->stats.tx_dropped++;
3acfa1e7 779 kfree_skb(skb);
3e08f4a7 780 return;
c5441932
PS
781 }
782
aad88724 783 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
d4a71b15 784 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
0e6fbc5b 785 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
c5441932 786
c5441932
PS
787 return;
788
789#if IS_ENABLED(CONFIG_IPV6)
790tx_error_icmp:
791 dst_link_failure(skb);
792#endif
793tx_error:
794 dev->stats.tx_errors++;
3acfa1e7 795 kfree_skb(skb);
c5441932
PS
796}
797EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
798
799static void ip_tunnel_update(struct ip_tunnel_net *itn,
800 struct ip_tunnel *t,
801 struct net_device *dev,
802 struct ip_tunnel_parm *p,
803 bool set_mtu)
804{
805 ip_tunnel_del(t);
806 t->parms.iph.saddr = p->iph.saddr;
807 t->parms.iph.daddr = p->iph.daddr;
808 t->parms.i_key = p->i_key;
809 t->parms.o_key = p->o_key;
810 if (dev->type != ARPHRD_ETHER) {
811 memcpy(dev->dev_addr, &p->iph.saddr, 4);
812 memcpy(dev->broadcast, &p->iph.daddr, 4);
813 }
814 ip_tunnel_add(itn, t);
815
816 t->parms.iph.ttl = p->iph.ttl;
817 t->parms.iph.tos = p->iph.tos;
818 t->parms.iph.frag_off = p->iph.frag_off;
819
820 if (t->parms.link != p->link) {
821 int mtu;
822
823 t->parms.link = p->link;
824 mtu = ip_tunnel_bind_dev(dev);
825 if (set_mtu)
826 dev->mtu = mtu;
827 }
cf71d2bc 828 ip_tunnel_dst_reset_all(t);
c5441932
PS
829 netdev_state_change(dev);
830}
831
832int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
833{
834 int err = 0;
8c923ce2
ND
835 struct ip_tunnel *t = netdev_priv(dev);
836 struct net *net = t->net;
837 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
c5441932
PS
838
839 BUG_ON(!itn->fb_tunnel_dev);
840 switch (cmd) {
841 case SIOCGETTUNNEL:
8c923ce2 842 if (dev == itn->fb_tunnel_dev) {
c5441932 843 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
8c923ce2
ND
844 if (t == NULL)
845 t = netdev_priv(dev);
846 }
c5441932
PS
847 memcpy(p, &t->parms, sizeof(*p));
848 break;
849
850 case SIOCADDTUNNEL:
851 case SIOCCHGTUNNEL:
852 err = -EPERM;
853 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
854 goto done;
855 if (p->iph.ttl)
856 p->iph.frag_off |= htons(IP_DF);
7c8e6b9c
DP
857 if (!(p->i_flags & VTI_ISVTI)) {
858 if (!(p->i_flags & TUNNEL_KEY))
859 p->i_key = 0;
860 if (!(p->o_flags & TUNNEL_KEY))
861 p->o_key = 0;
862 }
c5441932
PS
863
864 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
865
d61746b2
SK
866 if (cmd == SIOCADDTUNNEL) {
867 if (!t) {
868 t = ip_tunnel_create(net, itn, p);
869 err = PTR_ERR_OR_ZERO(t);
870 break;
871 }
872
873 err = -EEXIST;
ee30ef4d 874 break;
6dd3c9ec 875 }
c5441932
PS
876 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
877 if (t != NULL) {
878 if (t->dev != dev) {
879 err = -EEXIST;
880 break;
881 }
882 } else {
883 unsigned int nflags = 0;
884
885 if (ipv4_is_multicast(p->iph.daddr))
886 nflags = IFF_BROADCAST;
887 else if (p->iph.daddr)
888 nflags = IFF_POINTOPOINT;
889
890 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
891 err = -EINVAL;
892 break;
893 }
894
895 t = netdev_priv(dev);
896 }
897 }
898
899 if (t) {
900 err = 0;
901 ip_tunnel_update(itn, t, dev, p, true);
6dd3c9ec
FW
902 } else {
903 err = -ENOENT;
904 }
c5441932
PS
905 break;
906
907 case SIOCDELTUNNEL:
908 err = -EPERM;
909 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
910 goto done;
911
912 if (dev == itn->fb_tunnel_dev) {
913 err = -ENOENT;
914 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
915 if (t == NULL)
916 goto done;
917 err = -EPERM;
918 if (t == netdev_priv(itn->fb_tunnel_dev))
919 goto done;
920 dev = t->dev;
921 }
922 unregister_netdevice(dev);
923 err = 0;
924 break;
925
926 default:
927 err = -EINVAL;
928 }
929
930done:
931 return err;
932}
933EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
934
935int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
936{
937 struct ip_tunnel *tunnel = netdev_priv(dev);
938 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
939
940 if (new_mtu < 68 ||
941 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
942 return -EINVAL;
943 dev->mtu = new_mtu;
944 return 0;
945}
946EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
947
948static void ip_tunnel_dev_free(struct net_device *dev)
949{
950 struct ip_tunnel *tunnel = netdev_priv(dev);
951
952 gro_cells_destroy(&tunnel->gro_cells);
9a4aa9af 953 free_percpu(tunnel->dst_cache);
c5441932
PS
954 free_percpu(dev->tstats);
955 free_netdev(dev);
956}
957
958void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
959{
c5441932
PS
960 struct ip_tunnel *tunnel = netdev_priv(dev);
961 struct ip_tunnel_net *itn;
962
6c742e71 963 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
c5441932
PS
964
965 if (itn->fb_tunnel_dev != dev) {
966 ip_tunnel_del(netdev_priv(dev));
967 unregister_netdevice_queue(dev, head);
968 }
969}
970EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
971
d3b6f614 972int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
c5441932
PS
973 struct rtnl_link_ops *ops, char *devname)
974{
975 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
976 struct ip_tunnel_parm parms;
6261d983 977 unsigned int i;
c5441932 978
6261d983 979 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
980 INIT_HLIST_HEAD(&itn->tunnels[i]);
c5441932
PS
981
982 if (!ops) {
983 itn->fb_tunnel_dev = NULL;
984 return 0;
985 }
6261d983 986
c5441932
PS
987 memset(&parms, 0, sizeof(parms));
988 if (devname)
989 strlcpy(parms.name, devname, IFNAMSIZ);
990
991 rtnl_lock();
992 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
ea857f28
DC
993 /* FB netdevice is special: we have one, and only one per netns.
994 * Allowing to move it to another netns is clearly unsafe.
995 */
67013282 996 if (!IS_ERR(itn->fb_tunnel_dev)) {
b4de77ad 997 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
78ff4be4 998 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
67013282
SK
999 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1000 }
b4de77ad 1001 rtnl_unlock();
c5441932 1002
27d79f3b 1003 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
c5441932
PS
1004}
1005EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1006
6c742e71
ND
1007static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1008 struct rtnl_link_ops *ops)
c5441932 1009{
6c742e71
ND
1010 struct net *net = dev_net(itn->fb_tunnel_dev);
1011 struct net_device *dev, *aux;
c5441932
PS
1012 int h;
1013
6c742e71
ND
1014 for_each_netdev_safe(net, dev, aux)
1015 if (dev->rtnl_link_ops == ops)
1016 unregister_netdevice_queue(dev, head);
1017
c5441932
PS
1018 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1019 struct ip_tunnel *t;
1020 struct hlist_node *n;
1021 struct hlist_head *thead = &itn->tunnels[h];
1022
1023 hlist_for_each_entry_safe(t, n, thead, hash_node)
6c742e71
ND
1024 /* If dev is in the same netns, it has already
1025 * been added to the list by the previous loop.
1026 */
1027 if (!net_eq(dev_net(t->dev), net))
1028 unregister_netdevice_queue(t->dev, head);
c5441932 1029 }
c5441932
PS
1030}
1031
6c742e71 1032void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
c5441932
PS
1033{
1034 LIST_HEAD(list);
1035
1036 rtnl_lock();
6c742e71 1037 ip_tunnel_destroy(itn, &list, ops);
c5441932
PS
1038 unregister_netdevice_many(&list);
1039 rtnl_unlock();
c5441932
PS
1040}
1041EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1042
1043int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1044 struct ip_tunnel_parm *p)
1045{
1046 struct ip_tunnel *nt;
1047 struct net *net = dev_net(dev);
1048 struct ip_tunnel_net *itn;
1049 int mtu;
1050 int err;
1051
1052 nt = netdev_priv(dev);
1053 itn = net_generic(net, nt->ip_tnl_net_id);
1054
1055 if (ip_tunnel_find(itn, p, dev->type))
1056 return -EEXIST;
1057
5e6700b3 1058 nt->net = net;
c5441932
PS
1059 nt->parms = *p;
1060 err = register_netdevice(dev);
1061 if (err)
1062 goto out;
1063
1064 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1065 eth_hw_addr_random(dev);
1066
1067 mtu = ip_tunnel_bind_dev(dev);
1068 if (!tb[IFLA_MTU])
1069 dev->mtu = mtu;
1070
1071 ip_tunnel_add(itn, nt);
1072
1073out:
1074 return err;
1075}
1076EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1077
1078int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1079 struct ip_tunnel_parm *p)
1080{
6c742e71 1081 struct ip_tunnel *t;
c5441932 1082 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1083 struct net *net = tunnel->net;
c5441932
PS
1084 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1085
1086 if (dev == itn->fb_tunnel_dev)
1087 return -EINVAL;
1088
c5441932
PS
1089 t = ip_tunnel_find(itn, p, dev->type);
1090
1091 if (t) {
1092 if (t->dev != dev)
1093 return -EEXIST;
1094 } else {
6c742e71 1095 t = tunnel;
c5441932
PS
1096
1097 if (dev->type != ARPHRD_ETHER) {
1098 unsigned int nflags = 0;
1099
1100 if (ipv4_is_multicast(p->iph.daddr))
1101 nflags = IFF_BROADCAST;
1102 else if (p->iph.daddr)
1103 nflags = IFF_POINTOPOINT;
1104
1105 if ((dev->flags ^ nflags) &
1106 (IFF_POINTOPOINT | IFF_BROADCAST))
1107 return -EINVAL;
1108 }
1109 }
1110
1111 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1112 return 0;
1113}
1114EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1115
1116int ip_tunnel_init(struct net_device *dev)
1117{
1118 struct ip_tunnel *tunnel = netdev_priv(dev);
1119 struct iphdr *iph = &tunnel->parms.iph;
1c213bd2 1120 int err;
c5441932
PS
1121
1122 dev->destructor = ip_tunnel_dev_free;
1c213bd2 1123 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
c5441932
PS
1124 if (!dev->tstats)
1125 return -ENOMEM;
1126
9a4aa9af
TH
1127 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1128 if (!tunnel->dst_cache) {
1129 free_percpu(dev->tstats);
1130 return -ENOMEM;
1131 }
1132
c5441932
PS
1133 err = gro_cells_init(&tunnel->gro_cells, dev);
1134 if (err) {
9a4aa9af 1135 free_percpu(tunnel->dst_cache);
c5441932
PS
1136 free_percpu(dev->tstats);
1137 return err;
1138 }
1139
1140 tunnel->dev = dev;
6c742e71 1141 tunnel->net = dev_net(dev);
c5441932
PS
1142 strcpy(tunnel->parms.name, dev->name);
1143 iph->version = 4;
1144 iph->ihl = 5;
1145
1146 return 0;
1147}
1148EXPORT_SYMBOL_GPL(ip_tunnel_init);
1149
1150void ip_tunnel_uninit(struct net_device *dev)
1151{
c5441932 1152 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1153 struct net *net = tunnel->net;
c5441932
PS
1154 struct ip_tunnel_net *itn;
1155
1156 itn = net_generic(net, tunnel->ip_tnl_net_id);
1157 /* fb_tunnel_dev will be unregisted in net-exit call. */
1158 if (itn->fb_tunnel_dev != dev)
1159 ip_tunnel_del(netdev_priv(dev));
7d442fab 1160
cf71d2bc 1161 ip_tunnel_dst_reset_all(tunnel);
c5441932
PS
1162}
1163EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1164
1165/* Do least required initialization, rest of init is done in tunnel_init call */
1166void ip_tunnel_setup(struct net_device *dev, int net_id)
1167{
1168 struct ip_tunnel *tunnel = netdev_priv(dev);
1169 tunnel->ip_tnl_net_id = net_id;
1170}
1171EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1172
1173MODULE_LICENSE("GPL");
This page took 0.31812 seconds and 5 git commands to generate.