net: Move fou_build_header into fou.c and refactor
[deliverable/linux.git] / net / ipv4 / ip_tunnel.c
CommitLineData
c5441932
PS
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
27d79f3b 43#include <linux/err.h>
c5441932
PS
44
45#include <net/sock.h>
46#include <net/ip.h>
47#include <net/icmp.h>
48#include <net/protocol.h>
49#include <net/ip_tunnels.h>
50#include <net/arp.h>
51#include <net/checksum.h>
52#include <net/dsfield.h>
53#include <net/inet_ecn.h>
54#include <net/xfrm.h>
55#include <net/net_namespace.h>
56#include <net/netns/generic.h>
57#include <net/rtnetlink.h>
56328486 58#include <net/udp.h>
63487bab
TH
59
60#if IS_ENABLED(CONFIG_NET_FOU)
61#include <net/fou.h>
62#endif
c5441932
PS
63
64#if IS_ENABLED(CONFIG_IPV6)
65#include <net/ipv6.h>
66#include <net/ip6_fib.h>
67#include <net/ip6_route.h>
68#endif
69
967680e0 70static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
c5441932
PS
71{
72 return hash_32((__force u32)key ^ (__force u32)remote,
73 IP_TNL_HASH_BITS);
74}
75
6c7e7610 76static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
95cb5745 77 struct dst_entry *dst, __be32 saddr)
7d442fab
TH
78{
79 struct dst_entry *old_dst;
80
f8864972 81 dst_clone(dst);
6c7e7610 82 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
7d442fab 83 dst_release(old_dst);
95cb5745 84 idst->saddr = saddr;
7d442fab
TH
85}
86
a35165ca 87static noinline void tunnel_dst_set(struct ip_tunnel *t,
95cb5745 88 struct dst_entry *dst, __be32 saddr)
7d442fab 89{
a35165ca 90 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
7d442fab
TH
91}
92
6c7e7610 93static void tunnel_dst_reset(struct ip_tunnel *t)
7d442fab 94{
95cb5745 95 tunnel_dst_set(t, NULL, 0);
7d442fab
TH
96}
97
cf71d2bc 98void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
9a4aa9af
TH
99{
100 int i;
101
102 for_each_possible_cpu(i)
95cb5745 103 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
9a4aa9af 104}
cf71d2bc 105EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
9a4aa9af 106
95cb5745
DP
107static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
108 u32 cookie, __be32 *saddr)
7d442fab 109{
95cb5745 110 struct ip_tunnel_dst *idst;
7d442fab
TH
111 struct dst_entry *dst;
112
113 rcu_read_lock();
a35165ca 114 idst = raw_cpu_ptr(t->dst_cache);
95cb5745 115 dst = rcu_dereference(idst->dst);
f8864972
ED
116 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
117 dst = NULL;
b045d37b 118 if (dst) {
95cb5745
DP
119 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
120 *saddr = idst->saddr;
121 } else {
b045d37b 122 tunnel_dst_reset(t);
f8864972
ED
123 dst_release(dst);
124 dst = NULL;
b045d37b 125 }
7d442fab 126 }
b045d37b
ED
127 rcu_read_unlock();
128 return (struct rtable *)dst;
7d442fab
TH
129}
130
c5441932
PS
131static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
132 __be16 flags, __be32 key)
133{
134 if (p->i_flags & TUNNEL_KEY) {
135 if (flags & TUNNEL_KEY)
136 return key == p->i_key;
137 else
138 /* key expected, none present */
139 return false;
140 } else
141 return !(flags & TUNNEL_KEY);
142}
143
144/* Fallback tunnel: no source, no destination, no key, no options
145
146 Tunnel hash table:
147 We require exact key match i.e. if a key is present in packet
148 it will match only tunnel with the same key; if it is not present,
149 it will match only keyless tunnel.
150
151 All keysless packets, if not matched configured keyless tunnels
152 will match fallback tunnel.
153 Given src, dst and key, find appropriate for input tunnel.
154*/
155struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
156 int link, __be16 flags,
157 __be32 remote, __be32 local,
158 __be32 key)
159{
160 unsigned int hash;
161 struct ip_tunnel *t, *cand = NULL;
162 struct hlist_head *head;
163
967680e0 164 hash = ip_tunnel_hash(key, remote);
c5441932
PS
165 head = &itn->tunnels[hash];
166
167 hlist_for_each_entry_rcu(t, head, hash_node) {
168 if (local != t->parms.iph.saddr ||
169 remote != t->parms.iph.daddr ||
170 !(t->dev->flags & IFF_UP))
171 continue;
172
173 if (!ip_tunnel_key_match(&t->parms, flags, key))
174 continue;
175
176 if (t->parms.link == link)
177 return t;
178 else
179 cand = t;
180 }
181
182 hlist_for_each_entry_rcu(t, head, hash_node) {
183 if (remote != t->parms.iph.daddr ||
e0056593 184 t->parms.iph.saddr != 0 ||
c5441932
PS
185 !(t->dev->flags & IFF_UP))
186 continue;
187
188 if (!ip_tunnel_key_match(&t->parms, flags, key))
189 continue;
190
191 if (t->parms.link == link)
192 return t;
193 else if (!cand)
194 cand = t;
195 }
196
967680e0 197 hash = ip_tunnel_hash(key, 0);
c5441932
PS
198 head = &itn->tunnels[hash];
199
200 hlist_for_each_entry_rcu(t, head, hash_node) {
e0056593
DP
201 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
202 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
203 continue;
204
205 if (!(t->dev->flags & IFF_UP))
c5441932
PS
206 continue;
207
208 if (!ip_tunnel_key_match(&t->parms, flags, key))
209 continue;
210
211 if (t->parms.link == link)
212 return t;
213 else if (!cand)
214 cand = t;
215 }
216
217 if (flags & TUNNEL_NO_KEY)
218 goto skip_key_lookup;
219
220 hlist_for_each_entry_rcu(t, head, hash_node) {
221 if (t->parms.i_key != key ||
e0056593
DP
222 t->parms.iph.saddr != 0 ||
223 t->parms.iph.daddr != 0 ||
c5441932
PS
224 !(t->dev->flags & IFF_UP))
225 continue;
226
227 if (t->parms.link == link)
228 return t;
229 else if (!cand)
230 cand = t;
231 }
232
233skip_key_lookup:
234 if (cand)
235 return cand;
236
237 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
238 return netdev_priv(itn->fb_tunnel_dev);
239
240
241 return NULL;
242}
243EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
244
245static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
246 struct ip_tunnel_parm *parms)
247{
248 unsigned int h;
249 __be32 remote;
6d608f06 250 __be32 i_key = parms->i_key;
c5441932
PS
251
252 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
253 remote = parms->iph.daddr;
254 else
255 remote = 0;
256
6d608f06
SK
257 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
258 i_key = 0;
259
260 h = ip_tunnel_hash(i_key, remote);
c5441932
PS
261 return &itn->tunnels[h];
262}
263
264static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
265{
266 struct hlist_head *head = ip_bucket(itn, &t->parms);
267
268 hlist_add_head_rcu(&t->hash_node, head);
269}
270
271static void ip_tunnel_del(struct ip_tunnel *t)
272{
273 hlist_del_init_rcu(&t->hash_node);
274}
275
276static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
277 struct ip_tunnel_parm *parms,
278 int type)
279{
280 __be32 remote = parms->iph.daddr;
281 __be32 local = parms->iph.saddr;
282 __be32 key = parms->i_key;
5ce54af1 283 __be16 flags = parms->i_flags;
c5441932
PS
284 int link = parms->link;
285 struct ip_tunnel *t = NULL;
286 struct hlist_head *head = ip_bucket(itn, parms);
287
288 hlist_for_each_entry_rcu(t, head, hash_node) {
289 if (local == t->parms.iph.saddr &&
290 remote == t->parms.iph.daddr &&
c5441932 291 link == t->parms.link &&
5ce54af1
DP
292 type == t->dev->type &&
293 ip_tunnel_key_match(&t->parms, flags, key))
c5441932
PS
294 break;
295 }
296 return t;
297}
298
299static struct net_device *__ip_tunnel_create(struct net *net,
300 const struct rtnl_link_ops *ops,
301 struct ip_tunnel_parm *parms)
302{
303 int err;
304 struct ip_tunnel *tunnel;
305 struct net_device *dev;
306 char name[IFNAMSIZ];
307
308 if (parms->name[0])
309 strlcpy(name, parms->name, IFNAMSIZ);
310 else {
54a5d382 311 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
c5441932
PS
312 err = -E2BIG;
313 goto failed;
314 }
315 strlcpy(name, ops->kind, IFNAMSIZ);
316 strncat(name, "%d", 2);
317 }
318
319 ASSERT_RTNL();
c835a677 320 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
c5441932
PS
321 if (!dev) {
322 err = -ENOMEM;
323 goto failed;
324 }
325 dev_net_set(dev, net);
326
327 dev->rtnl_link_ops = ops;
328
329 tunnel = netdev_priv(dev);
330 tunnel->parms = *parms;
5e6700b3 331 tunnel->net = net;
c5441932
PS
332
333 err = register_netdevice(dev);
334 if (err)
335 goto failed_free;
336
337 return dev;
338
339failed_free:
340 free_netdev(dev);
341failed:
342 return ERR_PTR(err);
343}
344
7d442fab
TH
345static inline void init_tunnel_flow(struct flowi4 *fl4,
346 int proto,
347 __be32 daddr, __be32 saddr,
348 __be32 key, __u8 tos, int oif)
c5441932
PS
349{
350 memset(fl4, 0, sizeof(*fl4));
351 fl4->flowi4_oif = oif;
352 fl4->daddr = daddr;
353 fl4->saddr = saddr;
354 fl4->flowi4_tos = tos;
355 fl4->flowi4_proto = proto;
356 fl4->fl4_gre_key = key;
c5441932
PS
357}
358
359static int ip_tunnel_bind_dev(struct net_device *dev)
360{
361 struct net_device *tdev = NULL;
362 struct ip_tunnel *tunnel = netdev_priv(dev);
363 const struct iphdr *iph;
364 int hlen = LL_MAX_HEADER;
365 int mtu = ETH_DATA_LEN;
366 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
367
368 iph = &tunnel->parms.iph;
369
370 /* Guess output device to choose reasonable mtu and needed_headroom */
371 if (iph->daddr) {
372 struct flowi4 fl4;
373 struct rtable *rt;
374
7d442fab
TH
375 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
376 iph->saddr, tunnel->parms.o_key,
377 RT_TOS(iph->tos), tunnel->parms.link);
378 rt = ip_route_output_key(tunnel->net, &fl4);
379
c5441932
PS
380 if (!IS_ERR(rt)) {
381 tdev = rt->dst.dev;
95cb5745 382 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
c5441932
PS
383 ip_rt_put(rt);
384 }
385 if (dev->type != ARPHRD_ETHER)
386 dev->flags |= IFF_POINTOPOINT;
387 }
388
389 if (!tdev && tunnel->parms.link)
6c742e71 390 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
c5441932
PS
391
392 if (tdev) {
393 hlen = tdev->hard_header_len + tdev->needed_headroom;
394 mtu = tdev->mtu;
395 }
396 dev->iflink = tunnel->parms.link;
397
398 dev->needed_headroom = t_hlen + hlen;
399 mtu -= (dev->hard_header_len + t_hlen);
400
401 if (mtu < 68)
402 mtu = 68;
403
404 return mtu;
405}
406
407static struct ip_tunnel *ip_tunnel_create(struct net *net,
408 struct ip_tunnel_net *itn,
409 struct ip_tunnel_parm *parms)
410{
4929fd8c 411 struct ip_tunnel *nt;
c5441932
PS
412 struct net_device *dev;
413
414 BUG_ON(!itn->fb_tunnel_dev);
c5441932
PS
415 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
416 if (IS_ERR(dev))
6dd3c9ec 417 return ERR_CAST(dev);
c5441932
PS
418
419 dev->mtu = ip_tunnel_bind_dev(dev);
420
421 nt = netdev_priv(dev);
422 ip_tunnel_add(itn, nt);
423 return nt;
424}
425
426int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
427 const struct tnl_ptk_info *tpi, bool log_ecn_error)
428{
8f84985f 429 struct pcpu_sw_netstats *tstats;
c5441932
PS
430 const struct iphdr *iph = ip_hdr(skb);
431 int err;
432
c5441932
PS
433#ifdef CONFIG_NET_IPGRE_BROADCAST
434 if (ipv4_is_multicast(iph->daddr)) {
c5441932
PS
435 tunnel->dev->stats.multicast++;
436 skb->pkt_type = PACKET_BROADCAST;
437 }
438#endif
439
440 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
441 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
442 tunnel->dev->stats.rx_crc_errors++;
443 tunnel->dev->stats.rx_errors++;
444 goto drop;
445 }
446
447 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
448 if (!(tpi->flags&TUNNEL_SEQ) ||
449 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
450 tunnel->dev->stats.rx_fifo_errors++;
451 tunnel->dev->stats.rx_errors++;
452 goto drop;
453 }
454 tunnel->i_seqno = ntohl(tpi->seq) + 1;
455 }
456
e96f2e7c
YC
457 skb_reset_network_header(skb);
458
c5441932
PS
459 err = IP_ECN_decapsulate(iph, skb);
460 if (unlikely(err)) {
461 if (log_ecn_error)
462 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
463 &iph->saddr, iph->tos);
464 if (err > 1) {
465 ++tunnel->dev->stats.rx_frame_errors;
466 ++tunnel->dev->stats.rx_errors;
467 goto drop;
468 }
469 }
470
471 tstats = this_cpu_ptr(tunnel->dev->tstats);
472 u64_stats_update_begin(&tstats->syncp);
473 tstats->rx_packets++;
474 tstats->rx_bytes += skb->len;
475 u64_stats_update_end(&tstats->syncp);
476
81b9eab5
AS
477 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
478
3d7b46cd
PS
479 if (tunnel->dev->type == ARPHRD_ETHER) {
480 skb->protocol = eth_type_trans(skb, tunnel->dev);
481 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
482 } else {
483 skb->dev = tunnel->dev;
484 }
64261f23 485
c5441932
PS
486 gro_cells_receive(&tunnel->gro_cells, skb);
487 return 0;
488
489drop:
490 kfree_skb(skb);
491 return 0;
492}
493EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
494
56328486
TH
495static int ip_encap_hlen(struct ip_tunnel_encap *e)
496{
497 switch (e->type) {
498 case TUNNEL_ENCAP_NONE:
499 return 0;
63487bab 500#if IS_ENABLED(CONFIG_NET_FOU)
56328486 501 case TUNNEL_ENCAP_FOU:
63487bab 502 return fou_encap_hlen(e);
bc1fc390 503 case TUNNEL_ENCAP_GUE:
63487bab
TH
504 return gue_encap_hlen(e);
505#endif
56328486
TH
506 default:
507 return -EINVAL;
508 }
509}
510
511int ip_tunnel_encap_setup(struct ip_tunnel *t,
512 struct ip_tunnel_encap *ipencap)
513{
514 int hlen;
515
516 memset(&t->encap, 0, sizeof(t->encap));
517
518 hlen = ip_encap_hlen(ipencap);
519 if (hlen < 0)
520 return hlen;
521
522 t->encap.type = ipencap->type;
523 t->encap.sport = ipencap->sport;
524 t->encap.dport = ipencap->dport;
525 t->encap.flags = ipencap->flags;
526
527 t->encap_hlen = hlen;
528 t->hlen = t->encap_hlen + t->tun_hlen;
529
530 return 0;
531}
532EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
533
56328486
TH
534int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
535 u8 *protocol, struct flowi4 *fl4)
536{
537 switch (t->encap.type) {
538 case TUNNEL_ENCAP_NONE:
539 return 0;
63487bab 540#if IS_ENABLED(CONFIG_NET_FOU)
56328486 541 case TUNNEL_ENCAP_FOU:
63487bab 542 return fou_build_header(skb, &t->encap, protocol, fl4);
bc1fc390 543 case TUNNEL_ENCAP_GUE:
63487bab
TH
544 return gue_build_header(skb, &t->encap, protocol, fl4);
545#endif
56328486
TH
546 default:
547 return -EINVAL;
548 }
549}
550EXPORT_SYMBOL(ip_tunnel_encap);
551
23a3647b
PS
552static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
553 struct rtable *rt, __be16 df)
554{
555 struct ip_tunnel *tunnel = netdev_priv(dev);
8c91e162 556 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
23a3647b
PS
557 int mtu;
558
559 if (df)
560 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
561 - sizeof(struct iphdr) - tunnel->hlen;
562 else
563 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
564
565 if (skb_dst(skb))
566 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
567
568 if (skb->protocol == htons(ETH_P_IP)) {
569 if (!skb_is_gso(skb) &&
570 (df & htons(IP_DF)) && mtu < pkt_size) {
571 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
572 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
573 return -E2BIG;
574 }
575 }
576#if IS_ENABLED(CONFIG_IPV6)
577 else if (skb->protocol == htons(ETH_P_IPV6)) {
578 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
579
580 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
581 mtu >= IPV6_MIN_MTU) {
582 if ((tunnel->parms.iph.daddr &&
583 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
584 rt6->rt6i_dst.plen == 128) {
585 rt6->rt6i_flags |= RTF_MODIFIED;
586 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
587 }
588 }
589
590 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
591 mtu < pkt_size) {
592 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
593 return -E2BIG;
594 }
595 }
596#endif
597 return 0;
598}
599
c5441932 600void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
56328486 601 const struct iphdr *tnl_params, u8 protocol)
c5441932
PS
602{
603 struct ip_tunnel *tunnel = netdev_priv(dev);
604 const struct iphdr *inner_iph;
c5441932
PS
605 struct flowi4 fl4;
606 u8 tos, ttl;
607 __be16 df;
b045d37b 608 struct rtable *rt; /* Route to the other host */
c5441932
PS
609 unsigned int max_headroom; /* The extra header space needed */
610 __be32 dst;
0e6fbc5b 611 int err;
22fb22ea 612 bool connected;
c5441932
PS
613
614 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
22fb22ea 615 connected = (tunnel->parms.iph.daddr != 0);
c5441932
PS
616
617 dst = tnl_params->daddr;
618 if (dst == 0) {
619 /* NBMA tunnel */
620
621 if (skb_dst(skb) == NULL) {
622 dev->stats.tx_fifo_errors++;
623 goto tx_error;
624 }
625
626 if (skb->protocol == htons(ETH_P_IP)) {
627 rt = skb_rtable(skb);
628 dst = rt_nexthop(rt, inner_iph->daddr);
629 }
630#if IS_ENABLED(CONFIG_IPV6)
631 else if (skb->protocol == htons(ETH_P_IPV6)) {
632 const struct in6_addr *addr6;
633 struct neighbour *neigh;
634 bool do_tx_error_icmp;
635 int addr_type;
636
637 neigh = dst_neigh_lookup(skb_dst(skb),
638 &ipv6_hdr(skb)->daddr);
639 if (neigh == NULL)
640 goto tx_error;
641
642 addr6 = (const struct in6_addr *)&neigh->primary_key;
643 addr_type = ipv6_addr_type(addr6);
644
645 if (addr_type == IPV6_ADDR_ANY) {
646 addr6 = &ipv6_hdr(skb)->daddr;
647 addr_type = ipv6_addr_type(addr6);
648 }
649
650 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
651 do_tx_error_icmp = true;
652 else {
653 do_tx_error_icmp = false;
654 dst = addr6->s6_addr32[3];
655 }
656 neigh_release(neigh);
657 if (do_tx_error_icmp)
658 goto tx_error_icmp;
659 }
660#endif
661 else
662 goto tx_error;
7d442fab
TH
663
664 connected = false;
c5441932
PS
665 }
666
667 tos = tnl_params->tos;
668 if (tos & 0x1) {
669 tos &= ~0x1;
7d442fab 670 if (skb->protocol == htons(ETH_P_IP)) {
c5441932 671 tos = inner_iph->tos;
7d442fab
TH
672 connected = false;
673 } else if (skb->protocol == htons(ETH_P_IPV6)) {
c5441932 674 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
7d442fab
TH
675 connected = false;
676 }
c5441932
PS
677 }
678
7d442fab
TH
679 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
680 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
681
56328486
TH
682 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
683 goto tx_error;
684
95cb5745 685 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
7d442fab
TH
686
687 if (!rt) {
688 rt = ip_route_output_key(tunnel->net, &fl4);
689
690 if (IS_ERR(rt)) {
691 dev->stats.tx_carrier_errors++;
692 goto tx_error;
693 }
694 if (connected)
95cb5745 695 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
c5441932 696 }
7d442fab 697
0e6fbc5b 698 if (rt->dst.dev == dev) {
c5441932
PS
699 ip_rt_put(rt);
700 dev->stats.collisions++;
701 goto tx_error;
702 }
c5441932 703
23a3647b
PS
704 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
705 ip_rt_put(rt);
706 goto tx_error;
c5441932 707 }
c5441932
PS
708
709 if (tunnel->err_count > 0) {
710 if (time_before(jiffies,
711 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
712 tunnel->err_count--;
713
11c21a30 714 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
c5441932
PS
715 dst_link_failure(skb);
716 } else
717 tunnel->err_count = 0;
718 }
719
d4a71b15 720 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
c5441932
PS
721 ttl = tnl_params->ttl;
722 if (ttl == 0) {
723 if (skb->protocol == htons(ETH_P_IP))
724 ttl = inner_iph->ttl;
725#if IS_ENABLED(CONFIG_IPV6)
726 else if (skb->protocol == htons(ETH_P_IPV6))
727 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
728#endif
729 else
730 ttl = ip4_dst_hoplimit(&rt->dst);
731 }
732
23a3647b
PS
733 df = tnl_params->frag_off;
734 if (skb->protocol == htons(ETH_P_IP))
735 df |= (inner_iph->frag_off&htons(IP_DF));
736
0e6fbc5b 737 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
7371e022 738 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
3e08f4a7 739 if (max_headroom > dev->needed_headroom)
c5441932 740 dev->needed_headroom = max_headroom;
3e08f4a7
SK
741
742 if (skb_cow_head(skb, dev->needed_headroom)) {
586d5fc8 743 ip_rt_put(rt);
3e08f4a7 744 dev->stats.tx_dropped++;
3acfa1e7 745 kfree_skb(skb);
3e08f4a7 746 return;
c5441932
PS
747 }
748
aad88724 749 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
d4a71b15 750 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
0e6fbc5b 751 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
c5441932 752
c5441932
PS
753 return;
754
755#if IS_ENABLED(CONFIG_IPV6)
756tx_error_icmp:
757 dst_link_failure(skb);
758#endif
759tx_error:
760 dev->stats.tx_errors++;
3acfa1e7 761 kfree_skb(skb);
c5441932
PS
762}
763EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
764
765static void ip_tunnel_update(struct ip_tunnel_net *itn,
766 struct ip_tunnel *t,
767 struct net_device *dev,
768 struct ip_tunnel_parm *p,
769 bool set_mtu)
770{
771 ip_tunnel_del(t);
772 t->parms.iph.saddr = p->iph.saddr;
773 t->parms.iph.daddr = p->iph.daddr;
774 t->parms.i_key = p->i_key;
775 t->parms.o_key = p->o_key;
776 if (dev->type != ARPHRD_ETHER) {
777 memcpy(dev->dev_addr, &p->iph.saddr, 4);
778 memcpy(dev->broadcast, &p->iph.daddr, 4);
779 }
780 ip_tunnel_add(itn, t);
781
782 t->parms.iph.ttl = p->iph.ttl;
783 t->parms.iph.tos = p->iph.tos;
784 t->parms.iph.frag_off = p->iph.frag_off;
785
786 if (t->parms.link != p->link) {
787 int mtu;
788
789 t->parms.link = p->link;
790 mtu = ip_tunnel_bind_dev(dev);
791 if (set_mtu)
792 dev->mtu = mtu;
793 }
cf71d2bc 794 ip_tunnel_dst_reset_all(t);
c5441932
PS
795 netdev_state_change(dev);
796}
797
798int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
799{
800 int err = 0;
8c923ce2
ND
801 struct ip_tunnel *t = netdev_priv(dev);
802 struct net *net = t->net;
803 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
c5441932
PS
804
805 BUG_ON(!itn->fb_tunnel_dev);
806 switch (cmd) {
807 case SIOCGETTUNNEL:
8c923ce2 808 if (dev == itn->fb_tunnel_dev) {
c5441932 809 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
8c923ce2
ND
810 if (t == NULL)
811 t = netdev_priv(dev);
812 }
c5441932
PS
813 memcpy(p, &t->parms, sizeof(*p));
814 break;
815
816 case SIOCADDTUNNEL:
817 case SIOCCHGTUNNEL:
818 err = -EPERM;
819 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
820 goto done;
821 if (p->iph.ttl)
822 p->iph.frag_off |= htons(IP_DF);
7c8e6b9c
DP
823 if (!(p->i_flags & VTI_ISVTI)) {
824 if (!(p->i_flags & TUNNEL_KEY))
825 p->i_key = 0;
826 if (!(p->o_flags & TUNNEL_KEY))
827 p->o_key = 0;
828 }
c5441932
PS
829
830 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
831
d61746b2
SK
832 if (cmd == SIOCADDTUNNEL) {
833 if (!t) {
834 t = ip_tunnel_create(net, itn, p);
835 err = PTR_ERR_OR_ZERO(t);
836 break;
837 }
838
839 err = -EEXIST;
ee30ef4d 840 break;
6dd3c9ec 841 }
c5441932
PS
842 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
843 if (t != NULL) {
844 if (t->dev != dev) {
845 err = -EEXIST;
846 break;
847 }
848 } else {
849 unsigned int nflags = 0;
850
851 if (ipv4_is_multicast(p->iph.daddr))
852 nflags = IFF_BROADCAST;
853 else if (p->iph.daddr)
854 nflags = IFF_POINTOPOINT;
855
856 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
857 err = -EINVAL;
858 break;
859 }
860
861 t = netdev_priv(dev);
862 }
863 }
864
865 if (t) {
866 err = 0;
867 ip_tunnel_update(itn, t, dev, p, true);
6dd3c9ec
FW
868 } else {
869 err = -ENOENT;
870 }
c5441932
PS
871 break;
872
873 case SIOCDELTUNNEL:
874 err = -EPERM;
875 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
876 goto done;
877
878 if (dev == itn->fb_tunnel_dev) {
879 err = -ENOENT;
880 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
881 if (t == NULL)
882 goto done;
883 err = -EPERM;
884 if (t == netdev_priv(itn->fb_tunnel_dev))
885 goto done;
886 dev = t->dev;
887 }
888 unregister_netdevice(dev);
889 err = 0;
890 break;
891
892 default:
893 err = -EINVAL;
894 }
895
896done:
897 return err;
898}
899EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
900
901int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
902{
903 struct ip_tunnel *tunnel = netdev_priv(dev);
904 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
905
906 if (new_mtu < 68 ||
907 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
908 return -EINVAL;
909 dev->mtu = new_mtu;
910 return 0;
911}
912EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
913
914static void ip_tunnel_dev_free(struct net_device *dev)
915{
916 struct ip_tunnel *tunnel = netdev_priv(dev);
917
918 gro_cells_destroy(&tunnel->gro_cells);
9a4aa9af 919 free_percpu(tunnel->dst_cache);
c5441932
PS
920 free_percpu(dev->tstats);
921 free_netdev(dev);
922}
923
924void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
925{
c5441932
PS
926 struct ip_tunnel *tunnel = netdev_priv(dev);
927 struct ip_tunnel_net *itn;
928
6c742e71 929 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
c5441932
PS
930
931 if (itn->fb_tunnel_dev != dev) {
932 ip_tunnel_del(netdev_priv(dev));
933 unregister_netdevice_queue(dev, head);
934 }
935}
936EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
937
d3b6f614 938int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
c5441932
PS
939 struct rtnl_link_ops *ops, char *devname)
940{
941 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
942 struct ip_tunnel_parm parms;
6261d983 943 unsigned int i;
c5441932 944
6261d983 945 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
946 INIT_HLIST_HEAD(&itn->tunnels[i]);
c5441932
PS
947
948 if (!ops) {
949 itn->fb_tunnel_dev = NULL;
950 return 0;
951 }
6261d983 952
c5441932
PS
953 memset(&parms, 0, sizeof(parms));
954 if (devname)
955 strlcpy(parms.name, devname, IFNAMSIZ);
956
957 rtnl_lock();
958 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
ea857f28
DC
959 /* FB netdevice is special: we have one, and only one per netns.
960 * Allowing to move it to another netns is clearly unsafe.
961 */
67013282 962 if (!IS_ERR(itn->fb_tunnel_dev)) {
b4de77ad 963 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
78ff4be4 964 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
67013282
SK
965 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
966 }
b4de77ad 967 rtnl_unlock();
c5441932 968
27d79f3b 969 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
c5441932
PS
970}
971EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
972
6c742e71
ND
973static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
974 struct rtnl_link_ops *ops)
c5441932 975{
6c742e71
ND
976 struct net *net = dev_net(itn->fb_tunnel_dev);
977 struct net_device *dev, *aux;
c5441932
PS
978 int h;
979
6c742e71
ND
980 for_each_netdev_safe(net, dev, aux)
981 if (dev->rtnl_link_ops == ops)
982 unregister_netdevice_queue(dev, head);
983
c5441932
PS
984 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
985 struct ip_tunnel *t;
986 struct hlist_node *n;
987 struct hlist_head *thead = &itn->tunnels[h];
988
989 hlist_for_each_entry_safe(t, n, thead, hash_node)
6c742e71
ND
990 /* If dev is in the same netns, it has already
991 * been added to the list by the previous loop.
992 */
993 if (!net_eq(dev_net(t->dev), net))
994 unregister_netdevice_queue(t->dev, head);
c5441932 995 }
c5441932
PS
996}
997
6c742e71 998void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
c5441932
PS
999{
1000 LIST_HEAD(list);
1001
1002 rtnl_lock();
6c742e71 1003 ip_tunnel_destroy(itn, &list, ops);
c5441932
PS
1004 unregister_netdevice_many(&list);
1005 rtnl_unlock();
c5441932
PS
1006}
1007EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1008
1009int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1010 struct ip_tunnel_parm *p)
1011{
1012 struct ip_tunnel *nt;
1013 struct net *net = dev_net(dev);
1014 struct ip_tunnel_net *itn;
1015 int mtu;
1016 int err;
1017
1018 nt = netdev_priv(dev);
1019 itn = net_generic(net, nt->ip_tnl_net_id);
1020
1021 if (ip_tunnel_find(itn, p, dev->type))
1022 return -EEXIST;
1023
5e6700b3 1024 nt->net = net;
c5441932
PS
1025 nt->parms = *p;
1026 err = register_netdevice(dev);
1027 if (err)
1028 goto out;
1029
1030 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1031 eth_hw_addr_random(dev);
1032
1033 mtu = ip_tunnel_bind_dev(dev);
1034 if (!tb[IFLA_MTU])
1035 dev->mtu = mtu;
1036
1037 ip_tunnel_add(itn, nt);
1038
1039out:
1040 return err;
1041}
1042EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1043
1044int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1045 struct ip_tunnel_parm *p)
1046{
6c742e71 1047 struct ip_tunnel *t;
c5441932 1048 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1049 struct net *net = tunnel->net;
c5441932
PS
1050 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1051
1052 if (dev == itn->fb_tunnel_dev)
1053 return -EINVAL;
1054
c5441932
PS
1055 t = ip_tunnel_find(itn, p, dev->type);
1056
1057 if (t) {
1058 if (t->dev != dev)
1059 return -EEXIST;
1060 } else {
6c742e71 1061 t = tunnel;
c5441932
PS
1062
1063 if (dev->type != ARPHRD_ETHER) {
1064 unsigned int nflags = 0;
1065
1066 if (ipv4_is_multicast(p->iph.daddr))
1067 nflags = IFF_BROADCAST;
1068 else if (p->iph.daddr)
1069 nflags = IFF_POINTOPOINT;
1070
1071 if ((dev->flags ^ nflags) &
1072 (IFF_POINTOPOINT | IFF_BROADCAST))
1073 return -EINVAL;
1074 }
1075 }
1076
1077 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1078 return 0;
1079}
1080EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1081
1082int ip_tunnel_init(struct net_device *dev)
1083{
1084 struct ip_tunnel *tunnel = netdev_priv(dev);
1085 struct iphdr *iph = &tunnel->parms.iph;
1c213bd2 1086 int err;
c5441932
PS
1087
1088 dev->destructor = ip_tunnel_dev_free;
1c213bd2 1089 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
c5441932
PS
1090 if (!dev->tstats)
1091 return -ENOMEM;
1092
9a4aa9af
TH
1093 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1094 if (!tunnel->dst_cache) {
1095 free_percpu(dev->tstats);
1096 return -ENOMEM;
1097 }
1098
c5441932
PS
1099 err = gro_cells_init(&tunnel->gro_cells, dev);
1100 if (err) {
9a4aa9af 1101 free_percpu(tunnel->dst_cache);
c5441932
PS
1102 free_percpu(dev->tstats);
1103 return err;
1104 }
1105
1106 tunnel->dev = dev;
6c742e71 1107 tunnel->net = dev_net(dev);
c5441932
PS
1108 strcpy(tunnel->parms.name, dev->name);
1109 iph->version = 4;
1110 iph->ihl = 5;
1111
1112 return 0;
1113}
1114EXPORT_SYMBOL_GPL(ip_tunnel_init);
1115
1116void ip_tunnel_uninit(struct net_device *dev)
1117{
c5441932 1118 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1119 struct net *net = tunnel->net;
c5441932
PS
1120 struct ip_tunnel_net *itn;
1121
1122 itn = net_generic(net, tunnel->ip_tnl_net_id);
1123 /* fb_tunnel_dev will be unregisted in net-exit call. */
1124 if (itn->fb_tunnel_dev != dev)
1125 ip_tunnel_del(netdev_priv(dev));
7d442fab 1126
cf71d2bc 1127 ip_tunnel_dst_reset_all(tunnel);
c5441932
PS
1128}
1129EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1130
1131/* Do least required initialization, rest of init is done in tunnel_init call */
1132void ip_tunnel_setup(struct net_device *dev, int net_id)
1133{
1134 struct ip_tunnel *tunnel = netdev_priv(dev);
1135 tunnel->ip_tnl_net_id = net_id;
1136}
1137EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1138
1139MODULE_LICENSE("GPL");
This page took 0.179434 seconds and 5 git commands to generate.