net: Move fou_build_header into fou.c and refactor
[deliverable/linux.git] / net / ipv4 / ip_tunnel.c
1 /*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58 #include <net/udp.h>
59
60 #if IS_ENABLED(CONFIG_NET_FOU)
61 #include <net/fou.h>
62 #endif
63
64 #if IS_ENABLED(CONFIG_IPV6)
65 #include <net/ipv6.h>
66 #include <net/ip6_fib.h>
67 #include <net/ip6_route.h>
68 #endif
69
70 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
71 {
72 return hash_32((__force u32)key ^ (__force u32)remote,
73 IP_TNL_HASH_BITS);
74 }
75
76 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
77 struct dst_entry *dst, __be32 saddr)
78 {
79 struct dst_entry *old_dst;
80
81 dst_clone(dst);
82 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83 dst_release(old_dst);
84 idst->saddr = saddr;
85 }
86
87 static noinline void tunnel_dst_set(struct ip_tunnel *t,
88 struct dst_entry *dst, __be32 saddr)
89 {
90 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
91 }
92
93 static void tunnel_dst_reset(struct ip_tunnel *t)
94 {
95 tunnel_dst_set(t, NULL, 0);
96 }
97
98 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
99 {
100 int i;
101
102 for_each_possible_cpu(i)
103 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
104 }
105 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
106
107 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
108 u32 cookie, __be32 *saddr)
109 {
110 struct ip_tunnel_dst *idst;
111 struct dst_entry *dst;
112
113 rcu_read_lock();
114 idst = raw_cpu_ptr(t->dst_cache);
115 dst = rcu_dereference(idst->dst);
116 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
117 dst = NULL;
118 if (dst) {
119 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
120 *saddr = idst->saddr;
121 } else {
122 tunnel_dst_reset(t);
123 dst_release(dst);
124 dst = NULL;
125 }
126 }
127 rcu_read_unlock();
128 return (struct rtable *)dst;
129 }
130
131 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
132 __be16 flags, __be32 key)
133 {
134 if (p->i_flags & TUNNEL_KEY) {
135 if (flags & TUNNEL_KEY)
136 return key == p->i_key;
137 else
138 /* key expected, none present */
139 return false;
140 } else
141 return !(flags & TUNNEL_KEY);
142 }
143
144 /* Fallback tunnel: no source, no destination, no key, no options
145
146 Tunnel hash table:
147 We require exact key match i.e. if a key is present in packet
148 it will match only tunnel with the same key; if it is not present,
149 it will match only keyless tunnel.
150
151 All keysless packets, if not matched configured keyless tunnels
152 will match fallback tunnel.
153 Given src, dst and key, find appropriate for input tunnel.
154 */
155 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
156 int link, __be16 flags,
157 __be32 remote, __be32 local,
158 __be32 key)
159 {
160 unsigned int hash;
161 struct ip_tunnel *t, *cand = NULL;
162 struct hlist_head *head;
163
164 hash = ip_tunnel_hash(key, remote);
165 head = &itn->tunnels[hash];
166
167 hlist_for_each_entry_rcu(t, head, hash_node) {
168 if (local != t->parms.iph.saddr ||
169 remote != t->parms.iph.daddr ||
170 !(t->dev->flags & IFF_UP))
171 continue;
172
173 if (!ip_tunnel_key_match(&t->parms, flags, key))
174 continue;
175
176 if (t->parms.link == link)
177 return t;
178 else
179 cand = t;
180 }
181
182 hlist_for_each_entry_rcu(t, head, hash_node) {
183 if (remote != t->parms.iph.daddr ||
184 t->parms.iph.saddr != 0 ||
185 !(t->dev->flags & IFF_UP))
186 continue;
187
188 if (!ip_tunnel_key_match(&t->parms, flags, key))
189 continue;
190
191 if (t->parms.link == link)
192 return t;
193 else if (!cand)
194 cand = t;
195 }
196
197 hash = ip_tunnel_hash(key, 0);
198 head = &itn->tunnels[hash];
199
200 hlist_for_each_entry_rcu(t, head, hash_node) {
201 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
202 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
203 continue;
204
205 if (!(t->dev->flags & IFF_UP))
206 continue;
207
208 if (!ip_tunnel_key_match(&t->parms, flags, key))
209 continue;
210
211 if (t->parms.link == link)
212 return t;
213 else if (!cand)
214 cand = t;
215 }
216
217 if (flags & TUNNEL_NO_KEY)
218 goto skip_key_lookup;
219
220 hlist_for_each_entry_rcu(t, head, hash_node) {
221 if (t->parms.i_key != key ||
222 t->parms.iph.saddr != 0 ||
223 t->parms.iph.daddr != 0 ||
224 !(t->dev->flags & IFF_UP))
225 continue;
226
227 if (t->parms.link == link)
228 return t;
229 else if (!cand)
230 cand = t;
231 }
232
233 skip_key_lookup:
234 if (cand)
235 return cand;
236
237 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
238 return netdev_priv(itn->fb_tunnel_dev);
239
240
241 return NULL;
242 }
243 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
244
245 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
246 struct ip_tunnel_parm *parms)
247 {
248 unsigned int h;
249 __be32 remote;
250 __be32 i_key = parms->i_key;
251
252 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
253 remote = parms->iph.daddr;
254 else
255 remote = 0;
256
257 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
258 i_key = 0;
259
260 h = ip_tunnel_hash(i_key, remote);
261 return &itn->tunnels[h];
262 }
263
264 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
265 {
266 struct hlist_head *head = ip_bucket(itn, &t->parms);
267
268 hlist_add_head_rcu(&t->hash_node, head);
269 }
270
271 static void ip_tunnel_del(struct ip_tunnel *t)
272 {
273 hlist_del_init_rcu(&t->hash_node);
274 }
275
276 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
277 struct ip_tunnel_parm *parms,
278 int type)
279 {
280 __be32 remote = parms->iph.daddr;
281 __be32 local = parms->iph.saddr;
282 __be32 key = parms->i_key;
283 __be16 flags = parms->i_flags;
284 int link = parms->link;
285 struct ip_tunnel *t = NULL;
286 struct hlist_head *head = ip_bucket(itn, parms);
287
288 hlist_for_each_entry_rcu(t, head, hash_node) {
289 if (local == t->parms.iph.saddr &&
290 remote == t->parms.iph.daddr &&
291 link == t->parms.link &&
292 type == t->dev->type &&
293 ip_tunnel_key_match(&t->parms, flags, key))
294 break;
295 }
296 return t;
297 }
298
299 static struct net_device *__ip_tunnel_create(struct net *net,
300 const struct rtnl_link_ops *ops,
301 struct ip_tunnel_parm *parms)
302 {
303 int err;
304 struct ip_tunnel *tunnel;
305 struct net_device *dev;
306 char name[IFNAMSIZ];
307
308 if (parms->name[0])
309 strlcpy(name, parms->name, IFNAMSIZ);
310 else {
311 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
312 err = -E2BIG;
313 goto failed;
314 }
315 strlcpy(name, ops->kind, IFNAMSIZ);
316 strncat(name, "%d", 2);
317 }
318
319 ASSERT_RTNL();
320 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
321 if (!dev) {
322 err = -ENOMEM;
323 goto failed;
324 }
325 dev_net_set(dev, net);
326
327 dev->rtnl_link_ops = ops;
328
329 tunnel = netdev_priv(dev);
330 tunnel->parms = *parms;
331 tunnel->net = net;
332
333 err = register_netdevice(dev);
334 if (err)
335 goto failed_free;
336
337 return dev;
338
339 failed_free:
340 free_netdev(dev);
341 failed:
342 return ERR_PTR(err);
343 }
344
345 static inline void init_tunnel_flow(struct flowi4 *fl4,
346 int proto,
347 __be32 daddr, __be32 saddr,
348 __be32 key, __u8 tos, int oif)
349 {
350 memset(fl4, 0, sizeof(*fl4));
351 fl4->flowi4_oif = oif;
352 fl4->daddr = daddr;
353 fl4->saddr = saddr;
354 fl4->flowi4_tos = tos;
355 fl4->flowi4_proto = proto;
356 fl4->fl4_gre_key = key;
357 }
358
359 static int ip_tunnel_bind_dev(struct net_device *dev)
360 {
361 struct net_device *tdev = NULL;
362 struct ip_tunnel *tunnel = netdev_priv(dev);
363 const struct iphdr *iph;
364 int hlen = LL_MAX_HEADER;
365 int mtu = ETH_DATA_LEN;
366 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
367
368 iph = &tunnel->parms.iph;
369
370 /* Guess output device to choose reasonable mtu and needed_headroom */
371 if (iph->daddr) {
372 struct flowi4 fl4;
373 struct rtable *rt;
374
375 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
376 iph->saddr, tunnel->parms.o_key,
377 RT_TOS(iph->tos), tunnel->parms.link);
378 rt = ip_route_output_key(tunnel->net, &fl4);
379
380 if (!IS_ERR(rt)) {
381 tdev = rt->dst.dev;
382 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
383 ip_rt_put(rt);
384 }
385 if (dev->type != ARPHRD_ETHER)
386 dev->flags |= IFF_POINTOPOINT;
387 }
388
389 if (!tdev && tunnel->parms.link)
390 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
391
392 if (tdev) {
393 hlen = tdev->hard_header_len + tdev->needed_headroom;
394 mtu = tdev->mtu;
395 }
396 dev->iflink = tunnel->parms.link;
397
398 dev->needed_headroom = t_hlen + hlen;
399 mtu -= (dev->hard_header_len + t_hlen);
400
401 if (mtu < 68)
402 mtu = 68;
403
404 return mtu;
405 }
406
407 static struct ip_tunnel *ip_tunnel_create(struct net *net,
408 struct ip_tunnel_net *itn,
409 struct ip_tunnel_parm *parms)
410 {
411 struct ip_tunnel *nt;
412 struct net_device *dev;
413
414 BUG_ON(!itn->fb_tunnel_dev);
415 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
416 if (IS_ERR(dev))
417 return ERR_CAST(dev);
418
419 dev->mtu = ip_tunnel_bind_dev(dev);
420
421 nt = netdev_priv(dev);
422 ip_tunnel_add(itn, nt);
423 return nt;
424 }
425
426 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
427 const struct tnl_ptk_info *tpi, bool log_ecn_error)
428 {
429 struct pcpu_sw_netstats *tstats;
430 const struct iphdr *iph = ip_hdr(skb);
431 int err;
432
433 #ifdef CONFIG_NET_IPGRE_BROADCAST
434 if (ipv4_is_multicast(iph->daddr)) {
435 tunnel->dev->stats.multicast++;
436 skb->pkt_type = PACKET_BROADCAST;
437 }
438 #endif
439
440 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
441 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
442 tunnel->dev->stats.rx_crc_errors++;
443 tunnel->dev->stats.rx_errors++;
444 goto drop;
445 }
446
447 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
448 if (!(tpi->flags&TUNNEL_SEQ) ||
449 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
450 tunnel->dev->stats.rx_fifo_errors++;
451 tunnel->dev->stats.rx_errors++;
452 goto drop;
453 }
454 tunnel->i_seqno = ntohl(tpi->seq) + 1;
455 }
456
457 skb_reset_network_header(skb);
458
459 err = IP_ECN_decapsulate(iph, skb);
460 if (unlikely(err)) {
461 if (log_ecn_error)
462 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
463 &iph->saddr, iph->tos);
464 if (err > 1) {
465 ++tunnel->dev->stats.rx_frame_errors;
466 ++tunnel->dev->stats.rx_errors;
467 goto drop;
468 }
469 }
470
471 tstats = this_cpu_ptr(tunnel->dev->tstats);
472 u64_stats_update_begin(&tstats->syncp);
473 tstats->rx_packets++;
474 tstats->rx_bytes += skb->len;
475 u64_stats_update_end(&tstats->syncp);
476
477 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
478
479 if (tunnel->dev->type == ARPHRD_ETHER) {
480 skb->protocol = eth_type_trans(skb, tunnel->dev);
481 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
482 } else {
483 skb->dev = tunnel->dev;
484 }
485
486 gro_cells_receive(&tunnel->gro_cells, skb);
487 return 0;
488
489 drop:
490 kfree_skb(skb);
491 return 0;
492 }
493 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
494
495 static int ip_encap_hlen(struct ip_tunnel_encap *e)
496 {
497 switch (e->type) {
498 case TUNNEL_ENCAP_NONE:
499 return 0;
500 #if IS_ENABLED(CONFIG_NET_FOU)
501 case TUNNEL_ENCAP_FOU:
502 return fou_encap_hlen(e);
503 case TUNNEL_ENCAP_GUE:
504 return gue_encap_hlen(e);
505 #endif
506 default:
507 return -EINVAL;
508 }
509 }
510
511 int ip_tunnel_encap_setup(struct ip_tunnel *t,
512 struct ip_tunnel_encap *ipencap)
513 {
514 int hlen;
515
516 memset(&t->encap, 0, sizeof(t->encap));
517
518 hlen = ip_encap_hlen(ipencap);
519 if (hlen < 0)
520 return hlen;
521
522 t->encap.type = ipencap->type;
523 t->encap.sport = ipencap->sport;
524 t->encap.dport = ipencap->dport;
525 t->encap.flags = ipencap->flags;
526
527 t->encap_hlen = hlen;
528 t->hlen = t->encap_hlen + t->tun_hlen;
529
530 return 0;
531 }
532 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
533
534 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
535 u8 *protocol, struct flowi4 *fl4)
536 {
537 switch (t->encap.type) {
538 case TUNNEL_ENCAP_NONE:
539 return 0;
540 #if IS_ENABLED(CONFIG_NET_FOU)
541 case TUNNEL_ENCAP_FOU:
542 return fou_build_header(skb, &t->encap, protocol, fl4);
543 case TUNNEL_ENCAP_GUE:
544 return gue_build_header(skb, &t->encap, protocol, fl4);
545 #endif
546 default:
547 return -EINVAL;
548 }
549 }
550 EXPORT_SYMBOL(ip_tunnel_encap);
551
552 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
553 struct rtable *rt, __be16 df)
554 {
555 struct ip_tunnel *tunnel = netdev_priv(dev);
556 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
557 int mtu;
558
559 if (df)
560 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
561 - sizeof(struct iphdr) - tunnel->hlen;
562 else
563 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
564
565 if (skb_dst(skb))
566 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
567
568 if (skb->protocol == htons(ETH_P_IP)) {
569 if (!skb_is_gso(skb) &&
570 (df & htons(IP_DF)) && mtu < pkt_size) {
571 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
572 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
573 return -E2BIG;
574 }
575 }
576 #if IS_ENABLED(CONFIG_IPV6)
577 else if (skb->protocol == htons(ETH_P_IPV6)) {
578 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
579
580 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
581 mtu >= IPV6_MIN_MTU) {
582 if ((tunnel->parms.iph.daddr &&
583 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
584 rt6->rt6i_dst.plen == 128) {
585 rt6->rt6i_flags |= RTF_MODIFIED;
586 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
587 }
588 }
589
590 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
591 mtu < pkt_size) {
592 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
593 return -E2BIG;
594 }
595 }
596 #endif
597 return 0;
598 }
599
600 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
601 const struct iphdr *tnl_params, u8 protocol)
602 {
603 struct ip_tunnel *tunnel = netdev_priv(dev);
604 const struct iphdr *inner_iph;
605 struct flowi4 fl4;
606 u8 tos, ttl;
607 __be16 df;
608 struct rtable *rt; /* Route to the other host */
609 unsigned int max_headroom; /* The extra header space needed */
610 __be32 dst;
611 int err;
612 bool connected;
613
614 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
615 connected = (tunnel->parms.iph.daddr != 0);
616
617 dst = tnl_params->daddr;
618 if (dst == 0) {
619 /* NBMA tunnel */
620
621 if (skb_dst(skb) == NULL) {
622 dev->stats.tx_fifo_errors++;
623 goto tx_error;
624 }
625
626 if (skb->protocol == htons(ETH_P_IP)) {
627 rt = skb_rtable(skb);
628 dst = rt_nexthop(rt, inner_iph->daddr);
629 }
630 #if IS_ENABLED(CONFIG_IPV6)
631 else if (skb->protocol == htons(ETH_P_IPV6)) {
632 const struct in6_addr *addr6;
633 struct neighbour *neigh;
634 bool do_tx_error_icmp;
635 int addr_type;
636
637 neigh = dst_neigh_lookup(skb_dst(skb),
638 &ipv6_hdr(skb)->daddr);
639 if (neigh == NULL)
640 goto tx_error;
641
642 addr6 = (const struct in6_addr *)&neigh->primary_key;
643 addr_type = ipv6_addr_type(addr6);
644
645 if (addr_type == IPV6_ADDR_ANY) {
646 addr6 = &ipv6_hdr(skb)->daddr;
647 addr_type = ipv6_addr_type(addr6);
648 }
649
650 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
651 do_tx_error_icmp = true;
652 else {
653 do_tx_error_icmp = false;
654 dst = addr6->s6_addr32[3];
655 }
656 neigh_release(neigh);
657 if (do_tx_error_icmp)
658 goto tx_error_icmp;
659 }
660 #endif
661 else
662 goto tx_error;
663
664 connected = false;
665 }
666
667 tos = tnl_params->tos;
668 if (tos & 0x1) {
669 tos &= ~0x1;
670 if (skb->protocol == htons(ETH_P_IP)) {
671 tos = inner_iph->tos;
672 connected = false;
673 } else if (skb->protocol == htons(ETH_P_IPV6)) {
674 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
675 connected = false;
676 }
677 }
678
679 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
680 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
681
682 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
683 goto tx_error;
684
685 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
686
687 if (!rt) {
688 rt = ip_route_output_key(tunnel->net, &fl4);
689
690 if (IS_ERR(rt)) {
691 dev->stats.tx_carrier_errors++;
692 goto tx_error;
693 }
694 if (connected)
695 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
696 }
697
698 if (rt->dst.dev == dev) {
699 ip_rt_put(rt);
700 dev->stats.collisions++;
701 goto tx_error;
702 }
703
704 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
705 ip_rt_put(rt);
706 goto tx_error;
707 }
708
709 if (tunnel->err_count > 0) {
710 if (time_before(jiffies,
711 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
712 tunnel->err_count--;
713
714 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
715 dst_link_failure(skb);
716 } else
717 tunnel->err_count = 0;
718 }
719
720 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
721 ttl = tnl_params->ttl;
722 if (ttl == 0) {
723 if (skb->protocol == htons(ETH_P_IP))
724 ttl = inner_iph->ttl;
725 #if IS_ENABLED(CONFIG_IPV6)
726 else if (skb->protocol == htons(ETH_P_IPV6))
727 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
728 #endif
729 else
730 ttl = ip4_dst_hoplimit(&rt->dst);
731 }
732
733 df = tnl_params->frag_off;
734 if (skb->protocol == htons(ETH_P_IP))
735 df |= (inner_iph->frag_off&htons(IP_DF));
736
737 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
738 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
739 if (max_headroom > dev->needed_headroom)
740 dev->needed_headroom = max_headroom;
741
742 if (skb_cow_head(skb, dev->needed_headroom)) {
743 ip_rt_put(rt);
744 dev->stats.tx_dropped++;
745 kfree_skb(skb);
746 return;
747 }
748
749 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
750 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
751 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
752
753 return;
754
755 #if IS_ENABLED(CONFIG_IPV6)
756 tx_error_icmp:
757 dst_link_failure(skb);
758 #endif
759 tx_error:
760 dev->stats.tx_errors++;
761 kfree_skb(skb);
762 }
763 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
764
765 static void ip_tunnel_update(struct ip_tunnel_net *itn,
766 struct ip_tunnel *t,
767 struct net_device *dev,
768 struct ip_tunnel_parm *p,
769 bool set_mtu)
770 {
771 ip_tunnel_del(t);
772 t->parms.iph.saddr = p->iph.saddr;
773 t->parms.iph.daddr = p->iph.daddr;
774 t->parms.i_key = p->i_key;
775 t->parms.o_key = p->o_key;
776 if (dev->type != ARPHRD_ETHER) {
777 memcpy(dev->dev_addr, &p->iph.saddr, 4);
778 memcpy(dev->broadcast, &p->iph.daddr, 4);
779 }
780 ip_tunnel_add(itn, t);
781
782 t->parms.iph.ttl = p->iph.ttl;
783 t->parms.iph.tos = p->iph.tos;
784 t->parms.iph.frag_off = p->iph.frag_off;
785
786 if (t->parms.link != p->link) {
787 int mtu;
788
789 t->parms.link = p->link;
790 mtu = ip_tunnel_bind_dev(dev);
791 if (set_mtu)
792 dev->mtu = mtu;
793 }
794 ip_tunnel_dst_reset_all(t);
795 netdev_state_change(dev);
796 }
797
798 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
799 {
800 int err = 0;
801 struct ip_tunnel *t = netdev_priv(dev);
802 struct net *net = t->net;
803 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
804
805 BUG_ON(!itn->fb_tunnel_dev);
806 switch (cmd) {
807 case SIOCGETTUNNEL:
808 if (dev == itn->fb_tunnel_dev) {
809 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
810 if (t == NULL)
811 t = netdev_priv(dev);
812 }
813 memcpy(p, &t->parms, sizeof(*p));
814 break;
815
816 case SIOCADDTUNNEL:
817 case SIOCCHGTUNNEL:
818 err = -EPERM;
819 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
820 goto done;
821 if (p->iph.ttl)
822 p->iph.frag_off |= htons(IP_DF);
823 if (!(p->i_flags & VTI_ISVTI)) {
824 if (!(p->i_flags & TUNNEL_KEY))
825 p->i_key = 0;
826 if (!(p->o_flags & TUNNEL_KEY))
827 p->o_key = 0;
828 }
829
830 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
831
832 if (cmd == SIOCADDTUNNEL) {
833 if (!t) {
834 t = ip_tunnel_create(net, itn, p);
835 err = PTR_ERR_OR_ZERO(t);
836 break;
837 }
838
839 err = -EEXIST;
840 break;
841 }
842 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
843 if (t != NULL) {
844 if (t->dev != dev) {
845 err = -EEXIST;
846 break;
847 }
848 } else {
849 unsigned int nflags = 0;
850
851 if (ipv4_is_multicast(p->iph.daddr))
852 nflags = IFF_BROADCAST;
853 else if (p->iph.daddr)
854 nflags = IFF_POINTOPOINT;
855
856 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
857 err = -EINVAL;
858 break;
859 }
860
861 t = netdev_priv(dev);
862 }
863 }
864
865 if (t) {
866 err = 0;
867 ip_tunnel_update(itn, t, dev, p, true);
868 } else {
869 err = -ENOENT;
870 }
871 break;
872
873 case SIOCDELTUNNEL:
874 err = -EPERM;
875 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
876 goto done;
877
878 if (dev == itn->fb_tunnel_dev) {
879 err = -ENOENT;
880 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
881 if (t == NULL)
882 goto done;
883 err = -EPERM;
884 if (t == netdev_priv(itn->fb_tunnel_dev))
885 goto done;
886 dev = t->dev;
887 }
888 unregister_netdevice(dev);
889 err = 0;
890 break;
891
892 default:
893 err = -EINVAL;
894 }
895
896 done:
897 return err;
898 }
899 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
900
901 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
902 {
903 struct ip_tunnel *tunnel = netdev_priv(dev);
904 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
905
906 if (new_mtu < 68 ||
907 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
908 return -EINVAL;
909 dev->mtu = new_mtu;
910 return 0;
911 }
912 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
913
914 static void ip_tunnel_dev_free(struct net_device *dev)
915 {
916 struct ip_tunnel *tunnel = netdev_priv(dev);
917
918 gro_cells_destroy(&tunnel->gro_cells);
919 free_percpu(tunnel->dst_cache);
920 free_percpu(dev->tstats);
921 free_netdev(dev);
922 }
923
924 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
925 {
926 struct ip_tunnel *tunnel = netdev_priv(dev);
927 struct ip_tunnel_net *itn;
928
929 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
930
931 if (itn->fb_tunnel_dev != dev) {
932 ip_tunnel_del(netdev_priv(dev));
933 unregister_netdevice_queue(dev, head);
934 }
935 }
936 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
937
938 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
939 struct rtnl_link_ops *ops, char *devname)
940 {
941 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
942 struct ip_tunnel_parm parms;
943 unsigned int i;
944
945 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
946 INIT_HLIST_HEAD(&itn->tunnels[i]);
947
948 if (!ops) {
949 itn->fb_tunnel_dev = NULL;
950 return 0;
951 }
952
953 memset(&parms, 0, sizeof(parms));
954 if (devname)
955 strlcpy(parms.name, devname, IFNAMSIZ);
956
957 rtnl_lock();
958 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
959 /* FB netdevice is special: we have one, and only one per netns.
960 * Allowing to move it to another netns is clearly unsafe.
961 */
962 if (!IS_ERR(itn->fb_tunnel_dev)) {
963 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
964 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
965 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
966 }
967 rtnl_unlock();
968
969 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
970 }
971 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
972
973 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
974 struct rtnl_link_ops *ops)
975 {
976 struct net *net = dev_net(itn->fb_tunnel_dev);
977 struct net_device *dev, *aux;
978 int h;
979
980 for_each_netdev_safe(net, dev, aux)
981 if (dev->rtnl_link_ops == ops)
982 unregister_netdevice_queue(dev, head);
983
984 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
985 struct ip_tunnel *t;
986 struct hlist_node *n;
987 struct hlist_head *thead = &itn->tunnels[h];
988
989 hlist_for_each_entry_safe(t, n, thead, hash_node)
990 /* If dev is in the same netns, it has already
991 * been added to the list by the previous loop.
992 */
993 if (!net_eq(dev_net(t->dev), net))
994 unregister_netdevice_queue(t->dev, head);
995 }
996 }
997
998 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
999 {
1000 LIST_HEAD(list);
1001
1002 rtnl_lock();
1003 ip_tunnel_destroy(itn, &list, ops);
1004 unregister_netdevice_many(&list);
1005 rtnl_unlock();
1006 }
1007 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1008
1009 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1010 struct ip_tunnel_parm *p)
1011 {
1012 struct ip_tunnel *nt;
1013 struct net *net = dev_net(dev);
1014 struct ip_tunnel_net *itn;
1015 int mtu;
1016 int err;
1017
1018 nt = netdev_priv(dev);
1019 itn = net_generic(net, nt->ip_tnl_net_id);
1020
1021 if (ip_tunnel_find(itn, p, dev->type))
1022 return -EEXIST;
1023
1024 nt->net = net;
1025 nt->parms = *p;
1026 err = register_netdevice(dev);
1027 if (err)
1028 goto out;
1029
1030 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1031 eth_hw_addr_random(dev);
1032
1033 mtu = ip_tunnel_bind_dev(dev);
1034 if (!tb[IFLA_MTU])
1035 dev->mtu = mtu;
1036
1037 ip_tunnel_add(itn, nt);
1038
1039 out:
1040 return err;
1041 }
1042 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1043
1044 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1045 struct ip_tunnel_parm *p)
1046 {
1047 struct ip_tunnel *t;
1048 struct ip_tunnel *tunnel = netdev_priv(dev);
1049 struct net *net = tunnel->net;
1050 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1051
1052 if (dev == itn->fb_tunnel_dev)
1053 return -EINVAL;
1054
1055 t = ip_tunnel_find(itn, p, dev->type);
1056
1057 if (t) {
1058 if (t->dev != dev)
1059 return -EEXIST;
1060 } else {
1061 t = tunnel;
1062
1063 if (dev->type != ARPHRD_ETHER) {
1064 unsigned int nflags = 0;
1065
1066 if (ipv4_is_multicast(p->iph.daddr))
1067 nflags = IFF_BROADCAST;
1068 else if (p->iph.daddr)
1069 nflags = IFF_POINTOPOINT;
1070
1071 if ((dev->flags ^ nflags) &
1072 (IFF_POINTOPOINT | IFF_BROADCAST))
1073 return -EINVAL;
1074 }
1075 }
1076
1077 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1078 return 0;
1079 }
1080 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1081
1082 int ip_tunnel_init(struct net_device *dev)
1083 {
1084 struct ip_tunnel *tunnel = netdev_priv(dev);
1085 struct iphdr *iph = &tunnel->parms.iph;
1086 int err;
1087
1088 dev->destructor = ip_tunnel_dev_free;
1089 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1090 if (!dev->tstats)
1091 return -ENOMEM;
1092
1093 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1094 if (!tunnel->dst_cache) {
1095 free_percpu(dev->tstats);
1096 return -ENOMEM;
1097 }
1098
1099 err = gro_cells_init(&tunnel->gro_cells, dev);
1100 if (err) {
1101 free_percpu(tunnel->dst_cache);
1102 free_percpu(dev->tstats);
1103 return err;
1104 }
1105
1106 tunnel->dev = dev;
1107 tunnel->net = dev_net(dev);
1108 strcpy(tunnel->parms.name, dev->name);
1109 iph->version = 4;
1110 iph->ihl = 5;
1111
1112 return 0;
1113 }
1114 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1115
1116 void ip_tunnel_uninit(struct net_device *dev)
1117 {
1118 struct ip_tunnel *tunnel = netdev_priv(dev);
1119 struct net *net = tunnel->net;
1120 struct ip_tunnel_net *itn;
1121
1122 itn = net_generic(net, tunnel->ip_tnl_net_id);
1123 /* fb_tunnel_dev will be unregisted in net-exit call. */
1124 if (itn->fb_tunnel_dev != dev)
1125 ip_tunnel_del(netdev_priv(dev));
1126
1127 ip_tunnel_dst_reset_all(tunnel);
1128 }
1129 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1130
1131 /* Do least required initialization, rest of init is done in tunnel_init call */
1132 void ip_tunnel_setup(struct net_device *dev, int net_id)
1133 {
1134 struct ip_tunnel *tunnel = netdev_priv(dev);
1135 tunnel->ip_tnl_net_id = net_id;
1136 }
1137 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1138
1139 MODULE_LICENSE("GPL");
This page took 0.055742 seconds and 5 git commands to generate.