mm: convert printk(KERN_<LEVEL> to pr_<level>
[deliverable/linux.git] / net / ipv4 / ip_tunnel.c
1 /*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/init.h>
34 #include <linux/in6.h>
35 #include <linux/inetdevice.h>
36 #include <linux/igmp.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/etherdevice.h>
39 #include <linux/if_ether.h>
40 #include <linux/if_vlan.h>
41 #include <linux/rculist.h>
42 #include <linux/err.h>
43
44 #include <net/sock.h>
45 #include <net/ip.h>
46 #include <net/icmp.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
49 #include <net/arp.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
57 #include <net/udp.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72 struct dst_entry *dst, __be32 saddr)
73 {
74 struct dst_entry *old_dst;
75
76 dst_clone(dst);
77 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
78 dst_release(old_dst);
79 idst->saddr = saddr;
80 }
81
82 static noinline void tunnel_dst_set(struct ip_tunnel *t,
83 struct dst_entry *dst, __be32 saddr)
84 {
85 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
86 }
87
88 static void tunnel_dst_reset(struct ip_tunnel *t)
89 {
90 tunnel_dst_set(t, NULL, 0);
91 }
92
93 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
94 {
95 int i;
96
97 for_each_possible_cpu(i)
98 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
99 }
100 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
101
102 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
103 u32 cookie, __be32 *saddr)
104 {
105 struct ip_tunnel_dst *idst;
106 struct dst_entry *dst;
107
108 rcu_read_lock();
109 idst = raw_cpu_ptr(t->dst_cache);
110 dst = rcu_dereference(idst->dst);
111 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
112 dst = NULL;
113 if (dst) {
114 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
115 *saddr = idst->saddr;
116 } else {
117 tunnel_dst_reset(t);
118 dst_release(dst);
119 dst = NULL;
120 }
121 }
122 rcu_read_unlock();
123 return (struct rtable *)dst;
124 }
125
126 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
127 __be16 flags, __be32 key)
128 {
129 if (p->i_flags & TUNNEL_KEY) {
130 if (flags & TUNNEL_KEY)
131 return key == p->i_key;
132 else
133 /* key expected, none present */
134 return false;
135 } else
136 return !(flags & TUNNEL_KEY);
137 }
138
139 /* Fallback tunnel: no source, no destination, no key, no options
140
141 Tunnel hash table:
142 We require exact key match i.e. if a key is present in packet
143 it will match only tunnel with the same key; if it is not present,
144 it will match only keyless tunnel.
145
146 All keysless packets, if not matched configured keyless tunnels
147 will match fallback tunnel.
148 Given src, dst and key, find appropriate for input tunnel.
149 */
150 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
151 int link, __be16 flags,
152 __be32 remote, __be32 local,
153 __be32 key)
154 {
155 unsigned int hash;
156 struct ip_tunnel *t, *cand = NULL;
157 struct hlist_head *head;
158
159 hash = ip_tunnel_hash(key, remote);
160 head = &itn->tunnels[hash];
161
162 hlist_for_each_entry_rcu(t, head, hash_node) {
163 if (local != t->parms.iph.saddr ||
164 remote != t->parms.iph.daddr ||
165 !(t->dev->flags & IFF_UP))
166 continue;
167
168 if (!ip_tunnel_key_match(&t->parms, flags, key))
169 continue;
170
171 if (t->parms.link == link)
172 return t;
173 else
174 cand = t;
175 }
176
177 hlist_for_each_entry_rcu(t, head, hash_node) {
178 if (remote != t->parms.iph.daddr ||
179 t->parms.iph.saddr != 0 ||
180 !(t->dev->flags & IFF_UP))
181 continue;
182
183 if (!ip_tunnel_key_match(&t->parms, flags, key))
184 continue;
185
186 if (t->parms.link == link)
187 return t;
188 else if (!cand)
189 cand = t;
190 }
191
192 hash = ip_tunnel_hash(key, 0);
193 head = &itn->tunnels[hash];
194
195 hlist_for_each_entry_rcu(t, head, hash_node) {
196 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
197 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
198 continue;
199
200 if (!(t->dev->flags & IFF_UP))
201 continue;
202
203 if (!ip_tunnel_key_match(&t->parms, flags, key))
204 continue;
205
206 if (t->parms.link == link)
207 return t;
208 else if (!cand)
209 cand = t;
210 }
211
212 if (flags & TUNNEL_NO_KEY)
213 goto skip_key_lookup;
214
215 hlist_for_each_entry_rcu(t, head, hash_node) {
216 if (t->parms.i_key != key ||
217 t->parms.iph.saddr != 0 ||
218 t->parms.iph.daddr != 0 ||
219 !(t->dev->flags & IFF_UP))
220 continue;
221
222 if (t->parms.link == link)
223 return t;
224 else if (!cand)
225 cand = t;
226 }
227
228 skip_key_lookup:
229 if (cand)
230 return cand;
231
232 t = rcu_dereference(itn->collect_md_tun);
233 if (t)
234 return t;
235
236 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
237 return netdev_priv(itn->fb_tunnel_dev);
238
239 return NULL;
240 }
241 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
242
243 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
244 struct ip_tunnel_parm *parms)
245 {
246 unsigned int h;
247 __be32 remote;
248 __be32 i_key = parms->i_key;
249
250 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
251 remote = parms->iph.daddr;
252 else
253 remote = 0;
254
255 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
256 i_key = 0;
257
258 h = ip_tunnel_hash(i_key, remote);
259 return &itn->tunnels[h];
260 }
261
262 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
263 {
264 struct hlist_head *head = ip_bucket(itn, &t->parms);
265
266 if (t->collect_md)
267 rcu_assign_pointer(itn->collect_md_tun, t);
268 hlist_add_head_rcu(&t->hash_node, head);
269 }
270
271 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
272 {
273 if (t->collect_md)
274 rcu_assign_pointer(itn->collect_md_tun, NULL);
275 hlist_del_init_rcu(&t->hash_node);
276 }
277
278 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
279 struct ip_tunnel_parm *parms,
280 int type)
281 {
282 __be32 remote = parms->iph.daddr;
283 __be32 local = parms->iph.saddr;
284 __be32 key = parms->i_key;
285 __be16 flags = parms->i_flags;
286 int link = parms->link;
287 struct ip_tunnel *t = NULL;
288 struct hlist_head *head = ip_bucket(itn, parms);
289
290 hlist_for_each_entry_rcu(t, head, hash_node) {
291 if (local == t->parms.iph.saddr &&
292 remote == t->parms.iph.daddr &&
293 link == t->parms.link &&
294 type == t->dev->type &&
295 ip_tunnel_key_match(&t->parms, flags, key))
296 break;
297 }
298 return t;
299 }
300
301 static struct net_device *__ip_tunnel_create(struct net *net,
302 const struct rtnl_link_ops *ops,
303 struct ip_tunnel_parm *parms)
304 {
305 int err;
306 struct ip_tunnel *tunnel;
307 struct net_device *dev;
308 char name[IFNAMSIZ];
309
310 if (parms->name[0])
311 strlcpy(name, parms->name, IFNAMSIZ);
312 else {
313 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
314 err = -E2BIG;
315 goto failed;
316 }
317 strlcpy(name, ops->kind, IFNAMSIZ);
318 strncat(name, "%d", 2);
319 }
320
321 ASSERT_RTNL();
322 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
323 if (!dev) {
324 err = -ENOMEM;
325 goto failed;
326 }
327 dev_net_set(dev, net);
328
329 dev->rtnl_link_ops = ops;
330
331 tunnel = netdev_priv(dev);
332 tunnel->parms = *parms;
333 tunnel->net = net;
334
335 err = register_netdevice(dev);
336 if (err)
337 goto failed_free;
338
339 return dev;
340
341 failed_free:
342 free_netdev(dev);
343 failed:
344 return ERR_PTR(err);
345 }
346
347 static inline void init_tunnel_flow(struct flowi4 *fl4,
348 int proto,
349 __be32 daddr, __be32 saddr,
350 __be32 key, __u8 tos, int oif)
351 {
352 memset(fl4, 0, sizeof(*fl4));
353 fl4->flowi4_oif = oif;
354 fl4->daddr = daddr;
355 fl4->saddr = saddr;
356 fl4->flowi4_tos = tos;
357 fl4->flowi4_proto = proto;
358 fl4->fl4_gre_key = key;
359 }
360
361 static int ip_tunnel_bind_dev(struct net_device *dev)
362 {
363 struct net_device *tdev = NULL;
364 struct ip_tunnel *tunnel = netdev_priv(dev);
365 const struct iphdr *iph;
366 int hlen = LL_MAX_HEADER;
367 int mtu = ETH_DATA_LEN;
368 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
369
370 iph = &tunnel->parms.iph;
371
372 /* Guess output device to choose reasonable mtu and needed_headroom */
373 if (iph->daddr) {
374 struct flowi4 fl4;
375 struct rtable *rt;
376
377 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
378 iph->saddr, tunnel->parms.o_key,
379 RT_TOS(iph->tos), tunnel->parms.link);
380 rt = ip_route_output_key(tunnel->net, &fl4);
381
382 if (!IS_ERR(rt)) {
383 tdev = rt->dst.dev;
384 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
385 ip_rt_put(rt);
386 }
387 if (dev->type != ARPHRD_ETHER)
388 dev->flags |= IFF_POINTOPOINT;
389 }
390
391 if (!tdev && tunnel->parms.link)
392 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
393
394 if (tdev) {
395 hlen = tdev->hard_header_len + tdev->needed_headroom;
396 mtu = tdev->mtu;
397 }
398
399 dev->needed_headroom = t_hlen + hlen;
400 mtu -= (dev->hard_header_len + t_hlen);
401
402 if (mtu < 68)
403 mtu = 68;
404
405 return mtu;
406 }
407
408 static struct ip_tunnel *ip_tunnel_create(struct net *net,
409 struct ip_tunnel_net *itn,
410 struct ip_tunnel_parm *parms)
411 {
412 struct ip_tunnel *nt;
413 struct net_device *dev;
414
415 BUG_ON(!itn->fb_tunnel_dev);
416 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
417 if (IS_ERR(dev))
418 return ERR_CAST(dev);
419
420 dev->mtu = ip_tunnel_bind_dev(dev);
421
422 nt = netdev_priv(dev);
423 ip_tunnel_add(itn, nt);
424 return nt;
425 }
426
427 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
428 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
429 bool log_ecn_error)
430 {
431 struct pcpu_sw_netstats *tstats;
432 const struct iphdr *iph = ip_hdr(skb);
433 int err;
434
435 #ifdef CONFIG_NET_IPGRE_BROADCAST
436 if (ipv4_is_multicast(iph->daddr)) {
437 tunnel->dev->stats.multicast++;
438 skb->pkt_type = PACKET_BROADCAST;
439 }
440 #endif
441
442 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
443 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
444 tunnel->dev->stats.rx_crc_errors++;
445 tunnel->dev->stats.rx_errors++;
446 goto drop;
447 }
448
449 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
450 if (!(tpi->flags&TUNNEL_SEQ) ||
451 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
452 tunnel->dev->stats.rx_fifo_errors++;
453 tunnel->dev->stats.rx_errors++;
454 goto drop;
455 }
456 tunnel->i_seqno = ntohl(tpi->seq) + 1;
457 }
458
459 skb_reset_network_header(skb);
460
461 err = IP_ECN_decapsulate(iph, skb);
462 if (unlikely(err)) {
463 if (log_ecn_error)
464 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
465 &iph->saddr, iph->tos);
466 if (err > 1) {
467 ++tunnel->dev->stats.rx_frame_errors;
468 ++tunnel->dev->stats.rx_errors;
469 goto drop;
470 }
471 }
472
473 tstats = this_cpu_ptr(tunnel->dev->tstats);
474 u64_stats_update_begin(&tstats->syncp);
475 tstats->rx_packets++;
476 tstats->rx_bytes += skb->len;
477 u64_stats_update_end(&tstats->syncp);
478
479 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
480
481 if (tunnel->dev->type == ARPHRD_ETHER) {
482 skb->protocol = eth_type_trans(skb, tunnel->dev);
483 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
484 } else {
485 skb->dev = tunnel->dev;
486 }
487
488 if (tun_dst)
489 skb_dst_set(skb, (struct dst_entry *)tun_dst);
490
491 gro_cells_receive(&tunnel->gro_cells, skb);
492 return 0;
493
494 drop:
495 kfree_skb(skb);
496 return 0;
497 }
498 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
499
500 static int ip_encap_hlen(struct ip_tunnel_encap *e)
501 {
502 const struct ip_tunnel_encap_ops *ops;
503 int hlen = -EINVAL;
504
505 if (e->type == TUNNEL_ENCAP_NONE)
506 return 0;
507
508 if (e->type >= MAX_IPTUN_ENCAP_OPS)
509 return -EINVAL;
510
511 rcu_read_lock();
512 ops = rcu_dereference(iptun_encaps[e->type]);
513 if (likely(ops && ops->encap_hlen))
514 hlen = ops->encap_hlen(e);
515 rcu_read_unlock();
516
517 return hlen;
518 }
519
520 const struct ip_tunnel_encap_ops __rcu *
521 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
522
523 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
524 unsigned int num)
525 {
526 if (num >= MAX_IPTUN_ENCAP_OPS)
527 return -ERANGE;
528
529 return !cmpxchg((const struct ip_tunnel_encap_ops **)
530 &iptun_encaps[num],
531 NULL, ops) ? 0 : -1;
532 }
533 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
534
535 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
536 unsigned int num)
537 {
538 int ret;
539
540 if (num >= MAX_IPTUN_ENCAP_OPS)
541 return -ERANGE;
542
543 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
544 &iptun_encaps[num],
545 ops, NULL) == ops) ? 0 : -1;
546
547 synchronize_net();
548
549 return ret;
550 }
551 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
552
553 int ip_tunnel_encap_setup(struct ip_tunnel *t,
554 struct ip_tunnel_encap *ipencap)
555 {
556 int hlen;
557
558 memset(&t->encap, 0, sizeof(t->encap));
559
560 hlen = ip_encap_hlen(ipencap);
561 if (hlen < 0)
562 return hlen;
563
564 t->encap.type = ipencap->type;
565 t->encap.sport = ipencap->sport;
566 t->encap.dport = ipencap->dport;
567 t->encap.flags = ipencap->flags;
568
569 t->encap_hlen = hlen;
570 t->hlen = t->encap_hlen + t->tun_hlen;
571
572 return 0;
573 }
574 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
575
576 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
577 u8 *protocol, struct flowi4 *fl4)
578 {
579 const struct ip_tunnel_encap_ops *ops;
580 int ret = -EINVAL;
581
582 if (t->encap.type == TUNNEL_ENCAP_NONE)
583 return 0;
584
585 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
586 return -EINVAL;
587
588 rcu_read_lock();
589 ops = rcu_dereference(iptun_encaps[t->encap.type]);
590 if (likely(ops && ops->build_header))
591 ret = ops->build_header(skb, &t->encap, protocol, fl4);
592 rcu_read_unlock();
593
594 return ret;
595 }
596 EXPORT_SYMBOL(ip_tunnel_encap);
597
598 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
599 struct rtable *rt, __be16 df,
600 const struct iphdr *inner_iph)
601 {
602 struct ip_tunnel *tunnel = netdev_priv(dev);
603 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
604 int mtu;
605
606 if (df)
607 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
608 - sizeof(struct iphdr) - tunnel->hlen;
609 else
610 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
611
612 if (skb_dst(skb))
613 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
614
615 if (skb->protocol == htons(ETH_P_IP)) {
616 if (!skb_is_gso(skb) &&
617 (inner_iph->frag_off & htons(IP_DF)) &&
618 mtu < pkt_size) {
619 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
620 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
621 return -E2BIG;
622 }
623 }
624 #if IS_ENABLED(CONFIG_IPV6)
625 else if (skb->protocol == htons(ETH_P_IPV6)) {
626 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
627
628 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
629 mtu >= IPV6_MIN_MTU) {
630 if ((tunnel->parms.iph.daddr &&
631 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
632 rt6->rt6i_dst.plen == 128) {
633 rt6->rt6i_flags |= RTF_MODIFIED;
634 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
635 }
636 }
637
638 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
639 mtu < pkt_size) {
640 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
641 return -E2BIG;
642 }
643 }
644 #endif
645 return 0;
646 }
647
648 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
649 const struct iphdr *tnl_params, u8 protocol)
650 {
651 struct ip_tunnel *tunnel = netdev_priv(dev);
652 const struct iphdr *inner_iph;
653 struct flowi4 fl4;
654 u8 tos, ttl;
655 __be16 df;
656 struct rtable *rt; /* Route to the other host */
657 unsigned int max_headroom; /* The extra header space needed */
658 __be32 dst;
659 bool connected;
660
661 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
662 connected = (tunnel->parms.iph.daddr != 0);
663
664 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
665
666 dst = tnl_params->daddr;
667 if (dst == 0) {
668 /* NBMA tunnel */
669
670 if (!skb_dst(skb)) {
671 dev->stats.tx_fifo_errors++;
672 goto tx_error;
673 }
674
675 if (skb->protocol == htons(ETH_P_IP)) {
676 rt = skb_rtable(skb);
677 dst = rt_nexthop(rt, inner_iph->daddr);
678 }
679 #if IS_ENABLED(CONFIG_IPV6)
680 else if (skb->protocol == htons(ETH_P_IPV6)) {
681 const struct in6_addr *addr6;
682 struct neighbour *neigh;
683 bool do_tx_error_icmp;
684 int addr_type;
685
686 neigh = dst_neigh_lookup(skb_dst(skb),
687 &ipv6_hdr(skb)->daddr);
688 if (!neigh)
689 goto tx_error;
690
691 addr6 = (const struct in6_addr *)&neigh->primary_key;
692 addr_type = ipv6_addr_type(addr6);
693
694 if (addr_type == IPV6_ADDR_ANY) {
695 addr6 = &ipv6_hdr(skb)->daddr;
696 addr_type = ipv6_addr_type(addr6);
697 }
698
699 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
700 do_tx_error_icmp = true;
701 else {
702 do_tx_error_icmp = false;
703 dst = addr6->s6_addr32[3];
704 }
705 neigh_release(neigh);
706 if (do_tx_error_icmp)
707 goto tx_error_icmp;
708 }
709 #endif
710 else
711 goto tx_error;
712
713 connected = false;
714 }
715
716 tos = tnl_params->tos;
717 if (tos & 0x1) {
718 tos &= ~0x1;
719 if (skb->protocol == htons(ETH_P_IP)) {
720 tos = inner_iph->tos;
721 connected = false;
722 } else if (skb->protocol == htons(ETH_P_IPV6)) {
723 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
724 connected = false;
725 }
726 }
727
728 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
729 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
730
731 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
732 goto tx_error;
733
734 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
735
736 if (!rt) {
737 rt = ip_route_output_key(tunnel->net, &fl4);
738
739 if (IS_ERR(rt)) {
740 dev->stats.tx_carrier_errors++;
741 goto tx_error;
742 }
743 if (connected)
744 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
745 }
746
747 if (rt->dst.dev == dev) {
748 ip_rt_put(rt);
749 dev->stats.collisions++;
750 goto tx_error;
751 }
752
753 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
754 ip_rt_put(rt);
755 goto tx_error;
756 }
757
758 if (tunnel->err_count > 0) {
759 if (time_before(jiffies,
760 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
761 tunnel->err_count--;
762
763 dst_link_failure(skb);
764 } else
765 tunnel->err_count = 0;
766 }
767
768 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
769 ttl = tnl_params->ttl;
770 if (ttl == 0) {
771 if (skb->protocol == htons(ETH_P_IP))
772 ttl = inner_iph->ttl;
773 #if IS_ENABLED(CONFIG_IPV6)
774 else if (skb->protocol == htons(ETH_P_IPV6))
775 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
776 #endif
777 else
778 ttl = ip4_dst_hoplimit(&rt->dst);
779 }
780
781 df = tnl_params->frag_off;
782 if (skb->protocol == htons(ETH_P_IP))
783 df |= (inner_iph->frag_off&htons(IP_DF));
784
785 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
786 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
787 if (max_headroom > dev->needed_headroom)
788 dev->needed_headroom = max_headroom;
789
790 if (skb_cow_head(skb, dev->needed_headroom)) {
791 ip_rt_put(rt);
792 dev->stats.tx_dropped++;
793 kfree_skb(skb);
794 return;
795 }
796
797 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
798 df, !net_eq(tunnel->net, dev_net(dev)));
799 return;
800
801 #if IS_ENABLED(CONFIG_IPV6)
802 tx_error_icmp:
803 dst_link_failure(skb);
804 #endif
805 tx_error:
806 dev->stats.tx_errors++;
807 kfree_skb(skb);
808 }
809 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
810
811 static void ip_tunnel_update(struct ip_tunnel_net *itn,
812 struct ip_tunnel *t,
813 struct net_device *dev,
814 struct ip_tunnel_parm *p,
815 bool set_mtu)
816 {
817 ip_tunnel_del(itn, t);
818 t->parms.iph.saddr = p->iph.saddr;
819 t->parms.iph.daddr = p->iph.daddr;
820 t->parms.i_key = p->i_key;
821 t->parms.o_key = p->o_key;
822 if (dev->type != ARPHRD_ETHER) {
823 memcpy(dev->dev_addr, &p->iph.saddr, 4);
824 memcpy(dev->broadcast, &p->iph.daddr, 4);
825 }
826 ip_tunnel_add(itn, t);
827
828 t->parms.iph.ttl = p->iph.ttl;
829 t->parms.iph.tos = p->iph.tos;
830 t->parms.iph.frag_off = p->iph.frag_off;
831
832 if (t->parms.link != p->link) {
833 int mtu;
834
835 t->parms.link = p->link;
836 mtu = ip_tunnel_bind_dev(dev);
837 if (set_mtu)
838 dev->mtu = mtu;
839 }
840 ip_tunnel_dst_reset_all(t);
841 netdev_state_change(dev);
842 }
843
844 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
845 {
846 int err = 0;
847 struct ip_tunnel *t = netdev_priv(dev);
848 struct net *net = t->net;
849 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
850
851 BUG_ON(!itn->fb_tunnel_dev);
852 switch (cmd) {
853 case SIOCGETTUNNEL:
854 if (dev == itn->fb_tunnel_dev) {
855 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
856 if (!t)
857 t = netdev_priv(dev);
858 }
859 memcpy(p, &t->parms, sizeof(*p));
860 break;
861
862 case SIOCADDTUNNEL:
863 case SIOCCHGTUNNEL:
864 err = -EPERM;
865 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
866 goto done;
867 if (p->iph.ttl)
868 p->iph.frag_off |= htons(IP_DF);
869 if (!(p->i_flags & VTI_ISVTI)) {
870 if (!(p->i_flags & TUNNEL_KEY))
871 p->i_key = 0;
872 if (!(p->o_flags & TUNNEL_KEY))
873 p->o_key = 0;
874 }
875
876 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
877
878 if (cmd == SIOCADDTUNNEL) {
879 if (!t) {
880 t = ip_tunnel_create(net, itn, p);
881 err = PTR_ERR_OR_ZERO(t);
882 break;
883 }
884
885 err = -EEXIST;
886 break;
887 }
888 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
889 if (t) {
890 if (t->dev != dev) {
891 err = -EEXIST;
892 break;
893 }
894 } else {
895 unsigned int nflags = 0;
896
897 if (ipv4_is_multicast(p->iph.daddr))
898 nflags = IFF_BROADCAST;
899 else if (p->iph.daddr)
900 nflags = IFF_POINTOPOINT;
901
902 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
903 err = -EINVAL;
904 break;
905 }
906
907 t = netdev_priv(dev);
908 }
909 }
910
911 if (t) {
912 err = 0;
913 ip_tunnel_update(itn, t, dev, p, true);
914 } else {
915 err = -ENOENT;
916 }
917 break;
918
919 case SIOCDELTUNNEL:
920 err = -EPERM;
921 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
922 goto done;
923
924 if (dev == itn->fb_tunnel_dev) {
925 err = -ENOENT;
926 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
927 if (!t)
928 goto done;
929 err = -EPERM;
930 if (t == netdev_priv(itn->fb_tunnel_dev))
931 goto done;
932 dev = t->dev;
933 }
934 unregister_netdevice(dev);
935 err = 0;
936 break;
937
938 default:
939 err = -EINVAL;
940 }
941
942 done:
943 return err;
944 }
945 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
946
947 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
948 {
949 struct ip_tunnel *tunnel = netdev_priv(dev);
950 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
951 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
952
953 if (new_mtu < 68)
954 return -EINVAL;
955
956 if (new_mtu > max_mtu) {
957 if (strict)
958 return -EINVAL;
959
960 new_mtu = max_mtu;
961 }
962
963 dev->mtu = new_mtu;
964 return 0;
965 }
966 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
967
968 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
969 {
970 return __ip_tunnel_change_mtu(dev, new_mtu, true);
971 }
972 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
973
974 static void ip_tunnel_dev_free(struct net_device *dev)
975 {
976 struct ip_tunnel *tunnel = netdev_priv(dev);
977
978 gro_cells_destroy(&tunnel->gro_cells);
979 free_percpu(tunnel->dst_cache);
980 free_percpu(dev->tstats);
981 free_netdev(dev);
982 }
983
984 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
985 {
986 struct ip_tunnel *tunnel = netdev_priv(dev);
987 struct ip_tunnel_net *itn;
988
989 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
990
991 if (itn->fb_tunnel_dev != dev) {
992 ip_tunnel_del(itn, netdev_priv(dev));
993 unregister_netdevice_queue(dev, head);
994 }
995 }
996 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
997
998 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
999 {
1000 struct ip_tunnel *tunnel = netdev_priv(dev);
1001
1002 return tunnel->net;
1003 }
1004 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1005
1006 int ip_tunnel_get_iflink(const struct net_device *dev)
1007 {
1008 struct ip_tunnel *tunnel = netdev_priv(dev);
1009
1010 return tunnel->parms.link;
1011 }
1012 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1013
1014 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
1015 struct rtnl_link_ops *ops, char *devname)
1016 {
1017 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1018 struct ip_tunnel_parm parms;
1019 unsigned int i;
1020
1021 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1022 INIT_HLIST_HEAD(&itn->tunnels[i]);
1023
1024 if (!ops) {
1025 itn->fb_tunnel_dev = NULL;
1026 return 0;
1027 }
1028
1029 memset(&parms, 0, sizeof(parms));
1030 if (devname)
1031 strlcpy(parms.name, devname, IFNAMSIZ);
1032
1033 rtnl_lock();
1034 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1035 /* FB netdevice is special: we have one, and only one per netns.
1036 * Allowing to move it to another netns is clearly unsafe.
1037 */
1038 if (!IS_ERR(itn->fb_tunnel_dev)) {
1039 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1040 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1041 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1042 }
1043 rtnl_unlock();
1044
1045 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1046 }
1047 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1048
1049 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1050 struct rtnl_link_ops *ops)
1051 {
1052 struct net *net = dev_net(itn->fb_tunnel_dev);
1053 struct net_device *dev, *aux;
1054 int h;
1055
1056 for_each_netdev_safe(net, dev, aux)
1057 if (dev->rtnl_link_ops == ops)
1058 unregister_netdevice_queue(dev, head);
1059
1060 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1061 struct ip_tunnel *t;
1062 struct hlist_node *n;
1063 struct hlist_head *thead = &itn->tunnels[h];
1064
1065 hlist_for_each_entry_safe(t, n, thead, hash_node)
1066 /* If dev is in the same netns, it has already
1067 * been added to the list by the previous loop.
1068 */
1069 if (!net_eq(dev_net(t->dev), net))
1070 unregister_netdevice_queue(t->dev, head);
1071 }
1072 }
1073
1074 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1075 {
1076 LIST_HEAD(list);
1077
1078 rtnl_lock();
1079 ip_tunnel_destroy(itn, &list, ops);
1080 unregister_netdevice_many(&list);
1081 rtnl_unlock();
1082 }
1083 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1084
1085 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1086 struct ip_tunnel_parm *p)
1087 {
1088 struct ip_tunnel *nt;
1089 struct net *net = dev_net(dev);
1090 struct ip_tunnel_net *itn;
1091 int mtu;
1092 int err;
1093
1094 nt = netdev_priv(dev);
1095 itn = net_generic(net, nt->ip_tnl_net_id);
1096
1097 if (nt->collect_md) {
1098 if (rtnl_dereference(itn->collect_md_tun))
1099 return -EEXIST;
1100 } else {
1101 if (ip_tunnel_find(itn, p, dev->type))
1102 return -EEXIST;
1103 }
1104
1105 nt->net = net;
1106 nt->parms = *p;
1107 err = register_netdevice(dev);
1108 if (err)
1109 goto out;
1110
1111 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1112 eth_hw_addr_random(dev);
1113
1114 mtu = ip_tunnel_bind_dev(dev);
1115 if (!tb[IFLA_MTU])
1116 dev->mtu = mtu;
1117
1118 ip_tunnel_add(itn, nt);
1119 out:
1120 return err;
1121 }
1122 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1123
1124 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1125 struct ip_tunnel_parm *p)
1126 {
1127 struct ip_tunnel *t;
1128 struct ip_tunnel *tunnel = netdev_priv(dev);
1129 struct net *net = tunnel->net;
1130 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1131
1132 if (dev == itn->fb_tunnel_dev)
1133 return -EINVAL;
1134
1135 t = ip_tunnel_find(itn, p, dev->type);
1136
1137 if (t) {
1138 if (t->dev != dev)
1139 return -EEXIST;
1140 } else {
1141 t = tunnel;
1142
1143 if (dev->type != ARPHRD_ETHER) {
1144 unsigned int nflags = 0;
1145
1146 if (ipv4_is_multicast(p->iph.daddr))
1147 nflags = IFF_BROADCAST;
1148 else if (p->iph.daddr)
1149 nflags = IFF_POINTOPOINT;
1150
1151 if ((dev->flags ^ nflags) &
1152 (IFF_POINTOPOINT | IFF_BROADCAST))
1153 return -EINVAL;
1154 }
1155 }
1156
1157 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1158 return 0;
1159 }
1160 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1161
1162 int ip_tunnel_init(struct net_device *dev)
1163 {
1164 struct ip_tunnel *tunnel = netdev_priv(dev);
1165 struct iphdr *iph = &tunnel->parms.iph;
1166 int err;
1167
1168 dev->destructor = ip_tunnel_dev_free;
1169 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1170 if (!dev->tstats)
1171 return -ENOMEM;
1172
1173 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1174 if (!tunnel->dst_cache) {
1175 free_percpu(dev->tstats);
1176 return -ENOMEM;
1177 }
1178
1179 err = gro_cells_init(&tunnel->gro_cells, dev);
1180 if (err) {
1181 free_percpu(tunnel->dst_cache);
1182 free_percpu(dev->tstats);
1183 return err;
1184 }
1185
1186 tunnel->dev = dev;
1187 tunnel->net = dev_net(dev);
1188 strcpy(tunnel->parms.name, dev->name);
1189 iph->version = 4;
1190 iph->ihl = 5;
1191
1192 if (tunnel->collect_md) {
1193 dev->features |= NETIF_F_NETNS_LOCAL;
1194 netif_keep_dst(dev);
1195 }
1196 return 0;
1197 }
1198 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1199
1200 void ip_tunnel_uninit(struct net_device *dev)
1201 {
1202 struct ip_tunnel *tunnel = netdev_priv(dev);
1203 struct net *net = tunnel->net;
1204 struct ip_tunnel_net *itn;
1205
1206 itn = net_generic(net, tunnel->ip_tnl_net_id);
1207 /* fb_tunnel_dev will be unregisted in net-exit call. */
1208 if (itn->fb_tunnel_dev != dev)
1209 ip_tunnel_del(itn, netdev_priv(dev));
1210
1211 ip_tunnel_dst_reset_all(tunnel);
1212 }
1213 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1214
1215 /* Do least required initialization, rest of init is done in tunnel_init call */
1216 void ip_tunnel_setup(struct net_device *dev, int net_id)
1217 {
1218 struct ip_tunnel *tunnel = netdev_priv(dev);
1219 tunnel->ip_tnl_net_id = net_id;
1220 }
1221 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1222
1223 MODULE_LICENSE("GPL");
This page took 0.085816 seconds and 5 git commands to generate.