ip_tunnel: fix i_key matching in ip_tunnel_find
[deliverable/linux.git] / net / ipv4 / ip_tunnel.c
1 /*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72 struct dst_entry *dst)
73 {
74 struct dst_entry *old_dst;
75
76 if (dst) {
77 if (dst->flags & DST_NOCACHE)
78 dst = NULL;
79 else
80 dst_clone(dst);
81 }
82 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83 dst_release(old_dst);
84 }
85
86 static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
87 {
88 __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
89 }
90
91 static void tunnel_dst_reset(struct ip_tunnel *t)
92 {
93 tunnel_dst_set(t, NULL);
94 }
95
96 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
97 {
98 int i;
99
100 for_each_possible_cpu(i)
101 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102 }
103 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
104
105 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
106 {
107 struct dst_entry *dst;
108
109 rcu_read_lock();
110 dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
111 if (dst) {
112 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
113 rcu_read_unlock();
114 tunnel_dst_reset(t);
115 return NULL;
116 }
117 dst_hold(dst);
118 }
119 rcu_read_unlock();
120 return (struct rtable *)dst;
121 }
122
123 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
124 __be16 flags, __be32 key)
125 {
126 if (p->i_flags & TUNNEL_KEY) {
127 if (flags & TUNNEL_KEY)
128 return key == p->i_key;
129 else
130 /* key expected, none present */
131 return false;
132 } else
133 return !(flags & TUNNEL_KEY);
134 }
135
136 /* Fallback tunnel: no source, no destination, no key, no options
137
138 Tunnel hash table:
139 We require exact key match i.e. if a key is present in packet
140 it will match only tunnel with the same key; if it is not present,
141 it will match only keyless tunnel.
142
143 All keysless packets, if not matched configured keyless tunnels
144 will match fallback tunnel.
145 Given src, dst and key, find appropriate for input tunnel.
146 */
147 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
148 int link, __be16 flags,
149 __be32 remote, __be32 local,
150 __be32 key)
151 {
152 unsigned int hash;
153 struct ip_tunnel *t, *cand = NULL;
154 struct hlist_head *head;
155
156 hash = ip_tunnel_hash(key, remote);
157 head = &itn->tunnels[hash];
158
159 hlist_for_each_entry_rcu(t, head, hash_node) {
160 if (local != t->parms.iph.saddr ||
161 remote != t->parms.iph.daddr ||
162 !(t->dev->flags & IFF_UP))
163 continue;
164
165 if (!ip_tunnel_key_match(&t->parms, flags, key))
166 continue;
167
168 if (t->parms.link == link)
169 return t;
170 else
171 cand = t;
172 }
173
174 hlist_for_each_entry_rcu(t, head, hash_node) {
175 if (remote != t->parms.iph.daddr ||
176 !(t->dev->flags & IFF_UP))
177 continue;
178
179 if (!ip_tunnel_key_match(&t->parms, flags, key))
180 continue;
181
182 if (t->parms.link == link)
183 return t;
184 else if (!cand)
185 cand = t;
186 }
187
188 hash = ip_tunnel_hash(key, 0);
189 head = &itn->tunnels[hash];
190
191 hlist_for_each_entry_rcu(t, head, hash_node) {
192 if ((local != t->parms.iph.saddr &&
193 (local != t->parms.iph.daddr ||
194 !ipv4_is_multicast(local))) ||
195 !(t->dev->flags & IFF_UP))
196 continue;
197
198 if (!ip_tunnel_key_match(&t->parms, flags, key))
199 continue;
200
201 if (t->parms.link == link)
202 return t;
203 else if (!cand)
204 cand = t;
205 }
206
207 if (flags & TUNNEL_NO_KEY)
208 goto skip_key_lookup;
209
210 hlist_for_each_entry_rcu(t, head, hash_node) {
211 if (t->parms.i_key != key ||
212 !(t->dev->flags & IFF_UP))
213 continue;
214
215 if (t->parms.link == link)
216 return t;
217 else if (!cand)
218 cand = t;
219 }
220
221 skip_key_lookup:
222 if (cand)
223 return cand;
224
225 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
226 return netdev_priv(itn->fb_tunnel_dev);
227
228
229 return NULL;
230 }
231 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
232
233 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
234 struct ip_tunnel_parm *parms)
235 {
236 unsigned int h;
237 __be32 remote;
238 __be32 i_key = parms->i_key;
239
240 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
241 remote = parms->iph.daddr;
242 else
243 remote = 0;
244
245 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
246 i_key = 0;
247
248 h = ip_tunnel_hash(i_key, remote);
249 return &itn->tunnels[h];
250 }
251
252 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
253 {
254 struct hlist_head *head = ip_bucket(itn, &t->parms);
255
256 hlist_add_head_rcu(&t->hash_node, head);
257 }
258
259 static void ip_tunnel_del(struct ip_tunnel *t)
260 {
261 hlist_del_init_rcu(&t->hash_node);
262 }
263
264 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
265 struct ip_tunnel_parm *parms,
266 int type)
267 {
268 __be32 remote = parms->iph.daddr;
269 __be32 local = parms->iph.saddr;
270 __be32 key = parms->i_key;
271 __be16 flags = parms->i_flags;
272 int link = parms->link;
273 struct ip_tunnel *t = NULL;
274 struct hlist_head *head = ip_bucket(itn, parms);
275
276 hlist_for_each_entry_rcu(t, head, hash_node) {
277 if (local == t->parms.iph.saddr &&
278 remote == t->parms.iph.daddr &&
279 link == t->parms.link &&
280 type == t->dev->type &&
281 ip_tunnel_key_match(&t->parms, flags, key))
282 break;
283 }
284 return t;
285 }
286
287 static struct net_device *__ip_tunnel_create(struct net *net,
288 const struct rtnl_link_ops *ops,
289 struct ip_tunnel_parm *parms)
290 {
291 int err;
292 struct ip_tunnel *tunnel;
293 struct net_device *dev;
294 char name[IFNAMSIZ];
295
296 if (parms->name[0])
297 strlcpy(name, parms->name, IFNAMSIZ);
298 else {
299 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
300 err = -E2BIG;
301 goto failed;
302 }
303 strlcpy(name, ops->kind, IFNAMSIZ);
304 strncat(name, "%d", 2);
305 }
306
307 ASSERT_RTNL();
308 dev = alloc_netdev(ops->priv_size, name, ops->setup);
309 if (!dev) {
310 err = -ENOMEM;
311 goto failed;
312 }
313 dev_net_set(dev, net);
314
315 dev->rtnl_link_ops = ops;
316
317 tunnel = netdev_priv(dev);
318 tunnel->parms = *parms;
319 tunnel->net = net;
320
321 err = register_netdevice(dev);
322 if (err)
323 goto failed_free;
324
325 return dev;
326
327 failed_free:
328 free_netdev(dev);
329 failed:
330 return ERR_PTR(err);
331 }
332
333 static inline void init_tunnel_flow(struct flowi4 *fl4,
334 int proto,
335 __be32 daddr, __be32 saddr,
336 __be32 key, __u8 tos, int oif)
337 {
338 memset(fl4, 0, sizeof(*fl4));
339 fl4->flowi4_oif = oif;
340 fl4->daddr = daddr;
341 fl4->saddr = saddr;
342 fl4->flowi4_tos = tos;
343 fl4->flowi4_proto = proto;
344 fl4->fl4_gre_key = key;
345 }
346
347 static int ip_tunnel_bind_dev(struct net_device *dev)
348 {
349 struct net_device *tdev = NULL;
350 struct ip_tunnel *tunnel = netdev_priv(dev);
351 const struct iphdr *iph;
352 int hlen = LL_MAX_HEADER;
353 int mtu = ETH_DATA_LEN;
354 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
355
356 iph = &tunnel->parms.iph;
357
358 /* Guess output device to choose reasonable mtu and needed_headroom */
359 if (iph->daddr) {
360 struct flowi4 fl4;
361 struct rtable *rt;
362
363 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
364 iph->saddr, tunnel->parms.o_key,
365 RT_TOS(iph->tos), tunnel->parms.link);
366 rt = ip_route_output_key(tunnel->net, &fl4);
367
368 if (!IS_ERR(rt)) {
369 tdev = rt->dst.dev;
370 tunnel_dst_set(tunnel, &rt->dst);
371 ip_rt_put(rt);
372 }
373 if (dev->type != ARPHRD_ETHER)
374 dev->flags |= IFF_POINTOPOINT;
375 }
376
377 if (!tdev && tunnel->parms.link)
378 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
379
380 if (tdev) {
381 hlen = tdev->hard_header_len + tdev->needed_headroom;
382 mtu = tdev->mtu;
383 }
384 dev->iflink = tunnel->parms.link;
385
386 dev->needed_headroom = t_hlen + hlen;
387 mtu -= (dev->hard_header_len + t_hlen);
388
389 if (mtu < 68)
390 mtu = 68;
391
392 return mtu;
393 }
394
395 static struct ip_tunnel *ip_tunnel_create(struct net *net,
396 struct ip_tunnel_net *itn,
397 struct ip_tunnel_parm *parms)
398 {
399 struct ip_tunnel *nt, *fbt;
400 struct net_device *dev;
401
402 BUG_ON(!itn->fb_tunnel_dev);
403 fbt = netdev_priv(itn->fb_tunnel_dev);
404 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
405 if (IS_ERR(dev))
406 return ERR_CAST(dev);
407
408 dev->mtu = ip_tunnel_bind_dev(dev);
409
410 nt = netdev_priv(dev);
411 ip_tunnel_add(itn, nt);
412 return nt;
413 }
414
415 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
416 const struct tnl_ptk_info *tpi, bool log_ecn_error)
417 {
418 struct pcpu_sw_netstats *tstats;
419 const struct iphdr *iph = ip_hdr(skb);
420 int err;
421
422 #ifdef CONFIG_NET_IPGRE_BROADCAST
423 if (ipv4_is_multicast(iph->daddr)) {
424 tunnel->dev->stats.multicast++;
425 skb->pkt_type = PACKET_BROADCAST;
426 }
427 #endif
428
429 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
430 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
431 tunnel->dev->stats.rx_crc_errors++;
432 tunnel->dev->stats.rx_errors++;
433 goto drop;
434 }
435
436 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
437 if (!(tpi->flags&TUNNEL_SEQ) ||
438 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
439 tunnel->dev->stats.rx_fifo_errors++;
440 tunnel->dev->stats.rx_errors++;
441 goto drop;
442 }
443 tunnel->i_seqno = ntohl(tpi->seq) + 1;
444 }
445
446 skb_reset_network_header(skb);
447
448 err = IP_ECN_decapsulate(iph, skb);
449 if (unlikely(err)) {
450 if (log_ecn_error)
451 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
452 &iph->saddr, iph->tos);
453 if (err > 1) {
454 ++tunnel->dev->stats.rx_frame_errors;
455 ++tunnel->dev->stats.rx_errors;
456 goto drop;
457 }
458 }
459
460 tstats = this_cpu_ptr(tunnel->dev->tstats);
461 u64_stats_update_begin(&tstats->syncp);
462 tstats->rx_packets++;
463 tstats->rx_bytes += skb->len;
464 u64_stats_update_end(&tstats->syncp);
465
466 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
467
468 if (tunnel->dev->type == ARPHRD_ETHER) {
469 skb->protocol = eth_type_trans(skb, tunnel->dev);
470 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
471 } else {
472 skb->dev = tunnel->dev;
473 }
474
475 gro_cells_receive(&tunnel->gro_cells, skb);
476 return 0;
477
478 drop:
479 kfree_skb(skb);
480 return 0;
481 }
482 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
483
484 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
485 struct rtable *rt, __be16 df)
486 {
487 struct ip_tunnel *tunnel = netdev_priv(dev);
488 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
489 int mtu;
490
491 if (df)
492 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
493 - sizeof(struct iphdr) - tunnel->hlen;
494 else
495 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
496
497 if (skb_dst(skb))
498 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
499
500 if (skb->protocol == htons(ETH_P_IP)) {
501 if (!skb_is_gso(skb) &&
502 (df & htons(IP_DF)) && mtu < pkt_size) {
503 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
504 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
505 return -E2BIG;
506 }
507 }
508 #if IS_ENABLED(CONFIG_IPV6)
509 else if (skb->protocol == htons(ETH_P_IPV6)) {
510 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
511
512 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
513 mtu >= IPV6_MIN_MTU) {
514 if ((tunnel->parms.iph.daddr &&
515 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
516 rt6->rt6i_dst.plen == 128) {
517 rt6->rt6i_flags |= RTF_MODIFIED;
518 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
519 }
520 }
521
522 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
523 mtu < pkt_size) {
524 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
525 return -E2BIG;
526 }
527 }
528 #endif
529 return 0;
530 }
531
532 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
533 const struct iphdr *tnl_params, const u8 protocol)
534 {
535 struct ip_tunnel *tunnel = netdev_priv(dev);
536 const struct iphdr *inner_iph;
537 struct flowi4 fl4;
538 u8 tos, ttl;
539 __be16 df;
540 struct rtable *rt; /* Route to the other host */
541 unsigned int max_headroom; /* The extra header space needed */
542 __be32 dst;
543 int err;
544 bool connected;
545
546 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
547 connected = (tunnel->parms.iph.daddr != 0);
548
549 dst = tnl_params->daddr;
550 if (dst == 0) {
551 /* NBMA tunnel */
552
553 if (skb_dst(skb) == NULL) {
554 dev->stats.tx_fifo_errors++;
555 goto tx_error;
556 }
557
558 if (skb->protocol == htons(ETH_P_IP)) {
559 rt = skb_rtable(skb);
560 dst = rt_nexthop(rt, inner_iph->daddr);
561 }
562 #if IS_ENABLED(CONFIG_IPV6)
563 else if (skb->protocol == htons(ETH_P_IPV6)) {
564 const struct in6_addr *addr6;
565 struct neighbour *neigh;
566 bool do_tx_error_icmp;
567 int addr_type;
568
569 neigh = dst_neigh_lookup(skb_dst(skb),
570 &ipv6_hdr(skb)->daddr);
571 if (neigh == NULL)
572 goto tx_error;
573
574 addr6 = (const struct in6_addr *)&neigh->primary_key;
575 addr_type = ipv6_addr_type(addr6);
576
577 if (addr_type == IPV6_ADDR_ANY) {
578 addr6 = &ipv6_hdr(skb)->daddr;
579 addr_type = ipv6_addr_type(addr6);
580 }
581
582 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
583 do_tx_error_icmp = true;
584 else {
585 do_tx_error_icmp = false;
586 dst = addr6->s6_addr32[3];
587 }
588 neigh_release(neigh);
589 if (do_tx_error_icmp)
590 goto tx_error_icmp;
591 }
592 #endif
593 else
594 goto tx_error;
595
596 connected = false;
597 }
598
599 tos = tnl_params->tos;
600 if (tos & 0x1) {
601 tos &= ~0x1;
602 if (skb->protocol == htons(ETH_P_IP)) {
603 tos = inner_iph->tos;
604 connected = false;
605 } else if (skb->protocol == htons(ETH_P_IPV6)) {
606 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
607 connected = false;
608 }
609 }
610
611 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
612 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
613
614 rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
615
616 if (!rt) {
617 rt = ip_route_output_key(tunnel->net, &fl4);
618
619 if (IS_ERR(rt)) {
620 dev->stats.tx_carrier_errors++;
621 goto tx_error;
622 }
623 if (connected)
624 tunnel_dst_set(tunnel, &rt->dst);
625 }
626
627 if (rt->dst.dev == dev) {
628 ip_rt_put(rt);
629 dev->stats.collisions++;
630 goto tx_error;
631 }
632
633 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
634 ip_rt_put(rt);
635 goto tx_error;
636 }
637
638 if (tunnel->err_count > 0) {
639 if (time_before(jiffies,
640 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
641 tunnel->err_count--;
642
643 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
644 dst_link_failure(skb);
645 } else
646 tunnel->err_count = 0;
647 }
648
649 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
650 ttl = tnl_params->ttl;
651 if (ttl == 0) {
652 if (skb->protocol == htons(ETH_P_IP))
653 ttl = inner_iph->ttl;
654 #if IS_ENABLED(CONFIG_IPV6)
655 else if (skb->protocol == htons(ETH_P_IPV6))
656 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
657 #endif
658 else
659 ttl = ip4_dst_hoplimit(&rt->dst);
660 }
661
662 df = tnl_params->frag_off;
663 if (skb->protocol == htons(ETH_P_IP))
664 df |= (inner_iph->frag_off&htons(IP_DF));
665
666 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
667 + rt->dst.header_len;
668 if (max_headroom > dev->needed_headroom)
669 dev->needed_headroom = max_headroom;
670
671 if (skb_cow_head(skb, dev->needed_headroom)) {
672 ip_rt_put(rt);
673 dev->stats.tx_dropped++;
674 kfree_skb(skb);
675 return;
676 }
677
678 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
679 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
680 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
681
682 return;
683
684 #if IS_ENABLED(CONFIG_IPV6)
685 tx_error_icmp:
686 dst_link_failure(skb);
687 #endif
688 tx_error:
689 dev->stats.tx_errors++;
690 kfree_skb(skb);
691 }
692 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
693
694 static void ip_tunnel_update(struct ip_tunnel_net *itn,
695 struct ip_tunnel *t,
696 struct net_device *dev,
697 struct ip_tunnel_parm *p,
698 bool set_mtu)
699 {
700 ip_tunnel_del(t);
701 t->parms.iph.saddr = p->iph.saddr;
702 t->parms.iph.daddr = p->iph.daddr;
703 t->parms.i_key = p->i_key;
704 t->parms.o_key = p->o_key;
705 if (dev->type != ARPHRD_ETHER) {
706 memcpy(dev->dev_addr, &p->iph.saddr, 4);
707 memcpy(dev->broadcast, &p->iph.daddr, 4);
708 }
709 ip_tunnel_add(itn, t);
710
711 t->parms.iph.ttl = p->iph.ttl;
712 t->parms.iph.tos = p->iph.tos;
713 t->parms.iph.frag_off = p->iph.frag_off;
714
715 if (t->parms.link != p->link) {
716 int mtu;
717
718 t->parms.link = p->link;
719 mtu = ip_tunnel_bind_dev(dev);
720 if (set_mtu)
721 dev->mtu = mtu;
722 }
723 ip_tunnel_dst_reset_all(t);
724 netdev_state_change(dev);
725 }
726
727 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
728 {
729 int err = 0;
730 struct ip_tunnel *t = netdev_priv(dev);
731 struct net *net = t->net;
732 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
733
734 BUG_ON(!itn->fb_tunnel_dev);
735 switch (cmd) {
736 case SIOCGETTUNNEL:
737 if (dev == itn->fb_tunnel_dev) {
738 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
739 if (t == NULL)
740 t = netdev_priv(dev);
741 }
742 memcpy(p, &t->parms, sizeof(*p));
743 break;
744
745 case SIOCADDTUNNEL:
746 case SIOCCHGTUNNEL:
747 err = -EPERM;
748 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
749 goto done;
750 if (p->iph.ttl)
751 p->iph.frag_off |= htons(IP_DF);
752 if (!(p->i_flags & VTI_ISVTI)) {
753 if (!(p->i_flags & TUNNEL_KEY))
754 p->i_key = 0;
755 if (!(p->o_flags & TUNNEL_KEY))
756 p->o_key = 0;
757 }
758
759 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
760
761 if (!t && (cmd == SIOCADDTUNNEL)) {
762 t = ip_tunnel_create(net, itn, p);
763 if (IS_ERR(t)) {
764 err = PTR_ERR(t);
765 break;
766 }
767 }
768 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
769 if (t != NULL) {
770 if (t->dev != dev) {
771 err = -EEXIST;
772 break;
773 }
774 } else {
775 unsigned int nflags = 0;
776
777 if (ipv4_is_multicast(p->iph.daddr))
778 nflags = IFF_BROADCAST;
779 else if (p->iph.daddr)
780 nflags = IFF_POINTOPOINT;
781
782 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
783 err = -EINVAL;
784 break;
785 }
786
787 t = netdev_priv(dev);
788 }
789 }
790
791 if (t) {
792 err = 0;
793 ip_tunnel_update(itn, t, dev, p, true);
794 } else {
795 err = -ENOENT;
796 }
797 break;
798
799 case SIOCDELTUNNEL:
800 err = -EPERM;
801 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
802 goto done;
803
804 if (dev == itn->fb_tunnel_dev) {
805 err = -ENOENT;
806 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
807 if (t == NULL)
808 goto done;
809 err = -EPERM;
810 if (t == netdev_priv(itn->fb_tunnel_dev))
811 goto done;
812 dev = t->dev;
813 }
814 unregister_netdevice(dev);
815 err = 0;
816 break;
817
818 default:
819 err = -EINVAL;
820 }
821
822 done:
823 return err;
824 }
825 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
826
827 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
828 {
829 struct ip_tunnel *tunnel = netdev_priv(dev);
830 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
831
832 if (new_mtu < 68 ||
833 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
834 return -EINVAL;
835 dev->mtu = new_mtu;
836 return 0;
837 }
838 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
839
840 static void ip_tunnel_dev_free(struct net_device *dev)
841 {
842 struct ip_tunnel *tunnel = netdev_priv(dev);
843
844 gro_cells_destroy(&tunnel->gro_cells);
845 free_percpu(tunnel->dst_cache);
846 free_percpu(dev->tstats);
847 free_netdev(dev);
848 }
849
850 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
851 {
852 struct ip_tunnel *tunnel = netdev_priv(dev);
853 struct ip_tunnel_net *itn;
854
855 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
856
857 if (itn->fb_tunnel_dev != dev) {
858 ip_tunnel_del(netdev_priv(dev));
859 unregister_netdevice_queue(dev, head);
860 }
861 }
862 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
863
864 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
865 struct rtnl_link_ops *ops, char *devname)
866 {
867 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
868 struct ip_tunnel_parm parms;
869 unsigned int i;
870
871 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
872 INIT_HLIST_HEAD(&itn->tunnels[i]);
873
874 if (!ops) {
875 itn->fb_tunnel_dev = NULL;
876 return 0;
877 }
878
879 memset(&parms, 0, sizeof(parms));
880 if (devname)
881 strlcpy(parms.name, devname, IFNAMSIZ);
882
883 rtnl_lock();
884 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
885 /* FB netdevice is special: we have one, and only one per netns.
886 * Allowing to move it to another netns is clearly unsafe.
887 */
888 if (!IS_ERR(itn->fb_tunnel_dev)) {
889 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
890 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
891 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
892 }
893 rtnl_unlock();
894
895 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
896 }
897 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
898
899 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
900 struct rtnl_link_ops *ops)
901 {
902 struct net *net = dev_net(itn->fb_tunnel_dev);
903 struct net_device *dev, *aux;
904 int h;
905
906 for_each_netdev_safe(net, dev, aux)
907 if (dev->rtnl_link_ops == ops)
908 unregister_netdevice_queue(dev, head);
909
910 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
911 struct ip_tunnel *t;
912 struct hlist_node *n;
913 struct hlist_head *thead = &itn->tunnels[h];
914
915 hlist_for_each_entry_safe(t, n, thead, hash_node)
916 /* If dev is in the same netns, it has already
917 * been added to the list by the previous loop.
918 */
919 if (!net_eq(dev_net(t->dev), net))
920 unregister_netdevice_queue(t->dev, head);
921 }
922 }
923
924 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
925 {
926 LIST_HEAD(list);
927
928 rtnl_lock();
929 ip_tunnel_destroy(itn, &list, ops);
930 unregister_netdevice_many(&list);
931 rtnl_unlock();
932 }
933 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
934
935 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
936 struct ip_tunnel_parm *p)
937 {
938 struct ip_tunnel *nt;
939 struct net *net = dev_net(dev);
940 struct ip_tunnel_net *itn;
941 int mtu;
942 int err;
943
944 nt = netdev_priv(dev);
945 itn = net_generic(net, nt->ip_tnl_net_id);
946
947 if (ip_tunnel_find(itn, p, dev->type))
948 return -EEXIST;
949
950 nt->net = net;
951 nt->parms = *p;
952 err = register_netdevice(dev);
953 if (err)
954 goto out;
955
956 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
957 eth_hw_addr_random(dev);
958
959 mtu = ip_tunnel_bind_dev(dev);
960 if (!tb[IFLA_MTU])
961 dev->mtu = mtu;
962
963 ip_tunnel_add(itn, nt);
964
965 out:
966 return err;
967 }
968 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
969
970 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
971 struct ip_tunnel_parm *p)
972 {
973 struct ip_tunnel *t;
974 struct ip_tunnel *tunnel = netdev_priv(dev);
975 struct net *net = tunnel->net;
976 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
977
978 if (dev == itn->fb_tunnel_dev)
979 return -EINVAL;
980
981 t = ip_tunnel_find(itn, p, dev->type);
982
983 if (t) {
984 if (t->dev != dev)
985 return -EEXIST;
986 } else {
987 t = tunnel;
988
989 if (dev->type != ARPHRD_ETHER) {
990 unsigned int nflags = 0;
991
992 if (ipv4_is_multicast(p->iph.daddr))
993 nflags = IFF_BROADCAST;
994 else if (p->iph.daddr)
995 nflags = IFF_POINTOPOINT;
996
997 if ((dev->flags ^ nflags) &
998 (IFF_POINTOPOINT | IFF_BROADCAST))
999 return -EINVAL;
1000 }
1001 }
1002
1003 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1004 return 0;
1005 }
1006 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1007
1008 int ip_tunnel_init(struct net_device *dev)
1009 {
1010 struct ip_tunnel *tunnel = netdev_priv(dev);
1011 struct iphdr *iph = &tunnel->parms.iph;
1012 int err;
1013
1014 dev->destructor = ip_tunnel_dev_free;
1015 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1016 if (!dev->tstats)
1017 return -ENOMEM;
1018
1019 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1020 if (!tunnel->dst_cache) {
1021 free_percpu(dev->tstats);
1022 return -ENOMEM;
1023 }
1024
1025 err = gro_cells_init(&tunnel->gro_cells, dev);
1026 if (err) {
1027 free_percpu(tunnel->dst_cache);
1028 free_percpu(dev->tstats);
1029 return err;
1030 }
1031
1032 tunnel->dev = dev;
1033 tunnel->net = dev_net(dev);
1034 strcpy(tunnel->parms.name, dev->name);
1035 iph->version = 4;
1036 iph->ihl = 5;
1037
1038 return 0;
1039 }
1040 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1041
1042 void ip_tunnel_uninit(struct net_device *dev)
1043 {
1044 struct ip_tunnel *tunnel = netdev_priv(dev);
1045 struct net *net = tunnel->net;
1046 struct ip_tunnel_net *itn;
1047
1048 itn = net_generic(net, tunnel->ip_tnl_net_id);
1049 /* fb_tunnel_dev will be unregisted in net-exit call. */
1050 if (itn->fb_tunnel_dev != dev)
1051 ip_tunnel_del(netdev_priv(dev));
1052
1053 ip_tunnel_dst_reset_all(tunnel);
1054 }
1055 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1056
1057 /* Do least required initialization, rest of init is done in tunnel_init call */
1058 void ip_tunnel_setup(struct net_device *dev, int net_id)
1059 {
1060 struct ip_tunnel *tunnel = netdev_priv(dev);
1061 tunnel->ip_tnl_net_id = net_id;
1062 }
1063 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1064
1065 MODULE_LICENSE("GPL");
This page took 0.073234 seconds and 5 git commands to generate.