phylib: unsigneds go unnoticed
[deliverable/linux.git] / net / ipv4 / ip_gre.c
1 /*
2 * Linux NET3: GRE over IP protocol decoder.
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/etherdevice.h>
31 #include <linux/if_ether.h>
32
33 #include <net/sock.h>
34 #include <net/ip.h>
35 #include <net/icmp.h>
36 #include <net/protocol.h>
37 #include <net/ipip.h>
38 #include <net/arp.h>
39 #include <net/checksum.h>
40 #include <net/dsfield.h>
41 #include <net/inet_ecn.h>
42 #include <net/xfrm.h>
43 #include <net/net_namespace.h>
44 #include <net/netns/generic.h>
45 #include <net/rtnetlink.h>
46
47 #ifdef CONFIG_IPV6
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52
53 /*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
69 Current solution: t->recursion lock breaks dead loops. It looks
70 like dev->tbusy flag, but I preferred new variable, because
71 the semantics is different. One day, when hard_start_xmit
72 will be multithreaded we will have to use skb->encapsulation.
73
74
75
76 2. Networking dead loops would not kill routers, but would really
77 kill network. IP hop limit plays role of "t->recursion" in this case,
78 if we copy it from packet being encapsulated to upper header.
79 It is very good solution, but it introduces two problems:
80
81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
82 do not work over tunnels.
83 - traceroute does not work. I planned to relay ICMP from tunnel,
84 so that this problem would be solved and traceroute output
85 would even more informative. This idea appeared to be wrong:
86 only Linux complies to rfc1812 now (yes, guys, Linux is the only
87 true router now :-)), all routers (at least, in neighbourhood of mine)
88 return only 8 bytes of payload. It is the end.
89
90 Hence, if we want that OSPF worked or traceroute said something reasonable,
91 we should search for another solution.
92
93 One of them is to parse packet trying to detect inner encapsulation
94 made by our node. It is difficult or even impossible, especially,
95 taking into account fragmentation. TO be short, tt is not solution at all.
96
97 Current solution: The solution was UNEXPECTEDLY SIMPLE.
98 We force DF flag on tunnels with preconfigured hop limit,
99 that is ALL. :-) Well, it does not remove the problem completely,
100 but exponential growth of network traffic is changed to linear
101 (branches, that exceed pmtu are pruned) and tunnel mtu
102 fastly degrades to value <68, where looping stops.
103 Yes, it is not good if there exists a router in the loop,
104 which does not force DF, even when encapsulating packets have DF set.
105 But it is not our problem! Nobody could accuse us, we made
106 all that we could make. Even if it is your gated who injected
107 fatal route to network, even if it were you who configured
108 fatal static route: you are innocent. :-)
109
110
111
112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
113 practically identical code. It would be good to glue them
114 together, but it is not very evident, how to make them modular.
115 sit is integral part of IPv6, ipip and gre are naturally modular.
116 We could extract common parts (hash table, ioctl etc)
117 to a separate module (ip_tunnel.c).
118
119 Alexey Kuznetsov.
120 */
121
122 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
123 static int ipgre_tunnel_init(struct net_device *dev);
124 static void ipgre_tunnel_setup(struct net_device *dev);
125 static int ipgre_tunnel_bind_dev(struct net_device *dev);
126
127 /* Fallback tunnel: no source, no destination, no key, no options */
128
129 #define HASH_SIZE 16
130
131 static int ipgre_net_id;
132 struct ipgre_net {
133 struct ip_tunnel *tunnels[4][HASH_SIZE];
134
135 struct net_device *fb_tunnel_dev;
136 };
137
138 /* Tunnel hash table */
139
140 /*
141 4 hash tables:
142
143 3: (remote,local)
144 2: (remote,*)
145 1: (*,local)
146 0: (*,*)
147
148 We require exact key match i.e. if a key is present in packet
149 it will match only tunnel with the same key; if it is not present,
150 it will match only keyless tunnel.
151
152 All keysless packets, if not matched configured keyless tunnels
153 will match fallback tunnel.
154 */
155
156 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
157
158 #define tunnels_r_l tunnels[3]
159 #define tunnels_r tunnels[2]
160 #define tunnels_l tunnels[1]
161 #define tunnels_wc tunnels[0]
162
163 static DEFINE_RWLOCK(ipgre_lock);
164
165 /* Given src, dst and key, find appropriate for input tunnel. */
166
167 static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
168 __be32 remote, __be32 local,
169 __be32 key, __be16 gre_proto)
170 {
171 unsigned h0 = HASH(remote);
172 unsigned h1 = HASH(key);
173 struct ip_tunnel *t;
174 struct ip_tunnel *t2 = NULL;
175 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
176 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
177 ARPHRD_ETHER : ARPHRD_IPGRE;
178
179 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
180 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
181 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
182 if (t->dev->type == dev_type)
183 return t;
184 if (t->dev->type == ARPHRD_IPGRE && !t2)
185 t2 = t;
186 }
187 }
188 }
189
190 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
191 if (remote == t->parms.iph.daddr) {
192 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
193 if (t->dev->type == dev_type)
194 return t;
195 if (t->dev->type == ARPHRD_IPGRE && !t2)
196 t2 = t;
197 }
198 }
199 }
200
201 for (t = ign->tunnels_l[h1]; t; t = t->next) {
202 if (local == t->parms.iph.saddr ||
203 (local == t->parms.iph.daddr &&
204 ipv4_is_multicast(local))) {
205 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
206 if (t->dev->type == dev_type)
207 return t;
208 if (t->dev->type == ARPHRD_IPGRE && !t2)
209 t2 = t;
210 }
211 }
212 }
213
214 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
215 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
216 if (t->dev->type == dev_type)
217 return t;
218 if (t->dev->type == ARPHRD_IPGRE && !t2)
219 t2 = t;
220 }
221 }
222
223 if (t2)
224 return t2;
225
226 if (ign->fb_tunnel_dev->flags&IFF_UP)
227 return netdev_priv(ign->fb_tunnel_dev);
228 return NULL;
229 }
230
231 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
232 struct ip_tunnel_parm *parms)
233 {
234 __be32 remote = parms->iph.daddr;
235 __be32 local = parms->iph.saddr;
236 __be32 key = parms->i_key;
237 unsigned h = HASH(key);
238 int prio = 0;
239
240 if (local)
241 prio |= 1;
242 if (remote && !ipv4_is_multicast(remote)) {
243 prio |= 2;
244 h ^= HASH(remote);
245 }
246
247 return &ign->tunnels[prio][h];
248 }
249
250 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
251 struct ip_tunnel *t)
252 {
253 return __ipgre_bucket(ign, &t->parms);
254 }
255
256 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
257 {
258 struct ip_tunnel **tp = ipgre_bucket(ign, t);
259
260 t->next = *tp;
261 write_lock_bh(&ipgre_lock);
262 *tp = t;
263 write_unlock_bh(&ipgre_lock);
264 }
265
266 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
267 {
268 struct ip_tunnel **tp;
269
270 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
271 if (t == *tp) {
272 write_lock_bh(&ipgre_lock);
273 *tp = t->next;
274 write_unlock_bh(&ipgre_lock);
275 break;
276 }
277 }
278 }
279
280 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
281 struct ip_tunnel_parm *parms,
282 int type)
283 {
284 __be32 remote = parms->iph.daddr;
285 __be32 local = parms->iph.saddr;
286 __be32 key = parms->i_key;
287 struct ip_tunnel *t, **tp;
288 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
289
290 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
291 if (local == t->parms.iph.saddr &&
292 remote == t->parms.iph.daddr &&
293 key == t->parms.i_key &&
294 type == t->dev->type)
295 break;
296
297 return t;
298 }
299
300 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
301 struct ip_tunnel_parm *parms, int create)
302 {
303 struct ip_tunnel *t, *nt;
304 struct net_device *dev;
305 char name[IFNAMSIZ];
306 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
307
308 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
309 if (t || !create)
310 return t;
311
312 if (parms->name[0])
313 strlcpy(name, parms->name, IFNAMSIZ);
314 else
315 sprintf(name, "gre%%d");
316
317 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
318 if (!dev)
319 return NULL;
320
321 dev_net_set(dev, net);
322
323 if (strchr(name, '%')) {
324 if (dev_alloc_name(dev, name) < 0)
325 goto failed_free;
326 }
327
328 nt = netdev_priv(dev);
329 nt->parms = *parms;
330 dev->rtnl_link_ops = &ipgre_link_ops;
331
332 dev->mtu = ipgre_tunnel_bind_dev(dev);
333
334 if (register_netdevice(dev) < 0)
335 goto failed_free;
336
337 dev_hold(dev);
338 ipgre_tunnel_link(ign, nt);
339 return nt;
340
341 failed_free:
342 free_netdev(dev);
343 return NULL;
344 }
345
346 static void ipgre_tunnel_uninit(struct net_device *dev)
347 {
348 struct net *net = dev_net(dev);
349 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
350
351 ipgre_tunnel_unlink(ign, netdev_priv(dev));
352 dev_put(dev);
353 }
354
355
356 static void ipgre_err(struct sk_buff *skb, u32 info)
357 {
358
359 /* All the routers (except for Linux) return only
360 8 bytes of packet payload. It means, that precise relaying of
361 ICMP in the real Internet is absolutely infeasible.
362
363 Moreover, Cisco "wise men" put GRE key to the third word
364 in GRE header. It makes impossible maintaining even soft state for keyed
365 GRE tunnels with enabled checksum. Tell them "thank you".
366
367 Well, I wonder, rfc1812 was written by Cisco employee,
368 what the hell these idiots break standrads established
369 by themself???
370 */
371
372 struct iphdr *iph = (struct iphdr *)skb->data;
373 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
374 int grehlen = (iph->ihl<<2) + 4;
375 const int type = icmp_hdr(skb)->type;
376 const int code = icmp_hdr(skb)->code;
377 struct ip_tunnel *t;
378 __be16 flags;
379
380 flags = p[0];
381 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
382 if (flags&(GRE_VERSION|GRE_ROUTING))
383 return;
384 if (flags&GRE_KEY) {
385 grehlen += 4;
386 if (flags&GRE_CSUM)
387 grehlen += 4;
388 }
389 }
390
391 /* If only 8 bytes returned, keyed message will be dropped here */
392 if (skb_headlen(skb) < grehlen)
393 return;
394
395 switch (type) {
396 default:
397 case ICMP_PARAMETERPROB:
398 return;
399
400 case ICMP_DEST_UNREACH:
401 switch (code) {
402 case ICMP_SR_FAILED:
403 case ICMP_PORT_UNREACH:
404 /* Impossible event. */
405 return;
406 case ICMP_FRAG_NEEDED:
407 /* Soft state for pmtu is maintained by IP core. */
408 return;
409 default:
410 /* All others are translated to HOST_UNREACH.
411 rfc2003 contains "deep thoughts" about NET_UNREACH,
412 I believe they are just ether pollution. --ANK
413 */
414 break;
415 }
416 break;
417 case ICMP_TIME_EXCEEDED:
418 if (code != ICMP_EXC_TTL)
419 return;
420 break;
421 }
422
423 read_lock(&ipgre_lock);
424 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
425 flags & GRE_KEY ?
426 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
427 p[1]);
428 if (t == NULL || t->parms.iph.daddr == 0 ||
429 ipv4_is_multicast(t->parms.iph.daddr))
430 goto out;
431
432 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
433 goto out;
434
435 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
436 t->err_count++;
437 else
438 t->err_count = 1;
439 t->err_time = jiffies;
440 out:
441 read_unlock(&ipgre_lock);
442 return;
443 }
444
445 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
446 {
447 if (INET_ECN_is_ce(iph->tos)) {
448 if (skb->protocol == htons(ETH_P_IP)) {
449 IP_ECN_set_ce(ip_hdr(skb));
450 } else if (skb->protocol == htons(ETH_P_IPV6)) {
451 IP6_ECN_set_ce(ipv6_hdr(skb));
452 }
453 }
454 }
455
456 static inline u8
457 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
458 {
459 u8 inner = 0;
460 if (skb->protocol == htons(ETH_P_IP))
461 inner = old_iph->tos;
462 else if (skb->protocol == htons(ETH_P_IPV6))
463 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
464 return INET_ECN_encapsulate(tos, inner);
465 }
466
467 static int ipgre_rcv(struct sk_buff *skb)
468 {
469 struct iphdr *iph;
470 u8 *h;
471 __be16 flags;
472 __sum16 csum = 0;
473 __be32 key = 0;
474 u32 seqno = 0;
475 struct ip_tunnel *tunnel;
476 int offset = 4;
477 __be16 gre_proto;
478 unsigned int len;
479
480 if (!pskb_may_pull(skb, 16))
481 goto drop_nolock;
482
483 iph = ip_hdr(skb);
484 h = skb->data;
485 flags = *(__be16*)h;
486
487 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
488 /* - Version must be 0.
489 - We do not support routing headers.
490 */
491 if (flags&(GRE_VERSION|GRE_ROUTING))
492 goto drop_nolock;
493
494 if (flags&GRE_CSUM) {
495 switch (skb->ip_summed) {
496 case CHECKSUM_COMPLETE:
497 csum = csum_fold(skb->csum);
498 if (!csum)
499 break;
500 /* fall through */
501 case CHECKSUM_NONE:
502 skb->csum = 0;
503 csum = __skb_checksum_complete(skb);
504 skb->ip_summed = CHECKSUM_COMPLETE;
505 }
506 offset += 4;
507 }
508 if (flags&GRE_KEY) {
509 key = *(__be32*)(h + offset);
510 offset += 4;
511 }
512 if (flags&GRE_SEQ) {
513 seqno = ntohl(*(__be32*)(h + offset));
514 offset += 4;
515 }
516 }
517
518 gre_proto = *(__be16 *)(h + 2);
519
520 read_lock(&ipgre_lock);
521 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
522 iph->saddr, iph->daddr, key,
523 gre_proto))) {
524 struct net_device_stats *stats = &tunnel->dev->stats;
525
526 secpath_reset(skb);
527
528 skb->protocol = gre_proto;
529 /* WCCP version 1 and 2 protocol decoding.
530 * - Change protocol to IP
531 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
532 */
533 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
534 skb->protocol = htons(ETH_P_IP);
535 if ((*(h + offset) & 0xF0) != 0x40)
536 offset += 4;
537 }
538
539 skb->mac_header = skb->network_header;
540 __pskb_pull(skb, offset);
541 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
542 skb->pkt_type = PACKET_HOST;
543 #ifdef CONFIG_NET_IPGRE_BROADCAST
544 if (ipv4_is_multicast(iph->daddr)) {
545 /* Looped back packet, drop it! */
546 if (skb->rtable->fl.iif == 0)
547 goto drop;
548 stats->multicast++;
549 skb->pkt_type = PACKET_BROADCAST;
550 }
551 #endif
552
553 if (((flags&GRE_CSUM) && csum) ||
554 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
555 stats->rx_crc_errors++;
556 stats->rx_errors++;
557 goto drop;
558 }
559 if (tunnel->parms.i_flags&GRE_SEQ) {
560 if (!(flags&GRE_SEQ) ||
561 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
562 stats->rx_fifo_errors++;
563 stats->rx_errors++;
564 goto drop;
565 }
566 tunnel->i_seqno = seqno + 1;
567 }
568
569 len = skb->len;
570
571 /* Warning: All skb pointers will be invalidated! */
572 if (tunnel->dev->type == ARPHRD_ETHER) {
573 if (!pskb_may_pull(skb, ETH_HLEN)) {
574 stats->rx_length_errors++;
575 stats->rx_errors++;
576 goto drop;
577 }
578
579 iph = ip_hdr(skb);
580 skb->protocol = eth_type_trans(skb, tunnel->dev);
581 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
582 }
583
584 stats->rx_packets++;
585 stats->rx_bytes += len;
586 skb->dev = tunnel->dev;
587 dst_release(skb->dst);
588 skb->dst = NULL;
589 nf_reset(skb);
590
591 skb_reset_network_header(skb);
592 ipgre_ecn_decapsulate(iph, skb);
593
594 netif_rx(skb);
595 read_unlock(&ipgre_lock);
596 return(0);
597 }
598 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
599
600 drop:
601 read_unlock(&ipgre_lock);
602 drop_nolock:
603 kfree_skb(skb);
604 return(0);
605 }
606
607 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
608 {
609 struct ip_tunnel *tunnel = netdev_priv(dev);
610 struct net_device_stats *stats = &tunnel->dev->stats;
611 struct iphdr *old_iph = ip_hdr(skb);
612 struct iphdr *tiph;
613 u8 tos;
614 __be16 df;
615 struct rtable *rt; /* Route to the other host */
616 struct net_device *tdev; /* Device to other host */
617 struct iphdr *iph; /* Our new IP header */
618 unsigned int max_headroom; /* The extra header space needed */
619 int gre_hlen;
620 __be32 dst;
621 int mtu;
622
623 if (tunnel->recursion++) {
624 stats->collisions++;
625 goto tx_error;
626 }
627
628 if (dev->type == ARPHRD_ETHER)
629 IPCB(skb)->flags = 0;
630
631 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
632 gre_hlen = 0;
633 tiph = (struct iphdr *)skb->data;
634 } else {
635 gre_hlen = tunnel->hlen;
636 tiph = &tunnel->parms.iph;
637 }
638
639 if ((dst = tiph->daddr) == 0) {
640 /* NBMA tunnel */
641
642 if (skb->dst == NULL) {
643 stats->tx_fifo_errors++;
644 goto tx_error;
645 }
646
647 if (skb->protocol == htons(ETH_P_IP)) {
648 rt = skb->rtable;
649 if ((dst = rt->rt_gateway) == 0)
650 goto tx_error_icmp;
651 }
652 #ifdef CONFIG_IPV6
653 else if (skb->protocol == htons(ETH_P_IPV6)) {
654 struct in6_addr *addr6;
655 int addr_type;
656 struct neighbour *neigh = skb->dst->neighbour;
657
658 if (neigh == NULL)
659 goto tx_error;
660
661 addr6 = (struct in6_addr *)&neigh->primary_key;
662 addr_type = ipv6_addr_type(addr6);
663
664 if (addr_type == IPV6_ADDR_ANY) {
665 addr6 = &ipv6_hdr(skb)->daddr;
666 addr_type = ipv6_addr_type(addr6);
667 }
668
669 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
670 goto tx_error_icmp;
671
672 dst = addr6->s6_addr32[3];
673 }
674 #endif
675 else
676 goto tx_error;
677 }
678
679 tos = tiph->tos;
680 if (tos&1) {
681 if (skb->protocol == htons(ETH_P_IP))
682 tos = old_iph->tos;
683 tos &= ~1;
684 }
685
686 {
687 struct flowi fl = { .oif = tunnel->parms.link,
688 .nl_u = { .ip4_u =
689 { .daddr = dst,
690 .saddr = tiph->saddr,
691 .tos = RT_TOS(tos) } },
692 .proto = IPPROTO_GRE };
693 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
694 stats->tx_carrier_errors++;
695 goto tx_error;
696 }
697 }
698 tdev = rt->u.dst.dev;
699
700 if (tdev == dev) {
701 ip_rt_put(rt);
702 stats->collisions++;
703 goto tx_error;
704 }
705
706 df = tiph->frag_off;
707 if (df)
708 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
709 else
710 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
711
712 if (skb->dst)
713 skb->dst->ops->update_pmtu(skb->dst, mtu);
714
715 if (skb->protocol == htons(ETH_P_IP)) {
716 df |= (old_iph->frag_off&htons(IP_DF));
717
718 if ((old_iph->frag_off&htons(IP_DF)) &&
719 mtu < ntohs(old_iph->tot_len)) {
720 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
721 ip_rt_put(rt);
722 goto tx_error;
723 }
724 }
725 #ifdef CONFIG_IPV6
726 else if (skb->protocol == htons(ETH_P_IPV6)) {
727 struct rt6_info *rt6 = (struct rt6_info *)skb->dst;
728
729 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
730 if ((tunnel->parms.iph.daddr &&
731 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
732 rt6->rt6i_dst.plen == 128) {
733 rt6->rt6i_flags |= RTF_MODIFIED;
734 skb->dst->metrics[RTAX_MTU-1] = mtu;
735 }
736 }
737
738 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
739 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
740 ip_rt_put(rt);
741 goto tx_error;
742 }
743 }
744 #endif
745
746 if (tunnel->err_count > 0) {
747 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
748 tunnel->err_count--;
749
750 dst_link_failure(skb);
751 } else
752 tunnel->err_count = 0;
753 }
754
755 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
756
757 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
758 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
759 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
760 if (!new_skb) {
761 ip_rt_put(rt);
762 stats->tx_dropped++;
763 dev_kfree_skb(skb);
764 tunnel->recursion--;
765 return 0;
766 }
767 if (skb->sk)
768 skb_set_owner_w(new_skb, skb->sk);
769 dev_kfree_skb(skb);
770 skb = new_skb;
771 old_iph = ip_hdr(skb);
772 }
773
774 skb_reset_transport_header(skb);
775 skb_push(skb, gre_hlen);
776 skb_reset_network_header(skb);
777 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
778 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
779 IPSKB_REROUTED);
780 dst_release(skb->dst);
781 skb->dst = &rt->u.dst;
782
783 /*
784 * Push down and install the IPIP header.
785 */
786
787 iph = ip_hdr(skb);
788 iph->version = 4;
789 iph->ihl = sizeof(struct iphdr) >> 2;
790 iph->frag_off = df;
791 iph->protocol = IPPROTO_GRE;
792 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
793 iph->daddr = rt->rt_dst;
794 iph->saddr = rt->rt_src;
795
796 if ((iph->ttl = tiph->ttl) == 0) {
797 if (skb->protocol == htons(ETH_P_IP))
798 iph->ttl = old_iph->ttl;
799 #ifdef CONFIG_IPV6
800 else if (skb->protocol == htons(ETH_P_IPV6))
801 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
802 #endif
803 else
804 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
805 }
806
807 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
808 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
809 htons(ETH_P_TEB) : skb->protocol;
810
811 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
812 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
813
814 if (tunnel->parms.o_flags&GRE_SEQ) {
815 ++tunnel->o_seqno;
816 *ptr = htonl(tunnel->o_seqno);
817 ptr--;
818 }
819 if (tunnel->parms.o_flags&GRE_KEY) {
820 *ptr = tunnel->parms.o_key;
821 ptr--;
822 }
823 if (tunnel->parms.o_flags&GRE_CSUM) {
824 *ptr = 0;
825 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
826 }
827 }
828
829 nf_reset(skb);
830
831 IPTUNNEL_XMIT();
832 tunnel->recursion--;
833 return 0;
834
835 tx_error_icmp:
836 dst_link_failure(skb);
837
838 tx_error:
839 stats->tx_errors++;
840 dev_kfree_skb(skb);
841 tunnel->recursion--;
842 return 0;
843 }
844
845 static int ipgre_tunnel_bind_dev(struct net_device *dev)
846 {
847 struct net_device *tdev = NULL;
848 struct ip_tunnel *tunnel;
849 struct iphdr *iph;
850 int hlen = LL_MAX_HEADER;
851 int mtu = ETH_DATA_LEN;
852 int addend = sizeof(struct iphdr) + 4;
853
854 tunnel = netdev_priv(dev);
855 iph = &tunnel->parms.iph;
856
857 /* Guess output device to choose reasonable mtu and needed_headroom */
858
859 if (iph->daddr) {
860 struct flowi fl = { .oif = tunnel->parms.link,
861 .nl_u = { .ip4_u =
862 { .daddr = iph->daddr,
863 .saddr = iph->saddr,
864 .tos = RT_TOS(iph->tos) } },
865 .proto = IPPROTO_GRE };
866 struct rtable *rt;
867 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
868 tdev = rt->u.dst.dev;
869 ip_rt_put(rt);
870 }
871
872 if (dev->type != ARPHRD_ETHER)
873 dev->flags |= IFF_POINTOPOINT;
874 }
875
876 if (!tdev && tunnel->parms.link)
877 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
878
879 if (tdev) {
880 hlen = tdev->hard_header_len + tdev->needed_headroom;
881 mtu = tdev->mtu;
882 }
883 dev->iflink = tunnel->parms.link;
884
885 /* Precalculate GRE options length */
886 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
887 if (tunnel->parms.o_flags&GRE_CSUM)
888 addend += 4;
889 if (tunnel->parms.o_flags&GRE_KEY)
890 addend += 4;
891 if (tunnel->parms.o_flags&GRE_SEQ)
892 addend += 4;
893 }
894 dev->needed_headroom = addend + hlen;
895 mtu -= dev->hard_header_len - addend;
896
897 if (mtu < 68)
898 mtu = 68;
899
900 tunnel->hlen = addend;
901
902 return mtu;
903 }
904
905 static int
906 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
907 {
908 int err = 0;
909 struct ip_tunnel_parm p;
910 struct ip_tunnel *t;
911 struct net *net = dev_net(dev);
912 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
913
914 switch (cmd) {
915 case SIOCGETTUNNEL:
916 t = NULL;
917 if (dev == ign->fb_tunnel_dev) {
918 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
919 err = -EFAULT;
920 break;
921 }
922 t = ipgre_tunnel_locate(net, &p, 0);
923 }
924 if (t == NULL)
925 t = netdev_priv(dev);
926 memcpy(&p, &t->parms, sizeof(p));
927 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
928 err = -EFAULT;
929 break;
930
931 case SIOCADDTUNNEL:
932 case SIOCCHGTUNNEL:
933 err = -EPERM;
934 if (!capable(CAP_NET_ADMIN))
935 goto done;
936
937 err = -EFAULT;
938 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
939 goto done;
940
941 err = -EINVAL;
942 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
943 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
944 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
945 goto done;
946 if (p.iph.ttl)
947 p.iph.frag_off |= htons(IP_DF);
948
949 if (!(p.i_flags&GRE_KEY))
950 p.i_key = 0;
951 if (!(p.o_flags&GRE_KEY))
952 p.o_key = 0;
953
954 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
955
956 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
957 if (t != NULL) {
958 if (t->dev != dev) {
959 err = -EEXIST;
960 break;
961 }
962 } else {
963 unsigned nflags = 0;
964
965 t = netdev_priv(dev);
966
967 if (ipv4_is_multicast(p.iph.daddr))
968 nflags = IFF_BROADCAST;
969 else if (p.iph.daddr)
970 nflags = IFF_POINTOPOINT;
971
972 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
973 err = -EINVAL;
974 break;
975 }
976 ipgre_tunnel_unlink(ign, t);
977 t->parms.iph.saddr = p.iph.saddr;
978 t->parms.iph.daddr = p.iph.daddr;
979 t->parms.i_key = p.i_key;
980 t->parms.o_key = p.o_key;
981 memcpy(dev->dev_addr, &p.iph.saddr, 4);
982 memcpy(dev->broadcast, &p.iph.daddr, 4);
983 ipgre_tunnel_link(ign, t);
984 netdev_state_change(dev);
985 }
986 }
987
988 if (t) {
989 err = 0;
990 if (cmd == SIOCCHGTUNNEL) {
991 t->parms.iph.ttl = p.iph.ttl;
992 t->parms.iph.tos = p.iph.tos;
993 t->parms.iph.frag_off = p.iph.frag_off;
994 if (t->parms.link != p.link) {
995 t->parms.link = p.link;
996 dev->mtu = ipgre_tunnel_bind_dev(dev);
997 netdev_state_change(dev);
998 }
999 }
1000 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1001 err = -EFAULT;
1002 } else
1003 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1004 break;
1005
1006 case SIOCDELTUNNEL:
1007 err = -EPERM;
1008 if (!capable(CAP_NET_ADMIN))
1009 goto done;
1010
1011 if (dev == ign->fb_tunnel_dev) {
1012 err = -EFAULT;
1013 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1014 goto done;
1015 err = -ENOENT;
1016 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1017 goto done;
1018 err = -EPERM;
1019 if (t == netdev_priv(ign->fb_tunnel_dev))
1020 goto done;
1021 dev = t->dev;
1022 }
1023 unregister_netdevice(dev);
1024 err = 0;
1025 break;
1026
1027 default:
1028 err = -EINVAL;
1029 }
1030
1031 done:
1032 return err;
1033 }
1034
1035 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1036 {
1037 struct ip_tunnel *tunnel = netdev_priv(dev);
1038 if (new_mtu < 68 ||
1039 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1040 return -EINVAL;
1041 dev->mtu = new_mtu;
1042 return 0;
1043 }
1044
1045 /* Nice toy. Unfortunately, useless in real life :-)
1046 It allows to construct virtual multiprotocol broadcast "LAN"
1047 over the Internet, provided multicast routing is tuned.
1048
1049
1050 I have no idea was this bicycle invented before me,
1051 so that I had to set ARPHRD_IPGRE to a random value.
1052 I have an impression, that Cisco could make something similar,
1053 but this feature is apparently missing in IOS<=11.2(8).
1054
1055 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1056 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1057
1058 ping -t 255 224.66.66.66
1059
1060 If nobody answers, mbone does not work.
1061
1062 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1063 ip addr add 10.66.66.<somewhat>/24 dev Universe
1064 ifconfig Universe up
1065 ifconfig Universe add fe80::<Your_real_addr>/10
1066 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1067 ftp 10.66.66.66
1068 ...
1069 ftp fec0:6666:6666::193.233.7.65
1070 ...
1071
1072 */
1073
1074 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1075 unsigned short type,
1076 const void *daddr, const void *saddr, unsigned len)
1077 {
1078 struct ip_tunnel *t = netdev_priv(dev);
1079 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1080 __be16 *p = (__be16*)(iph+1);
1081
1082 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1083 p[0] = t->parms.o_flags;
1084 p[1] = htons(type);
1085
1086 /*
1087 * Set the source hardware address.
1088 */
1089
1090 if (saddr)
1091 memcpy(&iph->saddr, saddr, 4);
1092
1093 if (daddr) {
1094 memcpy(&iph->daddr, daddr, 4);
1095 return t->hlen;
1096 }
1097 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1098 return t->hlen;
1099
1100 return -t->hlen;
1101 }
1102
1103 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1104 {
1105 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1106 memcpy(haddr, &iph->saddr, 4);
1107 return 4;
1108 }
1109
1110 static const struct header_ops ipgre_header_ops = {
1111 .create = ipgre_header,
1112 .parse = ipgre_header_parse,
1113 };
1114
1115 #ifdef CONFIG_NET_IPGRE_BROADCAST
1116 static int ipgre_open(struct net_device *dev)
1117 {
1118 struct ip_tunnel *t = netdev_priv(dev);
1119
1120 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1121 struct flowi fl = { .oif = t->parms.link,
1122 .nl_u = { .ip4_u =
1123 { .daddr = t->parms.iph.daddr,
1124 .saddr = t->parms.iph.saddr,
1125 .tos = RT_TOS(t->parms.iph.tos) } },
1126 .proto = IPPROTO_GRE };
1127 struct rtable *rt;
1128 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1129 return -EADDRNOTAVAIL;
1130 dev = rt->u.dst.dev;
1131 ip_rt_put(rt);
1132 if (__in_dev_get_rtnl(dev) == NULL)
1133 return -EADDRNOTAVAIL;
1134 t->mlink = dev->ifindex;
1135 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1136 }
1137 return 0;
1138 }
1139
1140 static int ipgre_close(struct net_device *dev)
1141 {
1142 struct ip_tunnel *t = netdev_priv(dev);
1143
1144 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1145 struct in_device *in_dev;
1146 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1147 if (in_dev) {
1148 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1149 in_dev_put(in_dev);
1150 }
1151 }
1152 return 0;
1153 }
1154
1155 #endif
1156
1157 static const struct net_device_ops ipgre_netdev_ops = {
1158 .ndo_init = ipgre_tunnel_init,
1159 .ndo_uninit = ipgre_tunnel_uninit,
1160 #ifdef CONFIG_NET_IPGRE_BROADCAST
1161 .ndo_open = ipgre_open,
1162 .ndo_stop = ipgre_close,
1163 #endif
1164 .ndo_start_xmit = ipgre_tunnel_xmit,
1165 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1166 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1167 };
1168
1169 static void ipgre_tunnel_setup(struct net_device *dev)
1170 {
1171 dev->netdev_ops = &ipgre_netdev_ops;
1172 dev->destructor = free_netdev;
1173
1174 dev->type = ARPHRD_IPGRE;
1175 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1176 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1177 dev->flags = IFF_NOARP;
1178 dev->iflink = 0;
1179 dev->addr_len = 4;
1180 dev->features |= NETIF_F_NETNS_LOCAL;
1181 }
1182
1183 static int ipgre_tunnel_init(struct net_device *dev)
1184 {
1185 struct ip_tunnel *tunnel;
1186 struct iphdr *iph;
1187
1188 tunnel = netdev_priv(dev);
1189 iph = &tunnel->parms.iph;
1190
1191 tunnel->dev = dev;
1192 strcpy(tunnel->parms.name, dev->name);
1193
1194 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1195 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1196
1197 if (iph->daddr) {
1198 #ifdef CONFIG_NET_IPGRE_BROADCAST
1199 if (ipv4_is_multicast(iph->daddr)) {
1200 if (!iph->saddr)
1201 return -EINVAL;
1202 dev->flags = IFF_BROADCAST;
1203 dev->header_ops = &ipgre_header_ops;
1204 }
1205 #endif
1206 } else
1207 dev->header_ops = &ipgre_header_ops;
1208
1209 return 0;
1210 }
1211
1212 static void ipgre_fb_tunnel_init(struct net_device *dev)
1213 {
1214 struct ip_tunnel *tunnel = netdev_priv(dev);
1215 struct iphdr *iph = &tunnel->parms.iph;
1216 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1217
1218 tunnel->dev = dev;
1219 strcpy(tunnel->parms.name, dev->name);
1220
1221 iph->version = 4;
1222 iph->protocol = IPPROTO_GRE;
1223 iph->ihl = 5;
1224 tunnel->hlen = sizeof(struct iphdr) + 4;
1225
1226 dev_hold(dev);
1227 ign->tunnels_wc[0] = tunnel;
1228 }
1229
1230
1231 static struct net_protocol ipgre_protocol = {
1232 .handler = ipgre_rcv,
1233 .err_handler = ipgre_err,
1234 .netns_ok = 1,
1235 };
1236
1237 static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1238 {
1239 int prio;
1240
1241 for (prio = 0; prio < 4; prio++) {
1242 int h;
1243 for (h = 0; h < HASH_SIZE; h++) {
1244 struct ip_tunnel *t;
1245 while ((t = ign->tunnels[prio][h]) != NULL)
1246 unregister_netdevice(t->dev);
1247 }
1248 }
1249 }
1250
1251 static int ipgre_init_net(struct net *net)
1252 {
1253 int err;
1254 struct ipgre_net *ign;
1255
1256 err = -ENOMEM;
1257 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1258 if (ign == NULL)
1259 goto err_alloc;
1260
1261 err = net_assign_generic(net, ipgre_net_id, ign);
1262 if (err < 0)
1263 goto err_assign;
1264
1265 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1266 ipgre_tunnel_setup);
1267 if (!ign->fb_tunnel_dev) {
1268 err = -ENOMEM;
1269 goto err_alloc_dev;
1270 }
1271 dev_net_set(ign->fb_tunnel_dev, net);
1272
1273 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1274 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1275
1276 if ((err = register_netdev(ign->fb_tunnel_dev)))
1277 goto err_reg_dev;
1278
1279 return 0;
1280
1281 err_reg_dev:
1282 free_netdev(ign->fb_tunnel_dev);
1283 err_alloc_dev:
1284 /* nothing */
1285 err_assign:
1286 kfree(ign);
1287 err_alloc:
1288 return err;
1289 }
1290
1291 static void ipgre_exit_net(struct net *net)
1292 {
1293 struct ipgre_net *ign;
1294
1295 ign = net_generic(net, ipgre_net_id);
1296 rtnl_lock();
1297 ipgre_destroy_tunnels(ign);
1298 rtnl_unlock();
1299 kfree(ign);
1300 }
1301
1302 static struct pernet_operations ipgre_net_ops = {
1303 .init = ipgre_init_net,
1304 .exit = ipgre_exit_net,
1305 };
1306
1307 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1308 {
1309 __be16 flags;
1310
1311 if (!data)
1312 return 0;
1313
1314 flags = 0;
1315 if (data[IFLA_GRE_IFLAGS])
1316 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1317 if (data[IFLA_GRE_OFLAGS])
1318 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1319 if (flags & (GRE_VERSION|GRE_ROUTING))
1320 return -EINVAL;
1321
1322 return 0;
1323 }
1324
1325 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1326 {
1327 __be32 daddr;
1328
1329 if (tb[IFLA_ADDRESS]) {
1330 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1331 return -EINVAL;
1332 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1333 return -EADDRNOTAVAIL;
1334 }
1335
1336 if (!data)
1337 goto out;
1338
1339 if (data[IFLA_GRE_REMOTE]) {
1340 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1341 if (!daddr)
1342 return -EINVAL;
1343 }
1344
1345 out:
1346 return ipgre_tunnel_validate(tb, data);
1347 }
1348
1349 static void ipgre_netlink_parms(struct nlattr *data[],
1350 struct ip_tunnel_parm *parms)
1351 {
1352 memset(parms, 0, sizeof(*parms));
1353
1354 parms->iph.protocol = IPPROTO_GRE;
1355
1356 if (!data)
1357 return;
1358
1359 if (data[IFLA_GRE_LINK])
1360 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1361
1362 if (data[IFLA_GRE_IFLAGS])
1363 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1364
1365 if (data[IFLA_GRE_OFLAGS])
1366 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1367
1368 if (data[IFLA_GRE_IKEY])
1369 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1370
1371 if (data[IFLA_GRE_OKEY])
1372 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1373
1374 if (data[IFLA_GRE_LOCAL])
1375 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1376
1377 if (data[IFLA_GRE_REMOTE])
1378 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1379
1380 if (data[IFLA_GRE_TTL])
1381 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1382
1383 if (data[IFLA_GRE_TOS])
1384 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1385
1386 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1387 parms->iph.frag_off = htons(IP_DF);
1388 }
1389
1390 static int ipgre_tap_init(struct net_device *dev)
1391 {
1392 struct ip_tunnel *tunnel;
1393
1394 tunnel = netdev_priv(dev);
1395
1396 tunnel->dev = dev;
1397 strcpy(tunnel->parms.name, dev->name);
1398
1399 ipgre_tunnel_bind_dev(dev);
1400
1401 return 0;
1402 }
1403
1404 static const struct net_device_ops ipgre_tap_netdev_ops = {
1405 .ndo_init = ipgre_tap_init,
1406 .ndo_uninit = ipgre_tunnel_uninit,
1407 .ndo_start_xmit = ipgre_tunnel_xmit,
1408 .ndo_set_mac_address = eth_mac_addr,
1409 .ndo_validate_addr = eth_validate_addr,
1410 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1411 };
1412
1413 static void ipgre_tap_setup(struct net_device *dev)
1414 {
1415
1416 ether_setup(dev);
1417
1418 dev->netdev_ops = &ipgre_netdev_ops;
1419 dev->destructor = free_netdev;
1420
1421 dev->iflink = 0;
1422 dev->features |= NETIF_F_NETNS_LOCAL;
1423 }
1424
1425 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1426 struct nlattr *data[])
1427 {
1428 struct ip_tunnel *nt;
1429 struct net *net = dev_net(dev);
1430 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1431 int mtu;
1432 int err;
1433
1434 nt = netdev_priv(dev);
1435 ipgre_netlink_parms(data, &nt->parms);
1436
1437 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1438 return -EEXIST;
1439
1440 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1441 random_ether_addr(dev->dev_addr);
1442
1443 mtu = ipgre_tunnel_bind_dev(dev);
1444 if (!tb[IFLA_MTU])
1445 dev->mtu = mtu;
1446
1447 err = register_netdevice(dev);
1448 if (err)
1449 goto out;
1450
1451 dev_hold(dev);
1452 ipgre_tunnel_link(ign, nt);
1453
1454 out:
1455 return err;
1456 }
1457
1458 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1459 struct nlattr *data[])
1460 {
1461 struct ip_tunnel *t, *nt;
1462 struct net *net = dev_net(dev);
1463 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1464 struct ip_tunnel_parm p;
1465 int mtu;
1466
1467 if (dev == ign->fb_tunnel_dev)
1468 return -EINVAL;
1469
1470 nt = netdev_priv(dev);
1471 ipgre_netlink_parms(data, &p);
1472
1473 t = ipgre_tunnel_locate(net, &p, 0);
1474
1475 if (t) {
1476 if (t->dev != dev)
1477 return -EEXIST;
1478 } else {
1479 unsigned nflags = 0;
1480
1481 t = nt;
1482
1483 if (ipv4_is_multicast(p.iph.daddr))
1484 nflags = IFF_BROADCAST;
1485 else if (p.iph.daddr)
1486 nflags = IFF_POINTOPOINT;
1487
1488 if ((dev->flags ^ nflags) &
1489 (IFF_POINTOPOINT | IFF_BROADCAST))
1490 return -EINVAL;
1491
1492 ipgre_tunnel_unlink(ign, t);
1493 t->parms.iph.saddr = p.iph.saddr;
1494 t->parms.iph.daddr = p.iph.daddr;
1495 t->parms.i_key = p.i_key;
1496 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1497 memcpy(dev->broadcast, &p.iph.daddr, 4);
1498 ipgre_tunnel_link(ign, t);
1499 netdev_state_change(dev);
1500 }
1501
1502 t->parms.o_key = p.o_key;
1503 t->parms.iph.ttl = p.iph.ttl;
1504 t->parms.iph.tos = p.iph.tos;
1505 t->parms.iph.frag_off = p.iph.frag_off;
1506
1507 if (t->parms.link != p.link) {
1508 t->parms.link = p.link;
1509 mtu = ipgre_tunnel_bind_dev(dev);
1510 if (!tb[IFLA_MTU])
1511 dev->mtu = mtu;
1512 netdev_state_change(dev);
1513 }
1514
1515 return 0;
1516 }
1517
1518 static size_t ipgre_get_size(const struct net_device *dev)
1519 {
1520 return
1521 /* IFLA_GRE_LINK */
1522 nla_total_size(4) +
1523 /* IFLA_GRE_IFLAGS */
1524 nla_total_size(2) +
1525 /* IFLA_GRE_OFLAGS */
1526 nla_total_size(2) +
1527 /* IFLA_GRE_IKEY */
1528 nla_total_size(4) +
1529 /* IFLA_GRE_OKEY */
1530 nla_total_size(4) +
1531 /* IFLA_GRE_LOCAL */
1532 nla_total_size(4) +
1533 /* IFLA_GRE_REMOTE */
1534 nla_total_size(4) +
1535 /* IFLA_GRE_TTL */
1536 nla_total_size(1) +
1537 /* IFLA_GRE_TOS */
1538 nla_total_size(1) +
1539 /* IFLA_GRE_PMTUDISC */
1540 nla_total_size(1) +
1541 0;
1542 }
1543
1544 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1545 {
1546 struct ip_tunnel *t = netdev_priv(dev);
1547 struct ip_tunnel_parm *p = &t->parms;
1548
1549 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1550 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1551 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1552 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1553 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1554 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1555 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1556 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1557 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1558 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1559
1560 return 0;
1561
1562 nla_put_failure:
1563 return -EMSGSIZE;
1564 }
1565
1566 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1567 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1568 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1569 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1570 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1571 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1572 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1573 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1574 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1575 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1576 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1577 };
1578
1579 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1580 .kind = "gre",
1581 .maxtype = IFLA_GRE_MAX,
1582 .policy = ipgre_policy,
1583 .priv_size = sizeof(struct ip_tunnel),
1584 .setup = ipgre_tunnel_setup,
1585 .validate = ipgre_tunnel_validate,
1586 .newlink = ipgre_newlink,
1587 .changelink = ipgre_changelink,
1588 .get_size = ipgre_get_size,
1589 .fill_info = ipgre_fill_info,
1590 };
1591
1592 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1593 .kind = "gretap",
1594 .maxtype = IFLA_GRE_MAX,
1595 .policy = ipgre_policy,
1596 .priv_size = sizeof(struct ip_tunnel),
1597 .setup = ipgre_tap_setup,
1598 .validate = ipgre_tap_validate,
1599 .newlink = ipgre_newlink,
1600 .changelink = ipgre_changelink,
1601 .get_size = ipgre_get_size,
1602 .fill_info = ipgre_fill_info,
1603 };
1604
1605 /*
1606 * And now the modules code and kernel interface.
1607 */
1608
1609 static int __init ipgre_init(void)
1610 {
1611 int err;
1612
1613 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1614
1615 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1616 printk(KERN_INFO "ipgre init: can't add protocol\n");
1617 return -EAGAIN;
1618 }
1619
1620 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1621 if (err < 0)
1622 goto gen_device_failed;
1623
1624 err = rtnl_link_register(&ipgre_link_ops);
1625 if (err < 0)
1626 goto rtnl_link_failed;
1627
1628 err = rtnl_link_register(&ipgre_tap_ops);
1629 if (err < 0)
1630 goto tap_ops_failed;
1631
1632 out:
1633 return err;
1634
1635 tap_ops_failed:
1636 rtnl_link_unregister(&ipgre_link_ops);
1637 rtnl_link_failed:
1638 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1639 gen_device_failed:
1640 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1641 goto out;
1642 }
1643
1644 static void __exit ipgre_fini(void)
1645 {
1646 rtnl_link_unregister(&ipgre_tap_ops);
1647 rtnl_link_unregister(&ipgre_link_ops);
1648 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1649 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1650 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1651 }
1652
1653 module_init(ipgre_init);
1654 module_exit(ipgre_fini);
1655 MODULE_LICENSE("GPL");
1656 MODULE_ALIAS_RTNL_LINK("gre");
1657 MODULE_ALIAS_RTNL_LINK("gretap");
This page took 0.098918 seconds and 6 git commands to generate.