Merge branch 'fix/hda' into for-linus
[deliverable/linux.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4 16#include <linux/kernel.h>
5a0e3ad6 17#include <linux/slab.h>
1da177e4
LT
18#include <asm/uaccess.h>
19#include <linux/skbuff.h>
20#include <linux/netdevice.h>
21#include <linux/in.h>
22#include <linux/tcp.h>
23#include <linux/udp.h>
24#include <linux/if_arp.h>
25#include <linux/mroute.h>
26#include <linux/init.h>
27#include <linux/in6.h>
28#include <linux/inetdevice.h>
29#include <linux/igmp.h>
30#include <linux/netfilter_ipv4.h>
e1a80002 31#include <linux/etherdevice.h>
46f25dff 32#include <linux/if_ether.h>
1da177e4
LT
33
34#include <net/sock.h>
35#include <net/ip.h>
36#include <net/icmp.h>
37#include <net/protocol.h>
38#include <net/ipip.h>
39#include <net/arp.h>
40#include <net/checksum.h>
41#include <net/dsfield.h>
42#include <net/inet_ecn.h>
43#include <net/xfrm.h>
59a4c759
PE
44#include <net/net_namespace.h>
45#include <net/netns/generic.h>
c19e654d 46#include <net/rtnetlink.h>
1da177e4
LT
47
48#ifdef CONFIG_IPV6
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#endif
53
54/*
55 Problems & solutions
56 --------------------
57
58 1. The most important issue is detecting local dead loops.
59 They would cause complete host lockup in transmit, which
60 would be "resolved" by stack overflow or, if queueing is enabled,
61 with infinite looping in net_bh.
62
63 We cannot track such dead loops during route installation,
64 it is infeasible task. The most general solutions would be
65 to keep skb->encapsulation counter (sort of local ttl),
66 and silently drop packet when it expires. It is the best
67 solution, but it supposes maintaing new variable in ALL
68 skb, even if no tunneling is used.
69
a43912ab 70 Current solution: HARD_TX_LOCK lock breaks dead loops.
1da177e4
LT
71
72
73
74 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case,
76 if we copy it from packet being encapsulated to upper header.
77 It is very good solution, but it introduces two problems:
78
79 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80 do not work over tunnels.
81 - traceroute does not work. I planned to relay ICMP from tunnel,
82 so that this problem would be solved and traceroute output
83 would even more informative. This idea appeared to be wrong:
84 only Linux complies to rfc1812 now (yes, guys, Linux is the only
85 true router now :-)), all routers (at least, in neighbourhood of mine)
86 return only 8 bytes of payload. It is the end.
87
88 Hence, if we want that OSPF worked or traceroute said something reasonable,
89 we should search for another solution.
90
91 One of them is to parse packet trying to detect inner encapsulation
92 made by our node. It is difficult or even impossible, especially,
93 taking into account fragmentation. TO be short, tt is not solution at all.
94
95 Current solution: The solution was UNEXPECTEDLY SIMPLE.
96 We force DF flag on tunnels with preconfigured hop limit,
97 that is ALL. :-) Well, it does not remove the problem completely,
98 but exponential growth of network traffic is changed to linear
99 (branches, that exceed pmtu are pruned) and tunnel mtu
100 fastly degrades to value <68, where looping stops.
101 Yes, it is not good if there exists a router in the loop,
102 which does not force DF, even when encapsulating packets have DF set.
103 But it is not our problem! Nobody could accuse us, we made
104 all that we could make. Even if it is your gated who injected
105 fatal route to network, even if it were you who configured
106 fatal static route: you are innocent. :-)
107
108
109
110 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111 practically identical code. It would be good to glue them
112 together, but it is not very evident, how to make them modular.
113 sit is integral part of IPv6, ipip and gre are naturally modular.
114 We could extract common parts (hash table, ioctl etc)
115 to a separate module (ip_tunnel.c).
116
117 Alexey Kuznetsov.
118 */
119
c19e654d 120static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4
LT
121static int ipgre_tunnel_init(struct net_device *dev);
122static void ipgre_tunnel_setup(struct net_device *dev);
42aa9162 123static int ipgre_tunnel_bind_dev(struct net_device *dev);
1da177e4
LT
124
125/* Fallback tunnel: no source, no destination, no key, no options */
126
eb8ce741
PE
127#define HASH_SIZE 16
128
f99189b1 129static int ipgre_net_id __read_mostly;
59a4c759 130struct ipgre_net {
eb8ce741
PE
131 struct ip_tunnel *tunnels[4][HASH_SIZE];
132
7daa0004 133 struct net_device *fb_tunnel_dev;
59a4c759
PE
134};
135
1da177e4
LT
136/* Tunnel hash table */
137
138/*
139 4 hash tables:
140
141 3: (remote,local)
142 2: (remote,*)
143 1: (*,local)
144 0: (*,*)
145
146 We require exact key match i.e. if a key is present in packet
147 it will match only tunnel with the same key; if it is not present,
148 it will match only keyless tunnel.
149
150 All keysless packets, if not matched configured keyless tunnels
151 will match fallback tunnel.
152 */
153
d5a0a1e3 154#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 155
eb8ce741
PE
156#define tunnels_r_l tunnels[3]
157#define tunnels_r tunnels[2]
158#define tunnels_l tunnels[1]
159#define tunnels_wc tunnels[0]
8d5b2c08
ED
160/*
161 * Locking : hash tables are protected by RCU and a spinlock
162 */
163static DEFINE_SPINLOCK(ipgre_lock);
1da177e4 164
8d5b2c08
ED
165#define for_each_ip_tunnel_rcu(start) \
166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
1da177e4
LT
167
168/* Given src, dst and key, find appropriate for input tunnel. */
169
749c10f9 170static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
e1a80002
HX
171 __be32 remote, __be32 local,
172 __be32 key, __be16 gre_proto)
1da177e4 173{
749c10f9
TT
174 struct net *net = dev_net(dev);
175 int link = dev->ifindex;
1da177e4
LT
176 unsigned h0 = HASH(remote);
177 unsigned h1 = HASH(key);
afcf1242 178 struct ip_tunnel *t, *cand = NULL;
7daa0004 179 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
e1a80002
HX
180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
181 ARPHRD_ETHER : ARPHRD_IPGRE;
afcf1242 182 int score, cand_score = 4;
1da177e4 183
8d5b2c08 184 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
749c10f9
TT
185 if (local != t->parms.iph.saddr ||
186 remote != t->parms.iph.daddr ||
187 key != t->parms.i_key ||
188 !(t->dev->flags & IFF_UP))
189 continue;
190
191 if (t->dev->type != ARPHRD_IPGRE &&
192 t->dev->type != dev_type)
193 continue;
194
afcf1242 195 score = 0;
749c10f9 196 if (t->parms.link != link)
afcf1242 197 score |= 1;
749c10f9 198 if (t->dev->type != dev_type)
afcf1242
TT
199 score |= 2;
200 if (score == 0)
749c10f9 201 return t;
afcf1242
TT
202
203 if (score < cand_score) {
204 cand = t;
205 cand_score = score;
206 }
1da177e4 207 }
e1a80002 208
8d5b2c08 209 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
749c10f9
TT
210 if (remote != t->parms.iph.daddr ||
211 key != t->parms.i_key ||
212 !(t->dev->flags & IFF_UP))
213 continue;
214
215 if (t->dev->type != ARPHRD_IPGRE &&
216 t->dev->type != dev_type)
217 continue;
218
afcf1242 219 score = 0;
749c10f9 220 if (t->parms.link != link)
afcf1242 221 score |= 1;
749c10f9 222 if (t->dev->type != dev_type)
afcf1242
TT
223 score |= 2;
224 if (score == 0)
749c10f9 225 return t;
afcf1242
TT
226
227 if (score < cand_score) {
228 cand = t;
229 cand_score = score;
230 }
1da177e4 231 }
e1a80002 232
8d5b2c08 233 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
749c10f9
TT
234 if ((local != t->parms.iph.saddr &&
235 (local != t->parms.iph.daddr ||
236 !ipv4_is_multicast(local))) ||
237 key != t->parms.i_key ||
238 !(t->dev->flags & IFF_UP))
239 continue;
240
241 if (t->dev->type != ARPHRD_IPGRE &&
242 t->dev->type != dev_type)
243 continue;
244
afcf1242 245 score = 0;
749c10f9 246 if (t->parms.link != link)
afcf1242 247 score |= 1;
749c10f9 248 if (t->dev->type != dev_type)
afcf1242
TT
249 score |= 2;
250 if (score == 0)
749c10f9 251 return t;
afcf1242
TT
252
253 if (score < cand_score) {
254 cand = t;
255 cand_score = score;
256 }
1da177e4 257 }
e1a80002 258
8d5b2c08 259 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
749c10f9
TT
260 if (t->parms.i_key != key ||
261 !(t->dev->flags & IFF_UP))
262 continue;
263
264 if (t->dev->type != ARPHRD_IPGRE &&
265 t->dev->type != dev_type)
266 continue;
267
afcf1242 268 score = 0;
749c10f9 269 if (t->parms.link != link)
afcf1242 270 score |= 1;
749c10f9 271 if (t->dev->type != dev_type)
afcf1242
TT
272 score |= 2;
273 if (score == 0)
749c10f9 274 return t;
afcf1242
TT
275
276 if (score < cand_score) {
277 cand = t;
278 cand_score = score;
279 }
1da177e4
LT
280 }
281
afcf1242
TT
282 if (cand != NULL)
283 return cand;
e1a80002 284
8d5b2c08
ED
285 dev = ign->fb_tunnel_dev;
286 if (dev->flags & IFF_UP)
287 return netdev_priv(dev);
749c10f9 288
1da177e4
LT
289 return NULL;
290}
291
f57e7d5a
PE
292static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
293 struct ip_tunnel_parm *parms)
1da177e4 294{
5056a1ef
YH
295 __be32 remote = parms->iph.daddr;
296 __be32 local = parms->iph.saddr;
297 __be32 key = parms->i_key;
1da177e4
LT
298 unsigned h = HASH(key);
299 int prio = 0;
300
301 if (local)
302 prio |= 1;
f97c1e0c 303 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
304 prio |= 2;
305 h ^= HASH(remote);
306 }
307
eb8ce741 308 return &ign->tunnels[prio][h];
1da177e4
LT
309}
310
f57e7d5a
PE
311static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
312 struct ip_tunnel *t)
5056a1ef 313{
f57e7d5a 314 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
315}
316
f57e7d5a 317static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 318{
f57e7d5a 319 struct ip_tunnel **tp = ipgre_bucket(ign, t);
1da177e4 320
8d5b2c08 321 spin_lock_bh(&ipgre_lock);
1da177e4 322 t->next = *tp;
8d5b2c08
ED
323 rcu_assign_pointer(*tp, t);
324 spin_unlock_bh(&ipgre_lock);
1da177e4
LT
325}
326
f57e7d5a 327static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4
LT
328{
329 struct ip_tunnel **tp;
330
f57e7d5a 331 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
1da177e4 332 if (t == *tp) {
8d5b2c08 333 spin_lock_bh(&ipgre_lock);
1da177e4 334 *tp = t->next;
8d5b2c08 335 spin_unlock_bh(&ipgre_lock);
1da177e4
LT
336 break;
337 }
338 }
339}
340
e1a80002
HX
341static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
342 struct ip_tunnel_parm *parms,
343 int type)
1da177e4 344{
d5a0a1e3
AV
345 __be32 remote = parms->iph.daddr;
346 __be32 local = parms->iph.saddr;
347 __be32 key = parms->i_key;
749c10f9 348 int link = parms->link;
e1a80002
HX
349 struct ip_tunnel *t, **tp;
350 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
351
352 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
353 if (local == t->parms.iph.saddr &&
354 remote == t->parms.iph.daddr &&
355 key == t->parms.i_key &&
749c10f9 356 link == t->parms.link &&
e1a80002
HX
357 type == t->dev->type)
358 break;
359
360 return t;
361}
362
363static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
364 struct ip_tunnel_parm *parms, int create)
365{
366 struct ip_tunnel *t, *nt;
1da177e4 367 struct net_device *dev;
1da177e4 368 char name[IFNAMSIZ];
f57e7d5a 369 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 370
e1a80002
HX
371 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
372 if (t || !create)
373 return t;
1da177e4
LT
374
375 if (parms->name[0])
376 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
377 else
378 sprintf(name, "gre%%d");
1da177e4
LT
379
380 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
381 if (!dev)
382 return NULL;
383
0b67eceb
PE
384 dev_net_set(dev, net);
385
b37d428b
PE
386 if (strchr(name, '%')) {
387 if (dev_alloc_name(dev, name) < 0)
388 goto failed_free;
389 }
390
2941a486 391 nt = netdev_priv(dev);
1da177e4 392 nt->parms = *parms;
c19e654d 393 dev->rtnl_link_ops = &ipgre_link_ops;
1da177e4 394
42aa9162
HX
395 dev->mtu = ipgre_tunnel_bind_dev(dev);
396
b37d428b
PE
397 if (register_netdevice(dev) < 0)
398 goto failed_free;
1da177e4 399
1da177e4 400 dev_hold(dev);
f57e7d5a 401 ipgre_tunnel_link(ign, nt);
1da177e4
LT
402 return nt;
403
b37d428b
PE
404failed_free:
405 free_netdev(dev);
1da177e4
LT
406 return NULL;
407}
408
409static void ipgre_tunnel_uninit(struct net_device *dev)
410{
f57e7d5a
PE
411 struct net *net = dev_net(dev);
412 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
413
414 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
415 dev_put(dev);
416}
417
418
419static void ipgre_err(struct sk_buff *skb, u32 info)
420{
1da177e4 421
071f92d0 422/* All the routers (except for Linux) return only
1da177e4
LT
423 8 bytes of packet payload. It means, that precise relaying of
424 ICMP in the real Internet is absolutely infeasible.
425
426 Moreover, Cisco "wise men" put GRE key to the third word
427 in GRE header. It makes impossible maintaining even soft state for keyed
428 GRE tunnels with enabled checksum. Tell them "thank you".
429
430 Well, I wonder, rfc1812 was written by Cisco employee,
431 what the hell these idiots break standrads established
432 by themself???
433 */
434
6ed2533e 435 struct iphdr *iph = (struct iphdr *)skb->data;
d5a0a1e3 436 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 437 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
438 const int type = icmp_hdr(skb)->type;
439 const int code = icmp_hdr(skb)->code;
1da177e4 440 struct ip_tunnel *t;
d5a0a1e3 441 __be16 flags;
1da177e4
LT
442
443 flags = p[0];
444 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
445 if (flags&(GRE_VERSION|GRE_ROUTING))
446 return;
447 if (flags&GRE_KEY) {
448 grehlen += 4;
449 if (flags&GRE_CSUM)
450 grehlen += 4;
451 }
452 }
453
454 /* If only 8 bytes returned, keyed message will be dropped here */
455 if (skb_headlen(skb) < grehlen)
456 return;
457
458 switch (type) {
459 default:
460 case ICMP_PARAMETERPROB:
461 return;
462
463 case ICMP_DEST_UNREACH:
464 switch (code) {
465 case ICMP_SR_FAILED:
466 case ICMP_PORT_UNREACH:
467 /* Impossible event. */
468 return;
469 case ICMP_FRAG_NEEDED:
470 /* Soft state for pmtu is maintained by IP core. */
471 return;
472 default:
473 /* All others are translated to HOST_UNREACH.
474 rfc2003 contains "deep thoughts" about NET_UNREACH,
475 I believe they are just ether pollution. --ANK
476 */
477 break;
478 }
479 break;
480 case ICMP_TIME_EXCEEDED:
481 if (code != ICMP_EXC_TTL)
482 return;
483 break;
484 }
485
8d5b2c08 486 rcu_read_lock();
749c10f9 487 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
e1a80002
HX
488 flags & GRE_KEY ?
489 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
490 p[1]);
f97c1e0c
JP
491 if (t == NULL || t->parms.iph.daddr == 0 ||
492 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
493 goto out;
494
495 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
496 goto out;
497
da6185d8 498 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
499 t->err_count++;
500 else
501 t->err_count = 1;
502 t->err_time = jiffies;
503out:
8d5b2c08 504 rcu_read_unlock();
1da177e4 505 return;
1da177e4
LT
506}
507
508static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
509{
510 if (INET_ECN_is_ce(iph->tos)) {
511 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 512 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 513 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 514 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
515 }
516 }
517}
518
519static inline u8
520ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
521{
522 u8 inner = 0;
523 if (skb->protocol == htons(ETH_P_IP))
524 inner = old_iph->tos;
525 else if (skb->protocol == htons(ETH_P_IPV6))
526 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
527 return INET_ECN_encapsulate(tos, inner);
528}
529
530static int ipgre_rcv(struct sk_buff *skb)
531{
532 struct iphdr *iph;
533 u8 *h;
d5a0a1e3 534 __be16 flags;
d3bc23e7 535 __sum16 csum = 0;
d5a0a1e3 536 __be32 key = 0;
1da177e4
LT
537 u32 seqno = 0;
538 struct ip_tunnel *tunnel;
539 int offset = 4;
e1a80002 540 __be16 gre_proto;
64194c31 541 unsigned int len;
1da177e4
LT
542
543 if (!pskb_may_pull(skb, 16))
544 goto drop_nolock;
545
eddc9ec5 546 iph = ip_hdr(skb);
1da177e4 547 h = skb->data;
d5a0a1e3 548 flags = *(__be16*)h;
1da177e4
LT
549
550 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
551 /* - Version must be 0.
552 - We do not support routing headers.
553 */
554 if (flags&(GRE_VERSION|GRE_ROUTING))
555 goto drop_nolock;
556
557 if (flags&GRE_CSUM) {
fb286bb2 558 switch (skb->ip_summed) {
84fa7933 559 case CHECKSUM_COMPLETE:
d3bc23e7 560 csum = csum_fold(skb->csum);
fb286bb2
HX
561 if (!csum)
562 break;
563 /* fall through */
564 case CHECKSUM_NONE:
565 skb->csum = 0;
566 csum = __skb_checksum_complete(skb);
84fa7933 567 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
568 }
569 offset += 4;
570 }
571 if (flags&GRE_KEY) {
d5a0a1e3 572 key = *(__be32*)(h + offset);
1da177e4
LT
573 offset += 4;
574 }
575 if (flags&GRE_SEQ) {
d5a0a1e3 576 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
577 offset += 4;
578 }
579 }
580
e1a80002
HX
581 gre_proto = *(__be16 *)(h + 2);
582
8d5b2c08 583 rcu_read_lock();
749c10f9 584 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
e1a80002
HX
585 iph->saddr, iph->daddr, key,
586 gre_proto))) {
addd68eb
PE
587 struct net_device_stats *stats = &tunnel->dev->stats;
588
1da177e4
LT
589 secpath_reset(skb);
590
e1a80002 591 skb->protocol = gre_proto;
1da177e4
LT
592 /* WCCP version 1 and 2 protocol decoding.
593 * - Change protocol to IP
594 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
595 */
e1a80002 596 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
496c98df 597 skb->protocol = htons(ETH_P_IP);
e905a9ed 598 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
599 offset += 4;
600 }
601
1d069167 602 skb->mac_header = skb->network_header;
4209fb60 603 __pskb_pull(skb, offset);
9c70220b 604 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
605 skb->pkt_type = PACKET_HOST;
606#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 607 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 608 /* Looped back packet, drop it! */
511c3f92 609 if (skb_rtable(skb)->fl.iif == 0)
1da177e4 610 goto drop;
addd68eb 611 stats->multicast++;
1da177e4
LT
612 skb->pkt_type = PACKET_BROADCAST;
613 }
614#endif
615
616 if (((flags&GRE_CSUM) && csum) ||
617 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
addd68eb
PE
618 stats->rx_crc_errors++;
619 stats->rx_errors++;
1da177e4
LT
620 goto drop;
621 }
622 if (tunnel->parms.i_flags&GRE_SEQ) {
623 if (!(flags&GRE_SEQ) ||
624 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
addd68eb
PE
625 stats->rx_fifo_errors++;
626 stats->rx_errors++;
1da177e4
LT
627 goto drop;
628 }
629 tunnel->i_seqno = seqno + 1;
630 }
e1a80002 631
64194c31
HX
632 len = skb->len;
633
e1a80002
HX
634 /* Warning: All skb pointers will be invalidated! */
635 if (tunnel->dev->type == ARPHRD_ETHER) {
636 if (!pskb_may_pull(skb, ETH_HLEN)) {
637 stats->rx_length_errors++;
638 stats->rx_errors++;
639 goto drop;
640 }
641
642 iph = ip_hdr(skb);
643 skb->protocol = eth_type_trans(skb, tunnel->dev);
644 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
645 }
646
addd68eb 647 stats->rx_packets++;
64194c31 648 stats->rx_bytes += len;
1da177e4 649 skb->dev = tunnel->dev;
adf30907 650 skb_dst_drop(skb);
1da177e4 651 nf_reset(skb);
e1a80002
HX
652
653 skb_reset_network_header(skb);
1da177e4 654 ipgre_ecn_decapsulate(iph, skb);
e1a80002 655
1da177e4 656 netif_rx(skb);
8d5b2c08 657 rcu_read_unlock();
1da177e4
LT
658 return(0);
659 }
45af08be 660 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
661
662drop:
8d5b2c08 663 rcu_read_unlock();
1da177e4
LT
664drop_nolock:
665 kfree_skb(skb);
666 return(0);
667}
668
6fef4c0c 669static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 670{
2941a486 671 struct ip_tunnel *tunnel = netdev_priv(dev);
0bfbedb1
ED
672 struct net_device_stats *stats = &dev->stats;
673 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
eddc9ec5 674 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
675 struct iphdr *tiph;
676 u8 tos;
d5a0a1e3 677 __be16 df;
1da177e4
LT
678 struct rtable *rt; /* Route to the other host */
679 struct net_device *tdev; /* Device to other host */
680 struct iphdr *iph; /* Our new IP header */
c2636b4d 681 unsigned int max_headroom; /* The extra header space needed */
1da177e4 682 int gre_hlen;
d5a0a1e3 683 __be32 dst;
1da177e4
LT
684 int mtu;
685
e1a80002
HX
686 if (dev->type == ARPHRD_ETHER)
687 IPCB(skb)->flags = 0;
688
689 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
1da177e4 690 gre_hlen = 0;
6ed2533e 691 tiph = (struct iphdr *)skb->data;
1da177e4
LT
692 } else {
693 gre_hlen = tunnel->hlen;
694 tiph = &tunnel->parms.iph;
695 }
696
697 if ((dst = tiph->daddr) == 0) {
698 /* NBMA tunnel */
699
adf30907 700 if (skb_dst(skb) == NULL) {
addd68eb 701 stats->tx_fifo_errors++;
1da177e4
LT
702 goto tx_error;
703 }
704
705 if (skb->protocol == htons(ETH_P_IP)) {
511c3f92 706 rt = skb_rtable(skb);
1da177e4
LT
707 if ((dst = rt->rt_gateway) == 0)
708 goto tx_error_icmp;
709 }
710#ifdef CONFIG_IPV6
711 else if (skb->protocol == htons(ETH_P_IPV6)) {
712 struct in6_addr *addr6;
713 int addr_type;
adf30907 714 struct neighbour *neigh = skb_dst(skb)->neighbour;
1da177e4
LT
715
716 if (neigh == NULL)
717 goto tx_error;
718
6ed2533e 719 addr6 = (struct in6_addr *)&neigh->primary_key;
1da177e4
LT
720 addr_type = ipv6_addr_type(addr6);
721
722 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 723 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
724 addr_type = ipv6_addr_type(addr6);
725 }
726
727 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
728 goto tx_error_icmp;
729
730 dst = addr6->s6_addr32[3];
731 }
732#endif
733 else
734 goto tx_error;
735 }
736
737 tos = tiph->tos;
ee686ca9
AJ
738 if (tos == 1) {
739 tos = 0;
1da177e4
LT
740 if (skb->protocol == htons(ETH_P_IP))
741 tos = old_iph->tos;
1da177e4
LT
742 }
743
744 {
745 struct flowi fl = { .oif = tunnel->parms.link,
746 .nl_u = { .ip4_u =
747 { .daddr = dst,
748 .saddr = tiph->saddr,
749 .tos = RT_TOS(tos) } },
750 .proto = IPPROTO_GRE };
96635522 751 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
addd68eb 752 stats->tx_carrier_errors++;
1da177e4
LT
753 goto tx_error;
754 }
755 }
756 tdev = rt->u.dst.dev;
757
758 if (tdev == dev) {
759 ip_rt_put(rt);
addd68eb 760 stats->collisions++;
1da177e4
LT
761 goto tx_error;
762 }
763
764 df = tiph->frag_off;
765 if (df)
c95b819a 766 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
1da177e4 767 else
adf30907 768 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
1da177e4 769
adf30907
ED
770 if (skb_dst(skb))
771 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
1da177e4
LT
772
773 if (skb->protocol == htons(ETH_P_IP)) {
774 df |= (old_iph->frag_off&htons(IP_DF));
775
776 if ((old_iph->frag_off&htons(IP_DF)) &&
777 mtu < ntohs(old_iph->tot_len)) {
778 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
779 ip_rt_put(rt);
780 goto tx_error;
781 }
782 }
783#ifdef CONFIG_IPV6
784 else if (skb->protocol == htons(ETH_P_IPV6)) {
adf30907 785 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
1da177e4 786
adf30907 787 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
788 if ((tunnel->parms.iph.daddr &&
789 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
790 rt6->rt6i_dst.plen == 128) {
791 rt6->rt6i_flags |= RTF_MODIFIED;
adf30907 792 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
1da177e4
LT
793 }
794 }
795
796 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
3ffe533c 797 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1da177e4
LT
798 ip_rt_put(rt);
799 goto tx_error;
800 }
801 }
802#endif
803
804 if (tunnel->err_count > 0) {
da6185d8
WY
805 if (time_before(jiffies,
806 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
1da177e4
LT
807 tunnel->err_count--;
808
809 dst_link_failure(skb);
810 } else
811 tunnel->err_count = 0;
812 }
813
243aad83 814 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len;
1da177e4 815
cfbba49d
PM
816 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
817 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4 818 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
243aad83
TT
819 if (max_headroom > dev->needed_headroom)
820 dev->needed_headroom = max_headroom;
1da177e4
LT
821 if (!new_skb) {
822 ip_rt_put(rt);
0bfbedb1 823 txq->tx_dropped++;
1da177e4 824 dev_kfree_skb(skb);
6ed10654 825 return NETDEV_TX_OK;
1da177e4
LT
826 }
827 if (skb->sk)
828 skb_set_owner_w(new_skb, skb->sk);
829 dev_kfree_skb(skb);
830 skb = new_skb;
eddc9ec5 831 old_iph = ip_hdr(skb);
1da177e4
LT
832 }
833
64194c31 834 skb_reset_transport_header(skb);
e2d1bca7
ACM
835 skb_push(skb, gre_hlen);
836 skb_reset_network_header(skb);
1da177e4 837 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
838 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
839 IPSKB_REROUTED);
adf30907
ED
840 skb_dst_drop(skb);
841 skb_dst_set(skb, &rt->u.dst);
1da177e4
LT
842
843 /*
844 * Push down and install the IPIP header.
845 */
846
eddc9ec5 847 iph = ip_hdr(skb);
1da177e4
LT
848 iph->version = 4;
849 iph->ihl = sizeof(struct iphdr) >> 2;
850 iph->frag_off = df;
851 iph->protocol = IPPROTO_GRE;
852 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
853 iph->daddr = rt->rt_dst;
854 iph->saddr = rt->rt_src;
855
856 if ((iph->ttl = tiph->ttl) == 0) {
857 if (skb->protocol == htons(ETH_P_IP))
858 iph->ttl = old_iph->ttl;
859#ifdef CONFIG_IPV6
860 else if (skb->protocol == htons(ETH_P_IPV6))
6ed2533e 861 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
1da177e4
LT
862#endif
863 else
864 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
865 }
866
e1a80002
HX
867 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
868 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
869 htons(ETH_P_TEB) : skb->protocol;
1da177e4
LT
870
871 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 872 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
873
874 if (tunnel->parms.o_flags&GRE_SEQ) {
875 ++tunnel->o_seqno;
876 *ptr = htonl(tunnel->o_seqno);
877 ptr--;
878 }
879 if (tunnel->parms.o_flags&GRE_KEY) {
880 *ptr = tunnel->parms.o_key;
881 ptr--;
882 }
883 if (tunnel->parms.o_flags&GRE_CSUM) {
884 *ptr = 0;
5f92a738 885 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
886 }
887 }
888
889 nf_reset(skb);
890
891 IPTUNNEL_XMIT();
6ed10654 892 return NETDEV_TX_OK;
1da177e4
LT
893
894tx_error_icmp:
895 dst_link_failure(skb);
896
897tx_error:
898 stats->tx_errors++;
899 dev_kfree_skb(skb);
6ed10654 900 return NETDEV_TX_OK;
1da177e4
LT
901}
902
42aa9162 903static int ipgre_tunnel_bind_dev(struct net_device *dev)
ee34c1eb
MS
904{
905 struct net_device *tdev = NULL;
906 struct ip_tunnel *tunnel;
907 struct iphdr *iph;
908 int hlen = LL_MAX_HEADER;
909 int mtu = ETH_DATA_LEN;
910 int addend = sizeof(struct iphdr) + 4;
911
912 tunnel = netdev_priv(dev);
913 iph = &tunnel->parms.iph;
914
c95b819a 915 /* Guess output device to choose reasonable mtu and needed_headroom */
ee34c1eb
MS
916
917 if (iph->daddr) {
918 struct flowi fl = { .oif = tunnel->parms.link,
919 .nl_u = { .ip4_u =
920 { .daddr = iph->daddr,
921 .saddr = iph->saddr,
922 .tos = RT_TOS(iph->tos) } },
923 .proto = IPPROTO_GRE };
924 struct rtable *rt;
96635522 925 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
ee34c1eb
MS
926 tdev = rt->u.dst.dev;
927 ip_rt_put(rt);
928 }
e1a80002
HX
929
930 if (dev->type != ARPHRD_ETHER)
931 dev->flags |= IFF_POINTOPOINT;
ee34c1eb
MS
932 }
933
934 if (!tdev && tunnel->parms.link)
96635522 935 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
ee34c1eb
MS
936
937 if (tdev) {
c95b819a 938 hlen = tdev->hard_header_len + tdev->needed_headroom;
ee34c1eb
MS
939 mtu = tdev->mtu;
940 }
941 dev->iflink = tunnel->parms.link;
942
943 /* Precalculate GRE options length */
944 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
945 if (tunnel->parms.o_flags&GRE_CSUM)
946 addend += 4;
947 if (tunnel->parms.o_flags&GRE_KEY)
948 addend += 4;
949 if (tunnel->parms.o_flags&GRE_SEQ)
950 addend += 4;
951 }
c95b819a 952 dev->needed_headroom = addend + hlen;
8cdb0456 953 mtu -= dev->hard_header_len + addend;
42aa9162
HX
954
955 if (mtu < 68)
956 mtu = 68;
957
ee34c1eb
MS
958 tunnel->hlen = addend;
959
42aa9162 960 return mtu;
ee34c1eb
MS
961}
962
1da177e4
LT
963static int
964ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
965{
966 int err = 0;
967 struct ip_tunnel_parm p;
968 struct ip_tunnel *t;
f57e7d5a
PE
969 struct net *net = dev_net(dev);
970 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
971
972 switch (cmd) {
973 case SIOCGETTUNNEL:
974 t = NULL;
7daa0004 975 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
976 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
977 err = -EFAULT;
978 break;
979 }
f57e7d5a 980 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
981 }
982 if (t == NULL)
2941a486 983 t = netdev_priv(dev);
1da177e4
LT
984 memcpy(&p, &t->parms, sizeof(p));
985 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
986 err = -EFAULT;
987 break;
988
989 case SIOCADDTUNNEL:
990 case SIOCCHGTUNNEL:
991 err = -EPERM;
992 if (!capable(CAP_NET_ADMIN))
993 goto done;
994
995 err = -EFAULT;
996 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
997 goto done;
998
999 err = -EINVAL;
1000 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1001 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1002 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1003 goto done;
1004 if (p.iph.ttl)
1005 p.iph.frag_off |= htons(IP_DF);
1006
1007 if (!(p.i_flags&GRE_KEY))
1008 p.i_key = 0;
1009 if (!(p.o_flags&GRE_KEY))
1010 p.o_key = 0;
1011
f57e7d5a 1012 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 1013
7daa0004 1014 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
1015 if (t != NULL) {
1016 if (t->dev != dev) {
1017 err = -EEXIST;
1018 break;
1019 }
1020 } else {
6ed2533e 1021 unsigned nflags = 0;
1da177e4 1022
2941a486 1023 t = netdev_priv(dev);
1da177e4 1024
f97c1e0c 1025 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
1026 nflags = IFF_BROADCAST;
1027 else if (p.iph.daddr)
1028 nflags = IFF_POINTOPOINT;
1029
1030 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1031 err = -EINVAL;
1032 break;
1033 }
f57e7d5a 1034 ipgre_tunnel_unlink(ign, t);
1da177e4
LT
1035 t->parms.iph.saddr = p.iph.saddr;
1036 t->parms.iph.daddr = p.iph.daddr;
1037 t->parms.i_key = p.i_key;
1038 t->parms.o_key = p.o_key;
1039 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1040 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 1041 ipgre_tunnel_link(ign, t);
1da177e4
LT
1042 netdev_state_change(dev);
1043 }
1044 }
1045
1046 if (t) {
1047 err = 0;
1048 if (cmd == SIOCCHGTUNNEL) {
1049 t->parms.iph.ttl = p.iph.ttl;
1050 t->parms.iph.tos = p.iph.tos;
1051 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
1052 if (t->parms.link != p.link) {
1053 t->parms.link = p.link;
42aa9162 1054 dev->mtu = ipgre_tunnel_bind_dev(dev);
ee34c1eb
MS
1055 netdev_state_change(dev);
1056 }
1da177e4
LT
1057 }
1058 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1059 err = -EFAULT;
1060 } else
1061 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1062 break;
1063
1064 case SIOCDELTUNNEL:
1065 err = -EPERM;
1066 if (!capable(CAP_NET_ADMIN))
1067 goto done;
1068
7daa0004 1069 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1070 err = -EFAULT;
1071 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1072 goto done;
1073 err = -ENOENT;
f57e7d5a 1074 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
1075 goto done;
1076 err = -EPERM;
7daa0004 1077 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
1078 goto done;
1079 dev = t->dev;
1080 }
22f8cde5
SH
1081 unregister_netdevice(dev);
1082 err = 0;
1da177e4
LT
1083 break;
1084
1085 default:
1086 err = -EINVAL;
1087 }
1088
1089done:
1090 return err;
1091}
1092
1da177e4
LT
1093static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1094{
2941a486 1095 struct ip_tunnel *tunnel = netdev_priv(dev);
c95b819a
HX
1096 if (new_mtu < 68 ||
1097 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1da177e4
LT
1098 return -EINVAL;
1099 dev->mtu = new_mtu;
1100 return 0;
1101}
1102
1da177e4
LT
1103/* Nice toy. Unfortunately, useless in real life :-)
1104 It allows to construct virtual multiprotocol broadcast "LAN"
1105 over the Internet, provided multicast routing is tuned.
1106
1107
1108 I have no idea was this bicycle invented before me,
1109 so that I had to set ARPHRD_IPGRE to a random value.
1110 I have an impression, that Cisco could make something similar,
1111 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1112
1da177e4
LT
1113 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1114 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1115
1116 ping -t 255 224.66.66.66
1117
1118 If nobody answers, mbone does not work.
1119
1120 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1121 ip addr add 10.66.66.<somewhat>/24 dev Universe
1122 ifconfig Universe up
1123 ifconfig Universe add fe80::<Your_real_addr>/10
1124 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1125 ftp 10.66.66.66
1126 ...
1127 ftp fec0:6666:6666::193.233.7.65
1128 ...
1129
1130 */
1131
3b04ddde
SH
1132static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1133 unsigned short type,
1134 const void *daddr, const void *saddr, unsigned len)
1da177e4 1135{
2941a486 1136 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1137 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1138 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1139
1140 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1141 p[0] = t->parms.o_flags;
1142 p[1] = htons(type);
1143
1144 /*
e905a9ed 1145 * Set the source hardware address.
1da177e4 1146 */
e905a9ed 1147
1da177e4
LT
1148 if (saddr)
1149 memcpy(&iph->saddr, saddr, 4);
6d55cb91 1150 if (daddr)
1da177e4 1151 memcpy(&iph->daddr, daddr, 4);
6d55cb91 1152 if (iph->daddr)
1da177e4 1153 return t->hlen;
e905a9ed 1154
1da177e4
LT
1155 return -t->hlen;
1156}
1157
6a5f44d7
TT
1158static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1159{
6ed2533e 1160 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
1161 memcpy(haddr, &iph->saddr, 4);
1162 return 4;
1163}
1164
3b04ddde
SH
1165static const struct header_ops ipgre_header_ops = {
1166 .create = ipgre_header,
6a5f44d7 1167 .parse = ipgre_header_parse,
3b04ddde
SH
1168};
1169
6a5f44d7 1170#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1171static int ipgre_open(struct net_device *dev)
1172{
2941a486 1173 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1174
f97c1e0c 1175 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1176 struct flowi fl = { .oif = t->parms.link,
1177 .nl_u = { .ip4_u =
1178 { .daddr = t->parms.iph.daddr,
1179 .saddr = t->parms.iph.saddr,
1180 .tos = RT_TOS(t->parms.iph.tos) } },
1181 .proto = IPPROTO_GRE };
1182 struct rtable *rt;
96635522 1183 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1da177e4
LT
1184 return -EADDRNOTAVAIL;
1185 dev = rt->u.dst.dev;
1186 ip_rt_put(rt);
e5ed6399 1187 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1188 return -EADDRNOTAVAIL;
1189 t->mlink = dev->ifindex;
e5ed6399 1190 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1191 }
1192 return 0;
1193}
1194
1195static int ipgre_close(struct net_device *dev)
1196{
2941a486 1197 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 1198
f97c1e0c 1199 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1200 struct in_device *in_dev;
c346dca1 1201 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1202 if (in_dev) {
1203 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1204 in_dev_put(in_dev);
1205 }
1206 }
1207 return 0;
1208}
1209
1210#endif
1211
b8c26a33
SH
1212static const struct net_device_ops ipgre_netdev_ops = {
1213 .ndo_init = ipgre_tunnel_init,
1214 .ndo_uninit = ipgre_tunnel_uninit,
1215#ifdef CONFIG_NET_IPGRE_BROADCAST
1216 .ndo_open = ipgre_open,
1217 .ndo_stop = ipgre_close,
1218#endif
1219 .ndo_start_xmit = ipgre_tunnel_xmit,
1220 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1221 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1222};
1223
1da177e4
LT
1224static void ipgre_tunnel_setup(struct net_device *dev)
1225{
b8c26a33 1226 dev->netdev_ops = &ipgre_netdev_ops;
1da177e4 1227 dev->destructor = free_netdev;
1da177e4
LT
1228
1229 dev->type = ARPHRD_IPGRE;
c95b819a 1230 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1231 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1232 dev->flags = IFF_NOARP;
1233 dev->iflink = 0;
1234 dev->addr_len = 4;
0b67eceb 1235 dev->features |= NETIF_F_NETNS_LOCAL;
108bfa89 1236 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1da177e4
LT
1237}
1238
1239static int ipgre_tunnel_init(struct net_device *dev)
1240{
1da177e4
LT
1241 struct ip_tunnel *tunnel;
1242 struct iphdr *iph;
1da177e4 1243
2941a486 1244 tunnel = netdev_priv(dev);
1da177e4
LT
1245 iph = &tunnel->parms.iph;
1246
1247 tunnel->dev = dev;
1248 strcpy(tunnel->parms.name, dev->name);
1249
1250 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1251 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1252
1da177e4 1253 if (iph->daddr) {
1da177e4 1254#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1255 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1256 if (!iph->saddr)
1257 return -EINVAL;
1258 dev->flags = IFF_BROADCAST;
3b04ddde 1259 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1260 }
1261#endif
ee34c1eb 1262 } else
6a5f44d7 1263 dev->header_ops = &ipgre_header_ops;
1da177e4 1264
1da177e4
LT
1265 return 0;
1266}
1267
b8c26a33 1268static void ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1269{
2941a486 1270 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 1271 struct iphdr *iph = &tunnel->parms.iph;
eb8ce741 1272 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1da177e4
LT
1273
1274 tunnel->dev = dev;
1275 strcpy(tunnel->parms.name, dev->name);
1276
1277 iph->version = 4;
1278 iph->protocol = IPPROTO_GRE;
1279 iph->ihl = 5;
1280 tunnel->hlen = sizeof(struct iphdr) + 4;
1281
1282 dev_hold(dev);
eb8ce741 1283 ign->tunnels_wc[0] = tunnel;
1da177e4
LT
1284}
1285
1286
32613090 1287static const struct net_protocol ipgre_protocol = {
1da177e4
LT
1288 .handler = ipgre_rcv,
1289 .err_handler = ipgre_err,
f96c148f 1290 .netns_ok = 1,
1da177e4
LT
1291};
1292
eef6dd65 1293static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
eb8ce741
PE
1294{
1295 int prio;
1296
1297 for (prio = 0; prio < 4; prio++) {
1298 int h;
1299 for (h = 0; h < HASH_SIZE; h++) {
eef6dd65
ED
1300 struct ip_tunnel *t = ign->tunnels[prio][h];
1301
1302 while (t != NULL) {
1303 unregister_netdevice_queue(t->dev, head);
1304 t = t->next;
1305 }
eb8ce741
PE
1306 }
1307 }
1308}
1309
2c8c1e72 1310static int __net_init ipgre_init_net(struct net *net)
59a4c759 1311{
cfb8fbf2 1312 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
59a4c759 1313 int err;
59a4c759 1314
7daa0004
PE
1315 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1316 ipgre_tunnel_setup);
1317 if (!ign->fb_tunnel_dev) {
1318 err = -ENOMEM;
1319 goto err_alloc_dev;
1320 }
be77e593 1321 dev_net_set(ign->fb_tunnel_dev, net);
7daa0004 1322
b8c26a33 1323 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
c19e654d 1324 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
7daa0004
PE
1325
1326 if ((err = register_netdev(ign->fb_tunnel_dev)))
1327 goto err_reg_dev;
1328
59a4c759
PE
1329 return 0;
1330
7daa0004
PE
1331err_reg_dev:
1332 free_netdev(ign->fb_tunnel_dev);
1333err_alloc_dev:
59a4c759
PE
1334 return err;
1335}
1336
2c8c1e72 1337static void __net_exit ipgre_exit_net(struct net *net)
59a4c759
PE
1338{
1339 struct ipgre_net *ign;
eef6dd65 1340 LIST_HEAD(list);
59a4c759
PE
1341
1342 ign = net_generic(net, ipgre_net_id);
7daa0004 1343 rtnl_lock();
eef6dd65
ED
1344 ipgre_destroy_tunnels(ign, &list);
1345 unregister_netdevice_many(&list);
7daa0004 1346 rtnl_unlock();
59a4c759
PE
1347}
1348
1349static struct pernet_operations ipgre_net_ops = {
1350 .init = ipgre_init_net,
1351 .exit = ipgre_exit_net,
cfb8fbf2
EB
1352 .id = &ipgre_net_id,
1353 .size = sizeof(struct ipgre_net),
59a4c759 1354};
1da177e4 1355
c19e654d
HX
1356static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1357{
1358 __be16 flags;
1359
1360 if (!data)
1361 return 0;
1362
1363 flags = 0;
1364 if (data[IFLA_GRE_IFLAGS])
1365 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1366 if (data[IFLA_GRE_OFLAGS])
1367 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1368 if (flags & (GRE_VERSION|GRE_ROUTING))
1369 return -EINVAL;
1370
1371 return 0;
1372}
1373
e1a80002
HX
1374static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1375{
1376 __be32 daddr;
1377
1378 if (tb[IFLA_ADDRESS]) {
1379 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1380 return -EINVAL;
1381 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1382 return -EADDRNOTAVAIL;
1383 }
1384
1385 if (!data)
1386 goto out;
1387
1388 if (data[IFLA_GRE_REMOTE]) {
1389 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1390 if (!daddr)
1391 return -EINVAL;
1392 }
1393
1394out:
1395 return ipgre_tunnel_validate(tb, data);
1396}
1397
c19e654d
HX
1398static void ipgre_netlink_parms(struct nlattr *data[],
1399 struct ip_tunnel_parm *parms)
1400{
7bb82d92 1401 memset(parms, 0, sizeof(*parms));
c19e654d
HX
1402
1403 parms->iph.protocol = IPPROTO_GRE;
1404
1405 if (!data)
1406 return;
1407
1408 if (data[IFLA_GRE_LINK])
1409 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1410
1411 if (data[IFLA_GRE_IFLAGS])
1412 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1413
1414 if (data[IFLA_GRE_OFLAGS])
1415 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1416
1417 if (data[IFLA_GRE_IKEY])
1418 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1419
1420 if (data[IFLA_GRE_OKEY])
1421 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1422
1423 if (data[IFLA_GRE_LOCAL])
4d74f8ba 1424 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
c19e654d
HX
1425
1426 if (data[IFLA_GRE_REMOTE])
4d74f8ba 1427 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
c19e654d
HX
1428
1429 if (data[IFLA_GRE_TTL])
1430 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1431
1432 if (data[IFLA_GRE_TOS])
1433 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1434
1435 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1436 parms->iph.frag_off = htons(IP_DF);
1437}
1438
e1a80002
HX
1439static int ipgre_tap_init(struct net_device *dev)
1440{
1441 struct ip_tunnel *tunnel;
1442
1443 tunnel = netdev_priv(dev);
1444
1445 tunnel->dev = dev;
1446 strcpy(tunnel->parms.name, dev->name);
1447
1448 ipgre_tunnel_bind_dev(dev);
1449
1450 return 0;
1451}
1452
b8c26a33
SH
1453static const struct net_device_ops ipgre_tap_netdev_ops = {
1454 .ndo_init = ipgre_tap_init,
1455 .ndo_uninit = ipgre_tunnel_uninit,
1456 .ndo_start_xmit = ipgre_tunnel_xmit,
1457 .ndo_set_mac_address = eth_mac_addr,
1458 .ndo_validate_addr = eth_validate_addr,
1459 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1460};
1461
e1a80002
HX
1462static void ipgre_tap_setup(struct net_device *dev)
1463{
1464
1465 ether_setup(dev);
1466
2e9526b3 1467 dev->netdev_ops = &ipgre_tap_netdev_ops;
e1a80002 1468 dev->destructor = free_netdev;
e1a80002
HX
1469
1470 dev->iflink = 0;
1471 dev->features |= NETIF_F_NETNS_LOCAL;
1472}
1473
81adee47 1474static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
c19e654d
HX
1475 struct nlattr *data[])
1476{
1477 struct ip_tunnel *nt;
1478 struct net *net = dev_net(dev);
1479 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1480 int mtu;
1481 int err;
1482
1483 nt = netdev_priv(dev);
1484 ipgre_netlink_parms(data, &nt->parms);
1485
e1a80002 1486 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
c19e654d
HX
1487 return -EEXIST;
1488
e1a80002
HX
1489 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1490 random_ether_addr(dev->dev_addr);
1491
c19e654d
HX
1492 mtu = ipgre_tunnel_bind_dev(dev);
1493 if (!tb[IFLA_MTU])
1494 dev->mtu = mtu;
1495
1496 err = register_netdevice(dev);
1497 if (err)
1498 goto out;
1499
1500 dev_hold(dev);
1501 ipgre_tunnel_link(ign, nt);
1502
1503out:
1504 return err;
1505}
1506
1507static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1508 struct nlattr *data[])
1509{
1510 struct ip_tunnel *t, *nt;
1511 struct net *net = dev_net(dev);
1512 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1513 struct ip_tunnel_parm p;
1514 int mtu;
1515
1516 if (dev == ign->fb_tunnel_dev)
1517 return -EINVAL;
1518
1519 nt = netdev_priv(dev);
1520 ipgre_netlink_parms(data, &p);
1521
1522 t = ipgre_tunnel_locate(net, &p, 0);
1523
1524 if (t) {
1525 if (t->dev != dev)
1526 return -EEXIST;
1527 } else {
c19e654d
HX
1528 t = nt;
1529
2e9526b3
HX
1530 if (dev->type != ARPHRD_ETHER) {
1531 unsigned nflags = 0;
c19e654d 1532
2e9526b3
HX
1533 if (ipv4_is_multicast(p.iph.daddr))
1534 nflags = IFF_BROADCAST;
1535 else if (p.iph.daddr)
1536 nflags = IFF_POINTOPOINT;
1537
1538 if ((dev->flags ^ nflags) &
1539 (IFF_POINTOPOINT | IFF_BROADCAST))
1540 return -EINVAL;
1541 }
c19e654d
HX
1542
1543 ipgre_tunnel_unlink(ign, t);
1544 t->parms.iph.saddr = p.iph.saddr;
1545 t->parms.iph.daddr = p.iph.daddr;
1546 t->parms.i_key = p.i_key;
2e9526b3
HX
1547 if (dev->type != ARPHRD_ETHER) {
1548 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1549 memcpy(dev->broadcast, &p.iph.daddr, 4);
1550 }
c19e654d
HX
1551 ipgre_tunnel_link(ign, t);
1552 netdev_state_change(dev);
1553 }
1554
1555 t->parms.o_key = p.o_key;
1556 t->parms.iph.ttl = p.iph.ttl;
1557 t->parms.iph.tos = p.iph.tos;
1558 t->parms.iph.frag_off = p.iph.frag_off;
1559
1560 if (t->parms.link != p.link) {
1561 t->parms.link = p.link;
1562 mtu = ipgre_tunnel_bind_dev(dev);
1563 if (!tb[IFLA_MTU])
1564 dev->mtu = mtu;
1565 netdev_state_change(dev);
1566 }
1567
1568 return 0;
1569}
1570
1571static size_t ipgre_get_size(const struct net_device *dev)
1572{
1573 return
1574 /* IFLA_GRE_LINK */
1575 nla_total_size(4) +
1576 /* IFLA_GRE_IFLAGS */
1577 nla_total_size(2) +
1578 /* IFLA_GRE_OFLAGS */
1579 nla_total_size(2) +
1580 /* IFLA_GRE_IKEY */
1581 nla_total_size(4) +
1582 /* IFLA_GRE_OKEY */
1583 nla_total_size(4) +
1584 /* IFLA_GRE_LOCAL */
1585 nla_total_size(4) +
1586 /* IFLA_GRE_REMOTE */
1587 nla_total_size(4) +
1588 /* IFLA_GRE_TTL */
1589 nla_total_size(1) +
1590 /* IFLA_GRE_TOS */
1591 nla_total_size(1) +
1592 /* IFLA_GRE_PMTUDISC */
1593 nla_total_size(1) +
1594 0;
1595}
1596
1597static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1598{
1599 struct ip_tunnel *t = netdev_priv(dev);
1600 struct ip_tunnel_parm *p = &t->parms;
1601
1602 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1603 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1604 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
ba9e64b1
PM
1605 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1606 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
4d74f8ba
PM
1607 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1608 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
c19e654d
HX
1609 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1610 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1611 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1612
1613 return 0;
1614
1615nla_put_failure:
1616 return -EMSGSIZE;
1617}
1618
1619static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1620 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1621 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1622 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1623 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1624 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1625 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1626 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1627 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1628 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1629 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1630};
1631
1632static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1633 .kind = "gre",
1634 .maxtype = IFLA_GRE_MAX,
1635 .policy = ipgre_policy,
1636 .priv_size = sizeof(struct ip_tunnel),
1637 .setup = ipgre_tunnel_setup,
1638 .validate = ipgre_tunnel_validate,
1639 .newlink = ipgre_newlink,
1640 .changelink = ipgre_changelink,
1641 .get_size = ipgre_get_size,
1642 .fill_info = ipgre_fill_info,
1643};
1644
e1a80002
HX
1645static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1646 .kind = "gretap",
1647 .maxtype = IFLA_GRE_MAX,
1648 .policy = ipgre_policy,
1649 .priv_size = sizeof(struct ip_tunnel),
1650 .setup = ipgre_tap_setup,
1651 .validate = ipgre_tap_validate,
1652 .newlink = ipgre_newlink,
1653 .changelink = ipgre_changelink,
1654 .get_size = ipgre_get_size,
1655 .fill_info = ipgre_fill_info,
1656};
1657
1da177e4
LT
1658/*
1659 * And now the modules code and kernel interface.
1660 */
1661
1662static int __init ipgre_init(void)
1663{
1664 int err;
1665
1666 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1667
cfb8fbf2 1668 err = register_pernet_device(&ipgre_net_ops);
59a4c759 1669 if (err < 0)
c2892f02
AD
1670 return err;
1671
1672 err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
1673 if (err < 0) {
1674 printk(KERN_INFO "ipgre init: can't add protocol\n");
1675 goto add_proto_failed;
1676 }
7daa0004 1677
c19e654d
HX
1678 err = rtnl_link_register(&ipgre_link_ops);
1679 if (err < 0)
1680 goto rtnl_link_failed;
1681
e1a80002
HX
1682 err = rtnl_link_register(&ipgre_tap_ops);
1683 if (err < 0)
1684 goto tap_ops_failed;
1685
c19e654d 1686out:
1da177e4 1687 return err;
c19e654d 1688
e1a80002
HX
1689tap_ops_failed:
1690 rtnl_link_unregister(&ipgre_link_ops);
c19e654d 1691rtnl_link_failed:
c19e654d 1692 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
c2892f02
AD
1693add_proto_failed:
1694 unregister_pernet_device(&ipgre_net_ops);
c19e654d 1695 goto out;
1da177e4
LT
1696}
1697
db44575f 1698static void __exit ipgre_fini(void)
1da177e4 1699{
e1a80002 1700 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d 1701 rtnl_link_unregister(&ipgre_link_ops);
1da177e4
LT
1702 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1703 printk(KERN_INFO "ipgre close: can't remove protocol\n");
c2892f02 1704 unregister_pernet_device(&ipgre_net_ops);
1da177e4
LT
1705}
1706
1707module_init(ipgre_init);
1708module_exit(ipgre_fini);
1709MODULE_LICENSE("GPL");
4d74f8ba
PM
1710MODULE_ALIAS_RTNL_LINK("gre");
1711MODULE_ALIAS_RTNL_LINK("gretap");
This page took 0.636895 seconds and 5 git commands to generate.