xfrm: remove extranous rcu_read_lock
[deliverable/linux.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
afd46503
JP
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
4fc268d2 15#include <linux/capability.h>
1da177e4
LT
16#include <linux/module.h>
17#include <linux/types.h>
1da177e4 18#include <linux/kernel.h>
5a0e3ad6 19#include <linux/slab.h>
1da177e4
LT
20#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
e1a80002 33#include <linux/etherdevice.h>
46f25dff 34#include <linux/if_ether.h>
1da177e4
LT
35
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
40#include <net/ipip.h>
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
59a4c759
PE
46#include <net/net_namespace.h>
47#include <net/netns/generic.h>
c19e654d 48#include <net/rtnetlink.h>
00959ade 49#include <net/gre.h>
1da177e4 50
dfd56b8b 51#if IS_ENABLED(CONFIG_IPV6)
1da177e4
LT
52#include <net/ipv6.h>
53#include <net/ip6_fib.h>
54#include <net/ip6_route.h>
55#endif
56
57/*
58 Problems & solutions
59 --------------------
60
61 1. The most important issue is detecting local dead loops.
62 They would cause complete host lockup in transmit, which
63 would be "resolved" by stack overflow or, if queueing is enabled,
64 with infinite looping in net_bh.
65
66 We cannot track such dead loops during route installation,
67 it is infeasible task. The most general solutions would be
68 to keep skb->encapsulation counter (sort of local ttl),
6d0722a2 69 and silently drop packet when it expires. It is a good
bff52857 70 solution, but it supposes maintaining new variable in ALL
1da177e4
LT
71 skb, even if no tunneling is used.
72
6d0722a2
ED
73 Current solution: xmit_recursion breaks dead loops. This is a percpu
74 counter, since when we enter the first ndo_xmit(), cpu migration is
75 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
1da177e4
LT
76
77 2. Networking dead loops would not kill routers, but would really
78 kill network. IP hop limit plays role of "t->recursion" in this case,
79 if we copy it from packet being encapsulated to upper header.
80 It is very good solution, but it introduces two problems:
81
82 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83 do not work over tunnels.
84 - traceroute does not work. I planned to relay ICMP from tunnel,
85 so that this problem would be solved and traceroute output
86 would even more informative. This idea appeared to be wrong:
87 only Linux complies to rfc1812 now (yes, guys, Linux is the only
88 true router now :-)), all routers (at least, in neighbourhood of mine)
89 return only 8 bytes of payload. It is the end.
90
91 Hence, if we want that OSPF worked or traceroute said something reasonable,
92 we should search for another solution.
93
94 One of them is to parse packet trying to detect inner encapsulation
95 made by our node. It is difficult or even impossible, especially,
bff52857 96 taking into account fragmentation. TO be short, ttl is not solution at all.
1da177e4
LT
97
98 Current solution: The solution was UNEXPECTEDLY SIMPLE.
99 We force DF flag on tunnels with preconfigured hop limit,
100 that is ALL. :-) Well, it does not remove the problem completely,
101 but exponential growth of network traffic is changed to linear
102 (branches, that exceed pmtu are pruned) and tunnel mtu
bff52857 103 rapidly degrades to value <68, where looping stops.
1da177e4
LT
104 Yes, it is not good if there exists a router in the loop,
105 which does not force DF, even when encapsulating packets have DF set.
106 But it is not our problem! Nobody could accuse us, we made
107 all that we could make. Even if it is your gated who injected
108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-)
110
111
112
113 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114 practically identical code. It would be good to glue them
115 together, but it is not very evident, how to make them modular.
116 sit is integral part of IPv6, ipip and gre are naturally modular.
117 We could extract common parts (hash table, ioctl etc)
118 to a separate module (ip_tunnel.c).
119
120 Alexey Kuznetsov.
121 */
122
c19e654d 123static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4
LT
124static int ipgre_tunnel_init(struct net_device *dev);
125static void ipgre_tunnel_setup(struct net_device *dev);
42aa9162 126static int ipgre_tunnel_bind_dev(struct net_device *dev);
1da177e4
LT
127
128/* Fallback tunnel: no source, no destination, no key, no options */
129
eb8ce741
PE
130#define HASH_SIZE 16
131
f99189b1 132static int ipgre_net_id __read_mostly;
59a4c759 133struct ipgre_net {
1507850b 134 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
eb8ce741 135
7daa0004 136 struct net_device *fb_tunnel_dev;
59a4c759
PE
137};
138
1da177e4
LT
139/* Tunnel hash table */
140
141/*
142 4 hash tables:
143
144 3: (remote,local)
145 2: (remote,*)
146 1: (*,local)
147 0: (*,*)
148
149 We require exact key match i.e. if a key is present in packet
150 it will match only tunnel with the same key; if it is not present,
151 it will match only keyless tunnel.
152
153 All keysless packets, if not matched configured keyless tunnels
154 will match fallback tunnel.
155 */
156
d5a0a1e3 157#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 158
eb8ce741
PE
159#define tunnels_r_l tunnels[3]
160#define tunnels_r tunnels[2]
161#define tunnels_l tunnels[1]
162#define tunnels_wc tunnels[0]
8d5b2c08 163/*
1507850b 164 * Locking : hash tables are protected by RCU and RTNL
8d5b2c08 165 */
1da177e4 166
8d5b2c08
ED
167#define for_each_ip_tunnel_rcu(start) \
168 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
1da177e4 169
e985aad7
ED
170/* often modified stats are per cpu, other are shared (netdev->stats) */
171struct pcpu_tstats {
87b6d218 172 u64 rx_packets;
173 u64 rx_bytes;
174 u64 tx_packets;
175 u64 tx_bytes;
176 struct u64_stats_sync syncp;
177};
e985aad7 178
87b6d218 179static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
180 struct rtnl_link_stats64 *tot)
e985aad7 181{
e985aad7
ED
182 int i;
183
184 for_each_possible_cpu(i) {
185 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
87b6d218 186 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
187 unsigned int start;
188
189 do {
190 start = u64_stats_fetch_begin_bh(&tstats->syncp);
191 rx_packets = tstats->rx_packets;
192 tx_packets = tstats->tx_packets;
193 rx_bytes = tstats->rx_bytes;
194 tx_bytes = tstats->tx_bytes;
195 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
196
197 tot->rx_packets += rx_packets;
198 tot->tx_packets += tx_packets;
199 tot->rx_bytes += rx_bytes;
200 tot->tx_bytes += tx_bytes;
e985aad7 201 }
87b6d218 202
203 tot->multicast = dev->stats.multicast;
204 tot->rx_crc_errors = dev->stats.rx_crc_errors;
205 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
206 tot->rx_length_errors = dev->stats.rx_length_errors;
207 tot->rx_errors = dev->stats.rx_errors;
208 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
209 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
210 tot->tx_dropped = dev->stats.tx_dropped;
211 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
212 tot->tx_errors = dev->stats.tx_errors;
213
214 return tot;
e985aad7
ED
215}
216
d2083287 217/* Does key in tunnel parameters match packet */
218static bool ipgre_key_match(const struct ip_tunnel_parm *p,
219 __u32 flags, __be32 key)
220{
221 if (p->i_flags & GRE_KEY) {
222 if (flags & GRE_KEY)
223 return key == p->i_key;
224 else
225 return false; /* key expected, none present */
226 } else
227 return !(flags & GRE_KEY);
228}
229
1da177e4
LT
230/* Given src, dst and key, find appropriate for input tunnel. */
231
5e73ea1a
DB
232static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
233 __be32 remote, __be32 local,
d2083287 234 __u32 flags, __be32 key,
235 __be16 gre_proto)
1da177e4 236{
749c10f9
TT
237 struct net *net = dev_net(dev);
238 int link = dev->ifindex;
1507850b
ED
239 unsigned int h0 = HASH(remote);
240 unsigned int h1 = HASH(key);
afcf1242 241 struct ip_tunnel *t, *cand = NULL;
7daa0004 242 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
e1a80002
HX
243 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
244 ARPHRD_ETHER : ARPHRD_IPGRE;
afcf1242 245 int score, cand_score = 4;
1da177e4 246
8d5b2c08 247 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
749c10f9
TT
248 if (local != t->parms.iph.saddr ||
249 remote != t->parms.iph.daddr ||
749c10f9
TT
250 !(t->dev->flags & IFF_UP))
251 continue;
252
d2083287 253 if (!ipgre_key_match(&t->parms, flags, key))
254 continue;
255
749c10f9
TT
256 if (t->dev->type != ARPHRD_IPGRE &&
257 t->dev->type != dev_type)
258 continue;
259
afcf1242 260 score = 0;
749c10f9 261 if (t->parms.link != link)
afcf1242 262 score |= 1;
749c10f9 263 if (t->dev->type != dev_type)
afcf1242
TT
264 score |= 2;
265 if (score == 0)
749c10f9 266 return t;
afcf1242
TT
267
268 if (score < cand_score) {
269 cand = t;
270 cand_score = score;
271 }
1da177e4 272 }
e1a80002 273
8d5b2c08 274 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
749c10f9 275 if (remote != t->parms.iph.daddr ||
749c10f9
TT
276 !(t->dev->flags & IFF_UP))
277 continue;
278
d2083287 279 if (!ipgre_key_match(&t->parms, flags, key))
280 continue;
281
749c10f9
TT
282 if (t->dev->type != ARPHRD_IPGRE &&
283 t->dev->type != dev_type)
284 continue;
285
afcf1242 286 score = 0;
749c10f9 287 if (t->parms.link != link)
afcf1242 288 score |= 1;
749c10f9 289 if (t->dev->type != dev_type)
afcf1242
TT
290 score |= 2;
291 if (score == 0)
749c10f9 292 return t;
afcf1242
TT
293
294 if (score < cand_score) {
295 cand = t;
296 cand_score = score;
297 }
1da177e4 298 }
e1a80002 299
8d5b2c08 300 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
749c10f9
TT
301 if ((local != t->parms.iph.saddr &&
302 (local != t->parms.iph.daddr ||
303 !ipv4_is_multicast(local))) ||
749c10f9
TT
304 !(t->dev->flags & IFF_UP))
305 continue;
306
d2083287 307 if (!ipgre_key_match(&t->parms, flags, key))
308 continue;
309
749c10f9
TT
310 if (t->dev->type != ARPHRD_IPGRE &&
311 t->dev->type != dev_type)
312 continue;
313
afcf1242 314 score = 0;
749c10f9 315 if (t->parms.link != link)
afcf1242 316 score |= 1;
749c10f9 317 if (t->dev->type != dev_type)
afcf1242
TT
318 score |= 2;
319 if (score == 0)
749c10f9 320 return t;
afcf1242
TT
321
322 if (score < cand_score) {
323 cand = t;
324 cand_score = score;
325 }
1da177e4 326 }
e1a80002 327
8d5b2c08 328 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
749c10f9
TT
329 if (t->parms.i_key != key ||
330 !(t->dev->flags & IFF_UP))
331 continue;
332
333 if (t->dev->type != ARPHRD_IPGRE &&
334 t->dev->type != dev_type)
335 continue;
336
afcf1242 337 score = 0;
749c10f9 338 if (t->parms.link != link)
afcf1242 339 score |= 1;
749c10f9 340 if (t->dev->type != dev_type)
afcf1242
TT
341 score |= 2;
342 if (score == 0)
749c10f9 343 return t;
afcf1242
TT
344
345 if (score < cand_score) {
346 cand = t;
347 cand_score = score;
348 }
1da177e4
LT
349 }
350
afcf1242
TT
351 if (cand != NULL)
352 return cand;
e1a80002 353
8d5b2c08
ED
354 dev = ign->fb_tunnel_dev;
355 if (dev->flags & IFF_UP)
356 return netdev_priv(dev);
749c10f9 357
1da177e4
LT
358 return NULL;
359}
360
1507850b 361static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
f57e7d5a 362 struct ip_tunnel_parm *parms)
1da177e4 363{
5056a1ef
YH
364 __be32 remote = parms->iph.daddr;
365 __be32 local = parms->iph.saddr;
366 __be32 key = parms->i_key;
1507850b 367 unsigned int h = HASH(key);
1da177e4
LT
368 int prio = 0;
369
370 if (local)
371 prio |= 1;
f97c1e0c 372 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
373 prio |= 2;
374 h ^= HASH(remote);
375 }
376
eb8ce741 377 return &ign->tunnels[prio][h];
1da177e4
LT
378}
379
1507850b 380static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
f57e7d5a 381 struct ip_tunnel *t)
5056a1ef 382{
f57e7d5a 383 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
384}
385
f57e7d5a 386static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 387{
1507850b 388 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
1da177e4 389
1507850b 390 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
8d5b2c08 391 rcu_assign_pointer(*tp, t);
1da177e4
LT
392}
393
f57e7d5a 394static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 395{
1507850b
ED
396 struct ip_tunnel __rcu **tp;
397 struct ip_tunnel *iter;
398
399 for (tp = ipgre_bucket(ign, t);
400 (iter = rtnl_dereference(*tp)) != NULL;
401 tp = &iter->next) {
402 if (t == iter) {
403 rcu_assign_pointer(*tp, t->next);
1da177e4
LT
404 break;
405 }
406 }
407}
408
e1a80002
HX
409static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
410 struct ip_tunnel_parm *parms,
411 int type)
1da177e4 412{
d5a0a1e3
AV
413 __be32 remote = parms->iph.daddr;
414 __be32 local = parms->iph.saddr;
415 __be32 key = parms->i_key;
749c10f9 416 int link = parms->link;
1507850b
ED
417 struct ip_tunnel *t;
418 struct ip_tunnel __rcu **tp;
e1a80002
HX
419 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
420
1507850b
ED
421 for (tp = __ipgre_bucket(ign, parms);
422 (t = rtnl_dereference(*tp)) != NULL;
423 tp = &t->next)
e1a80002
HX
424 if (local == t->parms.iph.saddr &&
425 remote == t->parms.iph.daddr &&
426 key == t->parms.i_key &&
749c10f9 427 link == t->parms.link &&
e1a80002
HX
428 type == t->dev->type)
429 break;
430
431 return t;
432}
433
1507850b 434static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
e1a80002
HX
435 struct ip_tunnel_parm *parms, int create)
436{
437 struct ip_tunnel *t, *nt;
1da177e4 438 struct net_device *dev;
1da177e4 439 char name[IFNAMSIZ];
f57e7d5a 440 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 441
e1a80002
HX
442 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
443 if (t || !create)
444 return t;
1da177e4
LT
445
446 if (parms->name[0])
447 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6 448 else
407d6fcb 449 strcpy(name, "gre%d");
1da177e4
LT
450
451 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
452 if (!dev)
407d6fcb 453 return NULL;
1da177e4 454
0b67eceb
PE
455 dev_net_set(dev, net);
456
2941a486 457 nt = netdev_priv(dev);
1da177e4 458 nt->parms = *parms;
c19e654d 459 dev->rtnl_link_ops = &ipgre_link_ops;
1da177e4 460
42aa9162
HX
461 dev->mtu = ipgre_tunnel_bind_dev(dev);
462
b37d428b
PE
463 if (register_netdevice(dev) < 0)
464 goto failed_free;
1da177e4 465
f2b3ee9e
WB
466 /* Can use a lockless transmit, unless we generate output sequences */
467 if (!(nt->parms.o_flags & GRE_SEQ))
468 dev->features |= NETIF_F_LLTX;
469
1da177e4 470 dev_hold(dev);
f57e7d5a 471 ipgre_tunnel_link(ign, nt);
1da177e4
LT
472 return nt;
473
b37d428b
PE
474failed_free:
475 free_netdev(dev);
1da177e4
LT
476 return NULL;
477}
478
479static void ipgre_tunnel_uninit(struct net_device *dev)
480{
f57e7d5a
PE
481 struct net *net = dev_net(dev);
482 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
483
484 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
485 dev_put(dev);
486}
487
488
489static void ipgre_err(struct sk_buff *skb, u32 info)
490{
1da177e4 491
071f92d0 492/* All the routers (except for Linux) return only
1da177e4
LT
493 8 bytes of packet payload. It means, that precise relaying of
494 ICMP in the real Internet is absolutely infeasible.
495
496 Moreover, Cisco "wise men" put GRE key to the third word
497 in GRE header. It makes impossible maintaining even soft state for keyed
498 GRE tunnels with enabled checksum. Tell them "thank you".
499
500 Well, I wonder, rfc1812 was written by Cisco employee,
bff52857 501 what the hell these idiots break standards established
502 by themselves???
1da177e4
LT
503 */
504
b71d1d42 505 const struct iphdr *iph = (const struct iphdr *)skb->data;
5e73ea1a 506 __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
1da177e4 507 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
508 const int type = icmp_hdr(skb)->type;
509 const int code = icmp_hdr(skb)->code;
1da177e4 510 struct ip_tunnel *t;
d5a0a1e3 511 __be16 flags;
d2083287 512 __be32 key = 0;
1da177e4
LT
513
514 flags = p[0];
515 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
516 if (flags&(GRE_VERSION|GRE_ROUTING))
517 return;
518 if (flags&GRE_KEY) {
519 grehlen += 4;
520 if (flags&GRE_CSUM)
521 grehlen += 4;
522 }
523 }
524
525 /* If only 8 bytes returned, keyed message will be dropped here */
526 if (skb_headlen(skb) < grehlen)
527 return;
528
d2083287 529 if (flags & GRE_KEY)
530 key = *(((__be32 *)p) + (grehlen / 4) - 1);
531
1da177e4
LT
532 switch (type) {
533 default:
534 case ICMP_PARAMETERPROB:
535 return;
536
537 case ICMP_DEST_UNREACH:
538 switch (code) {
539 case ICMP_SR_FAILED:
540 case ICMP_PORT_UNREACH:
541 /* Impossible event. */
542 return;
1da177e4
LT
543 default:
544 /* All others are translated to HOST_UNREACH.
545 rfc2003 contains "deep thoughts" about NET_UNREACH,
546 I believe they are just ether pollution. --ANK
547 */
548 break;
549 }
550 break;
551 case ICMP_TIME_EXCEEDED:
552 if (code != ICMP_EXC_TTL)
553 return;
554 break;
55be7a9c
DM
555
556 case ICMP_REDIRECT:
557 break;
1da177e4
LT
558 }
559
749c10f9 560 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
d2083287 561 flags, key, p[1]);
562
36393395 563 if (t == NULL)
0c5794a6 564 return;
36393395
DM
565
566 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
567 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
568 t->parms.link, 0, IPPROTO_GRE, 0);
0c5794a6 569 return;
36393395 570 }
55be7a9c
DM
571 if (type == ICMP_REDIRECT) {
572 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
573 IPPROTO_GRE, 0);
0c5794a6 574 return;
55be7a9c 575 }
36393395 576 if (t->parms.iph.daddr == 0 ||
f97c1e0c 577 ipv4_is_multicast(t->parms.iph.daddr))
0c5794a6 578 return;
1da177e4
LT
579
580 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
0c5794a6 581 return;
1da177e4 582
da6185d8 583 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
584 t->err_count++;
585 else
586 t->err_count = 1;
587 t->err_time = jiffies;
1da177e4
LT
588}
589
b71d1d42 590static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
1da177e4
LT
591{
592 if (INET_ECN_is_ce(iph->tos)) {
593 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 594 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 595 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 596 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
597 }
598 }
599}
600
601static inline u8
b71d1d42 602ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
1da177e4
LT
603{
604 u8 inner = 0;
605 if (skb->protocol == htons(ETH_P_IP))
606 inner = old_iph->tos;
607 else if (skb->protocol == htons(ETH_P_IPV6))
b71d1d42 608 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
1da177e4
LT
609 return INET_ECN_encapsulate(tos, inner);
610}
611
612static int ipgre_rcv(struct sk_buff *skb)
613{
b71d1d42 614 const struct iphdr *iph;
1da177e4 615 u8 *h;
d5a0a1e3 616 __be16 flags;
d3bc23e7 617 __sum16 csum = 0;
d5a0a1e3 618 __be32 key = 0;
1da177e4
LT
619 u32 seqno = 0;
620 struct ip_tunnel *tunnel;
621 int offset = 4;
e1a80002 622 __be16 gre_proto;
1da177e4
LT
623
624 if (!pskb_may_pull(skb, 16))
0c5794a6 625 goto drop;
1da177e4 626
eddc9ec5 627 iph = ip_hdr(skb);
1da177e4 628 h = skb->data;
5e73ea1a 629 flags = *(__be16 *)h;
1da177e4
LT
630
631 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
632 /* - Version must be 0.
633 - We do not support routing headers.
634 */
635 if (flags&(GRE_VERSION|GRE_ROUTING))
0c5794a6 636 goto drop;
1da177e4
LT
637
638 if (flags&GRE_CSUM) {
fb286bb2 639 switch (skb->ip_summed) {
84fa7933 640 case CHECKSUM_COMPLETE:
d3bc23e7 641 csum = csum_fold(skb->csum);
fb286bb2
HX
642 if (!csum)
643 break;
644 /* fall through */
645 case CHECKSUM_NONE:
646 skb->csum = 0;
647 csum = __skb_checksum_complete(skb);
84fa7933 648 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
649 }
650 offset += 4;
651 }
652 if (flags&GRE_KEY) {
5e73ea1a 653 key = *(__be32 *)(h + offset);
1da177e4
LT
654 offset += 4;
655 }
656 if (flags&GRE_SEQ) {
5e73ea1a 657 seqno = ntohl(*(__be32 *)(h + offset));
1da177e4
LT
658 offset += 4;
659 }
660 }
661
e1a80002
HX
662 gre_proto = *(__be16 *)(h + 2);
663
d2083287 664 tunnel = ipgre_tunnel_lookup(skb->dev,
665 iph->saddr, iph->daddr, flags, key,
666 gre_proto);
667 if (tunnel) {
e985aad7 668 struct pcpu_tstats *tstats;
addd68eb 669
1da177e4
LT
670 secpath_reset(skb);
671
e1a80002 672 skb->protocol = gre_proto;
1da177e4
LT
673 /* WCCP version 1 and 2 protocol decoding.
674 * - Change protocol to IP
675 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
676 */
e1a80002 677 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
496c98df 678 skb->protocol = htons(ETH_P_IP);
e905a9ed 679 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
680 offset += 4;
681 }
682
1d069167 683 skb->mac_header = skb->network_header;
4209fb60 684 __pskb_pull(skb, offset);
9c70220b 685 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
686 skb->pkt_type = PACKET_HOST;
687#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 688 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 689 /* Looped back packet, drop it! */
c7537967 690 if (rt_is_output_route(skb_rtable(skb)))
1da177e4 691 goto drop;
e985aad7 692 tunnel->dev->stats.multicast++;
1da177e4
LT
693 skb->pkt_type = PACKET_BROADCAST;
694 }
695#endif
696
697 if (((flags&GRE_CSUM) && csum) ||
698 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
e985aad7
ED
699 tunnel->dev->stats.rx_crc_errors++;
700 tunnel->dev->stats.rx_errors++;
1da177e4
LT
701 goto drop;
702 }
703 if (tunnel->parms.i_flags&GRE_SEQ) {
704 if (!(flags&GRE_SEQ) ||
705 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
e985aad7
ED
706 tunnel->dev->stats.rx_fifo_errors++;
707 tunnel->dev->stats.rx_errors++;
1da177e4
LT
708 goto drop;
709 }
710 tunnel->i_seqno = seqno + 1;
711 }
e1a80002
HX
712
713 /* Warning: All skb pointers will be invalidated! */
714 if (tunnel->dev->type == ARPHRD_ETHER) {
715 if (!pskb_may_pull(skb, ETH_HLEN)) {
e985aad7
ED
716 tunnel->dev->stats.rx_length_errors++;
717 tunnel->dev->stats.rx_errors++;
e1a80002
HX
718 goto drop;
719 }
720
721 iph = ip_hdr(skb);
722 skb->protocol = eth_type_trans(skb, tunnel->dev);
723 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
724 }
725
e985aad7 726 tstats = this_cpu_ptr(tunnel->dev->tstats);
87b6d218 727 u64_stats_update_begin(&tstats->syncp);
e985aad7
ED
728 tstats->rx_packets++;
729 tstats->rx_bytes += skb->len;
87b6d218 730 u64_stats_update_end(&tstats->syncp);
e985aad7
ED
731
732 __skb_tunnel_rx(skb, tunnel->dev);
e1a80002
HX
733
734 skb_reset_network_header(skb);
1da177e4 735 ipgre_ecn_decapsulate(iph, skb);
e1a80002 736
caf586e5 737 netif_rx(skb);
8990f468 738
8990f468 739 return 0;
1da177e4 740 }
45af08be 741 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
742
743drop:
1da177e4 744 kfree_skb(skb);
a02cec21 745 return 0;
1da177e4
LT
746}
747
6fef4c0c 748static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 749{
2941a486 750 struct ip_tunnel *tunnel = netdev_priv(dev);
e985aad7 751 struct pcpu_tstats *tstats;
b71d1d42
ED
752 const struct iphdr *old_iph = ip_hdr(skb);
753 const struct iphdr *tiph;
cbb1e85f 754 struct flowi4 fl4;
1da177e4 755 u8 tos;
d5a0a1e3 756 __be16 df;
1da177e4 757 struct rtable *rt; /* Route to the other host */
1507850b 758 struct net_device *tdev; /* Device to other host */
1da177e4 759 struct iphdr *iph; /* Our new IP header */
c2636b4d 760 unsigned int max_headroom; /* The extra header space needed */
1da177e4 761 int gre_hlen;
d5a0a1e3 762 __be32 dst;
1da177e4
LT
763 int mtu;
764
6b78f16e
ED
765 if (skb->ip_summed == CHECKSUM_PARTIAL &&
766 skb_checksum_help(skb))
767 goto tx_error;
768
e1a80002
HX
769 if (dev->type == ARPHRD_ETHER)
770 IPCB(skb)->flags = 0;
771
772 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
1da177e4 773 gre_hlen = 0;
b71d1d42 774 tiph = (const struct iphdr *)skb->data;
1da177e4
LT
775 } else {
776 gre_hlen = tunnel->hlen;
777 tiph = &tunnel->parms.iph;
778 }
779
780 if ((dst = tiph->daddr) == 0) {
781 /* NBMA tunnel */
782
adf30907 783 if (skb_dst(skb) == NULL) {
e985aad7 784 dev->stats.tx_fifo_errors++;
1da177e4
LT
785 goto tx_error;
786 }
787
61d57f87 788 if (skb->protocol == htons(ETH_P_IP)) {
511c3f92 789 rt = skb_rtable(skb);
f8126f1d 790 dst = rt_nexthop(rt, old_iph->daddr);
61d57f87 791 }
dfd56b8b 792#if IS_ENABLED(CONFIG_IPV6)
1da177e4 793 else if (skb->protocol == htons(ETH_P_IPV6)) {
b71d1d42 794 const struct in6_addr *addr6;
0ec88662
DM
795 struct neighbour *neigh;
796 bool do_tx_error_icmp;
1da177e4 797 int addr_type;
1da177e4 798
0ec88662 799 neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
1da177e4
LT
800 if (neigh == NULL)
801 goto tx_error;
802
b71d1d42 803 addr6 = (const struct in6_addr *)&neigh->primary_key;
1da177e4
LT
804 addr_type = ipv6_addr_type(addr6);
805
806 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 807 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
808 addr_type = ipv6_addr_type(addr6);
809 }
810
811 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
0ec88662
DM
812 do_tx_error_icmp = true;
813 else {
814 do_tx_error_icmp = false;
815 dst = addr6->s6_addr32[3];
816 }
817 neigh_release(neigh);
818 if (do_tx_error_icmp)
1da177e4 819 goto tx_error_icmp;
1da177e4
LT
820 }
821#endif
822 else
823 goto tx_error;
824 }
825
826 tos = tiph->tos;
ee686ca9
AJ
827 if (tos == 1) {
828 tos = 0;
1da177e4
LT
829 if (skb->protocol == htons(ETH_P_IP))
830 tos = old_iph->tos;
dd4ba83d 831 else if (skb->protocol == htons(ETH_P_IPV6))
b71d1d42 832 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
1da177e4
LT
833 }
834
cbb1e85f 835 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
78fbfd8a
DM
836 tunnel->parms.o_key, RT_TOS(tos),
837 tunnel->parms.link);
838 if (IS_ERR(rt)) {
839 dev->stats.tx_carrier_errors++;
840 goto tx_error;
1da177e4 841 }
d8d1f30b 842 tdev = rt->dst.dev;
1da177e4
LT
843
844 if (tdev == dev) {
845 ip_rt_put(rt);
e985aad7 846 dev->stats.collisions++;
1da177e4
LT
847 goto tx_error;
848 }
849
850 df = tiph->frag_off;
851 if (df)
d8d1f30b 852 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
1da177e4 853 else
adf30907 854 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
1da177e4 855
adf30907 856 if (skb_dst(skb))
6700c270 857 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
1da177e4
LT
858
859 if (skb->protocol == htons(ETH_P_IP)) {
860 df |= (old_iph->frag_off&htons(IP_DF));
861
862 if ((old_iph->frag_off&htons(IP_DF)) &&
863 mtu < ntohs(old_iph->tot_len)) {
864 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
865 ip_rt_put(rt);
866 goto tx_error;
867 }
868 }
dfd56b8b 869#if IS_ENABLED(CONFIG_IPV6)
1da177e4 870 else if (skb->protocol == htons(ETH_P_IPV6)) {
adf30907 871 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
1da177e4 872
adf30907 873 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
874 if ((tunnel->parms.iph.daddr &&
875 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
876 rt6->rt6i_dst.plen == 128) {
877 rt6->rt6i_flags |= RTF_MODIFIED;
defb3519 878 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
1da177e4
LT
879 }
880 }
881
882 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
3ffe533c 883 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1da177e4
LT
884 ip_rt_put(rt);
885 goto tx_error;
886 }
887 }
888#endif
889
890 if (tunnel->err_count > 0) {
da6185d8
WY
891 if (time_before(jiffies,
892 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
1da177e4
LT
893 tunnel->err_count--;
894
895 dst_link_failure(skb);
896 } else
897 tunnel->err_count = 0;
898 }
899
d8d1f30b 900 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
1da177e4 901
cfbba49d
PM
902 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
903 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4 904 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
805dc1d6
HX
905 if (max_headroom > dev->needed_headroom)
906 dev->needed_headroom = max_headroom;
1da177e4
LT
907 if (!new_skb) {
908 ip_rt_put(rt);
e985aad7 909 dev->stats.tx_dropped++;
1da177e4 910 dev_kfree_skb(skb);
6ed10654 911 return NETDEV_TX_OK;
1da177e4
LT
912 }
913 if (skb->sk)
914 skb_set_owner_w(new_skb, skb->sk);
915 dev_kfree_skb(skb);
916 skb = new_skb;
eddc9ec5 917 old_iph = ip_hdr(skb);
1da177e4
LT
918 }
919
64194c31 920 skb_reset_transport_header(skb);
e2d1bca7
ACM
921 skb_push(skb, gre_hlen);
922 skb_reset_network_header(skb);
1da177e4 923 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
924 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
925 IPSKB_REROUTED);
adf30907 926 skb_dst_drop(skb);
d8d1f30b 927 skb_dst_set(skb, &rt->dst);
1da177e4
LT
928
929 /*
930 * Push down and install the IPIP header.
931 */
932
eddc9ec5 933 iph = ip_hdr(skb);
1da177e4
LT
934 iph->version = 4;
935 iph->ihl = sizeof(struct iphdr) >> 2;
936 iph->frag_off = df;
937 iph->protocol = IPPROTO_GRE;
938 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
cbb1e85f
DM
939 iph->daddr = fl4.daddr;
940 iph->saddr = fl4.saddr;
1da177e4
LT
941
942 if ((iph->ttl = tiph->ttl) == 0) {
943 if (skb->protocol == htons(ETH_P_IP))
944 iph->ttl = old_iph->ttl;
dfd56b8b 945#if IS_ENABLED(CONFIG_IPV6)
1da177e4 946 else if (skb->protocol == htons(ETH_P_IPV6))
b71d1d42 947 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
1da177e4
LT
948#endif
949 else
323e126f 950 iph->ttl = ip4_dst_hoplimit(&rt->dst);
1da177e4
LT
951 }
952
e1a80002
HX
953 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
954 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
955 htons(ETH_P_TEB) : skb->protocol;
1da177e4
LT
956
957 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
5e73ea1a 958 __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
1da177e4
LT
959
960 if (tunnel->parms.o_flags&GRE_SEQ) {
961 ++tunnel->o_seqno;
962 *ptr = htonl(tunnel->o_seqno);
963 ptr--;
964 }
965 if (tunnel->parms.o_flags&GRE_KEY) {
966 *ptr = tunnel->parms.o_key;
967 ptr--;
968 }
969 if (tunnel->parms.o_flags&GRE_CSUM) {
970 *ptr = 0;
5e73ea1a 971 *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
972 }
973 }
974
975 nf_reset(skb);
e985aad7
ED
976 tstats = this_cpu_ptr(dev->tstats);
977 __IPTUNNEL_XMIT(tstats, &dev->stats);
6ed10654 978 return NETDEV_TX_OK;
1da177e4 979
496053f4 980#if IS_ENABLED(CONFIG_IPV6)
1da177e4
LT
981tx_error_icmp:
982 dst_link_failure(skb);
496053f4 983#endif
1da177e4 984tx_error:
e985aad7 985 dev->stats.tx_errors++;
1da177e4 986 dev_kfree_skb(skb);
6ed10654 987 return NETDEV_TX_OK;
1da177e4
LT
988}
989
42aa9162 990static int ipgre_tunnel_bind_dev(struct net_device *dev)
ee34c1eb
MS
991{
992 struct net_device *tdev = NULL;
993 struct ip_tunnel *tunnel;
b71d1d42 994 const struct iphdr *iph;
ee34c1eb
MS
995 int hlen = LL_MAX_HEADER;
996 int mtu = ETH_DATA_LEN;
997 int addend = sizeof(struct iphdr) + 4;
998
999 tunnel = netdev_priv(dev);
1000 iph = &tunnel->parms.iph;
1001
c95b819a 1002 /* Guess output device to choose reasonable mtu and needed_headroom */
ee34c1eb
MS
1003
1004 if (iph->daddr) {
cbb1e85f
DM
1005 struct flowi4 fl4;
1006 struct rtable *rt;
1007
1008 rt = ip_route_output_gre(dev_net(dev), &fl4,
1009 iph->daddr, iph->saddr,
1010 tunnel->parms.o_key,
1011 RT_TOS(iph->tos),
1012 tunnel->parms.link);
b23dd4fe 1013 if (!IS_ERR(rt)) {
d8d1f30b 1014 tdev = rt->dst.dev;
ee34c1eb
MS
1015 ip_rt_put(rt);
1016 }
e1a80002
HX
1017
1018 if (dev->type != ARPHRD_ETHER)
1019 dev->flags |= IFF_POINTOPOINT;
ee34c1eb
MS
1020 }
1021
1022 if (!tdev && tunnel->parms.link)
96635522 1023 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
ee34c1eb
MS
1024
1025 if (tdev) {
c95b819a 1026 hlen = tdev->hard_header_len + tdev->needed_headroom;
ee34c1eb
MS
1027 mtu = tdev->mtu;
1028 }
1029 dev->iflink = tunnel->parms.link;
1030
1031 /* Precalculate GRE options length */
1032 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1033 if (tunnel->parms.o_flags&GRE_CSUM)
1034 addend += 4;
1035 if (tunnel->parms.o_flags&GRE_KEY)
1036 addend += 4;
1037 if (tunnel->parms.o_flags&GRE_SEQ)
1038 addend += 4;
1039 }
c95b819a 1040 dev->needed_headroom = addend + hlen;
8cdb0456 1041 mtu -= dev->hard_header_len + addend;
42aa9162
HX
1042
1043 if (mtu < 68)
1044 mtu = 68;
1045
ee34c1eb
MS
1046 tunnel->hlen = addend;
1047
42aa9162 1048 return mtu;
ee34c1eb
MS
1049}
1050
1da177e4
LT
1051static int
1052ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1053{
1054 int err = 0;
1055 struct ip_tunnel_parm p;
1056 struct ip_tunnel *t;
f57e7d5a
PE
1057 struct net *net = dev_net(dev);
1058 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
1059
1060 switch (cmd) {
1061 case SIOCGETTUNNEL:
1062 t = NULL;
7daa0004 1063 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1064 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1065 err = -EFAULT;
1066 break;
1067 }
f57e7d5a 1068 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
1069 }
1070 if (t == NULL)
2941a486 1071 t = netdev_priv(dev);
1da177e4
LT
1072 memcpy(&p, &t->parms, sizeof(p));
1073 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1074 err = -EFAULT;
1075 break;
1076
1077 case SIOCADDTUNNEL:
1078 case SIOCCHGTUNNEL:
1079 err = -EPERM;
1080 if (!capable(CAP_NET_ADMIN))
1081 goto done;
1082
1083 err = -EFAULT;
1084 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1085 goto done;
1086
1087 err = -EINVAL;
1088 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1089 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1090 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1091 goto done;
1092 if (p.iph.ttl)
1093 p.iph.frag_off |= htons(IP_DF);
1094
1095 if (!(p.i_flags&GRE_KEY))
1096 p.i_key = 0;
1097 if (!(p.o_flags&GRE_KEY))
1098 p.o_key = 0;
1099
f57e7d5a 1100 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 1101
7daa0004 1102 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
1103 if (t != NULL) {
1104 if (t->dev != dev) {
1105 err = -EEXIST;
1106 break;
1107 }
1108 } else {
1507850b 1109 unsigned int nflags = 0;
1da177e4 1110
2941a486 1111 t = netdev_priv(dev);
1da177e4 1112
f97c1e0c 1113 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
1114 nflags = IFF_BROADCAST;
1115 else if (p.iph.daddr)
1116 nflags = IFF_POINTOPOINT;
1117
1118 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1119 err = -EINVAL;
1120 break;
1121 }
f57e7d5a 1122 ipgre_tunnel_unlink(ign, t);
74b0b85b 1123 synchronize_net();
1da177e4
LT
1124 t->parms.iph.saddr = p.iph.saddr;
1125 t->parms.iph.daddr = p.iph.daddr;
1126 t->parms.i_key = p.i_key;
1127 t->parms.o_key = p.o_key;
1128 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1129 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 1130 ipgre_tunnel_link(ign, t);
1da177e4
LT
1131 netdev_state_change(dev);
1132 }
1133 }
1134
1135 if (t) {
1136 err = 0;
1137 if (cmd == SIOCCHGTUNNEL) {
1138 t->parms.iph.ttl = p.iph.ttl;
1139 t->parms.iph.tos = p.iph.tos;
1140 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
1141 if (t->parms.link != p.link) {
1142 t->parms.link = p.link;
42aa9162 1143 dev->mtu = ipgre_tunnel_bind_dev(dev);
ee34c1eb
MS
1144 netdev_state_change(dev);
1145 }
1da177e4
LT
1146 }
1147 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1148 err = -EFAULT;
1149 } else
1150 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1151 break;
1152
1153 case SIOCDELTUNNEL:
1154 err = -EPERM;
1155 if (!capable(CAP_NET_ADMIN))
1156 goto done;
1157
7daa0004 1158 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1159 err = -EFAULT;
1160 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1161 goto done;
1162 err = -ENOENT;
f57e7d5a 1163 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
1164 goto done;
1165 err = -EPERM;
7daa0004 1166 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
1167 goto done;
1168 dev = t->dev;
1169 }
22f8cde5
SH
1170 unregister_netdevice(dev);
1171 err = 0;
1da177e4
LT
1172 break;
1173
1174 default:
1175 err = -EINVAL;
1176 }
1177
1178done:
1179 return err;
1180}
1181
1da177e4
LT
1182static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1183{
2941a486 1184 struct ip_tunnel *tunnel = netdev_priv(dev);
c95b819a
HX
1185 if (new_mtu < 68 ||
1186 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1da177e4
LT
1187 return -EINVAL;
1188 dev->mtu = new_mtu;
1189 return 0;
1190}
1191
1da177e4
LT
1192/* Nice toy. Unfortunately, useless in real life :-)
1193 It allows to construct virtual multiprotocol broadcast "LAN"
1194 over the Internet, provided multicast routing is tuned.
1195
1196
1197 I have no idea was this bicycle invented before me,
1198 so that I had to set ARPHRD_IPGRE to a random value.
1199 I have an impression, that Cisco could make something similar,
1200 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1201
1da177e4
LT
1202 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1203 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1204
1205 ping -t 255 224.66.66.66
1206
1207 If nobody answers, mbone does not work.
1208
1209 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1210 ip addr add 10.66.66.<somewhat>/24 dev Universe
1211 ifconfig Universe up
1212 ifconfig Universe add fe80::<Your_real_addr>/10
1213 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1214 ftp 10.66.66.66
1215 ...
1216 ftp fec0:6666:6666::193.233.7.65
1217 ...
1218
1219 */
1220
3b04ddde
SH
1221static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1222 unsigned short type,
1507850b 1223 const void *daddr, const void *saddr, unsigned int len)
1da177e4 1224{
2941a486 1225 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1226 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
5e73ea1a 1227 __be16 *p = (__be16 *)(iph+1);
1da177e4
LT
1228
1229 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1230 p[0] = t->parms.o_flags;
1231 p[1] = htons(type);
1232
1233 /*
e905a9ed 1234 * Set the source hardware address.
1da177e4 1235 */
e905a9ed 1236
1da177e4
LT
1237 if (saddr)
1238 memcpy(&iph->saddr, saddr, 4);
6d55cb91 1239 if (daddr)
1da177e4 1240 memcpy(&iph->daddr, daddr, 4);
6d55cb91 1241 if (iph->daddr)
1da177e4 1242 return t->hlen;
e905a9ed 1243
1da177e4
LT
1244 return -t->hlen;
1245}
1246
6a5f44d7
TT
1247static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1248{
b71d1d42 1249 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
1250 memcpy(haddr, &iph->saddr, 4);
1251 return 4;
1252}
1253
3b04ddde
SH
1254static const struct header_ops ipgre_header_ops = {
1255 .create = ipgre_header,
6a5f44d7 1256 .parse = ipgre_header_parse,
3b04ddde
SH
1257};
1258
6a5f44d7 1259#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1260static int ipgre_open(struct net_device *dev)
1261{
2941a486 1262 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1263
f97c1e0c 1264 if (ipv4_is_multicast(t->parms.iph.daddr)) {
cbb1e85f
DM
1265 struct flowi4 fl4;
1266 struct rtable *rt;
1267
1268 rt = ip_route_output_gre(dev_net(dev), &fl4,
1269 t->parms.iph.daddr,
1270 t->parms.iph.saddr,
1271 t->parms.o_key,
1272 RT_TOS(t->parms.iph.tos),
1273 t->parms.link);
b23dd4fe 1274 if (IS_ERR(rt))
1da177e4 1275 return -EADDRNOTAVAIL;
d8d1f30b 1276 dev = rt->dst.dev;
1da177e4 1277 ip_rt_put(rt);
e5ed6399 1278 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1279 return -EADDRNOTAVAIL;
1280 t->mlink = dev->ifindex;
e5ed6399 1281 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1282 }
1283 return 0;
1284}
1285
1286static int ipgre_close(struct net_device *dev)
1287{
2941a486 1288 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 1289
f97c1e0c 1290 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1291 struct in_device *in_dev;
c346dca1 1292 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
8723e1b4 1293 if (in_dev)
1da177e4 1294 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1da177e4
LT
1295 }
1296 return 0;
1297}
1298
1299#endif
1300
b8c26a33
SH
1301static const struct net_device_ops ipgre_netdev_ops = {
1302 .ndo_init = ipgre_tunnel_init,
1303 .ndo_uninit = ipgre_tunnel_uninit,
1304#ifdef CONFIG_NET_IPGRE_BROADCAST
1305 .ndo_open = ipgre_open,
1306 .ndo_stop = ipgre_close,
1307#endif
1308 .ndo_start_xmit = ipgre_tunnel_xmit,
1309 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1310 .ndo_change_mtu = ipgre_tunnel_change_mtu,
87b6d218 1311 .ndo_get_stats64 = ipgre_get_stats64,
b8c26a33
SH
1312};
1313
e985aad7
ED
1314static void ipgre_dev_free(struct net_device *dev)
1315{
1316 free_percpu(dev->tstats);
1317 free_netdev(dev);
1318}
1319
6b78f16e
ED
1320#define GRE_FEATURES (NETIF_F_SG | \
1321 NETIF_F_FRAGLIST | \
1322 NETIF_F_HIGHDMA | \
1323 NETIF_F_HW_CSUM)
1324
1da177e4
LT
1325static void ipgre_tunnel_setup(struct net_device *dev)
1326{
b8c26a33 1327 dev->netdev_ops = &ipgre_netdev_ops;
e985aad7 1328 dev->destructor = ipgre_dev_free;
1da177e4
LT
1329
1330 dev->type = ARPHRD_IPGRE;
c95b819a 1331 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1332 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1333 dev->flags = IFF_NOARP;
1334 dev->iflink = 0;
1335 dev->addr_len = 4;
0b67eceb 1336 dev->features |= NETIF_F_NETNS_LOCAL;
108bfa89 1337 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
6b78f16e
ED
1338
1339 dev->features |= GRE_FEATURES;
1340 dev->hw_features |= GRE_FEATURES;
1da177e4
LT
1341}
1342
1343static int ipgre_tunnel_init(struct net_device *dev)
1344{
1da177e4
LT
1345 struct ip_tunnel *tunnel;
1346 struct iphdr *iph;
1da177e4 1347
2941a486 1348 tunnel = netdev_priv(dev);
1da177e4
LT
1349 iph = &tunnel->parms.iph;
1350
1351 tunnel->dev = dev;
1352 strcpy(tunnel->parms.name, dev->name);
1353
1354 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1355 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1356
1da177e4 1357 if (iph->daddr) {
1da177e4 1358#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1359 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1360 if (!iph->saddr)
1361 return -EINVAL;
1362 dev->flags = IFF_BROADCAST;
3b04ddde 1363 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1364 }
1365#endif
ee34c1eb 1366 } else
6a5f44d7 1367 dev->header_ops = &ipgre_header_ops;
1da177e4 1368
e985aad7
ED
1369 dev->tstats = alloc_percpu(struct pcpu_tstats);
1370 if (!dev->tstats)
1371 return -ENOMEM;
1372
1da177e4
LT
1373 return 0;
1374}
1375
b8c26a33 1376static void ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1377{
2941a486 1378 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4
LT
1379 struct iphdr *iph = &tunnel->parms.iph;
1380
1381 tunnel->dev = dev;
1382 strcpy(tunnel->parms.name, dev->name);
1383
1384 iph->version = 4;
1385 iph->protocol = IPPROTO_GRE;
1386 iph->ihl = 5;
1387 tunnel->hlen = sizeof(struct iphdr) + 4;
1388
1389 dev_hold(dev);
1da177e4
LT
1390}
1391
1392
00959ade
DK
1393static const struct gre_protocol ipgre_protocol = {
1394 .handler = ipgre_rcv,
1395 .err_handler = ipgre_err,
1da177e4
LT
1396};
1397
eef6dd65 1398static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
eb8ce741
PE
1399{
1400 int prio;
1401
1402 for (prio = 0; prio < 4; prio++) {
1403 int h;
1404 for (h = 0; h < HASH_SIZE; h++) {
1507850b
ED
1405 struct ip_tunnel *t;
1406
1407 t = rtnl_dereference(ign->tunnels[prio][h]);
eef6dd65
ED
1408
1409 while (t != NULL) {
1410 unregister_netdevice_queue(t->dev, head);
1507850b 1411 t = rtnl_dereference(t->next);
eef6dd65 1412 }
eb8ce741
PE
1413 }
1414 }
1415}
1416
2c8c1e72 1417static int __net_init ipgre_init_net(struct net *net)
59a4c759 1418{
cfb8fbf2 1419 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
59a4c759 1420 int err;
59a4c759 1421
7daa0004
PE
1422 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1423 ipgre_tunnel_setup);
1424 if (!ign->fb_tunnel_dev) {
1425 err = -ENOMEM;
1426 goto err_alloc_dev;
1427 }
be77e593 1428 dev_net_set(ign->fb_tunnel_dev, net);
7daa0004 1429
b8c26a33 1430 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
c19e654d 1431 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
7daa0004
PE
1432
1433 if ((err = register_netdev(ign->fb_tunnel_dev)))
1434 goto err_reg_dev;
1435
3285ee3b
ED
1436 rcu_assign_pointer(ign->tunnels_wc[0],
1437 netdev_priv(ign->fb_tunnel_dev));
59a4c759
PE
1438 return 0;
1439
7daa0004 1440err_reg_dev:
3285ee3b 1441 ipgre_dev_free(ign->fb_tunnel_dev);
7daa0004 1442err_alloc_dev:
59a4c759
PE
1443 return err;
1444}
1445
2c8c1e72 1446static void __net_exit ipgre_exit_net(struct net *net)
59a4c759
PE
1447{
1448 struct ipgre_net *ign;
eef6dd65 1449 LIST_HEAD(list);
59a4c759
PE
1450
1451 ign = net_generic(net, ipgre_net_id);
7daa0004 1452 rtnl_lock();
eef6dd65
ED
1453 ipgre_destroy_tunnels(ign, &list);
1454 unregister_netdevice_many(&list);
7daa0004 1455 rtnl_unlock();
59a4c759
PE
1456}
1457
1458static struct pernet_operations ipgre_net_ops = {
1459 .init = ipgre_init_net,
1460 .exit = ipgre_exit_net,
cfb8fbf2
EB
1461 .id = &ipgre_net_id,
1462 .size = sizeof(struct ipgre_net),
59a4c759 1463};
1da177e4 1464
c19e654d
HX
1465static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1466{
1467 __be16 flags;
1468
1469 if (!data)
1470 return 0;
1471
1472 flags = 0;
1473 if (data[IFLA_GRE_IFLAGS])
1474 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1475 if (data[IFLA_GRE_OFLAGS])
1476 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1477 if (flags & (GRE_VERSION|GRE_ROUTING))
1478 return -EINVAL;
1479
1480 return 0;
1481}
1482
e1a80002
HX
1483static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1484{
1485 __be32 daddr;
1486
1487 if (tb[IFLA_ADDRESS]) {
1488 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1489 return -EINVAL;
1490 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1491 return -EADDRNOTAVAIL;
1492 }
1493
1494 if (!data)
1495 goto out;
1496
1497 if (data[IFLA_GRE_REMOTE]) {
1498 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1499 if (!daddr)
1500 return -EINVAL;
1501 }
1502
1503out:
1504 return ipgre_tunnel_validate(tb, data);
1505}
1506
c19e654d
HX
1507static void ipgre_netlink_parms(struct nlattr *data[],
1508 struct ip_tunnel_parm *parms)
1509{
7bb82d92 1510 memset(parms, 0, sizeof(*parms));
c19e654d
HX
1511
1512 parms->iph.protocol = IPPROTO_GRE;
1513
1514 if (!data)
1515 return;
1516
1517 if (data[IFLA_GRE_LINK])
1518 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1519
1520 if (data[IFLA_GRE_IFLAGS])
1521 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1522
1523 if (data[IFLA_GRE_OFLAGS])
1524 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1525
1526 if (data[IFLA_GRE_IKEY])
1527 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1528
1529 if (data[IFLA_GRE_OKEY])
1530 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1531
1532 if (data[IFLA_GRE_LOCAL])
4d74f8ba 1533 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
c19e654d
HX
1534
1535 if (data[IFLA_GRE_REMOTE])
4d74f8ba 1536 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
c19e654d
HX
1537
1538 if (data[IFLA_GRE_TTL])
1539 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1540
1541 if (data[IFLA_GRE_TOS])
1542 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1543
1544 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1545 parms->iph.frag_off = htons(IP_DF);
1546}
1547
e1a80002
HX
1548static int ipgre_tap_init(struct net_device *dev)
1549{
1550 struct ip_tunnel *tunnel;
1551
1552 tunnel = netdev_priv(dev);
1553
1554 tunnel->dev = dev;
1555 strcpy(tunnel->parms.name, dev->name);
1556
1557 ipgre_tunnel_bind_dev(dev);
1558
e985aad7
ED
1559 dev->tstats = alloc_percpu(struct pcpu_tstats);
1560 if (!dev->tstats)
1561 return -ENOMEM;
1562
e1a80002
HX
1563 return 0;
1564}
1565
b8c26a33
SH
1566static const struct net_device_ops ipgre_tap_netdev_ops = {
1567 .ndo_init = ipgre_tap_init,
1568 .ndo_uninit = ipgre_tunnel_uninit,
1569 .ndo_start_xmit = ipgre_tunnel_xmit,
1570 .ndo_set_mac_address = eth_mac_addr,
1571 .ndo_validate_addr = eth_validate_addr,
1572 .ndo_change_mtu = ipgre_tunnel_change_mtu,
87b6d218 1573 .ndo_get_stats64 = ipgre_get_stats64,
b8c26a33
SH
1574};
1575
e1a80002
HX
1576static void ipgre_tap_setup(struct net_device *dev)
1577{
1578
1579 ether_setup(dev);
1580
2e9526b3 1581 dev->netdev_ops = &ipgre_tap_netdev_ops;
e985aad7 1582 dev->destructor = ipgre_dev_free;
e1a80002
HX
1583
1584 dev->iflink = 0;
1585 dev->features |= NETIF_F_NETNS_LOCAL;
1586}
1587
81adee47 1588static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
c19e654d
HX
1589 struct nlattr *data[])
1590{
1591 struct ip_tunnel *nt;
1592 struct net *net = dev_net(dev);
1593 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1594 int mtu;
1595 int err;
1596
1597 nt = netdev_priv(dev);
1598 ipgre_netlink_parms(data, &nt->parms);
1599
e1a80002 1600 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
c19e654d
HX
1601 return -EEXIST;
1602
e1a80002 1603 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
f2cedb63 1604 eth_hw_addr_random(dev);
e1a80002 1605
c19e654d
HX
1606 mtu = ipgre_tunnel_bind_dev(dev);
1607 if (!tb[IFLA_MTU])
1608 dev->mtu = mtu;
1609
b790e01a
ED
1610 /* Can use a lockless transmit, unless we generate output sequences */
1611 if (!(nt->parms.o_flags & GRE_SEQ))
1612 dev->features |= NETIF_F_LLTX;
1613
c19e654d
HX
1614 err = register_netdevice(dev);
1615 if (err)
1616 goto out;
1617
1618 dev_hold(dev);
1619 ipgre_tunnel_link(ign, nt);
1620
1621out:
1622 return err;
1623}
1624
1625static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1626 struct nlattr *data[])
1627{
1628 struct ip_tunnel *t, *nt;
1629 struct net *net = dev_net(dev);
1630 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1631 struct ip_tunnel_parm p;
1632 int mtu;
1633
1634 if (dev == ign->fb_tunnel_dev)
1635 return -EINVAL;
1636
1637 nt = netdev_priv(dev);
1638 ipgre_netlink_parms(data, &p);
1639
1640 t = ipgre_tunnel_locate(net, &p, 0);
1641
1642 if (t) {
1643 if (t->dev != dev)
1644 return -EEXIST;
1645 } else {
c19e654d
HX
1646 t = nt;
1647
2e9526b3 1648 if (dev->type != ARPHRD_ETHER) {
1507850b 1649 unsigned int nflags = 0;
c19e654d 1650
2e9526b3
HX
1651 if (ipv4_is_multicast(p.iph.daddr))
1652 nflags = IFF_BROADCAST;
1653 else if (p.iph.daddr)
1654 nflags = IFF_POINTOPOINT;
1655
1656 if ((dev->flags ^ nflags) &
1657 (IFF_POINTOPOINT | IFF_BROADCAST))
1658 return -EINVAL;
1659 }
c19e654d
HX
1660
1661 ipgre_tunnel_unlink(ign, t);
1662 t->parms.iph.saddr = p.iph.saddr;
1663 t->parms.iph.daddr = p.iph.daddr;
1664 t->parms.i_key = p.i_key;
2e9526b3
HX
1665 if (dev->type != ARPHRD_ETHER) {
1666 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1667 memcpy(dev->broadcast, &p.iph.daddr, 4);
1668 }
c19e654d
HX
1669 ipgre_tunnel_link(ign, t);
1670 netdev_state_change(dev);
1671 }
1672
1673 t->parms.o_key = p.o_key;
1674 t->parms.iph.ttl = p.iph.ttl;
1675 t->parms.iph.tos = p.iph.tos;
1676 t->parms.iph.frag_off = p.iph.frag_off;
1677
1678 if (t->parms.link != p.link) {
1679 t->parms.link = p.link;
1680 mtu = ipgre_tunnel_bind_dev(dev);
1681 if (!tb[IFLA_MTU])
1682 dev->mtu = mtu;
1683 netdev_state_change(dev);
1684 }
1685
1686 return 0;
1687}
1688
1689static size_t ipgre_get_size(const struct net_device *dev)
1690{
1691 return
1692 /* IFLA_GRE_LINK */
1693 nla_total_size(4) +
1694 /* IFLA_GRE_IFLAGS */
1695 nla_total_size(2) +
1696 /* IFLA_GRE_OFLAGS */
1697 nla_total_size(2) +
1698 /* IFLA_GRE_IKEY */
1699 nla_total_size(4) +
1700 /* IFLA_GRE_OKEY */
1701 nla_total_size(4) +
1702 /* IFLA_GRE_LOCAL */
1703 nla_total_size(4) +
1704 /* IFLA_GRE_REMOTE */
1705 nla_total_size(4) +
1706 /* IFLA_GRE_TTL */
1707 nla_total_size(1) +
1708 /* IFLA_GRE_TOS */
1709 nla_total_size(1) +
1710 /* IFLA_GRE_PMTUDISC */
1711 nla_total_size(1) +
1712 0;
1713}
1714
1715static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1716{
1717 struct ip_tunnel *t = netdev_priv(dev);
1718 struct ip_tunnel_parm *p = &t->parms;
1719
f3756b79
DM
1720 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1721 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1722 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1723 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1724 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1725 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1726 nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1727 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1728 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1729 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1730 !!(p->iph.frag_off & htons(IP_DF))))
1731 goto nla_put_failure;
c19e654d
HX
1732 return 0;
1733
1734nla_put_failure:
1735 return -EMSGSIZE;
1736}
1737
1738static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1739 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1740 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1741 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1742 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1743 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1744 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1745 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1746 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1747 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1748 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1749};
1750
1751static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1752 .kind = "gre",
1753 .maxtype = IFLA_GRE_MAX,
1754 .policy = ipgre_policy,
1755 .priv_size = sizeof(struct ip_tunnel),
1756 .setup = ipgre_tunnel_setup,
1757 .validate = ipgre_tunnel_validate,
1758 .newlink = ipgre_newlink,
1759 .changelink = ipgre_changelink,
1760 .get_size = ipgre_get_size,
1761 .fill_info = ipgre_fill_info,
1762};
1763
e1a80002
HX
1764static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1765 .kind = "gretap",
1766 .maxtype = IFLA_GRE_MAX,
1767 .policy = ipgre_policy,
1768 .priv_size = sizeof(struct ip_tunnel),
1769 .setup = ipgre_tap_setup,
1770 .validate = ipgre_tap_validate,
1771 .newlink = ipgre_newlink,
1772 .changelink = ipgre_changelink,
1773 .get_size = ipgre_get_size,
1774 .fill_info = ipgre_fill_info,
1775};
1776
1da177e4
LT
1777/*
1778 * And now the modules code and kernel interface.
1779 */
1780
1781static int __init ipgre_init(void)
1782{
1783 int err;
1784
058bd4d2 1785 pr_info("GRE over IPv4 tunneling driver\n");
1da177e4 1786
cfb8fbf2 1787 err = register_pernet_device(&ipgre_net_ops);
59a4c759 1788 if (err < 0)
c2892f02
AD
1789 return err;
1790
00959ade 1791 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1792 if (err < 0) {
058bd4d2 1793 pr_info("%s: can't add protocol\n", __func__);
c2892f02
AD
1794 goto add_proto_failed;
1795 }
7daa0004 1796
c19e654d
HX
1797 err = rtnl_link_register(&ipgre_link_ops);
1798 if (err < 0)
1799 goto rtnl_link_failed;
1800
e1a80002
HX
1801 err = rtnl_link_register(&ipgre_tap_ops);
1802 if (err < 0)
1803 goto tap_ops_failed;
1804
c19e654d 1805out:
1da177e4 1806 return err;
c19e654d 1807
e1a80002
HX
1808tap_ops_failed:
1809 rtnl_link_unregister(&ipgre_link_ops);
c19e654d 1810rtnl_link_failed:
00959ade 1811 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02
AD
1812add_proto_failed:
1813 unregister_pernet_device(&ipgre_net_ops);
c19e654d 1814 goto out;
1da177e4
LT
1815}
1816
db44575f 1817static void __exit ipgre_fini(void)
1da177e4 1818{
e1a80002 1819 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d 1820 rtnl_link_unregister(&ipgre_link_ops);
00959ade 1821 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
058bd4d2 1822 pr_info("%s: can't remove protocol\n", __func__);
c2892f02 1823 unregister_pernet_device(&ipgre_net_ops);
1da177e4
LT
1824}
1825
1826module_init(ipgre_init);
1827module_exit(ipgre_fini);
1828MODULE_LICENSE("GPL");
4d74f8ba
PM
1829MODULE_ALIAS_RTNL_LINK("gre");
1830MODULE_ALIAS_RTNL_LINK("gretap");
8909c9ad 1831MODULE_ALIAS_NETDEV("gre0");
This page took 0.844102 seconds and 5 git commands to generate.