flow_dissector: introduce support for Ethernet addresses
[deliverable/linux.git] / net / core / flow_dissector.c
CommitLineData
fbff949e 1#include <linux/kernel.h>
0744dd00 2#include <linux/skbuff.h>
c452ed70 3#include <linux/export.h>
0744dd00
ED
4#include <linux/ip.h>
5#include <linux/ipv6.h>
6#include <linux/if_vlan.h>
7#include <net/ip.h>
ddbe5032 8#include <net/ipv6.h>
f77668dc
DB
9#include <linux/igmp.h>
10#include <linux/icmp.h>
11#include <linux/sctp.h>
12#include <linux/dccp.h>
0744dd00
ED
13#include <linux/if_tunnel.h>
14#include <linux/if_pppox.h>
15#include <linux/ppp_defs.h>
06635a35 16#include <linux/stddef.h>
67a900cc 17#include <linux/if_ether.h>
1bd758eb 18#include <net/flow_dissector.h>
56193d1b 19#include <scsi/fc/fc_fcoe.h>
0744dd00 20
fbff949e
JP
21static bool skb_flow_dissector_uses_key(struct flow_dissector *flow_dissector,
22 enum flow_dissector_key_id key_id)
23{
24 return flow_dissector->used_keys & (1 << key_id);
25}
26
27static void skb_flow_dissector_set_key(struct flow_dissector *flow_dissector,
28 enum flow_dissector_key_id key_id)
29{
30 flow_dissector->used_keys |= (1 << key_id);
31}
32
33static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
34 enum flow_dissector_key_id key_id,
35 void *target_container)
36{
37 return ((char *) target_container) + flow_dissector->offset[key_id];
38}
39
40void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
41 const struct flow_dissector_key *key,
42 unsigned int key_count)
43{
44 unsigned int i;
45
46 memset(flow_dissector, 0, sizeof(*flow_dissector));
47
48 for (i = 0; i < key_count; i++, key++) {
49 /* User should make sure that every key target offset is withing
50 * boundaries of unsigned short.
51 */
52 BUG_ON(key->offset > USHRT_MAX);
53 BUG_ON(skb_flow_dissector_uses_key(flow_dissector,
54 key->key_id));
55
56 skb_flow_dissector_set_key(flow_dissector, key->key_id);
57 flow_dissector->offset[key->key_id] = key->offset;
58 }
59
60 /* Ensure that the dissector always includes basic key. That way
61 * we are able to avoid handling lack of it in fast path.
62 */
63 BUG_ON(!skb_flow_dissector_uses_key(flow_dissector,
64 FLOW_DISSECTOR_KEY_BASIC));
65}
66EXPORT_SYMBOL(skb_flow_dissector_init);
67
357afe9c 68/**
6451b3f5
WC
69 * __skb_flow_get_ports - extract the upper layer ports and return them
70 * @skb: sk_buff to extract the ports from
357afe9c
NA
71 * @thoff: transport header offset
72 * @ip_proto: protocol for which to get port offset
6451b3f5
WC
73 * @data: raw buffer pointer to the packet, if NULL use skb->data
74 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
357afe9c
NA
75 *
76 * The function will try to retrieve the ports at offset thoff + poff where poff
77 * is the protocol port offset returned from proto_ports_offset
78 */
690e36e7
DM
79__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
80 void *data, int hlen)
357afe9c
NA
81{
82 int poff = proto_ports_offset(ip_proto);
83
690e36e7
DM
84 if (!data) {
85 data = skb->data;
86 hlen = skb_headlen(skb);
87 }
88
357afe9c
NA
89 if (poff >= 0) {
90 __be32 *ports, _ports;
91
690e36e7
DM
92 ports = __skb_header_pointer(skb, thoff + poff,
93 sizeof(_ports), data, hlen, &_ports);
357afe9c
NA
94 if (ports)
95 return *ports;
96 }
97
98 return 0;
99}
690e36e7 100EXPORT_SYMBOL(__skb_flow_get_ports);
357afe9c 101
453a940e
WC
102/**
103 * __skb_flow_dissect - extract the flow_keys struct and return it
104 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
06635a35
JP
105 * @flow_dissector: list of keys to dissect
106 * @target_container: target structure to put dissected values into
453a940e
WC
107 * @data: raw buffer pointer to the packet, if NULL use skb->data
108 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
109 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
110 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
111 *
06635a35
JP
112 * The function will try to retrieve individual keys into target specified
113 * by flow_dissector from either the skbuff or a raw buffer specified by the
114 * rest parameters.
115 *
116 * Caller must take care of zeroing target container memory.
453a940e 117 */
06635a35
JP
118bool __skb_flow_dissect(const struct sk_buff *skb,
119 struct flow_dissector *flow_dissector,
120 void *target_container,
453a940e 121 void *data, __be16 proto, int nhoff, int hlen)
0744dd00 122{
06635a35
JP
123 struct flow_dissector_key_basic *key_basic;
124 struct flow_dissector_key_addrs *key_addrs;
125 struct flow_dissector_key_ports *key_ports;
0744dd00 126 u8 ip_proto;
0744dd00 127
690e36e7
DM
128 if (!data) {
129 data = skb->data;
453a940e
WC
130 proto = skb->protocol;
131 nhoff = skb_network_offset(skb);
690e36e7
DM
132 hlen = skb_headlen(skb);
133 }
134
06635a35
JP
135 /* It is ensured by skb_flow_dissector_init() that basic key will
136 * be always present.
137 */
138 key_basic = skb_flow_dissector_target(flow_dissector,
139 FLOW_DISSECTOR_KEY_BASIC,
140 target_container);
0744dd00 141
67a900cc
JP
142 if (skb_flow_dissector_uses_key(flow_dissector,
143 FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
144 struct ethhdr *eth = eth_hdr(skb);
145 struct flow_dissector_key_eth_addrs *key_eth_addrs;
146
147 key_eth_addrs = skb_flow_dissector_target(flow_dissector,
148 FLOW_DISSECTOR_KEY_ETH_ADDRS,
149 target_container);
150 memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
151 }
152
0744dd00
ED
153again:
154 switch (proto) {
2b8837ae 155 case htons(ETH_P_IP): {
0744dd00
ED
156 const struct iphdr *iph;
157 struct iphdr _iph;
158ip:
690e36e7 159 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
6f092343 160 if (!iph || iph->ihl < 5)
0744dd00 161 return false;
3797d3e8 162 nhoff += iph->ihl * 4;
0744dd00 163
3797d3e8 164 ip_proto = iph->protocol;
0744dd00
ED
165 if (ip_is_fragment(iph))
166 ip_proto = 0;
3797d3e8 167
06635a35
JP
168 if (!skb_flow_dissector_uses_key(flow_dissector,
169 FLOW_DISSECTOR_KEY_IPV4_ADDRS))
5af7fb6e 170 break;
06635a35
JP
171 key_addrs = skb_flow_dissector_target(flow_dissector,
172 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
173 target_container);
174 memcpy(key_addrs, &iph->saddr, sizeof(*key_addrs));
0744dd00
ED
175 break;
176 }
2b8837ae 177 case htons(ETH_P_IPV6): {
0744dd00
ED
178 const struct ipv6hdr *iph;
179 struct ipv6hdr _iph;
19469a87
TH
180 __be32 flow_label;
181
0744dd00 182ipv6:
690e36e7 183 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
0744dd00
ED
184 if (!iph)
185 return false;
186
187 ip_proto = iph->nexthdr;
0744dd00 188 nhoff += sizeof(struct ipv6hdr);
19469a87 189
b924933c
JP
190 if (skb_flow_dissector_uses_key(flow_dissector,
191 FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) {
192 key_addrs = skb_flow_dissector_target(flow_dissector,
193 FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
194 target_container);
56193d1b 195
b924933c
JP
196 key_addrs->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
197 key_addrs->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
198 goto flow_label;
199 }
200 if (skb_flow_dissector_uses_key(flow_dissector,
201 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
202 struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs;
203
204 key_ipv6_addrs = skb_flow_dissector_target(flow_dissector,
205 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
206 target_container);
5af7fb6e 207
b924933c
JP
208 memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs));
209 goto flow_label;
210 }
211 break;
212flow_label:
19469a87
TH
213 flow_label = ip6_flowlabel(iph);
214 if (flow_label) {
215 /* Awesome, IPv6 packet has a flow label so we can
216 * use that to represent the ports without any
217 * further dissection.
218 */
06635a35
JP
219
220 key_basic->n_proto = proto;
221 key_basic->ip_proto = ip_proto;
222 key_basic->thoff = (u16)nhoff;
223
224 if (!skb_flow_dissector_uses_key(flow_dissector,
225 FLOW_DISSECTOR_KEY_PORTS))
226 break;
227 key_ports = skb_flow_dissector_target(flow_dissector,
228 FLOW_DISSECTOR_KEY_PORTS,
229 target_container);
230 key_ports->ports = flow_label;
19469a87
TH
231
232 return true;
233 }
234
0744dd00
ED
235 break;
236 }
2b8837ae
JP
237 case htons(ETH_P_8021AD):
238 case htons(ETH_P_8021Q): {
0744dd00
ED
239 const struct vlan_hdr *vlan;
240 struct vlan_hdr _vlan;
241
690e36e7 242 vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
0744dd00
ED
243 if (!vlan)
244 return false;
245
246 proto = vlan->h_vlan_encapsulated_proto;
247 nhoff += sizeof(*vlan);
248 goto again;
249 }
2b8837ae 250 case htons(ETH_P_PPP_SES): {
0744dd00
ED
251 struct {
252 struct pppoe_hdr hdr;
253 __be16 proto;
254 } *hdr, _hdr;
690e36e7 255 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
0744dd00
ED
256 if (!hdr)
257 return false;
258 proto = hdr->proto;
259 nhoff += PPPOE_SES_HLEN;
260 switch (proto) {
2b8837ae 261 case htons(PPP_IP):
0744dd00 262 goto ip;
2b8837ae 263 case htons(PPP_IPV6):
0744dd00
ED
264 goto ipv6;
265 default:
266 return false;
267 }
268 }
08bfc9cb
EH
269 case htons(ETH_P_TIPC): {
270 struct {
271 __be32 pre[3];
272 __be32 srcnode;
273 } *hdr, _hdr;
274 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
275 if (!hdr)
276 return false;
06635a35
JP
277 key_basic->n_proto = proto;
278 key_basic->thoff = (u16)nhoff;
279
280 if (skb_flow_dissector_uses_key(flow_dissector,
281 FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) {
282 return true;
283 key_addrs = skb_flow_dissector_target(flow_dissector,
284 FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
285 target_container);
286 key_addrs->src = hdr->srcnode;
287 key_addrs->dst = 0;
288 }
08bfc9cb
EH
289 return true;
290 }
56193d1b 291 case htons(ETH_P_FCOE):
06635a35 292 key_basic->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
56193d1b 293 /* fall through */
0744dd00
ED
294 default:
295 return false;
296 }
297
298 switch (ip_proto) {
299 case IPPROTO_GRE: {
300 struct gre_hdr {
301 __be16 flags;
302 __be16 proto;
303 } *hdr, _hdr;
304
690e36e7 305 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
0744dd00
ED
306 if (!hdr)
307 return false;
308 /*
309 * Only look inside GRE if version zero and no
310 * routing
311 */
312 if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) {
313 proto = hdr->proto;
314 nhoff += 4;
315 if (hdr->flags & GRE_CSUM)
316 nhoff += 4;
317 if (hdr->flags & GRE_KEY)
318 nhoff += 4;
319 if (hdr->flags & GRE_SEQ)
320 nhoff += 4;
e1733de2
MD
321 if (proto == htons(ETH_P_TEB)) {
322 const struct ethhdr *eth;
323 struct ethhdr _eth;
324
690e36e7
DM
325 eth = __skb_header_pointer(skb, nhoff,
326 sizeof(_eth),
327 data, hlen, &_eth);
e1733de2
MD
328 if (!eth)
329 return false;
330 proto = eth->h_proto;
331 nhoff += sizeof(*eth);
332 }
0744dd00
ED
333 goto again;
334 }
335 break;
336 }
337 case IPPROTO_IPIP:
fca41895
TH
338 proto = htons(ETH_P_IP);
339 goto ip;
b438f940
TH
340 case IPPROTO_IPV6:
341 proto = htons(ETH_P_IPV6);
342 goto ipv6;
0744dd00
ED
343 default:
344 break;
345 }
346
06635a35
JP
347 /* It is ensured by skb_flow_dissector_init() that basic key will
348 * be always present.
349 */
350 key_basic = skb_flow_dissector_target(flow_dissector,
351 FLOW_DISSECTOR_KEY_BASIC,
352 target_container);
353 key_basic->n_proto = proto;
354 key_basic->ip_proto = ip_proto;
355 key_basic->thoff = (u16) nhoff;
356
357 if (skb_flow_dissector_uses_key(flow_dissector,
358 FLOW_DISSECTOR_KEY_PORTS)) {
359 key_ports = skb_flow_dissector_target(flow_dissector,
360 FLOW_DISSECTOR_KEY_PORTS,
361 target_container);
362 key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
363 data, hlen);
364 }
5af7fb6e 365
0744dd00
ED
366 return true;
367}
690e36e7 368EXPORT_SYMBOL(__skb_flow_dissect);
441d9d32
CW
369
370static u32 hashrnd __read_mostly;
66415cf8
HFS
371static __always_inline void __flow_hash_secret_init(void)
372{
373 net_get_random_once(&hashrnd, sizeof(hashrnd));
374}
375
50fb7992 376static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c, u32 keyval)
66415cf8 377{
50fb7992 378 return jhash_3words(a, b, c, keyval);
66415cf8
HFS
379}
380
50fb7992 381static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
5ed20a68
TH
382{
383 u32 hash;
384
385 /* get a consistent hash (same value on both flow directions) */
06635a35
JP
386 if (((__force u32)keys->addrs.dst < (__force u32)keys->addrs.src) ||
387 (((__force u32)keys->addrs.dst == (__force u32)keys->addrs.src) &&
388 ((__force u16)keys->ports.port16[1] < (__force u16)keys->ports.port16[0]))) {
389 swap(keys->addrs.dst, keys->addrs.src);
390 swap(keys->ports.port16[0], keys->ports.port16[1]);
5ed20a68
TH
391 }
392
06635a35
JP
393 hash = __flow_hash_3words((__force u32)keys->addrs.dst,
394 (__force u32)keys->addrs.src,
395 (__force u32)keys->ports.ports,
50fb7992 396 keyval);
5ed20a68
TH
397 if (!hash)
398 hash = 1;
399
400 return hash;
401}
402
403u32 flow_hash_from_keys(struct flow_keys *keys)
404{
50fb7992
TH
405 __flow_hash_secret_init();
406 return __flow_hash_from_keys(keys, hashrnd);
5ed20a68
TH
407}
408EXPORT_SYMBOL(flow_hash_from_keys);
409
50fb7992
TH
410static inline u32 ___skb_get_hash(const struct sk_buff *skb,
411 struct flow_keys *keys, u32 keyval)
412{
06635a35 413 if (!skb_flow_dissect_flow_keys(skb, keys))
50fb7992
TH
414 return 0;
415
416 return __flow_hash_from_keys(keys, keyval);
417}
418
2f59e1eb
TH
419struct _flow_keys_digest_data {
420 __be16 n_proto;
421 u8 ip_proto;
422 u8 padding;
423 __be32 ports;
424 __be32 src;
425 __be32 dst;
426};
427
428void make_flow_keys_digest(struct flow_keys_digest *digest,
429 const struct flow_keys *flow)
430{
431 struct _flow_keys_digest_data *data =
432 (struct _flow_keys_digest_data *)digest;
433
434 BUILD_BUG_ON(sizeof(*data) > sizeof(*digest));
435
436 memset(digest, 0, sizeof(*digest));
437
06635a35
JP
438 data->n_proto = flow->basic.n_proto;
439 data->ip_proto = flow->basic.ip_proto;
440 data->ports = flow->ports.ports;
441 data->src = flow->addrs.src;
442 data->dst = flow->addrs.dst;
2f59e1eb
TH
443}
444EXPORT_SYMBOL(make_flow_keys_digest);
445
d4fd3275
JP
446/**
447 * __skb_get_hash: calculate a flow hash
448 * @skb: sk_buff to calculate flow hash from
449 *
450 * This function calculates a flow hash based on src/dst addresses
61b905da
TH
451 * and src/dst port numbers. Sets hash in skb to non-zero hash value
452 * on success, zero indicates no valid hash. Also, sets l4_hash in skb
441d9d32
CW
453 * if hash is a canonical 4-tuple hash over transport ports.
454 */
3958afa1 455void __skb_get_hash(struct sk_buff *skb)
441d9d32
CW
456{
457 struct flow_keys keys;
50fb7992 458 u32 hash;
441d9d32 459
50fb7992
TH
460 __flow_hash_secret_init();
461
462 hash = ___skb_get_hash(skb, &keys, hashrnd);
463 if (!hash)
441d9d32 464 return;
06635a35 465 if (keys.ports.ports)
61b905da 466 skb->l4_hash = 1;
a3b18ddb 467 skb->sw_hash = 1;
50fb7992 468 skb->hash = hash;
441d9d32 469}
3958afa1 470EXPORT_SYMBOL(__skb_get_hash);
441d9d32 471
50fb7992
TH
472__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
473{
474 struct flow_keys keys;
475
476 return ___skb_get_hash(skb, &keys, perturb);
477}
478EXPORT_SYMBOL(skb_get_hash_perturb);
479
56193d1b
AD
480u32 __skb_get_poff(const struct sk_buff *skb, void *data,
481 const struct flow_keys *keys, int hlen)
f77668dc 482{
06635a35 483 u32 poff = keys->basic.thoff;
f77668dc 484
06635a35 485 switch (keys->basic.ip_proto) {
f77668dc 486 case IPPROTO_TCP: {
5af7fb6e
AD
487 /* access doff as u8 to avoid unaligned access */
488 const u8 *doff;
489 u8 _doff;
f77668dc 490
5af7fb6e
AD
491 doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff),
492 data, hlen, &_doff);
493 if (!doff)
f77668dc
DB
494 return poff;
495
5af7fb6e 496 poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2);
f77668dc
DB
497 break;
498 }
499 case IPPROTO_UDP:
500 case IPPROTO_UDPLITE:
501 poff += sizeof(struct udphdr);
502 break;
503 /* For the rest, we do not really care about header
504 * extensions at this point for now.
505 */
506 case IPPROTO_ICMP:
507 poff += sizeof(struct icmphdr);
508 break;
509 case IPPROTO_ICMPV6:
510 poff += sizeof(struct icmp6hdr);
511 break;
512 case IPPROTO_IGMP:
513 poff += sizeof(struct igmphdr);
514 break;
515 case IPPROTO_DCCP:
516 poff += sizeof(struct dccp_hdr);
517 break;
518 case IPPROTO_SCTP:
519 poff += sizeof(struct sctphdr);
520 break;
521 }
522
523 return poff;
524}
525
0db89b8b
JP
526/**
527 * skb_get_poff - get the offset to the payload
528 * @skb: sk_buff to get the payload offset from
529 *
530 * The function will get the offset to the payload as far as it could
531 * be dissected. The main user is currently BPF, so that we can dynamically
56193d1b
AD
532 * truncate packets without needing to push actual payload to the user
533 * space and can analyze headers only, instead.
534 */
535u32 skb_get_poff(const struct sk_buff *skb)
536{
537 struct flow_keys keys;
538
06635a35 539 if (!skb_flow_dissect_flow_keys(skb, &keys))
56193d1b
AD
540 return 0;
541
542 return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
543}
06635a35
JP
544
545static const struct flow_dissector_key flow_keys_dissector_keys[] = {
546 {
547 .key_id = FLOW_DISSECTOR_KEY_BASIC,
548 .offset = offsetof(struct flow_keys, basic),
549 },
550 {
551 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
552 .offset = offsetof(struct flow_keys, addrs),
553 },
554 {
555 .key_id = FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
556 .offset = offsetof(struct flow_keys, addrs),
557 },
558 {
559 .key_id = FLOW_DISSECTOR_KEY_PORTS,
560 .offset = offsetof(struct flow_keys, ports),
561 },
562};
563
564static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
565 {
566 .key_id = FLOW_DISSECTOR_KEY_BASIC,
567 .offset = offsetof(struct flow_keys, basic),
568 },
569};
570
571struct flow_dissector flow_keys_dissector __read_mostly;
572EXPORT_SYMBOL(flow_keys_dissector);
573
574struct flow_dissector flow_keys_buf_dissector __read_mostly;
575
576static int __init init_default_flow_dissectors(void)
577{
578 skb_flow_dissector_init(&flow_keys_dissector,
579 flow_keys_dissector_keys,
580 ARRAY_SIZE(flow_keys_dissector_keys));
581 skb_flow_dissector_init(&flow_keys_buf_dissector,
582 flow_keys_buf_dissector_keys,
583 ARRAY_SIZE(flow_keys_buf_dissector_keys));
584 return 0;
585}
586
587late_initcall_sync(init_default_flow_dissectors);
This page took 0.329207 seconds and 5 git commands to generate.