1 #include <linux/kernel.h>
2 #include <linux/skbuff.h>
3 #include <linux/export.h>
5 #include <linux/ipv6.h>
6 #include <linux/if_vlan.h>
9 #include <linux/igmp.h>
10 #include <linux/icmp.h>
11 #include <linux/sctp.h>
12 #include <linux/dccp.h>
13 #include <linux/if_tunnel.h>
14 #include <linux/if_pppox.h>
15 #include <linux/ppp_defs.h>
16 #include <linux/stddef.h>
17 #include <linux/if_ether.h>
18 #include <linux/mpls.h>
19 #include <net/flow_dissector.h>
20 #include <scsi/fc/fc_fcoe.h>
22 static bool skb_flow_dissector_uses_key(struct flow_dissector
*flow_dissector
,
23 enum flow_dissector_key_id key_id
)
25 return flow_dissector
->used_keys
& (1 << key_id
);
28 static void skb_flow_dissector_set_key(struct flow_dissector
*flow_dissector
,
29 enum flow_dissector_key_id key_id
)
31 flow_dissector
->used_keys
|= (1 << key_id
);
34 static void *skb_flow_dissector_target(struct flow_dissector
*flow_dissector
,
35 enum flow_dissector_key_id key_id
,
36 void *target_container
)
38 return ((char *) target_container
) + flow_dissector
->offset
[key_id
];
41 void skb_flow_dissector_init(struct flow_dissector
*flow_dissector
,
42 const struct flow_dissector_key
*key
,
43 unsigned int key_count
)
47 memset(flow_dissector
, 0, sizeof(*flow_dissector
));
49 for (i
= 0; i
< key_count
; i
++, key
++) {
50 /* User should make sure that every key target offset is withing
51 * boundaries of unsigned short.
53 BUG_ON(key
->offset
> USHRT_MAX
);
54 BUG_ON(skb_flow_dissector_uses_key(flow_dissector
,
57 skb_flow_dissector_set_key(flow_dissector
, key
->key_id
);
58 flow_dissector
->offset
[key
->key_id
] = key
->offset
;
61 /* Ensure that the dissector always includes control and basic key.
62 * That way we are able to avoid handling lack of these in fast path.
64 BUG_ON(!skb_flow_dissector_uses_key(flow_dissector
,
65 FLOW_DISSECTOR_KEY_CONTROL
));
66 BUG_ON(!skb_flow_dissector_uses_key(flow_dissector
,
67 FLOW_DISSECTOR_KEY_BASIC
));
69 EXPORT_SYMBOL(skb_flow_dissector_init
);
72 * __skb_flow_get_ports - extract the upper layer ports and return them
73 * @skb: sk_buff to extract the ports from
74 * @thoff: transport header offset
75 * @ip_proto: protocol for which to get port offset
76 * @data: raw buffer pointer to the packet, if NULL use skb->data
77 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
79 * The function will try to retrieve the ports at offset thoff + poff where poff
80 * is the protocol port offset returned from proto_ports_offset
82 __be32
__skb_flow_get_ports(const struct sk_buff
*skb
, int thoff
, u8 ip_proto
,
85 int poff
= proto_ports_offset(ip_proto
);
89 hlen
= skb_headlen(skb
);
93 __be32
*ports
, _ports
;
95 ports
= __skb_header_pointer(skb
, thoff
+ poff
,
96 sizeof(_ports
), data
, hlen
, &_ports
);
103 EXPORT_SYMBOL(__skb_flow_get_ports
);
106 * __skb_flow_dissect - extract the flow_keys struct and return it
107 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
108 * @flow_dissector: list of keys to dissect
109 * @target_container: target structure to put dissected values into
110 * @data: raw buffer pointer to the packet, if NULL use skb->data
111 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
112 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
113 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
115 * The function will try to retrieve individual keys into target specified
116 * by flow_dissector from either the skbuff or a raw buffer specified by the
119 * Caller must take care of zeroing target container memory.
121 bool __skb_flow_dissect(const struct sk_buff
*skb
,
122 struct flow_dissector
*flow_dissector
,
123 void *target_container
,
124 void *data
, __be16 proto
, int nhoff
, int hlen
)
126 struct flow_dissector_key_control
*key_control
;
127 struct flow_dissector_key_basic
*key_basic
;
128 struct flow_dissector_key_addrs
*key_addrs
;
129 struct flow_dissector_key_ports
*key_ports
;
130 struct flow_dissector_key_tags
*key_tags
;
131 struct flow_dissector_key_keyid
*key_keyid
;
136 proto
= skb
->protocol
;
137 nhoff
= skb_network_offset(skb
);
138 hlen
= skb_headlen(skb
);
141 /* It is ensured by skb_flow_dissector_init() that control key will
144 key_control
= skb_flow_dissector_target(flow_dissector
,
145 FLOW_DISSECTOR_KEY_CONTROL
,
148 /* It is ensured by skb_flow_dissector_init() that basic key will
151 key_basic
= skb_flow_dissector_target(flow_dissector
,
152 FLOW_DISSECTOR_KEY_BASIC
,
155 if (skb_flow_dissector_uses_key(flow_dissector
,
156 FLOW_DISSECTOR_KEY_ETH_ADDRS
)) {
157 struct ethhdr
*eth
= eth_hdr(skb
);
158 struct flow_dissector_key_eth_addrs
*key_eth_addrs
;
160 key_eth_addrs
= skb_flow_dissector_target(flow_dissector
,
161 FLOW_DISSECTOR_KEY_ETH_ADDRS
,
163 memcpy(key_eth_addrs
, ð
->h_dest
, sizeof(*key_eth_addrs
));
168 case htons(ETH_P_IP
): {
169 const struct iphdr
*iph
;
172 iph
= __skb_header_pointer(skb
, nhoff
, sizeof(_iph
), data
, hlen
, &_iph
);
173 if (!iph
|| iph
->ihl
< 5)
175 nhoff
+= iph
->ihl
* 4;
177 ip_proto
= iph
->protocol
;
178 if (ip_is_fragment(iph
))
181 if (!skb_flow_dissector_uses_key(flow_dissector
,
182 FLOW_DISSECTOR_KEY_IPV4_ADDRS
))
185 key_addrs
= skb_flow_dissector_target(flow_dissector
,
186 FLOW_DISSECTOR_KEY_IPV4_ADDRS
, target_container
);
187 memcpy(&key_addrs
->v4addrs
, &iph
->saddr
,
188 sizeof(key_addrs
->v4addrs
));
189 key_control
->addr_type
= FLOW_DISSECTOR_KEY_IPV4_ADDRS
;
192 case htons(ETH_P_IPV6
): {
193 const struct ipv6hdr
*iph
;
198 iph
= __skb_header_pointer(skb
, nhoff
, sizeof(_iph
), data
, hlen
, &_iph
);
202 ip_proto
= iph
->nexthdr
;
203 nhoff
+= sizeof(struct ipv6hdr
);
205 if (skb_flow_dissector_uses_key(flow_dissector
,
206 FLOW_DISSECTOR_KEY_IPV6_ADDRS
)) {
207 struct flow_dissector_key_ipv6_addrs
*key_ipv6_addrs
;
209 key_ipv6_addrs
= skb_flow_dissector_target(flow_dissector
,
210 FLOW_DISSECTOR_KEY_IPV6_ADDRS
,
213 memcpy(key_ipv6_addrs
, &iph
->saddr
, sizeof(*key_ipv6_addrs
));
214 key_control
->addr_type
= FLOW_DISSECTOR_KEY_IPV6_ADDRS
;
217 flow_label
= ip6_flowlabel(iph
);
219 if (skb_flow_dissector_uses_key(flow_dissector
,
220 FLOW_DISSECTOR_KEY_FLOW_LABEL
)) {
221 key_tags
= skb_flow_dissector_target(flow_dissector
,
222 FLOW_DISSECTOR_KEY_FLOW_LABEL
,
224 key_tags
->flow_label
= ntohl(flow_label
);
230 case htons(ETH_P_8021AD
):
231 case htons(ETH_P_8021Q
): {
232 const struct vlan_hdr
*vlan
;
233 struct vlan_hdr _vlan
;
235 vlan
= __skb_header_pointer(skb
, nhoff
, sizeof(_vlan
), data
, hlen
, &_vlan
);
239 if (skb_flow_dissector_uses_key(flow_dissector
,
240 FLOW_DISSECTOR_KEY_VLANID
)) {
241 key_tags
= skb_flow_dissector_target(flow_dissector
,
242 FLOW_DISSECTOR_KEY_VLANID
,
245 key_tags
->vlan_id
= skb_vlan_tag_get_id(skb
);
248 proto
= vlan
->h_vlan_encapsulated_proto
;
249 nhoff
+= sizeof(*vlan
);
252 case htons(ETH_P_PPP_SES
): {
254 struct pppoe_hdr hdr
;
257 hdr
= __skb_header_pointer(skb
, nhoff
, sizeof(_hdr
), data
, hlen
, &_hdr
);
261 nhoff
+= PPPOE_SES_HLEN
;
265 case htons(PPP_IPV6
):
271 case htons(ETH_P_TIPC
): {
276 hdr
= __skb_header_pointer(skb
, nhoff
, sizeof(_hdr
), data
, hlen
, &_hdr
);
279 key_basic
->n_proto
= proto
;
280 key_control
->thoff
= (u16
)nhoff
;
282 if (skb_flow_dissector_uses_key(flow_dissector
,
283 FLOW_DISSECTOR_KEY_TIPC_ADDRS
)) {
284 key_addrs
= skb_flow_dissector_target(flow_dissector
,
285 FLOW_DISSECTOR_KEY_TIPC_ADDRS
,
287 key_addrs
->tipcaddrs
.srcnode
= hdr
->srcnode
;
288 key_control
->addr_type
= FLOW_DISSECTOR_KEY_TIPC_ADDRS
;
293 case htons(ETH_P_MPLS_UC
):
294 case htons(ETH_P_MPLS_MC
): {
295 struct mpls_label
*hdr
, _hdr
[2];
297 hdr
= __skb_header_pointer(skb
, nhoff
, sizeof(_hdr
), data
,
302 if ((ntohl(hdr
[0].entry
) & MPLS_LS_LABEL_MASK
) >>
303 MPLS_LS_LABEL_SHIFT
== MPLS_LABEL_ENTROPY
) {
304 if (skb_flow_dissector_uses_key(flow_dissector
,
305 FLOW_DISSECTOR_KEY_MPLS_ENTROPY
)) {
306 key_keyid
= skb_flow_dissector_target(flow_dissector
,
307 FLOW_DISSECTOR_KEY_MPLS_ENTROPY
,
309 key_keyid
->keyid
= hdr
[1].entry
&
310 htonl(MPLS_LS_LABEL_MASK
);
313 key_basic
->n_proto
= proto
;
314 key_basic
->ip_proto
= ip_proto
;
315 key_control
->thoff
= (u16
)nhoff
;
323 case htons(ETH_P_FCOE
):
324 key_control
->thoff
= (u16
)(nhoff
+ FCOE_HEADER_LEN
);
337 hdr
= __skb_header_pointer(skb
, nhoff
, sizeof(_hdr
), data
, hlen
, &_hdr
);
341 * Only look inside GRE if version zero and no
344 if (hdr
->flags
& (GRE_VERSION
| GRE_ROUTING
))
349 if (hdr
->flags
& GRE_CSUM
)
351 if (hdr
->flags
& GRE_KEY
) {
355 keyid
= __skb_header_pointer(skb
, nhoff
, sizeof(_keyid
),
356 data
, hlen
, &_keyid
);
361 if (skb_flow_dissector_uses_key(flow_dissector
,
362 FLOW_DISSECTOR_KEY_GRE_KEYID
)) {
363 key_keyid
= skb_flow_dissector_target(flow_dissector
,
364 FLOW_DISSECTOR_KEY_GRE_KEYID
,
366 key_keyid
->keyid
= *keyid
;
370 if (hdr
->flags
& GRE_SEQ
)
372 if (proto
== htons(ETH_P_TEB
)) {
373 const struct ethhdr
*eth
;
376 eth
= __skb_header_pointer(skb
, nhoff
,
381 proto
= eth
->h_proto
;
382 nhoff
+= sizeof(*eth
);
387 proto
= htons(ETH_P_IP
);
390 proto
= htons(ETH_P_IPV6
);
393 proto
= htons(ETH_P_MPLS_UC
);
399 key_basic
->n_proto
= proto
;
400 key_basic
->ip_proto
= ip_proto
;
401 key_control
->thoff
= (u16
)nhoff
;
403 if (skb_flow_dissector_uses_key(flow_dissector
,
404 FLOW_DISSECTOR_KEY_PORTS
)) {
405 key_ports
= skb_flow_dissector_target(flow_dissector
,
406 FLOW_DISSECTOR_KEY_PORTS
,
408 key_ports
->ports
= __skb_flow_get_ports(skb
, nhoff
, ip_proto
,
414 EXPORT_SYMBOL(__skb_flow_dissect
);
416 static u32 hashrnd __read_mostly
;
417 static __always_inline
void __flow_hash_secret_init(void)
419 net_get_random_once(&hashrnd
, sizeof(hashrnd
));
422 static __always_inline u32
__flow_hash_words(u32
*words
, u32 length
, u32 keyval
)
424 return jhash2(words
, length
, keyval
);
427 static inline void *flow_keys_hash_start(struct flow_keys
*flow
)
429 BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET
% sizeof(u32
));
430 return (void *)flow
+ FLOW_KEYS_HASH_OFFSET
;
433 static inline size_t flow_keys_hash_length(struct flow_keys
*flow
)
435 size_t diff
= FLOW_KEYS_HASH_OFFSET
+ sizeof(flow
->addrs
);
436 BUILD_BUG_ON((sizeof(*flow
) - FLOW_KEYS_HASH_OFFSET
) % sizeof(u32
));
437 BUILD_BUG_ON(offsetof(typeof(*flow
), addrs
) !=
438 sizeof(*flow
) - sizeof(flow
->addrs
));
440 switch (flow
->control
.addr_type
) {
441 case FLOW_DISSECTOR_KEY_IPV4_ADDRS
:
442 diff
-= sizeof(flow
->addrs
.v4addrs
);
444 case FLOW_DISSECTOR_KEY_IPV6_ADDRS
:
445 diff
-= sizeof(flow
->addrs
.v6addrs
);
447 case FLOW_DISSECTOR_KEY_TIPC_ADDRS
:
448 diff
-= sizeof(flow
->addrs
.tipcaddrs
);
451 return (sizeof(*flow
) - diff
) / sizeof(u32
);
454 __be32
flow_get_u32_src(const struct flow_keys
*flow
)
456 switch (flow
->control
.addr_type
) {
457 case FLOW_DISSECTOR_KEY_IPV4_ADDRS
:
458 return flow
->addrs
.v4addrs
.src
;
459 case FLOW_DISSECTOR_KEY_IPV6_ADDRS
:
460 return (__force __be32
)ipv6_addr_hash(
461 &flow
->addrs
.v6addrs
.src
);
462 case FLOW_DISSECTOR_KEY_TIPC_ADDRS
:
463 return flow
->addrs
.tipcaddrs
.srcnode
;
468 EXPORT_SYMBOL(flow_get_u32_src
);
470 __be32
flow_get_u32_dst(const struct flow_keys
*flow
)
472 switch (flow
->control
.addr_type
) {
473 case FLOW_DISSECTOR_KEY_IPV4_ADDRS
:
474 return flow
->addrs
.v4addrs
.dst
;
475 case FLOW_DISSECTOR_KEY_IPV6_ADDRS
:
476 return (__force __be32
)ipv6_addr_hash(
477 &flow
->addrs
.v6addrs
.dst
);
482 EXPORT_SYMBOL(flow_get_u32_dst
);
484 static inline void __flow_hash_consistentify(struct flow_keys
*keys
)
488 switch (keys
->control
.addr_type
) {
489 case FLOW_DISSECTOR_KEY_IPV4_ADDRS
:
490 addr_diff
= (__force u32
)keys
->addrs
.v4addrs
.dst
-
491 (__force u32
)keys
->addrs
.v4addrs
.src
;
492 if ((addr_diff
< 0) ||
494 ((__force u16
)keys
->ports
.dst
<
495 (__force u16
)keys
->ports
.src
))) {
496 swap(keys
->addrs
.v4addrs
.src
, keys
->addrs
.v4addrs
.dst
);
497 swap(keys
->ports
.src
, keys
->ports
.dst
);
500 case FLOW_DISSECTOR_KEY_IPV6_ADDRS
:
501 addr_diff
= memcmp(&keys
->addrs
.v6addrs
.dst
,
502 &keys
->addrs
.v6addrs
.src
,
503 sizeof(keys
->addrs
.v6addrs
.dst
));
504 if ((addr_diff
< 0) ||
506 ((__force u16
)keys
->ports
.dst
<
507 (__force u16
)keys
->ports
.src
))) {
508 for (i
= 0; i
< 4; i
++)
509 swap(keys
->addrs
.v6addrs
.src
.s6_addr32
[i
],
510 keys
->addrs
.v6addrs
.dst
.s6_addr32
[i
]);
511 swap(keys
->ports
.src
, keys
->ports
.dst
);
517 static inline u32
__flow_hash_from_keys(struct flow_keys
*keys
, u32 keyval
)
521 __flow_hash_consistentify(keys
);
523 hash
= __flow_hash_words((u32
*)flow_keys_hash_start(keys
),
524 flow_keys_hash_length(keys
), keyval
);
531 u32
flow_hash_from_keys(struct flow_keys
*keys
)
533 __flow_hash_secret_init();
534 return __flow_hash_from_keys(keys
, hashrnd
);
536 EXPORT_SYMBOL(flow_hash_from_keys
);
538 static inline u32
___skb_get_hash(const struct sk_buff
*skb
,
539 struct flow_keys
*keys
, u32 keyval
)
541 if (!skb_flow_dissect_flow_keys(skb
, keys
))
544 return __flow_hash_from_keys(keys
, keyval
);
547 struct _flow_keys_digest_data
{
556 void make_flow_keys_digest(struct flow_keys_digest
*digest
,
557 const struct flow_keys
*flow
)
559 struct _flow_keys_digest_data
*data
=
560 (struct _flow_keys_digest_data
*)digest
;
562 BUILD_BUG_ON(sizeof(*data
) > sizeof(*digest
));
564 memset(digest
, 0, sizeof(*digest
));
566 data
->n_proto
= flow
->basic
.n_proto
;
567 data
->ip_proto
= flow
->basic
.ip_proto
;
568 data
->ports
= flow
->ports
.ports
;
569 data
->src
= flow
->addrs
.v4addrs
.src
;
570 data
->dst
= flow
->addrs
.v4addrs
.dst
;
572 EXPORT_SYMBOL(make_flow_keys_digest
);
575 * __skb_get_hash: calculate a flow hash
576 * @skb: sk_buff to calculate flow hash from
578 * This function calculates a flow hash based on src/dst addresses
579 * and src/dst port numbers. Sets hash in skb to non-zero hash value
580 * on success, zero indicates no valid hash. Also, sets l4_hash in skb
581 * if hash is a canonical 4-tuple hash over transport ports.
583 void __skb_get_hash(struct sk_buff
*skb
)
585 struct flow_keys keys
;
588 __flow_hash_secret_init();
590 hash
= ___skb_get_hash(skb
, &keys
, hashrnd
);
593 if (keys
.ports
.ports
)
598 EXPORT_SYMBOL(__skb_get_hash
);
600 __u32
skb_get_hash_perturb(const struct sk_buff
*skb
, u32 perturb
)
602 struct flow_keys keys
;
604 return ___skb_get_hash(skb
, &keys
, perturb
);
606 EXPORT_SYMBOL(skb_get_hash_perturb
);
608 u32
__skb_get_poff(const struct sk_buff
*skb
, void *data
,
609 const struct flow_keys
*keys
, int hlen
)
611 u32 poff
= keys
->control
.thoff
;
613 switch (keys
->basic
.ip_proto
) {
615 /* access doff as u8 to avoid unaligned access */
619 doff
= __skb_header_pointer(skb
, poff
+ 12, sizeof(_doff
),
624 poff
+= max_t(u32
, sizeof(struct tcphdr
), (*doff
& 0xF0) >> 2);
628 case IPPROTO_UDPLITE
:
629 poff
+= sizeof(struct udphdr
);
631 /* For the rest, we do not really care about header
632 * extensions at this point for now.
635 poff
+= sizeof(struct icmphdr
);
638 poff
+= sizeof(struct icmp6hdr
);
641 poff
+= sizeof(struct igmphdr
);
644 poff
+= sizeof(struct dccp_hdr
);
647 poff
+= sizeof(struct sctphdr
);
655 * skb_get_poff - get the offset to the payload
656 * @skb: sk_buff to get the payload offset from
658 * The function will get the offset to the payload as far as it could
659 * be dissected. The main user is currently BPF, so that we can dynamically
660 * truncate packets without needing to push actual payload to the user
661 * space and can analyze headers only, instead.
663 u32
skb_get_poff(const struct sk_buff
*skb
)
665 struct flow_keys keys
;
667 if (!skb_flow_dissect_flow_keys(skb
, &keys
))
670 return __skb_get_poff(skb
, skb
->data
, &keys
, skb_headlen(skb
));
673 static const struct flow_dissector_key flow_keys_dissector_keys
[] = {
675 .key_id
= FLOW_DISSECTOR_KEY_CONTROL
,
676 .offset
= offsetof(struct flow_keys
, control
),
679 .key_id
= FLOW_DISSECTOR_KEY_BASIC
,
680 .offset
= offsetof(struct flow_keys
, basic
),
683 .key_id
= FLOW_DISSECTOR_KEY_IPV4_ADDRS
,
684 .offset
= offsetof(struct flow_keys
, addrs
.v4addrs
),
687 .key_id
= FLOW_DISSECTOR_KEY_IPV6_ADDRS
,
688 .offset
= offsetof(struct flow_keys
, addrs
.v6addrs
),
691 .key_id
= FLOW_DISSECTOR_KEY_TIPC_ADDRS
,
692 .offset
= offsetof(struct flow_keys
, addrs
.tipcaddrs
),
695 .key_id
= FLOW_DISSECTOR_KEY_PORTS
,
696 .offset
= offsetof(struct flow_keys
, ports
),
699 .key_id
= FLOW_DISSECTOR_KEY_VLANID
,
700 .offset
= offsetof(struct flow_keys
, tags
),
703 .key_id
= FLOW_DISSECTOR_KEY_FLOW_LABEL
,
704 .offset
= offsetof(struct flow_keys
, tags
),
707 .key_id
= FLOW_DISSECTOR_KEY_GRE_KEYID
,
708 .offset
= offsetof(struct flow_keys
, keyid
),
712 static const struct flow_dissector_key flow_keys_buf_dissector_keys
[] = {
714 .key_id
= FLOW_DISSECTOR_KEY_CONTROL
,
715 .offset
= offsetof(struct flow_keys
, control
),
718 .key_id
= FLOW_DISSECTOR_KEY_BASIC
,
719 .offset
= offsetof(struct flow_keys
, basic
),
723 struct flow_dissector flow_keys_dissector __read_mostly
;
724 EXPORT_SYMBOL(flow_keys_dissector
);
726 struct flow_dissector flow_keys_buf_dissector __read_mostly
;
728 static int __init
init_default_flow_dissectors(void)
730 skb_flow_dissector_init(&flow_keys_dissector
,
731 flow_keys_dissector_keys
,
732 ARRAY_SIZE(flow_keys_dissector_keys
));
733 skb_flow_dissector_init(&flow_keys_buf_dissector
,
734 flow_keys_buf_dissector_keys
,
735 ARRAY_SIZE(flow_keys_buf_dissector_keys
));
739 late_initcall_sync(init_default_flow_dissectors
);