1 /* Expectation handling for nf_conntrack. */
3 /* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 * (c) 2005-2012 Patrick McHardy <kaber@trash.net>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
13 #include <linux/types.h>
14 #include <linux/netfilter.h>
15 #include <linux/skbuff.h>
16 #include <linux/proc_fs.h>
17 #include <linux/seq_file.h>
18 #include <linux/stddef.h>
19 #include <linux/slab.h>
20 #include <linux/err.h>
21 #include <linux/percpu.h>
22 #include <linux/kernel.h>
23 #include <linux/jhash.h>
24 #include <linux/moduleparam.h>
25 #include <linux/export.h>
26 #include <net/net_namespace.h>
28 #include <net/netfilter/nf_conntrack.h>
29 #include <net/netfilter/nf_conntrack_core.h>
30 #include <net/netfilter/nf_conntrack_expect.h>
31 #include <net/netfilter/nf_conntrack_helper.h>
32 #include <net/netfilter/nf_conntrack_tuple.h>
33 #include <net/netfilter/nf_conntrack_zones.h>
35 unsigned int nf_ct_expect_hsize __read_mostly
;
36 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize
);
38 unsigned int nf_ct_expect_max __read_mostly
;
40 static struct kmem_cache
*nf_ct_expect_cachep __read_mostly
;
41 static unsigned int nf_ct_expect_hashrnd __read_mostly
;
43 /* nf_conntrack_expect helper functions */
44 void nf_ct_unlink_expect_report(struct nf_conntrack_expect
*exp
,
45 u32 portid
, int report
)
47 struct nf_conn_help
*master_help
= nfct_help(exp
->master
);
48 struct net
*net
= nf_ct_exp_net(exp
);
50 NF_CT_ASSERT(master_help
);
51 NF_CT_ASSERT(!timer_pending(&exp
->timeout
));
53 hlist_del_rcu(&exp
->hnode
);
54 net
->ct
.expect_count
--;
56 hlist_del(&exp
->lnode
);
57 master_help
->expecting
[exp
->class]--;
59 nf_ct_expect_event_report(IPEXP_DESTROY
, exp
, portid
, report
);
60 nf_ct_expect_put(exp
);
62 NF_CT_STAT_INC(net
, expect_delete
);
64 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report
);
66 static void nf_ct_expectation_timed_out(unsigned long ul_expect
)
68 struct nf_conntrack_expect
*exp
= (void *)ul_expect
;
70 spin_lock_bh(&nf_conntrack_expect_lock
);
71 nf_ct_unlink_expect(exp
);
72 spin_unlock_bh(&nf_conntrack_expect_lock
);
73 nf_ct_expect_put(exp
);
76 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple
*tuple
)
80 get_random_once(&nf_ct_expect_hashrnd
, sizeof(nf_ct_expect_hashrnd
));
82 hash
= jhash2(tuple
->dst
.u3
.all
, ARRAY_SIZE(tuple
->dst
.u3
.all
),
83 (((tuple
->dst
.protonum
^ tuple
->src
.l3num
) << 16) |
84 (__force __u16
)tuple
->dst
.u
.all
) ^ nf_ct_expect_hashrnd
);
86 return reciprocal_scale(hash
, nf_ct_expect_hsize
);
89 struct nf_conntrack_expect
*
90 __nf_ct_expect_find(struct net
*net
,
91 const struct nf_conntrack_zone
*zone
,
92 const struct nf_conntrack_tuple
*tuple
)
94 struct nf_conntrack_expect
*i
;
97 if (!net
->ct
.expect_count
)
100 h
= nf_ct_expect_dst_hash(tuple
);
101 hlist_for_each_entry_rcu(i
, &net
->ct
.expect_hash
[h
], hnode
) {
102 if (nf_ct_tuple_mask_cmp(tuple
, &i
->tuple
, &i
->mask
) &&
103 nf_ct_zone_equal_any(i
->master
, zone
))
108 EXPORT_SYMBOL_GPL(__nf_ct_expect_find
);
110 /* Just find a expectation corresponding to a tuple. */
111 struct nf_conntrack_expect
*
112 nf_ct_expect_find_get(struct net
*net
,
113 const struct nf_conntrack_zone
*zone
,
114 const struct nf_conntrack_tuple
*tuple
)
116 struct nf_conntrack_expect
*i
;
119 i
= __nf_ct_expect_find(net
, zone
, tuple
);
120 if (i
&& !atomic_inc_not_zero(&i
->use
))
126 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get
);
128 /* If an expectation for this connection is found, it gets delete from
129 * global list then returned. */
130 struct nf_conntrack_expect
*
131 nf_ct_find_expectation(struct net
*net
,
132 const struct nf_conntrack_zone
*zone
,
133 const struct nf_conntrack_tuple
*tuple
)
135 struct nf_conntrack_expect
*i
, *exp
= NULL
;
138 if (!net
->ct
.expect_count
)
141 h
= nf_ct_expect_dst_hash(tuple
);
142 hlist_for_each_entry(i
, &net
->ct
.expect_hash
[h
], hnode
) {
143 if (!(i
->flags
& NF_CT_EXPECT_INACTIVE
) &&
144 nf_ct_tuple_mask_cmp(tuple
, &i
->tuple
, &i
->mask
) &&
145 nf_ct_zone_equal_any(i
->master
, zone
)) {
153 /* If master is not in hash table yet (ie. packet hasn't left
154 this machine yet), how can other end know about expected?
155 Hence these are not the droids you are looking for (if
156 master ct never got confirmed, we'd hold a reference to it
157 and weird things would happen to future packets). */
158 if (!nf_ct_is_confirmed(exp
->master
))
161 /* Avoid race with other CPUs, that for exp->master ct, is
162 * about to invoke ->destroy(), or nf_ct_delete() via timeout
165 * The atomic_inc_not_zero() check tells: If that fails, we
166 * know that the ct is being destroyed. If it succeeds, we
167 * can be sure the ct cannot disappear underneath.
169 if (unlikely(nf_ct_is_dying(exp
->master
) ||
170 !atomic_inc_not_zero(&exp
->master
->ct_general
.use
)))
173 if (exp
->flags
& NF_CT_EXPECT_PERMANENT
) {
174 atomic_inc(&exp
->use
);
176 } else if (del_timer(&exp
->timeout
)) {
177 nf_ct_unlink_expect(exp
);
180 /* Undo exp->master refcnt increase, if del_timer() failed */
181 nf_ct_put(exp
->master
);
186 /* delete all expectations for this conntrack */
187 void nf_ct_remove_expectations(struct nf_conn
*ct
)
189 struct nf_conn_help
*help
= nfct_help(ct
);
190 struct nf_conntrack_expect
*exp
;
191 struct hlist_node
*next
;
193 /* Optimization: most connection never expect any others. */
197 spin_lock_bh(&nf_conntrack_expect_lock
);
198 hlist_for_each_entry_safe(exp
, next
, &help
->expectations
, lnode
) {
199 if (del_timer(&exp
->timeout
)) {
200 nf_ct_unlink_expect(exp
);
201 nf_ct_expect_put(exp
);
204 spin_unlock_bh(&nf_conntrack_expect_lock
);
206 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations
);
208 /* Would two expected things clash? */
209 static inline int expect_clash(const struct nf_conntrack_expect
*a
,
210 const struct nf_conntrack_expect
*b
)
212 /* Part covered by intersection of masks must be unequal,
213 otherwise they clash */
214 struct nf_conntrack_tuple_mask intersect_mask
;
217 intersect_mask
.src
.u
.all
= a
->mask
.src
.u
.all
& b
->mask
.src
.u
.all
;
219 for (count
= 0; count
< NF_CT_TUPLE_L3SIZE
; count
++){
220 intersect_mask
.src
.u3
.all
[count
] =
221 a
->mask
.src
.u3
.all
[count
] & b
->mask
.src
.u3
.all
[count
];
224 return nf_ct_tuple_mask_cmp(&a
->tuple
, &b
->tuple
, &intersect_mask
) &&
225 nf_ct_zone_equal_any(a
->master
, nf_ct_zone(b
->master
));
228 static inline int expect_matches(const struct nf_conntrack_expect
*a
,
229 const struct nf_conntrack_expect
*b
)
231 return a
->master
== b
->master
&& a
->class == b
->class &&
232 nf_ct_tuple_equal(&a
->tuple
, &b
->tuple
) &&
233 nf_ct_tuple_mask_equal(&a
->mask
, &b
->mask
) &&
234 nf_ct_zone_equal_any(a
->master
, nf_ct_zone(b
->master
));
237 /* Generally a bad idea to call this: could have matched already. */
238 void nf_ct_unexpect_related(struct nf_conntrack_expect
*exp
)
240 spin_lock_bh(&nf_conntrack_expect_lock
);
241 if (del_timer(&exp
->timeout
)) {
242 nf_ct_unlink_expect(exp
);
243 nf_ct_expect_put(exp
);
245 spin_unlock_bh(&nf_conntrack_expect_lock
);
247 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related
);
249 /* We don't increase the master conntrack refcount for non-fulfilled
250 * conntracks. During the conntrack destruction, the expectations are
251 * always killed before the conntrack itself */
252 struct nf_conntrack_expect
*nf_ct_expect_alloc(struct nf_conn
*me
)
254 struct nf_conntrack_expect
*new;
256 new = kmem_cache_alloc(nf_ct_expect_cachep
, GFP_ATOMIC
);
261 atomic_set(&new->use
, 1);
264 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc
);
266 void nf_ct_expect_init(struct nf_conntrack_expect
*exp
, unsigned int class,
268 const union nf_inet_addr
*saddr
,
269 const union nf_inet_addr
*daddr
,
270 u_int8_t proto
, const __be16
*src
, const __be16
*dst
)
274 if (family
== AF_INET
)
281 exp
->expectfn
= NULL
;
283 exp
->tuple
.src
.l3num
= family
;
284 exp
->tuple
.dst
.protonum
= proto
;
287 memcpy(&exp
->tuple
.src
.u3
, saddr
, len
);
288 if (sizeof(exp
->tuple
.src
.u3
) > len
)
289 /* address needs to be cleared for nf_ct_tuple_equal */
290 memset((void *)&exp
->tuple
.src
.u3
+ len
, 0x00,
291 sizeof(exp
->tuple
.src
.u3
) - len
);
292 memset(&exp
->mask
.src
.u3
, 0xFF, len
);
293 if (sizeof(exp
->mask
.src
.u3
) > len
)
294 memset((void *)&exp
->mask
.src
.u3
+ len
, 0x00,
295 sizeof(exp
->mask
.src
.u3
) - len
);
297 memset(&exp
->tuple
.src
.u3
, 0x00, sizeof(exp
->tuple
.src
.u3
));
298 memset(&exp
->mask
.src
.u3
, 0x00, sizeof(exp
->mask
.src
.u3
));
302 exp
->tuple
.src
.u
.all
= *src
;
303 exp
->mask
.src
.u
.all
= htons(0xFFFF);
305 exp
->tuple
.src
.u
.all
= 0;
306 exp
->mask
.src
.u
.all
= 0;
309 memcpy(&exp
->tuple
.dst
.u3
, daddr
, len
);
310 if (sizeof(exp
->tuple
.dst
.u3
) > len
)
311 /* address needs to be cleared for nf_ct_tuple_equal */
312 memset((void *)&exp
->tuple
.dst
.u3
+ len
, 0x00,
313 sizeof(exp
->tuple
.dst
.u3
) - len
);
315 exp
->tuple
.dst
.u
.all
= *dst
;
317 #ifdef CONFIG_NF_NAT_NEEDED
318 memset(&exp
->saved_addr
, 0, sizeof(exp
->saved_addr
));
319 memset(&exp
->saved_proto
, 0, sizeof(exp
->saved_proto
));
322 EXPORT_SYMBOL_GPL(nf_ct_expect_init
);
324 static void nf_ct_expect_free_rcu(struct rcu_head
*head
)
326 struct nf_conntrack_expect
*exp
;
328 exp
= container_of(head
, struct nf_conntrack_expect
, rcu
);
329 kmem_cache_free(nf_ct_expect_cachep
, exp
);
332 void nf_ct_expect_put(struct nf_conntrack_expect
*exp
)
334 if (atomic_dec_and_test(&exp
->use
))
335 call_rcu(&exp
->rcu
, nf_ct_expect_free_rcu
);
337 EXPORT_SYMBOL_GPL(nf_ct_expect_put
);
339 static int nf_ct_expect_insert(struct nf_conntrack_expect
*exp
)
341 struct nf_conn_help
*master_help
= nfct_help(exp
->master
);
342 struct nf_conntrack_helper
*helper
;
343 struct net
*net
= nf_ct_exp_net(exp
);
344 unsigned int h
= nf_ct_expect_dst_hash(&exp
->tuple
);
346 /* two references : one for hash insert, one for the timer */
347 atomic_add(2, &exp
->use
);
349 hlist_add_head(&exp
->lnode
, &master_help
->expectations
);
350 master_help
->expecting
[exp
->class]++;
352 hlist_add_head_rcu(&exp
->hnode
, &net
->ct
.expect_hash
[h
]);
353 net
->ct
.expect_count
++;
355 setup_timer(&exp
->timeout
, nf_ct_expectation_timed_out
,
357 helper
= rcu_dereference_protected(master_help
->helper
,
358 lockdep_is_held(&nf_conntrack_expect_lock
));
360 exp
->timeout
.expires
= jiffies
+
361 helper
->expect_policy
[exp
->class].timeout
* HZ
;
363 add_timer(&exp
->timeout
);
365 NF_CT_STAT_INC(net
, expect_create
);
369 /* Race with expectations being used means we could have none to find; OK. */
370 static void evict_oldest_expect(struct nf_conn
*master
,
371 struct nf_conntrack_expect
*new)
373 struct nf_conn_help
*master_help
= nfct_help(master
);
374 struct nf_conntrack_expect
*exp
, *last
= NULL
;
376 hlist_for_each_entry(exp
, &master_help
->expectations
, lnode
) {
377 if (exp
->class == new->class)
381 if (last
&& del_timer(&last
->timeout
)) {
382 nf_ct_unlink_expect(last
);
383 nf_ct_expect_put(last
);
387 static inline int __nf_ct_expect_check(struct nf_conntrack_expect
*expect
)
389 const struct nf_conntrack_expect_policy
*p
;
390 struct nf_conntrack_expect
*i
;
391 struct nf_conn
*master
= expect
->master
;
392 struct nf_conn_help
*master_help
= nfct_help(master
);
393 struct nf_conntrack_helper
*helper
;
394 struct net
*net
= nf_ct_exp_net(expect
);
395 struct hlist_node
*next
;
403 h
= nf_ct_expect_dst_hash(&expect
->tuple
);
404 hlist_for_each_entry_safe(i
, next
, &net
->ct
.expect_hash
[h
], hnode
) {
405 if (expect_matches(i
, expect
)) {
406 if (del_timer(&i
->timeout
)) {
407 nf_ct_unlink_expect(i
);
411 } else if (expect_clash(i
, expect
)) {
416 /* Will be over limit? */
417 helper
= rcu_dereference_protected(master_help
->helper
,
418 lockdep_is_held(&nf_conntrack_expect_lock
));
420 p
= &helper
->expect_policy
[expect
->class];
421 if (p
->max_expected
&&
422 master_help
->expecting
[expect
->class] >= p
->max_expected
) {
423 evict_oldest_expect(master
, expect
);
424 if (master_help
->expecting
[expect
->class]
425 >= p
->max_expected
) {
432 if (net
->ct
.expect_count
>= nf_ct_expect_max
) {
433 net_warn_ratelimited("nf_conntrack: expectation table full\n");
440 int nf_ct_expect_related_report(struct nf_conntrack_expect
*expect
,
441 u32 portid
, int report
)
445 spin_lock_bh(&nf_conntrack_expect_lock
);
446 ret
= __nf_ct_expect_check(expect
);
450 ret
= nf_ct_expect_insert(expect
);
453 spin_unlock_bh(&nf_conntrack_expect_lock
);
454 nf_ct_expect_event_report(IPEXP_NEW
, expect
, portid
, report
);
457 spin_unlock_bh(&nf_conntrack_expect_lock
);
460 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report
);
462 #ifdef CONFIG_NF_CONNTRACK_PROCFS
463 struct ct_expect_iter_state
{
464 struct seq_net_private p
;
468 static struct hlist_node
*ct_expect_get_first(struct seq_file
*seq
)
470 struct net
*net
= seq_file_net(seq
);
471 struct ct_expect_iter_state
*st
= seq
->private;
472 struct hlist_node
*n
;
474 for (st
->bucket
= 0; st
->bucket
< nf_ct_expect_hsize
; st
->bucket
++) {
475 n
= rcu_dereference(hlist_first_rcu(&net
->ct
.expect_hash
[st
->bucket
]));
482 static struct hlist_node
*ct_expect_get_next(struct seq_file
*seq
,
483 struct hlist_node
*head
)
485 struct net
*net
= seq_file_net(seq
);
486 struct ct_expect_iter_state
*st
= seq
->private;
488 head
= rcu_dereference(hlist_next_rcu(head
));
489 while (head
== NULL
) {
490 if (++st
->bucket
>= nf_ct_expect_hsize
)
492 head
= rcu_dereference(hlist_first_rcu(&net
->ct
.expect_hash
[st
->bucket
]));
497 static struct hlist_node
*ct_expect_get_idx(struct seq_file
*seq
, loff_t pos
)
499 struct hlist_node
*head
= ct_expect_get_first(seq
);
502 while (pos
&& (head
= ct_expect_get_next(seq
, head
)))
504 return pos
? NULL
: head
;
507 static void *exp_seq_start(struct seq_file
*seq
, loff_t
*pos
)
511 return ct_expect_get_idx(seq
, *pos
);
514 static void *exp_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
517 return ct_expect_get_next(seq
, v
);
520 static void exp_seq_stop(struct seq_file
*seq
, void *v
)
526 static int exp_seq_show(struct seq_file
*s
, void *v
)
528 struct nf_conntrack_expect
*expect
;
529 struct nf_conntrack_helper
*helper
;
530 struct hlist_node
*n
= v
;
533 expect
= hlist_entry(n
, struct nf_conntrack_expect
, hnode
);
535 if (expect
->timeout
.function
)
536 seq_printf(s
, "%ld ", timer_pending(&expect
->timeout
)
537 ? (long)(expect
->timeout
.expires
- jiffies
)/HZ
: 0);
540 seq_printf(s
, "l3proto = %u proto=%u ",
541 expect
->tuple
.src
.l3num
,
542 expect
->tuple
.dst
.protonum
);
543 print_tuple(s
, &expect
->tuple
,
544 __nf_ct_l3proto_find(expect
->tuple
.src
.l3num
),
545 __nf_ct_l4proto_find(expect
->tuple
.src
.l3num
,
546 expect
->tuple
.dst
.protonum
));
548 if (expect
->flags
& NF_CT_EXPECT_PERMANENT
) {
549 seq_printf(s
, "PERMANENT");
552 if (expect
->flags
& NF_CT_EXPECT_INACTIVE
) {
553 seq_printf(s
, "%sINACTIVE", delim
);
556 if (expect
->flags
& NF_CT_EXPECT_USERSPACE
)
557 seq_printf(s
, "%sUSERSPACE", delim
);
559 helper
= rcu_dereference(nfct_help(expect
->master
)->helper
);
561 seq_printf(s
, "%s%s", expect
->flags
? " " : "", helper
->name
);
562 if (helper
->expect_policy
[expect
->class].name
)
564 helper
->expect_policy
[expect
->class].name
);
572 static const struct seq_operations exp_seq_ops
= {
573 .start
= exp_seq_start
,
574 .next
= exp_seq_next
,
575 .stop
= exp_seq_stop
,
579 static int exp_open(struct inode
*inode
, struct file
*file
)
581 return seq_open_net(inode
, file
, &exp_seq_ops
,
582 sizeof(struct ct_expect_iter_state
));
585 static const struct file_operations exp_file_ops
= {
586 .owner
= THIS_MODULE
,
590 .release
= seq_release_net
,
592 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
594 static int exp_proc_init(struct net
*net
)
596 #ifdef CONFIG_NF_CONNTRACK_PROCFS
597 struct proc_dir_entry
*proc
;
601 proc
= proc_create("nf_conntrack_expect", 0440, net
->proc_net
,
606 root_uid
= make_kuid(net
->user_ns
, 0);
607 root_gid
= make_kgid(net
->user_ns
, 0);
608 if (uid_valid(root_uid
) && gid_valid(root_gid
))
609 proc_set_user(proc
, root_uid
, root_gid
);
610 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
614 static void exp_proc_remove(struct net
*net
)
616 #ifdef CONFIG_NF_CONNTRACK_PROCFS
617 remove_proc_entry("nf_conntrack_expect", net
->proc_net
);
618 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
621 module_param_named(expect_hashsize
, nf_ct_expect_hsize
, uint
, 0400);
623 int nf_conntrack_expect_pernet_init(struct net
*net
)
627 net
->ct
.expect_count
= 0;
628 net
->ct
.expect_hash
= nf_ct_alloc_hashtable(&nf_ct_expect_hsize
, 0);
629 if (net
->ct
.expect_hash
== NULL
)
632 err
= exp_proc_init(net
);
638 nf_ct_free_hashtable(net
->ct
.expect_hash
, nf_ct_expect_hsize
);
643 void nf_conntrack_expect_pernet_fini(struct net
*net
)
645 exp_proc_remove(net
);
646 nf_ct_free_hashtable(net
->ct
.expect_hash
, nf_ct_expect_hsize
);
649 int nf_conntrack_expect_init(void)
651 if (!nf_ct_expect_hsize
) {
652 nf_ct_expect_hsize
= nf_conntrack_htable_size
/ 256;
653 if (!nf_ct_expect_hsize
)
654 nf_ct_expect_hsize
= 1;
656 nf_ct_expect_max
= nf_ct_expect_hsize
* 4;
657 nf_ct_expect_cachep
= kmem_cache_create("nf_conntrack_expect",
658 sizeof(struct nf_conntrack_expect
),
660 if (!nf_ct_expect_cachep
)
665 void nf_conntrack_expect_fini(void)
667 rcu_barrier(); /* Wait for call_rcu() before destroy */
668 kmem_cache_destroy(nf_ct_expect_cachep
);