netfilter: conntrack: allow increasing bucket size via sysctl too
[deliverable/linux.git] / net / netfilter / nf_conntrack_core.c
1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
3 extension. */
4
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
8 * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14
15 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16
17 #include <linux/types.h>
18 #include <linux/netfilter.h>
19 #include <linux/module.h>
20 #include <linux/sched.h>
21 #include <linux/skbuff.h>
22 #include <linux/proc_fs.h>
23 #include <linux/vmalloc.h>
24 #include <linux/stddef.h>
25 #include <linux/slab.h>
26 #include <linux/random.h>
27 #include <linux/jhash.h>
28 #include <linux/err.h>
29 #include <linux/percpu.h>
30 #include <linux/moduleparam.h>
31 #include <linux/notifier.h>
32 #include <linux/kernel.h>
33 #include <linux/netdevice.h>
34 #include <linux/socket.h>
35 #include <linux/mm.h>
36 #include <linux/nsproxy.h>
37 #include <linux/rculist_nulls.h>
38
39 #include <net/netfilter/nf_conntrack.h>
40 #include <net/netfilter/nf_conntrack_l3proto.h>
41 #include <net/netfilter/nf_conntrack_l4proto.h>
42 #include <net/netfilter/nf_conntrack_expect.h>
43 #include <net/netfilter/nf_conntrack_helper.h>
44 #include <net/netfilter/nf_conntrack_seqadj.h>
45 #include <net/netfilter/nf_conntrack_core.h>
46 #include <net/netfilter/nf_conntrack_extend.h>
47 #include <net/netfilter/nf_conntrack_acct.h>
48 #include <net/netfilter/nf_conntrack_ecache.h>
49 #include <net/netfilter/nf_conntrack_zones.h>
50 #include <net/netfilter/nf_conntrack_timestamp.h>
51 #include <net/netfilter/nf_conntrack_timeout.h>
52 #include <net/netfilter/nf_conntrack_labels.h>
53 #include <net/netfilter/nf_conntrack_synproxy.h>
54 #include <net/netfilter/nf_nat.h>
55 #include <net/netfilter/nf_nat_core.h>
56 #include <net/netfilter/nf_nat_helper.h>
57 #include <net/netns/hash.h>
58
59 #define NF_CONNTRACK_VERSION "0.5.0"
60
61 int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
62 enum nf_nat_manip_type manip,
63 const struct nlattr *attr) __read_mostly;
64 EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
65
66 __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
67 EXPORT_SYMBOL_GPL(nf_conntrack_locks);
68
69 __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
70 EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
71
72 struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
73 EXPORT_SYMBOL_GPL(nf_conntrack_hash);
74
75 static __read_mostly struct kmem_cache *nf_conntrack_cachep;
76 static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
77 static __read_mostly seqcount_t nf_conntrack_generation;
78 static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
79 static __read_mostly bool nf_conntrack_locks_all;
80
81 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
82 {
83 spin_lock(lock);
84 while (unlikely(nf_conntrack_locks_all)) {
85 spin_unlock(lock);
86 spin_unlock_wait(&nf_conntrack_locks_all_lock);
87 spin_lock(lock);
88 }
89 }
90 EXPORT_SYMBOL_GPL(nf_conntrack_lock);
91
92 static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
93 {
94 h1 %= CONNTRACK_LOCKS;
95 h2 %= CONNTRACK_LOCKS;
96 spin_unlock(&nf_conntrack_locks[h1]);
97 if (h1 != h2)
98 spin_unlock(&nf_conntrack_locks[h2]);
99 }
100
101 /* return true if we need to recompute hashes (in case hash table was resized) */
102 static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
103 unsigned int h2, unsigned int sequence)
104 {
105 h1 %= CONNTRACK_LOCKS;
106 h2 %= CONNTRACK_LOCKS;
107 if (h1 <= h2) {
108 nf_conntrack_lock(&nf_conntrack_locks[h1]);
109 if (h1 != h2)
110 spin_lock_nested(&nf_conntrack_locks[h2],
111 SINGLE_DEPTH_NESTING);
112 } else {
113 nf_conntrack_lock(&nf_conntrack_locks[h2]);
114 spin_lock_nested(&nf_conntrack_locks[h1],
115 SINGLE_DEPTH_NESTING);
116 }
117 if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
118 nf_conntrack_double_unlock(h1, h2);
119 return true;
120 }
121 return false;
122 }
123
124 static void nf_conntrack_all_lock(void)
125 {
126 int i;
127
128 spin_lock(&nf_conntrack_locks_all_lock);
129 nf_conntrack_locks_all = true;
130
131 for (i = 0; i < CONNTRACK_LOCKS; i++) {
132 spin_unlock_wait(&nf_conntrack_locks[i]);
133 }
134 }
135
136 static void nf_conntrack_all_unlock(void)
137 {
138 nf_conntrack_locks_all = false;
139 spin_unlock(&nf_conntrack_locks_all_lock);
140 }
141
142 unsigned int nf_conntrack_htable_size __read_mostly;
143 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
144
145 unsigned int nf_conntrack_max __read_mostly;
146 EXPORT_SYMBOL_GPL(nf_conntrack_max);
147
148 DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
149 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
150
151 static unsigned int nf_conntrack_hash_rnd __read_mostly;
152
153 static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
154 const struct net *net)
155 {
156 unsigned int n;
157 u32 seed;
158
159 get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
160
161 /* The direction must be ignored, so we hash everything up to the
162 * destination ports (which is a multiple of 4) and treat the last
163 * three bytes manually.
164 */
165 seed = nf_conntrack_hash_rnd ^ net_hash_mix(net);
166 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
167 return jhash2((u32 *)tuple, n, seed ^
168 (((__force __u16)tuple->dst.u.all << 16) |
169 tuple->dst.protonum));
170 }
171
172 static u32 scale_hash(u32 hash)
173 {
174 return reciprocal_scale(hash, nf_conntrack_htable_size);
175 }
176
177 static u32 __hash_conntrack(const struct net *net,
178 const struct nf_conntrack_tuple *tuple,
179 unsigned int size)
180 {
181 return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
182 }
183
184 static u32 hash_conntrack(const struct net *net,
185 const struct nf_conntrack_tuple *tuple)
186 {
187 return scale_hash(hash_conntrack_raw(tuple, net));
188 }
189
190 bool
191 nf_ct_get_tuple(const struct sk_buff *skb,
192 unsigned int nhoff,
193 unsigned int dataoff,
194 u_int16_t l3num,
195 u_int8_t protonum,
196 struct net *net,
197 struct nf_conntrack_tuple *tuple,
198 const struct nf_conntrack_l3proto *l3proto,
199 const struct nf_conntrack_l4proto *l4proto)
200 {
201 memset(tuple, 0, sizeof(*tuple));
202
203 tuple->src.l3num = l3num;
204 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
205 return false;
206
207 tuple->dst.protonum = protonum;
208 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
209
210 return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
211 }
212 EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
213
214 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
215 u_int16_t l3num,
216 struct net *net, struct nf_conntrack_tuple *tuple)
217 {
218 struct nf_conntrack_l3proto *l3proto;
219 struct nf_conntrack_l4proto *l4proto;
220 unsigned int protoff;
221 u_int8_t protonum;
222 int ret;
223
224 rcu_read_lock();
225
226 l3proto = __nf_ct_l3proto_find(l3num);
227 ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
228 if (ret != NF_ACCEPT) {
229 rcu_read_unlock();
230 return false;
231 }
232
233 l4proto = __nf_ct_l4proto_find(l3num, protonum);
234
235 ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
236 l3proto, l4proto);
237
238 rcu_read_unlock();
239 return ret;
240 }
241 EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
242
243 bool
244 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
245 const struct nf_conntrack_tuple *orig,
246 const struct nf_conntrack_l3proto *l3proto,
247 const struct nf_conntrack_l4proto *l4proto)
248 {
249 memset(inverse, 0, sizeof(*inverse));
250
251 inverse->src.l3num = orig->src.l3num;
252 if (l3proto->invert_tuple(inverse, orig) == 0)
253 return false;
254
255 inverse->dst.dir = !orig->dst.dir;
256
257 inverse->dst.protonum = orig->dst.protonum;
258 return l4proto->invert_tuple(inverse, orig);
259 }
260 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
261
262 static void
263 clean_from_lists(struct nf_conn *ct)
264 {
265 pr_debug("clean_from_lists(%p)\n", ct);
266 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
267 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
268
269 /* Destroy all pending expectations */
270 nf_ct_remove_expectations(ct);
271 }
272
273 /* must be called with local_bh_disable */
274 static void nf_ct_add_to_dying_list(struct nf_conn *ct)
275 {
276 struct ct_pcpu *pcpu;
277
278 /* add this conntrack to the (per cpu) dying list */
279 ct->cpu = smp_processor_id();
280 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
281
282 spin_lock(&pcpu->lock);
283 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
284 &pcpu->dying);
285 spin_unlock(&pcpu->lock);
286 }
287
288 /* must be called with local_bh_disable */
289 static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct)
290 {
291 struct ct_pcpu *pcpu;
292
293 /* add this conntrack to the (per cpu) unconfirmed list */
294 ct->cpu = smp_processor_id();
295 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
296
297 spin_lock(&pcpu->lock);
298 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
299 &pcpu->unconfirmed);
300 spin_unlock(&pcpu->lock);
301 }
302
303 /* must be called with local_bh_disable */
304 static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
305 {
306 struct ct_pcpu *pcpu;
307
308 /* We overload first tuple to link into unconfirmed or dying list.*/
309 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
310
311 spin_lock(&pcpu->lock);
312 BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
313 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
314 spin_unlock(&pcpu->lock);
315 }
316
317 /* Released via destroy_conntrack() */
318 struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
319 const struct nf_conntrack_zone *zone,
320 gfp_t flags)
321 {
322 struct nf_conn *tmpl;
323
324 tmpl = kzalloc(sizeof(*tmpl), flags);
325 if (tmpl == NULL)
326 return NULL;
327
328 tmpl->status = IPS_TEMPLATE;
329 write_pnet(&tmpl->ct_net, net);
330 nf_ct_zone_add(tmpl, zone);
331 atomic_set(&tmpl->ct_general.use, 0);
332
333 return tmpl;
334 }
335 EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
336
337 void nf_ct_tmpl_free(struct nf_conn *tmpl)
338 {
339 nf_ct_ext_destroy(tmpl);
340 nf_ct_ext_free(tmpl);
341 kfree(tmpl);
342 }
343 EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
344
345 static void
346 destroy_conntrack(struct nf_conntrack *nfct)
347 {
348 struct nf_conn *ct = (struct nf_conn *)nfct;
349 struct net *net = nf_ct_net(ct);
350 struct nf_conntrack_l4proto *l4proto;
351
352 pr_debug("destroy_conntrack(%p)\n", ct);
353 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
354 NF_CT_ASSERT(!timer_pending(&ct->timeout));
355
356 if (unlikely(nf_ct_is_template(ct))) {
357 nf_ct_tmpl_free(ct);
358 return;
359 }
360 rcu_read_lock();
361 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
362 if (l4proto->destroy)
363 l4proto->destroy(ct);
364
365 rcu_read_unlock();
366
367 local_bh_disable();
368 /* Expectations will have been removed in clean_from_lists,
369 * except TFTP can create an expectation on the first packet,
370 * before connection is in the list, so we need to clean here,
371 * too.
372 */
373 nf_ct_remove_expectations(ct);
374
375 nf_ct_del_from_dying_or_unconfirmed_list(ct);
376
377 NF_CT_STAT_INC(net, delete);
378 local_bh_enable();
379
380 if (ct->master)
381 nf_ct_put(ct->master);
382
383 pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
384 nf_conntrack_free(ct);
385 }
386
387 static void nf_ct_delete_from_lists(struct nf_conn *ct)
388 {
389 struct net *net = nf_ct_net(ct);
390 unsigned int hash, reply_hash;
391 unsigned int sequence;
392
393 nf_ct_helper_destroy(ct);
394
395 local_bh_disable();
396 do {
397 sequence = read_seqcount_begin(&nf_conntrack_generation);
398 hash = hash_conntrack(net,
399 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
400 reply_hash = hash_conntrack(net,
401 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
402 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
403
404 clean_from_lists(ct);
405 nf_conntrack_double_unlock(hash, reply_hash);
406
407 nf_ct_add_to_dying_list(ct);
408
409 NF_CT_STAT_INC(net, delete_list);
410 local_bh_enable();
411 }
412
413 bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
414 {
415 struct nf_conn_tstamp *tstamp;
416
417 tstamp = nf_conn_tstamp_find(ct);
418 if (tstamp && tstamp->stop == 0)
419 tstamp->stop = ktime_get_real_ns();
420
421 if (nf_ct_is_dying(ct))
422 goto delete;
423
424 if (nf_conntrack_event_report(IPCT_DESTROY, ct,
425 portid, report) < 0) {
426 /* destroy event was not delivered */
427 nf_ct_delete_from_lists(ct);
428 nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
429 return false;
430 }
431
432 nf_conntrack_ecache_work(nf_ct_net(ct));
433 set_bit(IPS_DYING_BIT, &ct->status);
434 delete:
435 nf_ct_delete_from_lists(ct);
436 nf_ct_put(ct);
437 return true;
438 }
439 EXPORT_SYMBOL_GPL(nf_ct_delete);
440
441 static void death_by_timeout(unsigned long ul_conntrack)
442 {
443 nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
444 }
445
446 static inline bool
447 nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
448 const struct nf_conntrack_tuple *tuple,
449 const struct nf_conntrack_zone *zone,
450 const struct net *net)
451 {
452 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
453
454 /* A conntrack can be recreated with the equal tuple,
455 * so we need to check that the conntrack is confirmed
456 */
457 return nf_ct_tuple_equal(tuple, &h->tuple) &&
458 nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
459 nf_ct_is_confirmed(ct) &&
460 net_eq(net, nf_ct_net(ct));
461 }
462
463 /*
464 * Warning :
465 * - Caller must take a reference on returned object
466 * and recheck nf_ct_tuple_equal(tuple, &h->tuple)
467 */
468 static struct nf_conntrack_tuple_hash *
469 ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
470 const struct nf_conntrack_tuple *tuple, u32 hash)
471 {
472 struct nf_conntrack_tuple_hash *h;
473 struct hlist_nulls_head *ct_hash;
474 struct hlist_nulls_node *n;
475 unsigned int bucket, sequence;
476
477 begin:
478 do {
479 sequence = read_seqcount_begin(&nf_conntrack_generation);
480 bucket = scale_hash(hash);
481 ct_hash = nf_conntrack_hash;
482 } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
483
484 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
485 if (nf_ct_key_equal(h, tuple, zone, net)) {
486 NF_CT_STAT_INC_ATOMIC(net, found);
487 return h;
488 }
489 NF_CT_STAT_INC_ATOMIC(net, searched);
490 }
491 /*
492 * if the nulls value we got at the end of this lookup is
493 * not the expected one, we must restart lookup.
494 * We probably met an item that was moved to another chain.
495 */
496 if (get_nulls_value(n) != bucket) {
497 NF_CT_STAT_INC_ATOMIC(net, search_restart);
498 goto begin;
499 }
500
501 return NULL;
502 }
503
504 /* Find a connection corresponding to a tuple. */
505 static struct nf_conntrack_tuple_hash *
506 __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
507 const struct nf_conntrack_tuple *tuple, u32 hash)
508 {
509 struct nf_conntrack_tuple_hash *h;
510 struct nf_conn *ct;
511
512 rcu_read_lock();
513 begin:
514 h = ____nf_conntrack_find(net, zone, tuple, hash);
515 if (h) {
516 ct = nf_ct_tuplehash_to_ctrack(h);
517 if (unlikely(nf_ct_is_dying(ct) ||
518 !atomic_inc_not_zero(&ct->ct_general.use)))
519 h = NULL;
520 else {
521 if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) {
522 nf_ct_put(ct);
523 goto begin;
524 }
525 }
526 }
527 rcu_read_unlock();
528
529 return h;
530 }
531
532 struct nf_conntrack_tuple_hash *
533 nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
534 const struct nf_conntrack_tuple *tuple)
535 {
536 return __nf_conntrack_find_get(net, zone, tuple,
537 hash_conntrack_raw(tuple, net));
538 }
539 EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
540
541 static void __nf_conntrack_hash_insert(struct nf_conn *ct,
542 unsigned int hash,
543 unsigned int reply_hash)
544 {
545 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
546 &nf_conntrack_hash[hash]);
547 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
548 &nf_conntrack_hash[reply_hash]);
549 }
550
551 int
552 nf_conntrack_hash_check_insert(struct nf_conn *ct)
553 {
554 const struct nf_conntrack_zone *zone;
555 struct net *net = nf_ct_net(ct);
556 unsigned int hash, reply_hash;
557 struct nf_conntrack_tuple_hash *h;
558 struct hlist_nulls_node *n;
559 unsigned int sequence;
560
561 zone = nf_ct_zone(ct);
562
563 local_bh_disable();
564 do {
565 sequence = read_seqcount_begin(&nf_conntrack_generation);
566 hash = hash_conntrack(net,
567 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
568 reply_hash = hash_conntrack(net,
569 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
570 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
571
572 /* See if there's one in the list already, including reverse */
573 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
574 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
575 zone, net))
576 goto out;
577
578 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
579 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
580 zone, net))
581 goto out;
582
583 add_timer(&ct->timeout);
584 smp_wmb();
585 /* The caller holds a reference to this object */
586 atomic_set(&ct->ct_general.use, 2);
587 __nf_conntrack_hash_insert(ct, hash, reply_hash);
588 nf_conntrack_double_unlock(hash, reply_hash);
589 NF_CT_STAT_INC(net, insert);
590 local_bh_enable();
591 return 0;
592
593 out:
594 nf_conntrack_double_unlock(hash, reply_hash);
595 NF_CT_STAT_INC(net, insert_failed);
596 local_bh_enable();
597 return -EEXIST;
598 }
599 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
600
601 static inline void nf_ct_acct_update(struct nf_conn *ct,
602 enum ip_conntrack_info ctinfo,
603 unsigned int len)
604 {
605 struct nf_conn_acct *acct;
606
607 acct = nf_conn_acct_find(ct);
608 if (acct) {
609 struct nf_conn_counter *counter = acct->counter;
610
611 atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
612 atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
613 }
614 }
615
616 static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
617 const struct nf_conn *loser_ct)
618 {
619 struct nf_conn_acct *acct;
620
621 acct = nf_conn_acct_find(loser_ct);
622 if (acct) {
623 struct nf_conn_counter *counter = acct->counter;
624 unsigned int bytes;
625
626 /* u32 should be fine since we must have seen one packet. */
627 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
628 nf_ct_acct_update(ct, ctinfo, bytes);
629 }
630 }
631
632 /* Resolve race on insertion if this protocol allows this. */
633 static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
634 enum ip_conntrack_info ctinfo,
635 struct nf_conntrack_tuple_hash *h)
636 {
637 /* This is the conntrack entry already in hashes that won race. */
638 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
639 struct nf_conntrack_l4proto *l4proto;
640
641 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
642 if (l4proto->allow_clash &&
643 !nf_ct_is_dying(ct) &&
644 atomic_inc_not_zero(&ct->ct_general.use)) {
645 nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct);
646 nf_conntrack_put(skb->nfct);
647 /* Assign conntrack already in hashes to this skbuff. Don't
648 * modify skb->nfctinfo to ensure consistent stateful filtering.
649 */
650 skb->nfct = &ct->ct_general;
651 return NF_ACCEPT;
652 }
653 NF_CT_STAT_INC(net, drop);
654 return NF_DROP;
655 }
656
657 /* Confirm a connection given skb; places it in hash table */
658 int
659 __nf_conntrack_confirm(struct sk_buff *skb)
660 {
661 const struct nf_conntrack_zone *zone;
662 unsigned int hash, reply_hash;
663 struct nf_conntrack_tuple_hash *h;
664 struct nf_conn *ct;
665 struct nf_conn_help *help;
666 struct nf_conn_tstamp *tstamp;
667 struct hlist_nulls_node *n;
668 enum ip_conntrack_info ctinfo;
669 struct net *net;
670 unsigned int sequence;
671 int ret = NF_DROP;
672
673 ct = nf_ct_get(skb, &ctinfo);
674 net = nf_ct_net(ct);
675
676 /* ipt_REJECT uses nf_conntrack_attach to attach related
677 ICMP/TCP RST packets in other direction. Actual packet
678 which created connection will be IP_CT_NEW or for an
679 expected connection, IP_CT_RELATED. */
680 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
681 return NF_ACCEPT;
682
683 zone = nf_ct_zone(ct);
684 local_bh_disable();
685
686 do {
687 sequence = read_seqcount_begin(&nf_conntrack_generation);
688 /* reuse the hash saved before */
689 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
690 hash = scale_hash(hash);
691 reply_hash = hash_conntrack(net,
692 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
693
694 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
695
696 /* We're not in hash table, and we refuse to set up related
697 * connections for unconfirmed conns. But packet copies and
698 * REJECT will give spurious warnings here.
699 */
700 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
701
702 /* No external references means no one else could have
703 * confirmed us.
704 */
705 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
706 pr_debug("Confirming conntrack %p\n", ct);
707 /* We have to check the DYING flag after unlink to prevent
708 * a race against nf_ct_get_next_corpse() possibly called from
709 * user context, else we insert an already 'dead' hash, blocking
710 * further use of that particular connection -JM.
711 */
712 nf_ct_del_from_dying_or_unconfirmed_list(ct);
713
714 if (unlikely(nf_ct_is_dying(ct))) {
715 nf_ct_add_to_dying_list(ct);
716 goto dying;
717 }
718
719 /* See if there's one in the list already, including reverse:
720 NAT could have grabbed it without realizing, since we're
721 not in the hash. If there is, we lost race. */
722 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
723 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
724 zone, net))
725 goto out;
726
727 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
728 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
729 zone, net))
730 goto out;
731
732 /* Timer relative to confirmation time, not original
733 setting time, otherwise we'd get timer wrap in
734 weird delay cases. */
735 ct->timeout.expires += jiffies;
736 add_timer(&ct->timeout);
737 atomic_inc(&ct->ct_general.use);
738 ct->status |= IPS_CONFIRMED;
739
740 /* set conntrack timestamp, if enabled. */
741 tstamp = nf_conn_tstamp_find(ct);
742 if (tstamp) {
743 if (skb->tstamp.tv64 == 0)
744 __net_timestamp(skb);
745
746 tstamp->start = ktime_to_ns(skb->tstamp);
747 }
748 /* Since the lookup is lockless, hash insertion must be done after
749 * starting the timer and setting the CONFIRMED bit. The RCU barriers
750 * guarantee that no other CPU can find the conntrack before the above
751 * stores are visible.
752 */
753 __nf_conntrack_hash_insert(ct, hash, reply_hash);
754 nf_conntrack_double_unlock(hash, reply_hash);
755 NF_CT_STAT_INC(net, insert);
756 local_bh_enable();
757
758 help = nfct_help(ct);
759 if (help && help->helper)
760 nf_conntrack_event_cache(IPCT_HELPER, ct);
761
762 nf_conntrack_event_cache(master_ct(ct) ?
763 IPCT_RELATED : IPCT_NEW, ct);
764 return NF_ACCEPT;
765
766 out:
767 nf_ct_add_to_dying_list(ct);
768 ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
769 dying:
770 nf_conntrack_double_unlock(hash, reply_hash);
771 NF_CT_STAT_INC(net, insert_failed);
772 local_bh_enable();
773 return ret;
774 }
775 EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
776
777 /* Returns true if a connection correspondings to the tuple (required
778 for NAT). */
779 int
780 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
781 const struct nf_conn *ignored_conntrack)
782 {
783 struct net *net = nf_ct_net(ignored_conntrack);
784 const struct nf_conntrack_zone *zone;
785 struct nf_conntrack_tuple_hash *h;
786 struct hlist_nulls_head *ct_hash;
787 unsigned int hash, sequence;
788 struct hlist_nulls_node *n;
789 struct nf_conn *ct;
790
791 zone = nf_ct_zone(ignored_conntrack);
792
793 rcu_read_lock();
794 do {
795 sequence = read_seqcount_begin(&nf_conntrack_generation);
796 hash = hash_conntrack(net, tuple);
797 ct_hash = nf_conntrack_hash;
798 } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
799
800 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
801 ct = nf_ct_tuplehash_to_ctrack(h);
802 if (ct != ignored_conntrack &&
803 nf_ct_key_equal(h, tuple, zone, net)) {
804 NF_CT_STAT_INC_ATOMIC(net, found);
805 rcu_read_unlock();
806 return 1;
807 }
808 NF_CT_STAT_INC_ATOMIC(net, searched);
809 }
810 rcu_read_unlock();
811
812 return 0;
813 }
814 EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
815
816 #define NF_CT_EVICTION_RANGE 8
817
818 /* There's a small race here where we may free a just-assured
819 connection. Too bad: we're in trouble anyway. */
820 static noinline int early_drop(struct net *net, unsigned int _hash)
821 {
822 /* Use oldest entry, which is roughly LRU */
823 struct nf_conntrack_tuple_hash *h;
824 struct nf_conn *tmp;
825 struct hlist_nulls_node *n;
826 unsigned int i, hash, sequence;
827 struct nf_conn *ct = NULL;
828 spinlock_t *lockp;
829 bool ret = false;
830
831 i = 0;
832
833 local_bh_disable();
834 restart:
835 sequence = read_seqcount_begin(&nf_conntrack_generation);
836 for (; i < NF_CT_EVICTION_RANGE; i++) {
837 hash = scale_hash(_hash++);
838 lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
839 nf_conntrack_lock(lockp);
840 if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
841 spin_unlock(lockp);
842 goto restart;
843 }
844 hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
845 hnnode) {
846 tmp = nf_ct_tuplehash_to_ctrack(h);
847
848 if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
849 !net_eq(nf_ct_net(tmp), net) ||
850 nf_ct_is_dying(tmp))
851 continue;
852
853 if (atomic_inc_not_zero(&tmp->ct_general.use)) {
854 ct = tmp;
855 break;
856 }
857 }
858
859 spin_unlock(lockp);
860 if (ct)
861 break;
862 }
863
864 local_bh_enable();
865
866 if (!ct)
867 return false;
868
869 /* kill only if in same netns -- might have moved due to
870 * SLAB_DESTROY_BY_RCU rules
871 */
872 if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) {
873 if (nf_ct_delete(ct, 0, 0)) {
874 NF_CT_STAT_INC_ATOMIC(net, early_drop);
875 ret = true;
876 }
877 }
878
879 nf_ct_put(ct);
880 return ret;
881 }
882
883 static struct nf_conn *
884 __nf_conntrack_alloc(struct net *net,
885 const struct nf_conntrack_zone *zone,
886 const struct nf_conntrack_tuple *orig,
887 const struct nf_conntrack_tuple *repl,
888 gfp_t gfp, u32 hash)
889 {
890 struct nf_conn *ct;
891
892 /* We don't want any race condition at early drop stage */
893 atomic_inc(&net->ct.count);
894
895 if (nf_conntrack_max &&
896 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
897 if (!early_drop(net, hash)) {
898 atomic_dec(&net->ct.count);
899 net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
900 return ERR_PTR(-ENOMEM);
901 }
902 }
903
904 /*
905 * Do not use kmem_cache_zalloc(), as this cache uses
906 * SLAB_DESTROY_BY_RCU.
907 */
908 ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
909 if (ct == NULL)
910 goto out;
911
912 spin_lock_init(&ct->lock);
913 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
914 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
915 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
916 /* save hash for reusing when confirming */
917 *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
918 ct->status = 0;
919 /* Don't set timer yet: wait for confirmation */
920 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
921 write_pnet(&ct->ct_net, net);
922 memset(&ct->__nfct_init_offset[0], 0,
923 offsetof(struct nf_conn, proto) -
924 offsetof(struct nf_conn, __nfct_init_offset[0]));
925
926 nf_ct_zone_add(ct, zone);
927
928 /* Because we use RCU lookups, we set ct_general.use to zero before
929 * this is inserted in any list.
930 */
931 atomic_set(&ct->ct_general.use, 0);
932 return ct;
933 out:
934 atomic_dec(&net->ct.count);
935 return ERR_PTR(-ENOMEM);
936 }
937
938 struct nf_conn *nf_conntrack_alloc(struct net *net,
939 const struct nf_conntrack_zone *zone,
940 const struct nf_conntrack_tuple *orig,
941 const struct nf_conntrack_tuple *repl,
942 gfp_t gfp)
943 {
944 return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
945 }
946 EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
947
948 void nf_conntrack_free(struct nf_conn *ct)
949 {
950 struct net *net = nf_ct_net(ct);
951
952 /* A freed object has refcnt == 0, that's
953 * the golden rule for SLAB_DESTROY_BY_RCU
954 */
955 NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
956
957 nf_ct_ext_destroy(ct);
958 nf_ct_ext_free(ct);
959 kmem_cache_free(nf_conntrack_cachep, ct);
960 smp_mb__before_atomic();
961 atomic_dec(&net->ct.count);
962 }
963 EXPORT_SYMBOL_GPL(nf_conntrack_free);
964
965
966 /* Allocate a new conntrack: we return -ENOMEM if classification
967 failed due to stress. Otherwise it really is unclassifiable. */
968 static struct nf_conntrack_tuple_hash *
969 init_conntrack(struct net *net, struct nf_conn *tmpl,
970 const struct nf_conntrack_tuple *tuple,
971 struct nf_conntrack_l3proto *l3proto,
972 struct nf_conntrack_l4proto *l4proto,
973 struct sk_buff *skb,
974 unsigned int dataoff, u32 hash)
975 {
976 struct nf_conn *ct;
977 struct nf_conn_help *help;
978 struct nf_conntrack_tuple repl_tuple;
979 struct nf_conntrack_ecache *ecache;
980 struct nf_conntrack_expect *exp = NULL;
981 const struct nf_conntrack_zone *zone;
982 struct nf_conn_timeout *timeout_ext;
983 struct nf_conntrack_zone tmp;
984 unsigned int *timeouts;
985
986 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
987 pr_debug("Can't invert tuple.\n");
988 return NULL;
989 }
990
991 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
992 ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
993 hash);
994 if (IS_ERR(ct))
995 return (struct nf_conntrack_tuple_hash *)ct;
996
997 if (tmpl && nfct_synproxy(tmpl)) {
998 nfct_seqadj_ext_add(ct);
999 nfct_synproxy_ext_add(ct);
1000 }
1001
1002 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
1003 if (timeout_ext) {
1004 timeouts = nf_ct_timeout_data(timeout_ext);
1005 if (unlikely(!timeouts))
1006 timeouts = l4proto->get_timeouts(net);
1007 } else {
1008 timeouts = l4proto->get_timeouts(net);
1009 }
1010
1011 if (!l4proto->new(ct, skb, dataoff, timeouts)) {
1012 nf_conntrack_free(ct);
1013 pr_debug("can't track with proto module\n");
1014 return NULL;
1015 }
1016
1017 if (timeout_ext)
1018 nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
1019 GFP_ATOMIC);
1020
1021 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
1022 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
1023 nf_ct_labels_ext_add(ct);
1024
1025 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
1026 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
1027 ecache ? ecache->expmask : 0,
1028 GFP_ATOMIC);
1029
1030 local_bh_disable();
1031 if (net->ct.expect_count) {
1032 spin_lock(&nf_conntrack_expect_lock);
1033 exp = nf_ct_find_expectation(net, zone, tuple);
1034 if (exp) {
1035 pr_debug("expectation arrives ct=%p exp=%p\n",
1036 ct, exp);
1037 /* Welcome, Mr. Bond. We've been expecting you... */
1038 __set_bit(IPS_EXPECTED_BIT, &ct->status);
1039 /* exp->master safe, refcnt bumped in nf_ct_find_expectation */
1040 ct->master = exp->master;
1041 if (exp->helper) {
1042 help = nf_ct_helper_ext_add(ct, exp->helper,
1043 GFP_ATOMIC);
1044 if (help)
1045 rcu_assign_pointer(help->helper, exp->helper);
1046 }
1047
1048 #ifdef CONFIG_NF_CONNTRACK_MARK
1049 ct->mark = exp->master->mark;
1050 #endif
1051 #ifdef CONFIG_NF_CONNTRACK_SECMARK
1052 ct->secmark = exp->master->secmark;
1053 #endif
1054 NF_CT_STAT_INC(net, expect_new);
1055 }
1056 spin_unlock(&nf_conntrack_expect_lock);
1057 }
1058 if (!exp) {
1059 __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
1060 NF_CT_STAT_INC(net, new);
1061 }
1062
1063 /* Now it is inserted into the unconfirmed list, bump refcount */
1064 nf_conntrack_get(&ct->ct_general);
1065 nf_ct_add_to_unconfirmed_list(ct);
1066
1067 local_bh_enable();
1068
1069 if (exp) {
1070 if (exp->expectfn)
1071 exp->expectfn(ct, exp);
1072 nf_ct_expect_put(exp);
1073 }
1074
1075 return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
1076 }
1077
1078 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
1079 static inline struct nf_conn *
1080 resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
1081 struct sk_buff *skb,
1082 unsigned int dataoff,
1083 u_int16_t l3num,
1084 u_int8_t protonum,
1085 struct nf_conntrack_l3proto *l3proto,
1086 struct nf_conntrack_l4proto *l4proto,
1087 int *set_reply,
1088 enum ip_conntrack_info *ctinfo)
1089 {
1090 const struct nf_conntrack_zone *zone;
1091 struct nf_conntrack_tuple tuple;
1092 struct nf_conntrack_tuple_hash *h;
1093 struct nf_conntrack_zone tmp;
1094 struct nf_conn *ct;
1095 u32 hash;
1096
1097 if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
1098 dataoff, l3num, protonum, net, &tuple, l3proto,
1099 l4proto)) {
1100 pr_debug("Can't get tuple\n");
1101 return NULL;
1102 }
1103
1104 /* look for tuple match */
1105 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
1106 hash = hash_conntrack_raw(&tuple, net);
1107 h = __nf_conntrack_find_get(net, zone, &tuple, hash);
1108 if (!h) {
1109 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
1110 skb, dataoff, hash);
1111 if (!h)
1112 return NULL;
1113 if (IS_ERR(h))
1114 return (void *)h;
1115 }
1116 ct = nf_ct_tuplehash_to_ctrack(h);
1117
1118 /* It exists; we have (non-exclusive) reference. */
1119 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
1120 *ctinfo = IP_CT_ESTABLISHED_REPLY;
1121 /* Please set reply bit if this packet OK */
1122 *set_reply = 1;
1123 } else {
1124 /* Once we've had two way comms, always ESTABLISHED. */
1125 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1126 pr_debug("normal packet for %p\n", ct);
1127 *ctinfo = IP_CT_ESTABLISHED;
1128 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
1129 pr_debug("related packet for %p\n", ct);
1130 *ctinfo = IP_CT_RELATED;
1131 } else {
1132 pr_debug("new packet for %p\n", ct);
1133 *ctinfo = IP_CT_NEW;
1134 }
1135 *set_reply = 0;
1136 }
1137 skb->nfct = &ct->ct_general;
1138 skb->nfctinfo = *ctinfo;
1139 return ct;
1140 }
1141
1142 unsigned int
1143 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
1144 struct sk_buff *skb)
1145 {
1146 struct nf_conn *ct, *tmpl = NULL;
1147 enum ip_conntrack_info ctinfo;
1148 struct nf_conntrack_l3proto *l3proto;
1149 struct nf_conntrack_l4proto *l4proto;
1150 unsigned int *timeouts;
1151 unsigned int dataoff;
1152 u_int8_t protonum;
1153 int set_reply = 0;
1154 int ret;
1155
1156 if (skb->nfct) {
1157 /* Previously seen (loopback or untracked)? Ignore. */
1158 tmpl = (struct nf_conn *)skb->nfct;
1159 if (!nf_ct_is_template(tmpl)) {
1160 NF_CT_STAT_INC_ATOMIC(net, ignore);
1161 return NF_ACCEPT;
1162 }
1163 skb->nfct = NULL;
1164 }
1165
1166 /* rcu_read_lock()ed by nf_hook_slow */
1167 l3proto = __nf_ct_l3proto_find(pf);
1168 ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
1169 &dataoff, &protonum);
1170 if (ret <= 0) {
1171 pr_debug("not prepared to track yet or error occurred\n");
1172 NF_CT_STAT_INC_ATOMIC(net, error);
1173 NF_CT_STAT_INC_ATOMIC(net, invalid);
1174 ret = -ret;
1175 goto out;
1176 }
1177
1178 l4proto = __nf_ct_l4proto_find(pf, protonum);
1179
1180 /* It may be an special packet, error, unclean...
1181 * inverse of the return code tells to the netfilter
1182 * core what to do with the packet. */
1183 if (l4proto->error != NULL) {
1184 ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo,
1185 pf, hooknum);
1186 if (ret <= 0) {
1187 NF_CT_STAT_INC_ATOMIC(net, error);
1188 NF_CT_STAT_INC_ATOMIC(net, invalid);
1189 ret = -ret;
1190 goto out;
1191 }
1192 /* ICMP[v6] protocol trackers may assign one conntrack. */
1193 if (skb->nfct)
1194 goto out;
1195 }
1196
1197 ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
1198 l3proto, l4proto, &set_reply, &ctinfo);
1199 if (!ct) {
1200 /* Not valid part of a connection */
1201 NF_CT_STAT_INC_ATOMIC(net, invalid);
1202 ret = NF_ACCEPT;
1203 goto out;
1204 }
1205
1206 if (IS_ERR(ct)) {
1207 /* Too stressed to deal. */
1208 NF_CT_STAT_INC_ATOMIC(net, drop);
1209 ret = NF_DROP;
1210 goto out;
1211 }
1212
1213 NF_CT_ASSERT(skb->nfct);
1214
1215 /* Decide what timeout policy we want to apply to this flow. */
1216 timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
1217
1218 ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
1219 if (ret <= 0) {
1220 /* Invalid: inverse of the return code tells
1221 * the netfilter core what to do */
1222 pr_debug("nf_conntrack_in: Can't track with proto module\n");
1223 nf_conntrack_put(skb->nfct);
1224 skb->nfct = NULL;
1225 NF_CT_STAT_INC_ATOMIC(net, invalid);
1226 if (ret == -NF_DROP)
1227 NF_CT_STAT_INC_ATOMIC(net, drop);
1228 ret = -ret;
1229 goto out;
1230 }
1231
1232 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
1233 nf_conntrack_event_cache(IPCT_REPLY, ct);
1234 out:
1235 if (tmpl) {
1236 /* Special case: we have to repeat this hook, assign the
1237 * template again to this packet. We assume that this packet
1238 * has no conntrack assigned. This is used by nf_ct_tcp. */
1239 if (ret == NF_REPEAT)
1240 skb->nfct = (struct nf_conntrack *)tmpl;
1241 else
1242 nf_ct_put(tmpl);
1243 }
1244
1245 return ret;
1246 }
1247 EXPORT_SYMBOL_GPL(nf_conntrack_in);
1248
1249 bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1250 const struct nf_conntrack_tuple *orig)
1251 {
1252 bool ret;
1253
1254 rcu_read_lock();
1255 ret = nf_ct_invert_tuple(inverse, orig,
1256 __nf_ct_l3proto_find(orig->src.l3num),
1257 __nf_ct_l4proto_find(orig->src.l3num,
1258 orig->dst.protonum));
1259 rcu_read_unlock();
1260 return ret;
1261 }
1262 EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
1263
1264 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
1265 implicitly racy: see __nf_conntrack_confirm */
1266 void nf_conntrack_alter_reply(struct nf_conn *ct,
1267 const struct nf_conntrack_tuple *newreply)
1268 {
1269 struct nf_conn_help *help = nfct_help(ct);
1270
1271 /* Should be unconfirmed, so not in hash table yet */
1272 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
1273
1274 pr_debug("Altering reply tuple of %p to ", ct);
1275 nf_ct_dump_tuple(newreply);
1276
1277 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1278 if (ct->master || (help && !hlist_empty(&help->expectations)))
1279 return;
1280
1281 rcu_read_lock();
1282 __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
1283 rcu_read_unlock();
1284 }
1285 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
1286
1287 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1288 void __nf_ct_refresh_acct(struct nf_conn *ct,
1289 enum ip_conntrack_info ctinfo,
1290 const struct sk_buff *skb,
1291 unsigned long extra_jiffies,
1292 int do_acct)
1293 {
1294 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
1295 NF_CT_ASSERT(skb);
1296
1297 /* Only update if this is not a fixed timeout */
1298 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
1299 goto acct;
1300
1301 /* If not in hash table, timer will not be active yet */
1302 if (!nf_ct_is_confirmed(ct)) {
1303 ct->timeout.expires = extra_jiffies;
1304 } else {
1305 unsigned long newtime = jiffies + extra_jiffies;
1306
1307 /* Only update the timeout if the new timeout is at least
1308 HZ jiffies from the old timeout. Need del_timer for race
1309 avoidance (may already be dying). */
1310 if (newtime - ct->timeout.expires >= HZ)
1311 mod_timer_pending(&ct->timeout, newtime);
1312 }
1313
1314 acct:
1315 if (do_acct)
1316 nf_ct_acct_update(ct, ctinfo, skb->len);
1317 }
1318 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
1319
1320 bool __nf_ct_kill_acct(struct nf_conn *ct,
1321 enum ip_conntrack_info ctinfo,
1322 const struct sk_buff *skb,
1323 int do_acct)
1324 {
1325 if (do_acct)
1326 nf_ct_acct_update(ct, ctinfo, skb->len);
1327
1328 if (del_timer(&ct->timeout)) {
1329 ct->timeout.function((unsigned long)ct);
1330 return true;
1331 }
1332 return false;
1333 }
1334 EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
1335
1336 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1337
1338 #include <linux/netfilter/nfnetlink.h>
1339 #include <linux/netfilter/nfnetlink_conntrack.h>
1340 #include <linux/mutex.h>
1341
1342 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1343 * in ip_conntrack_core, since we don't want the protocols to autoload
1344 * or depend on ctnetlink */
1345 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
1346 const struct nf_conntrack_tuple *tuple)
1347 {
1348 if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) ||
1349 nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port))
1350 goto nla_put_failure;
1351 return 0;
1352
1353 nla_put_failure:
1354 return -1;
1355 }
1356 EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr);
1357
1358 const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = {
1359 [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 },
1360 [CTA_PROTO_DST_PORT] = { .type = NLA_U16 },
1361 };
1362 EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy);
1363
1364 int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
1365 struct nf_conntrack_tuple *t)
1366 {
1367 if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT])
1368 return -EINVAL;
1369
1370 t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
1371 t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
1372
1373 return 0;
1374 }
1375 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
1376
1377 int nf_ct_port_nlattr_tuple_size(void)
1378 {
1379 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1380 }
1381 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
1382 #endif
1383
1384 /* Used by ipt_REJECT and ip6t_REJECT. */
1385 static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
1386 {
1387 struct nf_conn *ct;
1388 enum ip_conntrack_info ctinfo;
1389
1390 /* This ICMP is in reverse direction to the packet which caused it */
1391 ct = nf_ct_get(skb, &ctinfo);
1392 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1393 ctinfo = IP_CT_RELATED_REPLY;
1394 else
1395 ctinfo = IP_CT_RELATED;
1396
1397 /* Attach to new skbuff, and increment count */
1398 nskb->nfct = &ct->ct_general;
1399 nskb->nfctinfo = ctinfo;
1400 nf_conntrack_get(nskb->nfct);
1401 }
1402
1403 /* Bring out ya dead! */
1404 static struct nf_conn *
1405 get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
1406 void *data, unsigned int *bucket)
1407 {
1408 struct nf_conntrack_tuple_hash *h;
1409 struct nf_conn *ct;
1410 struct hlist_nulls_node *n;
1411 int cpu;
1412 spinlock_t *lockp;
1413
1414 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
1415 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
1416 local_bh_disable();
1417 nf_conntrack_lock(lockp);
1418 if (*bucket < nf_conntrack_htable_size) {
1419 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
1420 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
1421 continue;
1422 ct = nf_ct_tuplehash_to_ctrack(h);
1423 if (net_eq(nf_ct_net(ct), net) &&
1424 iter(ct, data))
1425 goto found;
1426 }
1427 }
1428 spin_unlock(lockp);
1429 local_bh_enable();
1430 cond_resched();
1431 }
1432
1433 for_each_possible_cpu(cpu) {
1434 struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
1435
1436 spin_lock_bh(&pcpu->lock);
1437 hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
1438 ct = nf_ct_tuplehash_to_ctrack(h);
1439 if (iter(ct, data))
1440 set_bit(IPS_DYING_BIT, &ct->status);
1441 }
1442 spin_unlock_bh(&pcpu->lock);
1443 cond_resched();
1444 }
1445 return NULL;
1446 found:
1447 atomic_inc(&ct->ct_general.use);
1448 spin_unlock(lockp);
1449 local_bh_enable();
1450 return ct;
1451 }
1452
1453 void nf_ct_iterate_cleanup(struct net *net,
1454 int (*iter)(struct nf_conn *i, void *data),
1455 void *data, u32 portid, int report)
1456 {
1457 struct nf_conn *ct;
1458 unsigned int bucket = 0;
1459
1460 might_sleep();
1461
1462 if (atomic_read(&net->ct.count) == 0)
1463 return;
1464
1465 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
1466 /* Time to push up daises... */
1467 if (del_timer(&ct->timeout))
1468 nf_ct_delete(ct, portid, report);
1469
1470 /* ... else the timer will get him soon. */
1471
1472 nf_ct_put(ct);
1473 cond_resched();
1474 }
1475 }
1476 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
1477
1478 static int kill_all(struct nf_conn *i, void *data)
1479 {
1480 return 1;
1481 }
1482
1483 void nf_ct_free_hashtable(void *hash, unsigned int size)
1484 {
1485 if (is_vmalloc_addr(hash))
1486 vfree(hash);
1487 else
1488 free_pages((unsigned long)hash,
1489 get_order(sizeof(struct hlist_head) * size));
1490 }
1491 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
1492
1493 static int untrack_refs(void)
1494 {
1495 int cnt = 0, cpu;
1496
1497 for_each_possible_cpu(cpu) {
1498 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
1499
1500 cnt += atomic_read(&ct->ct_general.use) - 1;
1501 }
1502 return cnt;
1503 }
1504
1505 void nf_conntrack_cleanup_start(void)
1506 {
1507 RCU_INIT_POINTER(ip_ct_attach, NULL);
1508 }
1509
1510 void nf_conntrack_cleanup_end(void)
1511 {
1512 RCU_INIT_POINTER(nf_ct_destroy, NULL);
1513 while (untrack_refs() > 0)
1514 schedule();
1515
1516 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
1517
1518 nf_conntrack_proto_fini();
1519 nf_conntrack_seqadj_fini();
1520 nf_conntrack_labels_fini();
1521 nf_conntrack_helper_fini();
1522 nf_conntrack_timeout_fini();
1523 nf_conntrack_ecache_fini();
1524 nf_conntrack_tstamp_fini();
1525 nf_conntrack_acct_fini();
1526 nf_conntrack_expect_fini();
1527 }
1528
1529 /*
1530 * Mishearing the voices in his head, our hero wonders how he's
1531 * supposed to kill the mall.
1532 */
1533 void nf_conntrack_cleanup_net(struct net *net)
1534 {
1535 LIST_HEAD(single);
1536
1537 list_add(&net->exit_list, &single);
1538 nf_conntrack_cleanup_net_list(&single);
1539 }
1540
1541 void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
1542 {
1543 int busy;
1544 struct net *net;
1545
1546 /*
1547 * This makes sure all current packets have passed through
1548 * netfilter framework. Roll on, two-stage module
1549 * delete...
1550 */
1551 synchronize_net();
1552 i_see_dead_people:
1553 busy = 0;
1554 list_for_each_entry(net, net_exit_list, exit_list) {
1555 nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
1556 if (atomic_read(&net->ct.count) != 0)
1557 busy = 1;
1558 }
1559 if (busy) {
1560 schedule();
1561 goto i_see_dead_people;
1562 }
1563
1564 list_for_each_entry(net, net_exit_list, exit_list) {
1565 nf_conntrack_proto_pernet_fini(net);
1566 nf_conntrack_helper_pernet_fini(net);
1567 nf_conntrack_ecache_pernet_fini(net);
1568 nf_conntrack_tstamp_pernet_fini(net);
1569 nf_conntrack_acct_pernet_fini(net);
1570 nf_conntrack_expect_pernet_fini(net);
1571 free_percpu(net->ct.stat);
1572 free_percpu(net->ct.pcpu_lists);
1573 }
1574 }
1575
1576 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
1577 {
1578 struct hlist_nulls_head *hash;
1579 unsigned int nr_slots, i;
1580 size_t sz;
1581
1582 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
1583 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
1584 sz = nr_slots * sizeof(struct hlist_nulls_head);
1585 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1586 get_order(sz));
1587 if (!hash)
1588 hash = vzalloc(sz);
1589
1590 if (hash && nulls)
1591 for (i = 0; i < nr_slots; i++)
1592 INIT_HLIST_NULLS_HEAD(&hash[i], i);
1593
1594 return hash;
1595 }
1596 EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
1597
1598 int nf_conntrack_hash_resize(unsigned int hashsize)
1599 {
1600 int i, bucket;
1601 unsigned int old_size;
1602 struct hlist_nulls_head *hash, *old_hash;
1603 struct nf_conntrack_tuple_hash *h;
1604 struct nf_conn *ct;
1605
1606 if (!hashsize)
1607 return -EINVAL;
1608
1609 hash = nf_ct_alloc_hashtable(&hashsize, 1);
1610 if (!hash)
1611 return -ENOMEM;
1612
1613 old_size = nf_conntrack_htable_size;
1614 if (old_size == hashsize) {
1615 nf_ct_free_hashtable(hash, hashsize);
1616 return 0;
1617 }
1618
1619 local_bh_disable();
1620 nf_conntrack_all_lock();
1621 write_seqcount_begin(&nf_conntrack_generation);
1622
1623 /* Lookups in the old hash might happen in parallel, which means we
1624 * might get false negatives during connection lookup. New connections
1625 * created because of a false negative won't make it into the hash
1626 * though since that required taking the locks.
1627 */
1628
1629 for (i = 0; i < nf_conntrack_htable_size; i++) {
1630 while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
1631 h = hlist_nulls_entry(nf_conntrack_hash[i].first,
1632 struct nf_conntrack_tuple_hash, hnnode);
1633 ct = nf_ct_tuplehash_to_ctrack(h);
1634 hlist_nulls_del_rcu(&h->hnnode);
1635 bucket = __hash_conntrack(nf_ct_net(ct),
1636 &h->tuple, hashsize);
1637 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
1638 }
1639 }
1640 old_size = nf_conntrack_htable_size;
1641 old_hash = nf_conntrack_hash;
1642
1643 nf_conntrack_hash = hash;
1644 nf_conntrack_htable_size = hashsize;
1645
1646 write_seqcount_end(&nf_conntrack_generation);
1647 nf_conntrack_all_unlock();
1648 local_bh_enable();
1649
1650 synchronize_net();
1651 nf_ct_free_hashtable(old_hash, old_size);
1652 return 0;
1653 }
1654
1655 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1656 {
1657 unsigned int hashsize;
1658 int rc;
1659
1660 if (current->nsproxy->net_ns != &init_net)
1661 return -EOPNOTSUPP;
1662
1663 /* On boot, we can set this without any fancy locking. */
1664 if (!nf_conntrack_htable_size)
1665 return param_set_uint(val, kp);
1666
1667 rc = kstrtouint(val, 0, &hashsize);
1668 if (rc)
1669 return rc;
1670
1671 return nf_conntrack_hash_resize(hashsize);
1672 }
1673 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
1674
1675 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
1676 &nf_conntrack_htable_size, 0600);
1677
1678 void nf_ct_untracked_status_or(unsigned long bits)
1679 {
1680 int cpu;
1681
1682 for_each_possible_cpu(cpu)
1683 per_cpu(nf_conntrack_untracked, cpu).status |= bits;
1684 }
1685 EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
1686
1687 int nf_conntrack_init_start(void)
1688 {
1689 int max_factor = 8;
1690 int ret = -ENOMEM;
1691 int i, cpu;
1692
1693 seqcount_init(&nf_conntrack_generation);
1694
1695 for (i = 0; i < CONNTRACK_LOCKS; i++)
1696 spin_lock_init(&nf_conntrack_locks[i]);
1697
1698 if (!nf_conntrack_htable_size) {
1699 /* Idea from tcp.c: use 1/16384 of memory.
1700 * On i386: 32MB machine has 512 buckets.
1701 * >= 1GB machines have 16384 buckets.
1702 * >= 4GB machines have 65536 buckets.
1703 */
1704 nf_conntrack_htable_size
1705 = (((totalram_pages << PAGE_SHIFT) / 16384)
1706 / sizeof(struct hlist_head));
1707 if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
1708 nf_conntrack_htable_size = 65536;
1709 else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
1710 nf_conntrack_htable_size = 16384;
1711 if (nf_conntrack_htable_size < 32)
1712 nf_conntrack_htable_size = 32;
1713
1714 /* Use a max. factor of four by default to get the same max as
1715 * with the old struct list_heads. When a table size is given
1716 * we use the old value of 8 to avoid reducing the max.
1717 * entries. */
1718 max_factor = 4;
1719 }
1720
1721 nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
1722 if (!nf_conntrack_hash)
1723 return -ENOMEM;
1724
1725 nf_conntrack_max = max_factor * nf_conntrack_htable_size;
1726
1727 nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
1728 sizeof(struct nf_conn), 0,
1729 SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
1730 if (!nf_conntrack_cachep)
1731 goto err_cachep;
1732
1733 printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
1734 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1735 nf_conntrack_max);
1736
1737 ret = nf_conntrack_expect_init();
1738 if (ret < 0)
1739 goto err_expect;
1740
1741 ret = nf_conntrack_acct_init();
1742 if (ret < 0)
1743 goto err_acct;
1744
1745 ret = nf_conntrack_tstamp_init();
1746 if (ret < 0)
1747 goto err_tstamp;
1748
1749 ret = nf_conntrack_ecache_init();
1750 if (ret < 0)
1751 goto err_ecache;
1752
1753 ret = nf_conntrack_timeout_init();
1754 if (ret < 0)
1755 goto err_timeout;
1756
1757 ret = nf_conntrack_helper_init();
1758 if (ret < 0)
1759 goto err_helper;
1760
1761 ret = nf_conntrack_labels_init();
1762 if (ret < 0)
1763 goto err_labels;
1764
1765 ret = nf_conntrack_seqadj_init();
1766 if (ret < 0)
1767 goto err_seqadj;
1768
1769 ret = nf_conntrack_proto_init();
1770 if (ret < 0)
1771 goto err_proto;
1772
1773 /* Set up fake conntrack: to never be deleted, not in any hashes */
1774 for_each_possible_cpu(cpu) {
1775 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
1776 write_pnet(&ct->ct_net, &init_net);
1777 atomic_set(&ct->ct_general.use, 1);
1778 }
1779 /* - and look it like as a confirmed connection */
1780 nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
1781 return 0;
1782
1783 err_proto:
1784 nf_conntrack_seqadj_fini();
1785 err_seqadj:
1786 nf_conntrack_labels_fini();
1787 err_labels:
1788 nf_conntrack_helper_fini();
1789 err_helper:
1790 nf_conntrack_timeout_fini();
1791 err_timeout:
1792 nf_conntrack_ecache_fini();
1793 err_ecache:
1794 nf_conntrack_tstamp_fini();
1795 err_tstamp:
1796 nf_conntrack_acct_fini();
1797 err_acct:
1798 nf_conntrack_expect_fini();
1799 err_expect:
1800 kmem_cache_destroy(nf_conntrack_cachep);
1801 err_cachep:
1802 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
1803 return ret;
1804 }
1805
1806 void nf_conntrack_init_end(void)
1807 {
1808 /* For use by REJECT target */
1809 RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
1810 RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
1811 }
1812
1813 /*
1814 * We need to use special "null" values, not used in hash table
1815 */
1816 #define UNCONFIRMED_NULLS_VAL ((1<<30)+0)
1817 #define DYING_NULLS_VAL ((1<<30)+1)
1818 #define TEMPLATE_NULLS_VAL ((1<<30)+2)
1819
1820 int nf_conntrack_init_net(struct net *net)
1821 {
1822 int ret = -ENOMEM;
1823 int cpu;
1824
1825 atomic_set(&net->ct.count, 0);
1826
1827 net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
1828 if (!net->ct.pcpu_lists)
1829 goto err_stat;
1830
1831 for_each_possible_cpu(cpu) {
1832 struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
1833
1834 spin_lock_init(&pcpu->lock);
1835 INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
1836 INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
1837 }
1838
1839 net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
1840 if (!net->ct.stat)
1841 goto err_pcpu_lists;
1842
1843 ret = nf_conntrack_expect_pernet_init(net);
1844 if (ret < 0)
1845 goto err_expect;
1846 ret = nf_conntrack_acct_pernet_init(net);
1847 if (ret < 0)
1848 goto err_acct;
1849 ret = nf_conntrack_tstamp_pernet_init(net);
1850 if (ret < 0)
1851 goto err_tstamp;
1852 ret = nf_conntrack_ecache_pernet_init(net);
1853 if (ret < 0)
1854 goto err_ecache;
1855 ret = nf_conntrack_helper_pernet_init(net);
1856 if (ret < 0)
1857 goto err_helper;
1858 ret = nf_conntrack_proto_pernet_init(net);
1859 if (ret < 0)
1860 goto err_proto;
1861 return 0;
1862
1863 err_proto:
1864 nf_conntrack_helper_pernet_fini(net);
1865 err_helper:
1866 nf_conntrack_ecache_pernet_fini(net);
1867 err_ecache:
1868 nf_conntrack_tstamp_pernet_fini(net);
1869 err_tstamp:
1870 nf_conntrack_acct_pernet_fini(net);
1871 err_acct:
1872 nf_conntrack_expect_pernet_fini(net);
1873 err_expect:
1874 free_percpu(net->ct.stat);
1875 err_pcpu_lists:
1876 free_percpu(net->ct.pcpu_lists);
1877 err_stat:
1878 return ret;
1879 }
This page took 0.094111 seconds and 5 git commands to generate.