netfilter: conntrack: allow increasing bucket size via sysctl too
[deliverable/linux.git] / net / netfilter / nf_conntrack_standalone.c
1 /* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10 #include <linux/types.h>
11 #include <linux/netfilter.h>
12 #include <linux/slab.h>
13 #include <linux/module.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/percpu.h>
18 #include <linux/netdevice.h>
19 #include <linux/security.h>
20 #include <net/net_namespace.h>
21 #ifdef CONFIG_SYSCTL
22 #include <linux/sysctl.h>
23 #endif
24
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_core.h>
27 #include <net/netfilter/nf_conntrack_l3proto.h>
28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 #include <net/netfilter/nf_conntrack_expect.h>
30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/nf_conntrack_acct.h>
32 #include <net/netfilter/nf_conntrack_zones.h>
33 #include <net/netfilter/nf_conntrack_timestamp.h>
34 #include <linux/rculist_nulls.h>
35
36 MODULE_LICENSE("GPL");
37
38 #ifdef CONFIG_NF_CONNTRACK_PROCFS
39 void
40 print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
41 const struct nf_conntrack_l3proto *l3proto,
42 const struct nf_conntrack_l4proto *l4proto)
43 {
44 l3proto->print_tuple(s, tuple);
45 l4proto->print_tuple(s, tuple);
46 }
47 EXPORT_SYMBOL_GPL(print_tuple);
48
49 struct ct_iter_state {
50 struct seq_net_private p;
51 unsigned int bucket;
52 u_int64_t time_now;
53 };
54
55 static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
56 {
57 struct ct_iter_state *st = seq->private;
58 struct hlist_nulls_node *n;
59
60 for (st->bucket = 0;
61 st->bucket < nf_conntrack_htable_size;
62 st->bucket++) {
63 n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
64 if (!is_a_nulls(n))
65 return n;
66 }
67 return NULL;
68 }
69
70 static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
71 struct hlist_nulls_node *head)
72 {
73 struct ct_iter_state *st = seq->private;
74
75 head = rcu_dereference(hlist_nulls_next_rcu(head));
76 while (is_a_nulls(head)) {
77 if (likely(get_nulls_value(head) == st->bucket)) {
78 if (++st->bucket >= nf_conntrack_htable_size)
79 return NULL;
80 }
81 head = rcu_dereference(
82 hlist_nulls_first_rcu(
83 &nf_conntrack_hash[st->bucket]));
84 }
85 return head;
86 }
87
88 static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
89 {
90 struct hlist_nulls_node *head = ct_get_first(seq);
91
92 if (head)
93 while (pos && (head = ct_get_next(seq, head)))
94 pos--;
95 return pos ? NULL : head;
96 }
97
98 static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
99 __acquires(RCU)
100 {
101 struct ct_iter_state *st = seq->private;
102
103 st->time_now = ktime_get_real_ns();
104 rcu_read_lock();
105 return ct_get_idx(seq, *pos);
106 }
107
108 static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
109 {
110 (*pos)++;
111 return ct_get_next(s, v);
112 }
113
114 static void ct_seq_stop(struct seq_file *s, void *v)
115 __releases(RCU)
116 {
117 rcu_read_unlock();
118 }
119
120 #ifdef CONFIG_NF_CONNTRACK_SECMARK
121 static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
122 {
123 int ret;
124 u32 len;
125 char *secctx;
126
127 ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
128 if (ret)
129 return;
130
131 seq_printf(s, "secctx=%s ", secctx);
132
133 security_release_secctx(secctx, len);
134 }
135 #else
136 static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
137 {
138 }
139 #endif
140
141 #ifdef CONFIG_NF_CONNTRACK_ZONES
142 static void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
143 int dir)
144 {
145 const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
146
147 if (zone->dir != dir)
148 return;
149 switch (zone->dir) {
150 case NF_CT_DEFAULT_ZONE_DIR:
151 seq_printf(s, "zone=%u ", zone->id);
152 break;
153 case NF_CT_ZONE_DIR_ORIG:
154 seq_printf(s, "zone-orig=%u ", zone->id);
155 break;
156 case NF_CT_ZONE_DIR_REPL:
157 seq_printf(s, "zone-reply=%u ", zone->id);
158 break;
159 default:
160 break;
161 }
162 }
163 #else
164 static inline void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
165 int dir)
166 {
167 }
168 #endif
169
170 #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
171 static void ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
172 {
173 struct ct_iter_state *st = s->private;
174 struct nf_conn_tstamp *tstamp;
175 s64 delta_time;
176
177 tstamp = nf_conn_tstamp_find(ct);
178 if (tstamp) {
179 delta_time = st->time_now - tstamp->start;
180 if (delta_time > 0)
181 delta_time = div_s64(delta_time, NSEC_PER_SEC);
182 else
183 delta_time = 0;
184
185 seq_printf(s, "delta-time=%llu ",
186 (unsigned long long)delta_time);
187 }
188 return;
189 }
190 #else
191 static inline void
192 ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
193 {
194 }
195 #endif
196
197 /* return 0 on success, 1 in case of error */
198 static int ct_seq_show(struct seq_file *s, void *v)
199 {
200 struct nf_conntrack_tuple_hash *hash = v;
201 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
202 const struct nf_conntrack_l3proto *l3proto;
203 const struct nf_conntrack_l4proto *l4proto;
204 int ret = 0;
205
206 NF_CT_ASSERT(ct);
207 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
208 return 0;
209
210 /* we only want to print DIR_ORIGINAL */
211 if (NF_CT_DIRECTION(hash))
212 goto release;
213
214 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
215 NF_CT_ASSERT(l3proto);
216 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
217 NF_CT_ASSERT(l4proto);
218
219 ret = -ENOSPC;
220 seq_printf(s, "%-8s %u %-8s %u %ld ",
221 l3proto->name, nf_ct_l3num(ct),
222 l4proto->name, nf_ct_protonum(ct),
223 timer_pending(&ct->timeout)
224 ? (long)(ct->timeout.expires - jiffies)/HZ : 0);
225
226 if (l4proto->print_conntrack)
227 l4proto->print_conntrack(s, ct);
228
229 print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
230 l3proto, l4proto);
231
232 ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG);
233
234 if (seq_has_overflowed(s))
235 goto release;
236
237 if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
238 goto release;
239
240 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
241 seq_printf(s, "[UNREPLIED] ");
242
243 print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
244 l3proto, l4proto);
245
246 ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL);
247
248 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
249 goto release;
250
251 if (test_bit(IPS_ASSURED_BIT, &ct->status))
252 seq_printf(s, "[ASSURED] ");
253
254 if (seq_has_overflowed(s))
255 goto release;
256
257 #if defined(CONFIG_NF_CONNTRACK_MARK)
258 seq_printf(s, "mark=%u ", ct->mark);
259 #endif
260
261 ct_show_secctx(s, ct);
262 ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR);
263 ct_show_delta_time(s, ct);
264
265 seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
266
267 if (seq_has_overflowed(s))
268 goto release;
269
270 ret = 0;
271 release:
272 nf_ct_put(ct);
273 return ret;
274 }
275
276 static const struct seq_operations ct_seq_ops = {
277 .start = ct_seq_start,
278 .next = ct_seq_next,
279 .stop = ct_seq_stop,
280 .show = ct_seq_show
281 };
282
283 static int ct_open(struct inode *inode, struct file *file)
284 {
285 return seq_open_net(inode, file, &ct_seq_ops,
286 sizeof(struct ct_iter_state));
287 }
288
289 static const struct file_operations ct_file_ops = {
290 .owner = THIS_MODULE,
291 .open = ct_open,
292 .read = seq_read,
293 .llseek = seq_lseek,
294 .release = seq_release_net,
295 };
296
297 static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
298 {
299 struct net *net = seq_file_net(seq);
300 int cpu;
301
302 if (*pos == 0)
303 return SEQ_START_TOKEN;
304
305 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
306 if (!cpu_possible(cpu))
307 continue;
308 *pos = cpu + 1;
309 return per_cpu_ptr(net->ct.stat, cpu);
310 }
311
312 return NULL;
313 }
314
315 static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
316 {
317 struct net *net = seq_file_net(seq);
318 int cpu;
319
320 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
321 if (!cpu_possible(cpu))
322 continue;
323 *pos = cpu + 1;
324 return per_cpu_ptr(net->ct.stat, cpu);
325 }
326
327 return NULL;
328 }
329
330 static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
331 {
332 }
333
334 static int ct_cpu_seq_show(struct seq_file *seq, void *v)
335 {
336 struct net *net = seq_file_net(seq);
337 unsigned int nr_conntracks = atomic_read(&net->ct.count);
338 const struct ip_conntrack_stat *st = v;
339
340 if (v == SEQ_START_TOKEN) {
341 seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
342 return 0;
343 }
344
345 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
346 "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
347 nr_conntracks,
348 st->searched,
349 st->found,
350 st->new,
351 st->invalid,
352 st->ignore,
353 st->delete,
354 st->delete_list,
355 st->insert,
356 st->insert_failed,
357 st->drop,
358 st->early_drop,
359 st->error,
360
361 st->expect_new,
362 st->expect_create,
363 st->expect_delete,
364 st->search_restart
365 );
366 return 0;
367 }
368
369 static const struct seq_operations ct_cpu_seq_ops = {
370 .start = ct_cpu_seq_start,
371 .next = ct_cpu_seq_next,
372 .stop = ct_cpu_seq_stop,
373 .show = ct_cpu_seq_show,
374 };
375
376 static int ct_cpu_seq_open(struct inode *inode, struct file *file)
377 {
378 return seq_open_net(inode, file, &ct_cpu_seq_ops,
379 sizeof(struct seq_net_private));
380 }
381
382 static const struct file_operations ct_cpu_seq_fops = {
383 .owner = THIS_MODULE,
384 .open = ct_cpu_seq_open,
385 .read = seq_read,
386 .llseek = seq_lseek,
387 .release = seq_release_net,
388 };
389
390 static int nf_conntrack_standalone_init_proc(struct net *net)
391 {
392 struct proc_dir_entry *pde;
393 kuid_t root_uid;
394 kgid_t root_gid;
395
396 pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops);
397 if (!pde)
398 goto out_nf_conntrack;
399
400 root_uid = make_kuid(net->user_ns, 0);
401 root_gid = make_kgid(net->user_ns, 0);
402 if (uid_valid(root_uid) && gid_valid(root_gid))
403 proc_set_user(pde, root_uid, root_gid);
404
405 pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat,
406 &ct_cpu_seq_fops);
407 if (!pde)
408 goto out_stat_nf_conntrack;
409 return 0;
410
411 out_stat_nf_conntrack:
412 remove_proc_entry("nf_conntrack", net->proc_net);
413 out_nf_conntrack:
414 return -ENOMEM;
415 }
416
417 static void nf_conntrack_standalone_fini_proc(struct net *net)
418 {
419 remove_proc_entry("nf_conntrack", net->proc_net_stat);
420 remove_proc_entry("nf_conntrack", net->proc_net);
421 }
422 #else
423 static int nf_conntrack_standalone_init_proc(struct net *net)
424 {
425 return 0;
426 }
427
428 static void nf_conntrack_standalone_fini_proc(struct net *net)
429 {
430 }
431 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
432
433 /* Sysctl support */
434
435 #ifdef CONFIG_SYSCTL
436 /* Log invalid packets of a given protocol */
437 static int log_invalid_proto_min __read_mostly;
438 static int log_invalid_proto_max __read_mostly = 255;
439
440 /* size the user *wants to set */
441 static unsigned int nf_conntrack_htable_size_user __read_mostly;
442
443 static int
444 nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
445 void __user *buffer, size_t *lenp, loff_t *ppos)
446 {
447 int ret;
448
449 ret = proc_dointvec(table, write, buffer, lenp, ppos);
450 if (ret < 0 || !write)
451 return ret;
452
453 /* update ret, we might not be able to satisfy request */
454 ret = nf_conntrack_hash_resize(nf_conntrack_htable_size_user);
455
456 /* update it to the actual value used by conntrack */
457 nf_conntrack_htable_size_user = nf_conntrack_htable_size;
458 return ret;
459 }
460
461 static struct ctl_table_header *nf_ct_netfilter_header;
462
463 static struct ctl_table nf_ct_sysctl_table[] = {
464 {
465 .procname = "nf_conntrack_max",
466 .data = &nf_conntrack_max,
467 .maxlen = sizeof(int),
468 .mode = 0644,
469 .proc_handler = proc_dointvec,
470 },
471 {
472 .procname = "nf_conntrack_count",
473 .data = &init_net.ct.count,
474 .maxlen = sizeof(int),
475 .mode = 0444,
476 .proc_handler = proc_dointvec,
477 },
478 {
479 .procname = "nf_conntrack_buckets",
480 .data = &nf_conntrack_htable_size_user,
481 .maxlen = sizeof(unsigned int),
482 .mode = 0644,
483 .proc_handler = nf_conntrack_hash_sysctl,
484 },
485 {
486 .procname = "nf_conntrack_checksum",
487 .data = &init_net.ct.sysctl_checksum,
488 .maxlen = sizeof(unsigned int),
489 .mode = 0644,
490 .proc_handler = proc_dointvec,
491 },
492 {
493 .procname = "nf_conntrack_log_invalid",
494 .data = &init_net.ct.sysctl_log_invalid,
495 .maxlen = sizeof(unsigned int),
496 .mode = 0644,
497 .proc_handler = proc_dointvec_minmax,
498 .extra1 = &log_invalid_proto_min,
499 .extra2 = &log_invalid_proto_max,
500 },
501 {
502 .procname = "nf_conntrack_expect_max",
503 .data = &nf_ct_expect_max,
504 .maxlen = sizeof(int),
505 .mode = 0644,
506 .proc_handler = proc_dointvec,
507 },
508 { }
509 };
510
511 #define NET_NF_CONNTRACK_MAX 2089
512
513 static struct ctl_table nf_ct_netfilter_table[] = {
514 {
515 .procname = "nf_conntrack_max",
516 .data = &nf_conntrack_max,
517 .maxlen = sizeof(int),
518 .mode = 0644,
519 .proc_handler = proc_dointvec,
520 },
521 { }
522 };
523
524 static int nf_conntrack_standalone_init_sysctl(struct net *net)
525 {
526 struct ctl_table *table;
527
528 table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table),
529 GFP_KERNEL);
530 if (!table)
531 goto out_kmemdup;
532
533 table[1].data = &net->ct.count;
534 table[3].data = &net->ct.sysctl_checksum;
535 table[4].data = &net->ct.sysctl_log_invalid;
536
537 /* Don't export sysctls to unprivileged users */
538 if (net->user_ns != &init_user_ns)
539 table[0].procname = NULL;
540
541 if (!net_eq(&init_net, net))
542 table[2].mode = 0444;
543
544 net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table);
545 if (!net->ct.sysctl_header)
546 goto out_unregister_netfilter;
547
548 return 0;
549
550 out_unregister_netfilter:
551 kfree(table);
552 out_kmemdup:
553 return -ENOMEM;
554 }
555
556 static void nf_conntrack_standalone_fini_sysctl(struct net *net)
557 {
558 struct ctl_table *table;
559
560 table = net->ct.sysctl_header->ctl_table_arg;
561 unregister_net_sysctl_table(net->ct.sysctl_header);
562 kfree(table);
563 }
564 #else
565 static int nf_conntrack_standalone_init_sysctl(struct net *net)
566 {
567 return 0;
568 }
569
570 static void nf_conntrack_standalone_fini_sysctl(struct net *net)
571 {
572 }
573 #endif /* CONFIG_SYSCTL */
574
575 static int nf_conntrack_pernet_init(struct net *net)
576 {
577 int ret;
578
579 ret = nf_conntrack_init_net(net);
580 if (ret < 0)
581 goto out_init;
582
583 ret = nf_conntrack_standalone_init_proc(net);
584 if (ret < 0)
585 goto out_proc;
586
587 net->ct.sysctl_checksum = 1;
588 net->ct.sysctl_log_invalid = 0;
589 ret = nf_conntrack_standalone_init_sysctl(net);
590 if (ret < 0)
591 goto out_sysctl;
592
593 return 0;
594
595 out_sysctl:
596 nf_conntrack_standalone_fini_proc(net);
597 out_proc:
598 nf_conntrack_cleanup_net(net);
599 out_init:
600 return ret;
601 }
602
603 static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
604 {
605 struct net *net;
606
607 list_for_each_entry(net, net_exit_list, exit_list) {
608 nf_conntrack_standalone_fini_sysctl(net);
609 nf_conntrack_standalone_fini_proc(net);
610 }
611 nf_conntrack_cleanup_net_list(net_exit_list);
612 }
613
614 static struct pernet_operations nf_conntrack_net_ops = {
615 .init = nf_conntrack_pernet_init,
616 .exit_batch = nf_conntrack_pernet_exit,
617 };
618
619 static int __init nf_conntrack_standalone_init(void)
620 {
621 int ret = nf_conntrack_init_start();
622 if (ret < 0)
623 goto out_start;
624
625 #ifdef CONFIG_SYSCTL
626 nf_ct_netfilter_header =
627 register_net_sysctl(&init_net, "net", nf_ct_netfilter_table);
628 if (!nf_ct_netfilter_header) {
629 pr_err("nf_conntrack: can't register to sysctl.\n");
630 ret = -ENOMEM;
631 goto out_sysctl;
632 }
633
634 nf_conntrack_htable_size_user = nf_conntrack_htable_size;
635 #endif
636
637 ret = register_pernet_subsys(&nf_conntrack_net_ops);
638 if (ret < 0)
639 goto out_pernet;
640
641 nf_conntrack_init_end();
642 return 0;
643
644 out_pernet:
645 #ifdef CONFIG_SYSCTL
646 unregister_net_sysctl_table(nf_ct_netfilter_header);
647 out_sysctl:
648 #endif
649 nf_conntrack_cleanup_end();
650 out_start:
651 return ret;
652 }
653
654 static void __exit nf_conntrack_standalone_fini(void)
655 {
656 nf_conntrack_cleanup_start();
657 unregister_pernet_subsys(&nf_conntrack_net_ops);
658 #ifdef CONFIG_SYSCTL
659 unregister_net_sysctl_table(nf_ct_netfilter_header);
660 #endif
661 nf_conntrack_cleanup_end();
662 }
663
664 module_init(nf_conntrack_standalone_init);
665 module_exit(nf_conntrack_standalone_fini);
666
667 /* Some modules need us, but don't depend directly on any symbol.
668 They should call this. */
669 void need_conntrack(void)
670 {
671 }
672 EXPORT_SYMBOL_GPL(need_conntrack);
This page took 0.062599 seconds and 5 git commands to generate.