Commit | Line | Data |
---|---|---|
f6180121 MJ |
1 | /* Event cache for netfilter. */ |
2 | ||
f229f6ce PM |
3 | /* |
4 | * (C) 2005 Harald Welte <laforge@gnumonks.org> | |
5 | * (C) 2005 Patrick McHardy <kaber@trash.net> | |
6 | * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org> | |
7 | * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org> | |
f6180121 MJ |
8 | * |
9 | * This program is free software; you can redistribute it and/or modify | |
10 | * it under the terms of the GNU General Public License version 2 as | |
11 | * published by the Free Software Foundation. | |
12 | */ | |
13 | ||
14 | #include <linux/types.h> | |
15 | #include <linux/netfilter.h> | |
16 | #include <linux/skbuff.h> | |
17 | #include <linux/vmalloc.h> | |
18 | #include <linux/stddef.h> | |
19 | #include <linux/err.h> | |
20 | #include <linux/percpu.h> | |
f6180121 MJ |
21 | #include <linux/kernel.h> |
22 | #include <linux/netdevice.h> | |
5a0e3ad6 | 23 | #include <linux/slab.h> |
bc3b2d7f | 24 | #include <linux/export.h> |
f6180121 MJ |
25 | |
26 | #include <net/netfilter/nf_conntrack.h> | |
f6180121 | 27 | #include <net/netfilter/nf_conntrack_core.h> |
a0891aa6 | 28 | #include <net/netfilter/nf_conntrack_extend.h> |
f6180121 | 29 | |
e34d5c1a | 30 | static DEFINE_MUTEX(nf_ct_ecache_mutex); |
13b18339 | 31 | |
9500507c FW |
32 | #define ECACHE_RETRY_WAIT (HZ/10) |
33 | ||
34 | enum retry_state { | |
35 | STATE_CONGESTED, | |
36 | STATE_RESTART, | |
37 | STATE_DONE, | |
38 | }; | |
39 | ||
40 | static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) | |
41 | { | |
42 | struct nf_conn *refs[16]; | |
43 | struct nf_conntrack_tuple_hash *h; | |
44 | struct hlist_nulls_node *n; | |
45 | unsigned int evicted = 0; | |
46 | enum retry_state ret = STATE_DONE; | |
47 | ||
48 | spin_lock(&pcpu->lock); | |
49 | ||
50 | hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { | |
51 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); | |
52 | ||
53 | if (nf_ct_is_dying(ct)) | |
54 | continue; | |
55 | ||
56 | if (nf_conntrack_event(IPCT_DESTROY, ct)) { | |
57 | ret = STATE_CONGESTED; | |
58 | break; | |
59 | } | |
60 | ||
61 | /* we've got the event delivered, now it's dying */ | |
62 | set_bit(IPS_DYING_BIT, &ct->status); | |
63 | refs[evicted] = ct; | |
64 | ||
65 | if (++evicted >= ARRAY_SIZE(refs)) { | |
66 | ret = STATE_RESTART; | |
67 | break; | |
68 | } | |
69 | } | |
70 | ||
71 | spin_unlock(&pcpu->lock); | |
72 | ||
73 | /* can't _put while holding lock */ | |
74 | while (evicted) | |
75 | nf_ct_put(refs[--evicted]); | |
76 | ||
77 | return ret; | |
78 | } | |
79 | ||
80 | static void ecache_work(struct work_struct *work) | |
81 | { | |
82 | struct netns_ct *ctnet = | |
83 | container_of(work, struct netns_ct, ecache_dwork.work); | |
84 | int cpu, delay = -1; | |
85 | struct ct_pcpu *pcpu; | |
86 | ||
87 | local_bh_disable(); | |
88 | ||
89 | for_each_possible_cpu(cpu) { | |
90 | enum retry_state ret; | |
91 | ||
92 | pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu); | |
93 | ||
94 | ret = ecache_work_evict_list(pcpu); | |
95 | ||
96 | switch (ret) { | |
97 | case STATE_CONGESTED: | |
98 | delay = ECACHE_RETRY_WAIT; | |
99 | goto out; | |
100 | case STATE_RESTART: | |
101 | delay = 0; | |
102 | break; | |
103 | case STATE_DONE: | |
104 | break; | |
105 | } | |
106 | } | |
107 | ||
108 | out: | |
109 | local_bh_enable(); | |
110 | ||
111 | ctnet->ecache_dwork_pending = delay > 0; | |
112 | if (delay >= 0) | |
113 | schedule_delayed_work(&ctnet->ecache_dwork, delay); | |
114 | } | |
115 | ||
3c435e2e FW |
116 | int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, |
117 | u32 portid, int report) | |
118 | { | |
119 | int ret = 0; | |
120 | struct net *net = nf_ct_net(ct); | |
121 | struct nf_ct_event_notifier *notify; | |
122 | struct nf_conntrack_ecache *e; | |
123 | ||
124 | rcu_read_lock(); | |
125 | notify = rcu_dereference(net->ct.nf_conntrack_event_cb); | |
126 | if (!notify) | |
127 | goto out_unlock; | |
128 | ||
129 | e = nf_ct_ecache_find(ct); | |
130 | if (!e) | |
131 | goto out_unlock; | |
132 | ||
133 | if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) { | |
134 | struct nf_ct_event item = { | |
135 | .ct = ct, | |
136 | .portid = e->portid ? e->portid : portid, | |
137 | .report = report | |
138 | }; | |
139 | /* This is a resent of a destroy event? If so, skip missed */ | |
140 | unsigned long missed = e->portid ? 0 : e->missed; | |
141 | ||
142 | if (!((eventmask | missed) & e->ctmask)) | |
143 | goto out_unlock; | |
144 | ||
145 | ret = notify->fcn(eventmask | missed, &item); | |
146 | if (unlikely(ret < 0 || missed)) { | |
147 | spin_lock_bh(&ct->lock); | |
148 | if (ret < 0) { | |
149 | /* This is a destroy event that has been | |
150 | * triggered by a process, we store the PORTID | |
151 | * to include it in the retransmission. | |
152 | */ | |
153 | if (eventmask & (1 << IPCT_DESTROY) && | |
154 | e->portid == 0 && portid != 0) | |
155 | e->portid = portid; | |
156 | else | |
157 | e->missed |= eventmask; | |
158 | } else { | |
159 | e->missed &= ~missed; | |
160 | } | |
161 | spin_unlock_bh(&ct->lock); | |
162 | } | |
163 | } | |
164 | out_unlock: | |
165 | rcu_read_unlock(); | |
166 | return ret; | |
167 | } | |
168 | EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); | |
169 | ||
f6180121 MJ |
170 | /* deliver cached events and clear cache entry - must be called with locally |
171 | * disabled softirqs */ | |
a0891aa6 | 172 | void nf_ct_deliver_cached_events(struct nf_conn *ct) |
f6180121 | 173 | { |
70e9942f | 174 | struct net *net = nf_ct_net(ct); |
58020f77 | 175 | unsigned long events, missed; |
e34d5c1a | 176 | struct nf_ct_event_notifier *notify; |
a0891aa6 | 177 | struct nf_conntrack_ecache *e; |
58020f77 TZ |
178 | struct nf_ct_event item; |
179 | int ret; | |
e34d5c1a PNA |
180 | |
181 | rcu_read_lock(); | |
70e9942f | 182 | notify = rcu_dereference(net->ct.nf_conntrack_event_cb); |
e34d5c1a PNA |
183 | if (notify == NULL) |
184 | goto out_unlock; | |
185 | ||
a0891aa6 PNA |
186 | e = nf_ct_ecache_find(ct); |
187 | if (e == NULL) | |
188 | goto out_unlock; | |
189 | ||
190 | events = xchg(&e->cache, 0); | |
191 | ||
58020f77 TZ |
192 | if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events) |
193 | goto out_unlock; | |
194 | ||
195 | /* We make a copy of the missed event cache without taking | |
196 | * the lock, thus we may send missed events twice. However, | |
197 | * this does not harm and it happens very rarely. */ | |
198 | missed = e->missed; | |
199 | ||
200 | if (!((events | missed) & e->ctmask)) | |
201 | goto out_unlock; | |
202 | ||
203 | item.ct = ct; | |
15e47304 | 204 | item.portid = 0; |
58020f77 TZ |
205 | item.report = 0; |
206 | ||
207 | ret = notify->fcn(events | missed, &item); | |
208 | ||
209 | if (likely(ret >= 0 && !missed)) | |
210 | goto out_unlock; | |
211 | ||
212 | spin_lock_bh(&ct->lock); | |
213 | if (ret < 0) | |
214 | e->missed |= events; | |
215 | else | |
216 | e->missed &= ~missed; | |
217 | spin_unlock_bh(&ct->lock); | |
f6180121 | 218 | |
e34d5c1a PNA |
219 | out_unlock: |
220 | rcu_read_unlock(); | |
f6180121 | 221 | } |
13b18339 | 222 | EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); |
f6180121 | 223 | |
ecdfb48c FW |
224 | void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, |
225 | struct nf_conntrack_expect *exp, | |
226 | u32 portid, int report) | |
227 | ||
228 | { | |
229 | struct net *net = nf_ct_exp_net(exp); | |
230 | struct nf_exp_event_notifier *notify; | |
231 | struct nf_conntrack_ecache *e; | |
232 | ||
233 | rcu_read_lock(); | |
234 | notify = rcu_dereference(net->ct.nf_expect_event_cb); | |
235 | if (!notify) | |
236 | goto out_unlock; | |
237 | ||
238 | e = nf_ct_ecache_find(exp->master); | |
239 | if (!e) | |
240 | goto out_unlock; | |
241 | ||
242 | if (e->expmask & (1 << event)) { | |
243 | struct nf_exp_event item = { | |
244 | .exp = exp, | |
245 | .portid = portid, | |
246 | .report = report | |
247 | }; | |
248 | notify->fcn(1 << event, &item); | |
249 | } | |
250 | out_unlock: | |
251 | rcu_read_unlock(); | |
252 | } | |
253 | ||
70e9942f PNA |
254 | int nf_conntrack_register_notifier(struct net *net, |
255 | struct nf_ct_event_notifier *new) | |
010c7d6f | 256 | { |
031d7709 | 257 | int ret; |
b56f2d55 | 258 | struct nf_ct_event_notifier *notify; |
e34d5c1a PNA |
259 | |
260 | mutex_lock(&nf_ct_ecache_mutex); | |
70e9942f | 261 | notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, |
b56f2d55 PM |
262 | lockdep_is_held(&nf_ct_ecache_mutex)); |
263 | if (notify != NULL) { | |
e34d5c1a PNA |
264 | ret = -EBUSY; |
265 | goto out_unlock; | |
266 | } | |
cf778b00 | 267 | rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); |
031d7709 | 268 | ret = 0; |
e34d5c1a PNA |
269 | |
270 | out_unlock: | |
271 | mutex_unlock(&nf_ct_ecache_mutex); | |
272 | return ret; | |
010c7d6f PM |
273 | } |
274 | EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); | |
275 | ||
70e9942f PNA |
276 | void nf_conntrack_unregister_notifier(struct net *net, |
277 | struct nf_ct_event_notifier *new) | |
010c7d6f | 278 | { |
b56f2d55 PM |
279 | struct nf_ct_event_notifier *notify; |
280 | ||
e34d5c1a | 281 | mutex_lock(&nf_ct_ecache_mutex); |
70e9942f | 282 | notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, |
b56f2d55 PM |
283 | lockdep_is_held(&nf_ct_ecache_mutex)); |
284 | BUG_ON(notify != new); | |
70e9942f | 285 | RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); |
e34d5c1a | 286 | mutex_unlock(&nf_ct_ecache_mutex); |
010c7d6f PM |
287 | } |
288 | EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); | |
289 | ||
70e9942f PNA |
290 | int nf_ct_expect_register_notifier(struct net *net, |
291 | struct nf_exp_event_notifier *new) | |
010c7d6f | 292 | { |
031d7709 | 293 | int ret; |
b56f2d55 | 294 | struct nf_exp_event_notifier *notify; |
e34d5c1a PNA |
295 | |
296 | mutex_lock(&nf_ct_ecache_mutex); | |
70e9942f | 297 | notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, |
b56f2d55 PM |
298 | lockdep_is_held(&nf_ct_ecache_mutex)); |
299 | if (notify != NULL) { | |
e34d5c1a PNA |
300 | ret = -EBUSY; |
301 | goto out_unlock; | |
302 | } | |
cf778b00 | 303 | rcu_assign_pointer(net->ct.nf_expect_event_cb, new); |
031d7709 | 304 | ret = 0; |
e34d5c1a PNA |
305 | |
306 | out_unlock: | |
307 | mutex_unlock(&nf_ct_ecache_mutex); | |
308 | return ret; | |
010c7d6f | 309 | } |
6823645d | 310 | EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); |
010c7d6f | 311 | |
70e9942f PNA |
312 | void nf_ct_expect_unregister_notifier(struct net *net, |
313 | struct nf_exp_event_notifier *new) | |
010c7d6f | 314 | { |
b56f2d55 PM |
315 | struct nf_exp_event_notifier *notify; |
316 | ||
e34d5c1a | 317 | mutex_lock(&nf_ct_ecache_mutex); |
70e9942f | 318 | notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, |
b56f2d55 PM |
319 | lockdep_is_held(&nf_ct_ecache_mutex)); |
320 | BUG_ON(notify != new); | |
70e9942f | 321 | RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); |
e34d5c1a | 322 | mutex_unlock(&nf_ct_ecache_mutex); |
010c7d6f | 323 | } |
6823645d | 324 | EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); |
a0891aa6 PNA |
325 | |
326 | #define NF_CT_EVENTS_DEFAULT 1 | |
327 | static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; | |
328 | ||
329 | #ifdef CONFIG_SYSCTL | |
330 | static struct ctl_table event_sysctl_table[] = { | |
331 | { | |
a0891aa6 PNA |
332 | .procname = "nf_conntrack_events", |
333 | .data = &init_net.ct.sysctl_events, | |
334 | .maxlen = sizeof(unsigned int), | |
335 | .mode = 0644, | |
336 | .proc_handler = proc_dointvec, | |
337 | }, | |
338 | {} | |
339 | }; | |
340 | #endif /* CONFIG_SYSCTL */ | |
341 | ||
342 | static struct nf_ct_ext_type event_extend __read_mostly = { | |
343 | .len = sizeof(struct nf_conntrack_ecache), | |
344 | .align = __alignof__(struct nf_conntrack_ecache), | |
345 | .id = NF_CT_EXT_ECACHE, | |
346 | }; | |
347 | ||
348 | #ifdef CONFIG_SYSCTL | |
349 | static int nf_conntrack_event_init_sysctl(struct net *net) | |
350 | { | |
351 | struct ctl_table *table; | |
352 | ||
353 | table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table), | |
354 | GFP_KERNEL); | |
355 | if (!table) | |
356 | goto out; | |
357 | ||
358 | table[0].data = &net->ct.sysctl_events; | |
359 | ||
464dc801 EB |
360 | /* Don't export sysctls to unprivileged users */ |
361 | if (net->user_ns != &init_user_ns) | |
362 | table[0].procname = NULL; | |
363 | ||
a0891aa6 | 364 | net->ct.event_sysctl_header = |
ec8f23ce | 365 | register_net_sysctl(net, "net/netfilter", table); |
a0891aa6 PNA |
366 | if (!net->ct.event_sysctl_header) { |
367 | printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n"); | |
368 | goto out_register; | |
369 | } | |
370 | return 0; | |
371 | ||
372 | out_register: | |
373 | kfree(table); | |
374 | out: | |
375 | return -ENOMEM; | |
376 | } | |
377 | ||
378 | static void nf_conntrack_event_fini_sysctl(struct net *net) | |
379 | { | |
380 | struct ctl_table *table; | |
381 | ||
382 | table = net->ct.event_sysctl_header->ctl_table_arg; | |
383 | unregister_net_sysctl_table(net->ct.event_sysctl_header); | |
384 | kfree(table); | |
385 | } | |
386 | #else | |
387 | static int nf_conntrack_event_init_sysctl(struct net *net) | |
388 | { | |
389 | return 0; | |
390 | } | |
391 | ||
392 | static void nf_conntrack_event_fini_sysctl(struct net *net) | |
393 | { | |
394 | } | |
395 | #endif /* CONFIG_SYSCTL */ | |
396 | ||
3fe0f943 | 397 | int nf_conntrack_ecache_pernet_init(struct net *net) |
a0891aa6 | 398 | { |
a0891aa6 | 399 | net->ct.sysctl_events = nf_ct_events; |
9500507c | 400 | INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work); |
3fe0f943 G |
401 | return nf_conntrack_event_init_sysctl(net); |
402 | } | |
a0891aa6 | 403 | |
3fe0f943 G |
404 | void nf_conntrack_ecache_pernet_fini(struct net *net) |
405 | { | |
9500507c | 406 | cancel_delayed_work_sync(&net->ct.ecache_dwork); |
3fe0f943 G |
407 | nf_conntrack_event_fini_sysctl(net); |
408 | } | |
a0891aa6 | 409 | |
3fe0f943 G |
410 | int nf_conntrack_ecache_init(void) |
411 | { | |
412 | int ret = nf_ct_extend_register(&event_extend); | |
a0891aa6 | 413 | if (ret < 0) |
3fe0f943 | 414 | pr_err("nf_ct_event: Unable to register event extension.\n"); |
a0891aa6 PNA |
415 | return ret; |
416 | } | |
417 | ||
3fe0f943 | 418 | void nf_conntrack_ecache_fini(void) |
a0891aa6 | 419 | { |
3fe0f943 | 420 | nf_ct_extend_unregister(&event_extend); |
a0891aa6 | 421 | } |