Commit | Line | Data |
---|---|---|
f6180121 MJ |
1 | /* Event cache for netfilter. */ |
2 | ||
f229f6ce PM |
3 | /* |
4 | * (C) 2005 Harald Welte <laforge@gnumonks.org> | |
5 | * (C) 2005 Patrick McHardy <kaber@trash.net> | |
6 | * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org> | |
7 | * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org> | |
f6180121 MJ |
8 | * |
9 | * This program is free software; you can redistribute it and/or modify | |
10 | * it under the terms of the GNU General Public License version 2 as | |
11 | * published by the Free Software Foundation. | |
12 | */ | |
13 | ||
14 | #include <linux/types.h> | |
15 | #include <linux/netfilter.h> | |
16 | #include <linux/skbuff.h> | |
17 | #include <linux/vmalloc.h> | |
18 | #include <linux/stddef.h> | |
19 | #include <linux/err.h> | |
20 | #include <linux/percpu.h> | |
f6180121 MJ |
21 | #include <linux/kernel.h> |
22 | #include <linux/netdevice.h> | |
5a0e3ad6 | 23 | #include <linux/slab.h> |
bc3b2d7f | 24 | #include <linux/export.h> |
f6180121 MJ |
25 | |
26 | #include <net/netfilter/nf_conntrack.h> | |
f6180121 | 27 | #include <net/netfilter/nf_conntrack_core.h> |
a0891aa6 | 28 | #include <net/netfilter/nf_conntrack_extend.h> |
f6180121 | 29 | |
e34d5c1a | 30 | static DEFINE_MUTEX(nf_ct_ecache_mutex); |
13b18339 | 31 | |
9500507c FW |
32 | #define ECACHE_RETRY_WAIT (HZ/10) |
33 | ||
34 | enum retry_state { | |
35 | STATE_CONGESTED, | |
36 | STATE_RESTART, | |
37 | STATE_DONE, | |
38 | }; | |
39 | ||
40 | static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) | |
41 | { | |
42 | struct nf_conn *refs[16]; | |
43 | struct nf_conntrack_tuple_hash *h; | |
44 | struct hlist_nulls_node *n; | |
45 | unsigned int evicted = 0; | |
46 | enum retry_state ret = STATE_DONE; | |
47 | ||
48 | spin_lock(&pcpu->lock); | |
49 | ||
50 | hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { | |
51 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); | |
52 | ||
53 | if (nf_ct_is_dying(ct)) | |
54 | continue; | |
55 | ||
56 | if (nf_conntrack_event(IPCT_DESTROY, ct)) { | |
57 | ret = STATE_CONGESTED; | |
58 | break; | |
59 | } | |
60 | ||
61 | /* we've got the event delivered, now it's dying */ | |
62 | set_bit(IPS_DYING_BIT, &ct->status); | |
63 | refs[evicted] = ct; | |
64 | ||
65 | if (++evicted >= ARRAY_SIZE(refs)) { | |
66 | ret = STATE_RESTART; | |
67 | break; | |
68 | } | |
69 | } | |
70 | ||
71 | spin_unlock(&pcpu->lock); | |
72 | ||
73 | /* can't _put while holding lock */ | |
74 | while (evicted) | |
75 | nf_ct_put(refs[--evicted]); | |
76 | ||
77 | return ret; | |
78 | } | |
79 | ||
80 | static void ecache_work(struct work_struct *work) | |
81 | { | |
82 | struct netns_ct *ctnet = | |
83 | container_of(work, struct netns_ct, ecache_dwork.work); | |
84 | int cpu, delay = -1; | |
85 | struct ct_pcpu *pcpu; | |
86 | ||
87 | local_bh_disable(); | |
88 | ||
89 | for_each_possible_cpu(cpu) { | |
90 | enum retry_state ret; | |
91 | ||
92 | pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu); | |
93 | ||
94 | ret = ecache_work_evict_list(pcpu); | |
95 | ||
96 | switch (ret) { | |
97 | case STATE_CONGESTED: | |
98 | delay = ECACHE_RETRY_WAIT; | |
99 | goto out; | |
100 | case STATE_RESTART: | |
101 | delay = 0; | |
102 | break; | |
103 | case STATE_DONE: | |
104 | break; | |
105 | } | |
106 | } | |
107 | ||
108 | out: | |
109 | local_bh_enable(); | |
110 | ||
111 | ctnet->ecache_dwork_pending = delay > 0; | |
112 | if (delay >= 0) | |
113 | schedule_delayed_work(&ctnet->ecache_dwork, delay); | |
114 | } | |
115 | ||
f6180121 MJ |
116 | /* deliver cached events and clear cache entry - must be called with locally |
117 | * disabled softirqs */ | |
a0891aa6 | 118 | void nf_ct_deliver_cached_events(struct nf_conn *ct) |
f6180121 | 119 | { |
70e9942f | 120 | struct net *net = nf_ct_net(ct); |
58020f77 | 121 | unsigned long events, missed; |
e34d5c1a | 122 | struct nf_ct_event_notifier *notify; |
a0891aa6 | 123 | struct nf_conntrack_ecache *e; |
58020f77 TZ |
124 | struct nf_ct_event item; |
125 | int ret; | |
e34d5c1a PNA |
126 | |
127 | rcu_read_lock(); | |
70e9942f | 128 | notify = rcu_dereference(net->ct.nf_conntrack_event_cb); |
e34d5c1a PNA |
129 | if (notify == NULL) |
130 | goto out_unlock; | |
131 | ||
a0891aa6 PNA |
132 | e = nf_ct_ecache_find(ct); |
133 | if (e == NULL) | |
134 | goto out_unlock; | |
135 | ||
136 | events = xchg(&e->cache, 0); | |
137 | ||
58020f77 TZ |
138 | if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events) |
139 | goto out_unlock; | |
140 | ||
141 | /* We make a copy of the missed event cache without taking | |
142 | * the lock, thus we may send missed events twice. However, | |
143 | * this does not harm and it happens very rarely. */ | |
144 | missed = e->missed; | |
145 | ||
146 | if (!((events | missed) & e->ctmask)) | |
147 | goto out_unlock; | |
148 | ||
149 | item.ct = ct; | |
15e47304 | 150 | item.portid = 0; |
58020f77 TZ |
151 | item.report = 0; |
152 | ||
153 | ret = notify->fcn(events | missed, &item); | |
154 | ||
155 | if (likely(ret >= 0 && !missed)) | |
156 | goto out_unlock; | |
157 | ||
158 | spin_lock_bh(&ct->lock); | |
159 | if (ret < 0) | |
160 | e->missed |= events; | |
161 | else | |
162 | e->missed &= ~missed; | |
163 | spin_unlock_bh(&ct->lock); | |
f6180121 | 164 | |
e34d5c1a PNA |
165 | out_unlock: |
166 | rcu_read_unlock(); | |
f6180121 | 167 | } |
13b18339 | 168 | EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); |
f6180121 | 169 | |
70e9942f PNA |
170 | int nf_conntrack_register_notifier(struct net *net, |
171 | struct nf_ct_event_notifier *new) | |
010c7d6f | 172 | { |
031d7709 | 173 | int ret; |
b56f2d55 | 174 | struct nf_ct_event_notifier *notify; |
e34d5c1a PNA |
175 | |
176 | mutex_lock(&nf_ct_ecache_mutex); | |
70e9942f | 177 | notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, |
b56f2d55 PM |
178 | lockdep_is_held(&nf_ct_ecache_mutex)); |
179 | if (notify != NULL) { | |
e34d5c1a PNA |
180 | ret = -EBUSY; |
181 | goto out_unlock; | |
182 | } | |
cf778b00 | 183 | rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); |
031d7709 | 184 | ret = 0; |
e34d5c1a PNA |
185 | |
186 | out_unlock: | |
187 | mutex_unlock(&nf_ct_ecache_mutex); | |
188 | return ret; | |
010c7d6f PM |
189 | } |
190 | EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); | |
191 | ||
70e9942f PNA |
192 | void nf_conntrack_unregister_notifier(struct net *net, |
193 | struct nf_ct_event_notifier *new) | |
010c7d6f | 194 | { |
b56f2d55 PM |
195 | struct nf_ct_event_notifier *notify; |
196 | ||
e34d5c1a | 197 | mutex_lock(&nf_ct_ecache_mutex); |
70e9942f | 198 | notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, |
b56f2d55 PM |
199 | lockdep_is_held(&nf_ct_ecache_mutex)); |
200 | BUG_ON(notify != new); | |
70e9942f | 201 | RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); |
e34d5c1a | 202 | mutex_unlock(&nf_ct_ecache_mutex); |
010c7d6f PM |
203 | } |
204 | EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); | |
205 | ||
70e9942f PNA |
206 | int nf_ct_expect_register_notifier(struct net *net, |
207 | struct nf_exp_event_notifier *new) | |
010c7d6f | 208 | { |
031d7709 | 209 | int ret; |
b56f2d55 | 210 | struct nf_exp_event_notifier *notify; |
e34d5c1a PNA |
211 | |
212 | mutex_lock(&nf_ct_ecache_mutex); | |
70e9942f | 213 | notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, |
b56f2d55 PM |
214 | lockdep_is_held(&nf_ct_ecache_mutex)); |
215 | if (notify != NULL) { | |
e34d5c1a PNA |
216 | ret = -EBUSY; |
217 | goto out_unlock; | |
218 | } | |
cf778b00 | 219 | rcu_assign_pointer(net->ct.nf_expect_event_cb, new); |
031d7709 | 220 | ret = 0; |
e34d5c1a PNA |
221 | |
222 | out_unlock: | |
223 | mutex_unlock(&nf_ct_ecache_mutex); | |
224 | return ret; | |
010c7d6f | 225 | } |
6823645d | 226 | EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); |
010c7d6f | 227 | |
70e9942f PNA |
228 | void nf_ct_expect_unregister_notifier(struct net *net, |
229 | struct nf_exp_event_notifier *new) | |
010c7d6f | 230 | { |
b56f2d55 PM |
231 | struct nf_exp_event_notifier *notify; |
232 | ||
e34d5c1a | 233 | mutex_lock(&nf_ct_ecache_mutex); |
70e9942f | 234 | notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, |
b56f2d55 PM |
235 | lockdep_is_held(&nf_ct_ecache_mutex)); |
236 | BUG_ON(notify != new); | |
70e9942f | 237 | RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); |
e34d5c1a | 238 | mutex_unlock(&nf_ct_ecache_mutex); |
010c7d6f | 239 | } |
6823645d | 240 | EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); |
a0891aa6 PNA |
241 | |
242 | #define NF_CT_EVENTS_DEFAULT 1 | |
243 | static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; | |
244 | ||
245 | #ifdef CONFIG_SYSCTL | |
246 | static struct ctl_table event_sysctl_table[] = { | |
247 | { | |
a0891aa6 PNA |
248 | .procname = "nf_conntrack_events", |
249 | .data = &init_net.ct.sysctl_events, | |
250 | .maxlen = sizeof(unsigned int), | |
251 | .mode = 0644, | |
252 | .proc_handler = proc_dointvec, | |
253 | }, | |
254 | {} | |
255 | }; | |
256 | #endif /* CONFIG_SYSCTL */ | |
257 | ||
258 | static struct nf_ct_ext_type event_extend __read_mostly = { | |
259 | .len = sizeof(struct nf_conntrack_ecache), | |
260 | .align = __alignof__(struct nf_conntrack_ecache), | |
261 | .id = NF_CT_EXT_ECACHE, | |
262 | }; | |
263 | ||
264 | #ifdef CONFIG_SYSCTL | |
265 | static int nf_conntrack_event_init_sysctl(struct net *net) | |
266 | { | |
267 | struct ctl_table *table; | |
268 | ||
269 | table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table), | |
270 | GFP_KERNEL); | |
271 | if (!table) | |
272 | goto out; | |
273 | ||
274 | table[0].data = &net->ct.sysctl_events; | |
275 | ||
464dc801 EB |
276 | /* Don't export sysctls to unprivileged users */ |
277 | if (net->user_ns != &init_user_ns) | |
278 | table[0].procname = NULL; | |
279 | ||
a0891aa6 | 280 | net->ct.event_sysctl_header = |
ec8f23ce | 281 | register_net_sysctl(net, "net/netfilter", table); |
a0891aa6 PNA |
282 | if (!net->ct.event_sysctl_header) { |
283 | printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n"); | |
284 | goto out_register; | |
285 | } | |
286 | return 0; | |
287 | ||
288 | out_register: | |
289 | kfree(table); | |
290 | out: | |
291 | return -ENOMEM; | |
292 | } | |
293 | ||
294 | static void nf_conntrack_event_fini_sysctl(struct net *net) | |
295 | { | |
296 | struct ctl_table *table; | |
297 | ||
298 | table = net->ct.event_sysctl_header->ctl_table_arg; | |
299 | unregister_net_sysctl_table(net->ct.event_sysctl_header); | |
300 | kfree(table); | |
301 | } | |
302 | #else | |
303 | static int nf_conntrack_event_init_sysctl(struct net *net) | |
304 | { | |
305 | return 0; | |
306 | } | |
307 | ||
308 | static void nf_conntrack_event_fini_sysctl(struct net *net) | |
309 | { | |
310 | } | |
311 | #endif /* CONFIG_SYSCTL */ | |
312 | ||
3fe0f943 | 313 | int nf_conntrack_ecache_pernet_init(struct net *net) |
a0891aa6 | 314 | { |
a0891aa6 | 315 | net->ct.sysctl_events = nf_ct_events; |
9500507c | 316 | INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work); |
3fe0f943 G |
317 | return nf_conntrack_event_init_sysctl(net); |
318 | } | |
a0891aa6 | 319 | |
3fe0f943 G |
320 | void nf_conntrack_ecache_pernet_fini(struct net *net) |
321 | { | |
9500507c | 322 | cancel_delayed_work_sync(&net->ct.ecache_dwork); |
3fe0f943 G |
323 | nf_conntrack_event_fini_sysctl(net); |
324 | } | |
a0891aa6 | 325 | |
3fe0f943 G |
326 | int nf_conntrack_ecache_init(void) |
327 | { | |
328 | int ret = nf_ct_extend_register(&event_extend); | |
a0891aa6 | 329 | if (ret < 0) |
3fe0f943 | 330 | pr_err("nf_ct_event: Unable to register event extension.\n"); |
a0891aa6 PNA |
331 | return ret; |
332 | } | |
333 | ||
3fe0f943 | 334 | void nf_conntrack_ecache_fini(void) |
a0891aa6 | 335 | { |
3fe0f943 | 336 | nf_ct_extend_unregister(&event_extend); |
a0891aa6 | 337 | } |