Commit | Line | Data |
---|---|---|
7d1d65cb DB |
1 | /* |
2 | * Berkeley Packet Filter based traffic classifier | |
3 | * | |
4 | * Might be used to classify traffic through flexible, user-defined and | |
5 | * possibly JIT-ed BPF filters for traffic control as an alternative to | |
6 | * ematches. | |
7 | * | |
8 | * (C) 2013 Daniel Borkmann <dborkman@redhat.com> | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of the GNU General Public License version 2 as | |
12 | * published by the Free Software Foundation. | |
13 | */ | |
14 | ||
15 | #include <linux/module.h> | |
16 | #include <linux/types.h> | |
17 | #include <linux/skbuff.h> | |
18 | #include <linux/filter.h> | |
19 | #include <net/rtnetlink.h> | |
20 | #include <net/pkt_cls.h> | |
21 | #include <net/sock.h> | |
22 | ||
23 | MODULE_LICENSE("GPL"); | |
24 | MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); | |
25 | MODULE_DESCRIPTION("TC BPF based classifier"); | |
26 | ||
27 | struct cls_bpf_head { | |
28 | struct list_head plist; | |
29 | u32 hgen; | |
30 | }; | |
31 | ||
32 | struct cls_bpf_prog { | |
33 | struct sk_filter *filter; | |
34 | struct sock_filter *bpf_ops; | |
35 | struct tcf_exts exts; | |
36 | struct tcf_result res; | |
37 | struct list_head link; | |
38 | u32 handle; | |
39 | u16 bpf_len; | |
40 | }; | |
41 | ||
42 | static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { | |
43 | [TCA_BPF_CLASSID] = { .type = NLA_U32 }, | |
44 | [TCA_BPF_OPS_LEN] = { .type = NLA_U16 }, | |
45 | [TCA_BPF_OPS] = { .type = NLA_BINARY, | |
46 | .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, | |
47 | }; | |
48 | ||
49 | static const struct tcf_ext_map bpf_ext_map = { | |
50 | .action = TCA_BPF_ACT, | |
51 | .police = TCA_BPF_POLICE, | |
52 | }; | |
53 | ||
54 | static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, | |
55 | struct tcf_result *res) | |
56 | { | |
57 | struct cls_bpf_head *head = tp->root; | |
58 | struct cls_bpf_prog *prog; | |
59 | int ret; | |
60 | ||
61 | list_for_each_entry(prog, &head->plist, link) { | |
62 | int filter_res = SK_RUN_FILTER(prog->filter, skb); | |
63 | ||
64 | if (filter_res == 0) | |
65 | continue; | |
66 | ||
67 | *res = prog->res; | |
68 | if (filter_res != -1) | |
69 | res->classid = filter_res; | |
70 | ||
71 | ret = tcf_exts_exec(skb, &prog->exts, res); | |
72 | if (ret < 0) | |
73 | continue; | |
74 | ||
75 | return ret; | |
76 | } | |
77 | ||
78 | return -1; | |
79 | } | |
80 | ||
81 | static int cls_bpf_init(struct tcf_proto *tp) | |
82 | { | |
83 | struct cls_bpf_head *head; | |
84 | ||
85 | head = kzalloc(sizeof(*head), GFP_KERNEL); | |
86 | if (head == NULL) | |
87 | return -ENOBUFS; | |
88 | ||
89 | INIT_LIST_HEAD(&head->plist); | |
90 | tp->root = head; | |
91 | ||
92 | return 0; | |
93 | } | |
94 | ||
95 | static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog) | |
96 | { | |
97 | tcf_unbind_filter(tp, &prog->res); | |
98 | tcf_exts_destroy(tp, &prog->exts); | |
99 | ||
100 | sk_unattached_filter_destroy(prog->filter); | |
101 | ||
102 | kfree(prog->bpf_ops); | |
103 | kfree(prog); | |
104 | } | |
105 | ||
106 | static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) | |
107 | { | |
108 | struct cls_bpf_head *head = tp->root; | |
109 | struct cls_bpf_prog *prog, *todel = (struct cls_bpf_prog *) arg; | |
110 | ||
111 | list_for_each_entry(prog, &head->plist, link) { | |
112 | if (prog == todel) { | |
113 | tcf_tree_lock(tp); | |
114 | list_del(&prog->link); | |
115 | tcf_tree_unlock(tp); | |
116 | ||
117 | cls_bpf_delete_prog(tp, prog); | |
118 | return 0; | |
119 | } | |
120 | } | |
121 | ||
122 | return -ENOENT; | |
123 | } | |
124 | ||
125 | static void cls_bpf_destroy(struct tcf_proto *tp) | |
126 | { | |
127 | struct cls_bpf_head *head = tp->root; | |
128 | struct cls_bpf_prog *prog, *tmp; | |
129 | ||
130 | list_for_each_entry_safe(prog, tmp, &head->plist, link) { | |
131 | list_del(&prog->link); | |
132 | cls_bpf_delete_prog(tp, prog); | |
133 | } | |
134 | ||
135 | kfree(head); | |
136 | } | |
137 | ||
138 | static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) | |
139 | { | |
140 | struct cls_bpf_head *head = tp->root; | |
141 | struct cls_bpf_prog *prog; | |
142 | unsigned long ret = 0UL; | |
143 | ||
144 | if (head == NULL) | |
145 | return 0UL; | |
146 | ||
147 | list_for_each_entry(prog, &head->plist, link) { | |
148 | if (prog->handle == handle) { | |
149 | ret = (unsigned long) prog; | |
150 | break; | |
151 | } | |
152 | } | |
153 | ||
154 | return ret; | |
155 | } | |
156 | ||
157 | static void cls_bpf_put(struct tcf_proto *tp, unsigned long f) | |
158 | { | |
159 | } | |
160 | ||
161 | static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, | |
162 | struct cls_bpf_prog *prog, | |
163 | unsigned long base, struct nlattr **tb, | |
164 | struct nlattr *est) | |
165 | { | |
166 | struct sock_filter *bpf_ops, *bpf_old; | |
167 | struct tcf_exts exts; | |
168 | struct sock_fprog tmp; | |
169 | struct sk_filter *fp, *fp_old; | |
170 | u16 bpf_size, bpf_len; | |
171 | u32 classid; | |
172 | int ret; | |
173 | ||
174 | if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID]) | |
175 | return -EINVAL; | |
176 | ||
177 | ret = tcf_exts_validate(net, tp, tb, est, &exts, &bpf_ext_map); | |
178 | if (ret < 0) | |
179 | return ret; | |
180 | ||
181 | classid = nla_get_u32(tb[TCA_BPF_CLASSID]); | |
182 | bpf_len = nla_get_u16(tb[TCA_BPF_OPS_LEN]); | |
183 | if (bpf_len > BPF_MAXINSNS || bpf_len == 0) { | |
184 | ret = -EINVAL; | |
185 | goto errout; | |
186 | } | |
187 | ||
188 | bpf_size = bpf_len * sizeof(*bpf_ops); | |
189 | bpf_ops = kzalloc(bpf_size, GFP_KERNEL); | |
190 | if (bpf_ops == NULL) { | |
191 | ret = -ENOMEM; | |
192 | goto errout; | |
193 | } | |
194 | ||
195 | memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); | |
196 | ||
197 | tmp.len = bpf_len; | |
198 | tmp.filter = (struct sock_filter __user *) bpf_ops; | |
199 | ||
200 | ret = sk_unattached_filter_create(&fp, &tmp); | |
201 | if (ret) | |
202 | goto errout_free; | |
203 | ||
204 | tcf_tree_lock(tp); | |
205 | fp_old = prog->filter; | |
206 | bpf_old = prog->bpf_ops; | |
207 | ||
208 | prog->bpf_len = bpf_len; | |
209 | prog->bpf_ops = bpf_ops; | |
210 | prog->filter = fp; | |
211 | prog->res.classid = classid; | |
212 | tcf_tree_unlock(tp); | |
213 | ||
214 | tcf_bind_filter(tp, &prog->res, base); | |
215 | tcf_exts_change(tp, &prog->exts, &exts); | |
216 | ||
217 | if (fp_old) | |
218 | sk_unattached_filter_destroy(fp_old); | |
219 | if (bpf_old) | |
220 | kfree(bpf_old); | |
221 | ||
222 | return 0; | |
223 | ||
224 | errout_free: | |
225 | kfree(bpf_ops); | |
226 | errout: | |
227 | tcf_exts_destroy(tp, &exts); | |
228 | return ret; | |
229 | } | |
230 | ||
231 | static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, | |
232 | struct cls_bpf_head *head) | |
233 | { | |
234 | unsigned int i = 0x80000000; | |
235 | ||
236 | do { | |
237 | if (++head->hgen == 0x7FFFFFFF) | |
238 | head->hgen = 1; | |
239 | } while (--i > 0 && cls_bpf_get(tp, head->hgen)); | |
240 | if (i == 0) | |
241 | pr_err("Insufficient number of handles\n"); | |
242 | ||
243 | return i; | |
244 | } | |
245 | ||
246 | static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, | |
247 | struct tcf_proto *tp, unsigned long base, | |
248 | u32 handle, struct nlattr **tca, | |
249 | unsigned long *arg) | |
250 | { | |
251 | struct cls_bpf_head *head = tp->root; | |
252 | struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg; | |
253 | struct nlattr *tb[TCA_BPF_MAX + 1]; | |
254 | int ret; | |
255 | ||
256 | if (tca[TCA_OPTIONS] == NULL) | |
257 | return -EINVAL; | |
258 | ||
259 | ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy); | |
260 | if (ret < 0) | |
261 | return ret; | |
262 | ||
263 | if (prog != NULL) { | |
264 | if (handle && prog->handle != handle) | |
265 | return -EINVAL; | |
266 | return cls_bpf_modify_existing(net, tp, prog, base, tb, | |
267 | tca[TCA_RATE]); | |
268 | } | |
269 | ||
270 | prog = kzalloc(sizeof(*prog), GFP_KERNEL); | |
271 | if (prog == NULL) | |
272 | return -ENOBUFS; | |
273 | ||
274 | if (handle == 0) | |
275 | prog->handle = cls_bpf_grab_new_handle(tp, head); | |
276 | else | |
277 | prog->handle = handle; | |
278 | if (prog->handle == 0) { | |
279 | ret = -EINVAL; | |
280 | goto errout; | |
281 | } | |
282 | ||
283 | ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE]); | |
284 | if (ret < 0) | |
285 | goto errout; | |
286 | ||
287 | tcf_tree_lock(tp); | |
288 | list_add(&prog->link, &head->plist); | |
289 | tcf_tree_unlock(tp); | |
290 | ||
291 | *arg = (unsigned long) prog; | |
292 | ||
293 | return 0; | |
294 | errout: | |
295 | if (*arg == 0UL && prog) | |
296 | kfree(prog); | |
297 | ||
298 | return ret; | |
299 | } | |
300 | ||
301 | static int cls_bpf_dump(struct tcf_proto *tp, unsigned long fh, | |
302 | struct sk_buff *skb, struct tcmsg *tm) | |
303 | { | |
304 | struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh; | |
305 | struct nlattr *nest, *nla; | |
306 | ||
307 | if (prog == NULL) | |
308 | return skb->len; | |
309 | ||
310 | tm->tcm_handle = prog->handle; | |
311 | ||
312 | nest = nla_nest_start(skb, TCA_OPTIONS); | |
313 | if (nest == NULL) | |
314 | goto nla_put_failure; | |
315 | ||
316 | if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid)) | |
317 | goto nla_put_failure; | |
318 | if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_len)) | |
319 | goto nla_put_failure; | |
320 | ||
321 | nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_len * | |
322 | sizeof(struct sock_filter)); | |
323 | if (nla == NULL) | |
324 | goto nla_put_failure; | |
325 | ||
326 | memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); | |
327 | ||
328 | if (tcf_exts_dump(skb, &prog->exts, &bpf_ext_map) < 0) | |
329 | goto nla_put_failure; | |
330 | ||
331 | nla_nest_end(skb, nest); | |
332 | ||
333 | if (tcf_exts_dump_stats(skb, &prog->exts, &bpf_ext_map) < 0) | |
334 | goto nla_put_failure; | |
335 | ||
336 | return skb->len; | |
337 | ||
338 | nla_put_failure: | |
339 | nla_nest_cancel(skb, nest); | |
340 | return -1; | |
341 | } | |
342 | ||
343 | static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) | |
344 | { | |
345 | struct cls_bpf_head *head = tp->root; | |
346 | struct cls_bpf_prog *prog; | |
347 | ||
348 | list_for_each_entry(prog, &head->plist, link) { | |
349 | if (arg->count < arg->skip) | |
350 | goto skip; | |
351 | if (arg->fn(tp, (unsigned long) prog, arg) < 0) { | |
352 | arg->stop = 1; | |
353 | break; | |
354 | } | |
355 | skip: | |
356 | arg->count++; | |
357 | } | |
358 | } | |
359 | ||
360 | static struct tcf_proto_ops cls_bpf_ops __read_mostly = { | |
361 | .kind = "bpf", | |
362 | .owner = THIS_MODULE, | |
363 | .classify = cls_bpf_classify, | |
364 | .init = cls_bpf_init, | |
365 | .destroy = cls_bpf_destroy, | |
366 | .get = cls_bpf_get, | |
367 | .put = cls_bpf_put, | |
368 | .change = cls_bpf_change, | |
369 | .delete = cls_bpf_delete, | |
370 | .walk = cls_bpf_walk, | |
371 | .dump = cls_bpf_dump, | |
372 | }; | |
373 | ||
374 | static int __init cls_bpf_init_mod(void) | |
375 | { | |
376 | return register_tcf_proto_ops(&cls_bpf_ops); | |
377 | } | |
378 | ||
379 | static void __exit cls_bpf_exit_mod(void) | |
380 | { | |
381 | unregister_tcf_proto_ops(&cls_bpf_ops); | |
382 | } | |
383 | ||
384 | module_init(cls_bpf_init_mod); | |
385 | module_exit(cls_bpf_exit_mod); |