Commit | Line | Data |
---|---|---|
b2197755 DB |
1 | /* |
2 | * Minimal file system backend for holding eBPF maps and programs, | |
3 | * used by bpf(2) object pinning. | |
4 | * | |
5 | * Authors: | |
6 | * | |
7 | * Daniel Borkmann <daniel@iogearbox.net> | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License | |
11 | * version 2 as published by the Free Software Foundation. | |
12 | */ | |
13 | ||
14 | #include <linux/module.h> | |
15 | #include <linux/magic.h> | |
16 | #include <linux/major.h> | |
17 | #include <linux/mount.h> | |
18 | #include <linux/namei.h> | |
19 | #include <linux/fs.h> | |
20 | #include <linux/kdev_t.h> | |
21 | #include <linux/filter.h> | |
22 | #include <linux/bpf.h> | |
23 | ||
24 | enum bpf_type { | |
25 | BPF_TYPE_UNSPEC = 0, | |
26 | BPF_TYPE_PROG, | |
27 | BPF_TYPE_MAP, | |
28 | }; | |
29 | ||
30 | static void *bpf_any_get(void *raw, enum bpf_type type) | |
31 | { | |
32 | switch (type) { | |
33 | case BPF_TYPE_PROG: | |
92117d84 | 34 | raw = bpf_prog_inc(raw); |
b2197755 DB |
35 | break; |
36 | case BPF_TYPE_MAP: | |
92117d84 | 37 | raw = bpf_map_inc(raw, true); |
b2197755 DB |
38 | break; |
39 | default: | |
40 | WARN_ON_ONCE(1); | |
41 | break; | |
42 | } | |
43 | ||
44 | return raw; | |
45 | } | |
46 | ||
47 | static void bpf_any_put(void *raw, enum bpf_type type) | |
48 | { | |
49 | switch (type) { | |
50 | case BPF_TYPE_PROG: | |
51 | bpf_prog_put(raw); | |
52 | break; | |
53 | case BPF_TYPE_MAP: | |
c9da161c | 54 | bpf_map_put_with_uref(raw); |
b2197755 DB |
55 | break; |
56 | default: | |
57 | WARN_ON_ONCE(1); | |
58 | break; | |
59 | } | |
60 | } | |
61 | ||
62 | static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) | |
63 | { | |
64 | void *raw; | |
65 | ||
66 | *type = BPF_TYPE_MAP; | |
c9da161c | 67 | raw = bpf_map_get_with_uref(ufd); |
b2197755 DB |
68 | if (IS_ERR(raw)) { |
69 | *type = BPF_TYPE_PROG; | |
70 | raw = bpf_prog_get(ufd); | |
71 | } | |
72 | ||
73 | return raw; | |
74 | } | |
75 | ||
76 | static const struct inode_operations bpf_dir_iops; | |
77 | ||
78 | static const struct inode_operations bpf_prog_iops = { }; | |
79 | static const struct inode_operations bpf_map_iops = { }; | |
80 | ||
81 | static struct inode *bpf_get_inode(struct super_block *sb, | |
82 | const struct inode *dir, | |
83 | umode_t mode) | |
84 | { | |
85 | struct inode *inode; | |
86 | ||
87 | switch (mode & S_IFMT) { | |
88 | case S_IFDIR: | |
89 | case S_IFREG: | |
90 | break; | |
91 | default: | |
92 | return ERR_PTR(-EINVAL); | |
93 | } | |
94 | ||
95 | inode = new_inode(sb); | |
96 | if (!inode) | |
97 | return ERR_PTR(-ENOSPC); | |
98 | ||
99 | inode->i_ino = get_next_ino(); | |
100 | inode->i_atime = CURRENT_TIME; | |
101 | inode->i_mtime = inode->i_atime; | |
102 | inode->i_ctime = inode->i_atime; | |
103 | ||
104 | inode_init_owner(inode, dir, mode); | |
105 | ||
106 | return inode; | |
107 | } | |
108 | ||
109 | static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) | |
110 | { | |
111 | *type = BPF_TYPE_UNSPEC; | |
112 | if (inode->i_op == &bpf_prog_iops) | |
113 | *type = BPF_TYPE_PROG; | |
114 | else if (inode->i_op == &bpf_map_iops) | |
115 | *type = BPF_TYPE_MAP; | |
116 | else | |
117 | return -EACCES; | |
118 | ||
119 | return 0; | |
120 | } | |
121 | ||
b2197755 DB |
122 | static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) |
123 | { | |
124 | struct inode *inode; | |
125 | ||
b2197755 DB |
126 | inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); |
127 | if (IS_ERR(inode)) | |
128 | return PTR_ERR(inode); | |
129 | ||
130 | inode->i_op = &bpf_dir_iops; | |
131 | inode->i_fop = &simple_dir_operations; | |
132 | ||
133 | inc_nlink(inode); | |
134 | inc_nlink(dir); | |
135 | ||
136 | d_instantiate(dentry, inode); | |
137 | dget(dentry); | |
138 | ||
139 | return 0; | |
140 | } | |
141 | ||
142 | static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, | |
143 | umode_t mode, const struct inode_operations *iops) | |
144 | { | |
145 | struct inode *inode; | |
146 | ||
b2197755 DB |
147 | inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG); |
148 | if (IS_ERR(inode)) | |
149 | return PTR_ERR(inode); | |
150 | ||
151 | inode->i_op = iops; | |
152 | inode->i_private = dentry->d_fsdata; | |
153 | ||
154 | d_instantiate(dentry, inode); | |
155 | dget(dentry); | |
156 | ||
157 | return 0; | |
158 | } | |
159 | ||
160 | static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode, | |
161 | dev_t devt) | |
162 | { | |
163 | enum bpf_type type = MINOR(devt); | |
164 | ||
165 | if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) || | |
166 | dentry->d_fsdata == NULL) | |
167 | return -EPERM; | |
168 | ||
169 | switch (type) { | |
170 | case BPF_TYPE_PROG: | |
171 | return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops); | |
172 | case BPF_TYPE_MAP: | |
173 | return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops); | |
174 | default: | |
175 | return -EPERM; | |
176 | } | |
177 | } | |
178 | ||
0c93b7d8 AV |
179 | static struct dentry * |
180 | bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) | |
bb35a6ef | 181 | { |
0c93b7d8 AV |
182 | if (strchr(dentry->d_name.name, '.')) |
183 | return ERR_PTR(-EPERM); | |
184 | return simple_lookup(dir, dentry, flags); | |
bb35a6ef DB |
185 | } |
186 | ||
b2197755 | 187 | static const struct inode_operations bpf_dir_iops = { |
0c93b7d8 | 188 | .lookup = bpf_lookup, |
b2197755 DB |
189 | .mknod = bpf_mkobj, |
190 | .mkdir = bpf_mkdir, | |
191 | .rmdir = simple_rmdir, | |
0c93b7d8 AV |
192 | .rename = simple_rename, |
193 | .link = simple_link, | |
b2197755 DB |
194 | .unlink = simple_unlink, |
195 | }; | |
196 | ||
197 | static int bpf_obj_do_pin(const struct filename *pathname, void *raw, | |
198 | enum bpf_type type) | |
199 | { | |
200 | struct dentry *dentry; | |
201 | struct inode *dir; | |
202 | struct path path; | |
203 | umode_t mode; | |
204 | dev_t devt; | |
205 | int ret; | |
206 | ||
207 | dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0); | |
208 | if (IS_ERR(dentry)) | |
209 | return PTR_ERR(dentry); | |
210 | ||
211 | mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); | |
212 | devt = MKDEV(UNNAMED_MAJOR, type); | |
213 | ||
214 | ret = security_path_mknod(&path, dentry, mode, devt); | |
215 | if (ret) | |
216 | goto out; | |
217 | ||
218 | dir = d_inode(path.dentry); | |
219 | if (dir->i_op != &bpf_dir_iops) { | |
220 | ret = -EPERM; | |
221 | goto out; | |
222 | } | |
223 | ||
224 | dentry->d_fsdata = raw; | |
225 | ret = vfs_mknod(dir, dentry, mode, devt); | |
226 | dentry->d_fsdata = NULL; | |
227 | out: | |
228 | done_path_create(&path, dentry); | |
229 | return ret; | |
230 | } | |
231 | ||
232 | int bpf_obj_pin_user(u32 ufd, const char __user *pathname) | |
233 | { | |
234 | struct filename *pname; | |
235 | enum bpf_type type; | |
236 | void *raw; | |
237 | int ret; | |
238 | ||
239 | pname = getname(pathname); | |
240 | if (IS_ERR(pname)) | |
241 | return PTR_ERR(pname); | |
242 | ||
243 | raw = bpf_fd_probe_obj(ufd, &type); | |
244 | if (IS_ERR(raw)) { | |
245 | ret = PTR_ERR(raw); | |
246 | goto out; | |
247 | } | |
248 | ||
249 | ret = bpf_obj_do_pin(pname, raw, type); | |
250 | if (ret != 0) | |
251 | bpf_any_put(raw, type); | |
252 | out: | |
253 | putname(pname); | |
254 | return ret; | |
255 | } | |
256 | ||
257 | static void *bpf_obj_do_get(const struct filename *pathname, | |
258 | enum bpf_type *type) | |
259 | { | |
260 | struct inode *inode; | |
261 | struct path path; | |
262 | void *raw; | |
263 | int ret; | |
264 | ||
265 | ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path); | |
266 | if (ret) | |
267 | return ERR_PTR(ret); | |
268 | ||
269 | inode = d_backing_inode(path.dentry); | |
270 | ret = inode_permission(inode, MAY_WRITE); | |
271 | if (ret) | |
272 | goto out; | |
273 | ||
274 | ret = bpf_inode_type(inode, type); | |
275 | if (ret) | |
276 | goto out; | |
277 | ||
278 | raw = bpf_any_get(inode->i_private, *type); | |
92117d84 AS |
279 | if (!IS_ERR(raw)) |
280 | touch_atime(&path); | |
b2197755 DB |
281 | |
282 | path_put(&path); | |
283 | return raw; | |
284 | out: | |
285 | path_put(&path); | |
286 | return ERR_PTR(ret); | |
287 | } | |
288 | ||
289 | int bpf_obj_get_user(const char __user *pathname) | |
290 | { | |
291 | enum bpf_type type = BPF_TYPE_UNSPEC; | |
292 | struct filename *pname; | |
293 | int ret = -ENOENT; | |
294 | void *raw; | |
295 | ||
296 | pname = getname(pathname); | |
297 | if (IS_ERR(pname)) | |
298 | return PTR_ERR(pname); | |
299 | ||
300 | raw = bpf_obj_do_get(pname, &type); | |
301 | if (IS_ERR(raw)) { | |
302 | ret = PTR_ERR(raw); | |
303 | goto out; | |
304 | } | |
305 | ||
306 | if (type == BPF_TYPE_PROG) | |
307 | ret = bpf_prog_new_fd(raw); | |
308 | else if (type == BPF_TYPE_MAP) | |
309 | ret = bpf_map_new_fd(raw); | |
310 | else | |
311 | goto out; | |
312 | ||
313 | if (ret < 0) | |
314 | bpf_any_put(raw, type); | |
315 | out: | |
316 | putname(pname); | |
317 | return ret; | |
318 | } | |
319 | ||
320 | static void bpf_evict_inode(struct inode *inode) | |
321 | { | |
322 | enum bpf_type type; | |
323 | ||
324 | truncate_inode_pages_final(&inode->i_data); | |
325 | clear_inode(inode); | |
326 | ||
327 | if (!bpf_inode_type(inode, &type)) | |
328 | bpf_any_put(inode->i_private, type); | |
329 | } | |
330 | ||
331 | static const struct super_operations bpf_super_ops = { | |
332 | .statfs = simple_statfs, | |
333 | .drop_inode = generic_delete_inode, | |
334 | .evict_inode = bpf_evict_inode, | |
335 | }; | |
336 | ||
337 | static int bpf_fill_super(struct super_block *sb, void *data, int silent) | |
338 | { | |
339 | static struct tree_descr bpf_rfiles[] = { { "" } }; | |
340 | struct inode *inode; | |
341 | int ret; | |
342 | ||
343 | ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); | |
344 | if (ret) | |
345 | return ret; | |
346 | ||
347 | sb->s_op = &bpf_super_ops; | |
348 | ||
349 | inode = sb->s_root->d_inode; | |
350 | inode->i_op = &bpf_dir_iops; | |
351 | inode->i_mode &= ~S_IALLUGO; | |
352 | inode->i_mode |= S_ISVTX | S_IRWXUGO; | |
353 | ||
354 | return 0; | |
355 | } | |
356 | ||
357 | static struct dentry *bpf_mount(struct file_system_type *type, int flags, | |
358 | const char *dev_name, void *data) | |
359 | { | |
e27f4a94 | 360 | return mount_nodev(type, flags, data, bpf_fill_super); |
b2197755 DB |
361 | } |
362 | ||
363 | static struct file_system_type bpf_fs_type = { | |
364 | .owner = THIS_MODULE, | |
365 | .name = "bpf", | |
366 | .mount = bpf_mount, | |
367 | .kill_sb = kill_litter_super, | |
b2197755 DB |
368 | }; |
369 | ||
370 | MODULE_ALIAS_FS("bpf"); | |
371 | ||
372 | static int __init bpf_init(void) | |
373 | { | |
374 | int ret; | |
375 | ||
376 | ret = sysfs_create_mount_point(fs_kobj, "bpf"); | |
377 | if (ret) | |
378 | return ret; | |
379 | ||
380 | ret = register_filesystem(&bpf_fs_type); | |
381 | if (ret) | |
382 | sysfs_remove_mount_point(fs_kobj, "bpf"); | |
383 | ||
384 | return ret; | |
385 | } | |
386 | fs_initcall(bpf_init); |