Commit | Line | Data |
---|---|---|
4282d606 SRRH |
1 | /* |
2 | * inode.c - part of tracefs, a pseudo file system for activating tracing | |
3 | * | |
4 | * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com> | |
5 | * | |
6 | * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com> | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public License version | |
10 | * 2 as published by the Free Software Foundation. | |
11 | * | |
12 | * tracefs is the file system that is used by the tracing infrastructure. | |
13 | * | |
14 | */ | |
15 | ||
16 | #include <linux/module.h> | |
17 | #include <linux/fs.h> | |
18 | #include <linux/mount.h> | |
19 | #include <linux/namei.h> | |
20 | #include <linux/tracefs.h> | |
21 | #include <linux/fsnotify.h> | |
22 | #include <linux/seq_file.h> | |
23 | #include <linux/parser.h> | |
24 | #include <linux/magic.h> | |
25 | #include <linux/slab.h> | |
26 | ||
27 | #define TRACEFS_DEFAULT_MODE 0700 | |
28 | ||
29 | static struct vfsmount *tracefs_mount; | |
30 | static int tracefs_mount_count; | |
31 | static bool tracefs_registered; | |
32 | ||
33 | static ssize_t default_read_file(struct file *file, char __user *buf, | |
34 | size_t count, loff_t *ppos) | |
35 | { | |
36 | return 0; | |
37 | } | |
38 | ||
39 | static ssize_t default_write_file(struct file *file, const char __user *buf, | |
40 | size_t count, loff_t *ppos) | |
41 | { | |
42 | return count; | |
43 | } | |
44 | ||
45 | static const struct file_operations tracefs_file_operations = { | |
46 | .read = default_read_file, | |
47 | .write = default_write_file, | |
48 | .open = simple_open, | |
49 | .llseek = noop_llseek, | |
50 | }; | |
51 | ||
52 | static struct inode *tracefs_get_inode(struct super_block *sb) | |
53 | { | |
54 | struct inode *inode = new_inode(sb); | |
55 | if (inode) { | |
56 | inode->i_ino = get_next_ino(); | |
57 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | |
58 | } | |
59 | return inode; | |
60 | } | |
61 | ||
62 | struct tracefs_mount_opts { | |
63 | kuid_t uid; | |
64 | kgid_t gid; | |
65 | umode_t mode; | |
66 | }; | |
67 | ||
68 | enum { | |
69 | Opt_uid, | |
70 | Opt_gid, | |
71 | Opt_mode, | |
72 | Opt_err | |
73 | }; | |
74 | ||
75 | static const match_table_t tokens = { | |
76 | {Opt_uid, "uid=%u"}, | |
77 | {Opt_gid, "gid=%u"}, | |
78 | {Opt_mode, "mode=%o"}, | |
79 | {Opt_err, NULL} | |
80 | }; | |
81 | ||
82 | struct tracefs_fs_info { | |
83 | struct tracefs_mount_opts mount_opts; | |
84 | }; | |
85 | ||
86 | static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) | |
87 | { | |
88 | substring_t args[MAX_OPT_ARGS]; | |
89 | int option; | |
90 | int token; | |
91 | kuid_t uid; | |
92 | kgid_t gid; | |
93 | char *p; | |
94 | ||
95 | opts->mode = TRACEFS_DEFAULT_MODE; | |
96 | ||
97 | while ((p = strsep(&data, ",")) != NULL) { | |
98 | if (!*p) | |
99 | continue; | |
100 | ||
101 | token = match_token(p, tokens, args); | |
102 | switch (token) { | |
103 | case Opt_uid: | |
104 | if (match_int(&args[0], &option)) | |
105 | return -EINVAL; | |
106 | uid = make_kuid(current_user_ns(), option); | |
107 | if (!uid_valid(uid)) | |
108 | return -EINVAL; | |
109 | opts->uid = uid; | |
110 | break; | |
111 | case Opt_gid: | |
112 | if (match_int(&args[0], &option)) | |
113 | return -EINVAL; | |
114 | gid = make_kgid(current_user_ns(), option); | |
115 | if (!gid_valid(gid)) | |
116 | return -EINVAL; | |
117 | opts->gid = gid; | |
118 | break; | |
119 | case Opt_mode: | |
120 | if (match_octal(&args[0], &option)) | |
121 | return -EINVAL; | |
122 | opts->mode = option & S_IALLUGO; | |
123 | break; | |
124 | /* | |
125 | * We might like to report bad mount options here; | |
126 | * but traditionally tracefs has ignored all mount options | |
127 | */ | |
128 | } | |
129 | } | |
130 | ||
131 | return 0; | |
132 | } | |
133 | ||
134 | static int tracefs_apply_options(struct super_block *sb) | |
135 | { | |
136 | struct tracefs_fs_info *fsi = sb->s_fs_info; | |
137 | struct inode *inode = sb->s_root->d_inode; | |
138 | struct tracefs_mount_opts *opts = &fsi->mount_opts; | |
139 | ||
140 | inode->i_mode &= ~S_IALLUGO; | |
141 | inode->i_mode |= opts->mode; | |
142 | ||
143 | inode->i_uid = opts->uid; | |
144 | inode->i_gid = opts->gid; | |
145 | ||
146 | return 0; | |
147 | } | |
148 | ||
149 | static int tracefs_remount(struct super_block *sb, int *flags, char *data) | |
150 | { | |
151 | int err; | |
152 | struct tracefs_fs_info *fsi = sb->s_fs_info; | |
153 | ||
154 | sync_filesystem(sb); | |
155 | err = tracefs_parse_options(data, &fsi->mount_opts); | |
156 | if (err) | |
157 | goto fail; | |
158 | ||
159 | tracefs_apply_options(sb); | |
160 | ||
161 | fail: | |
162 | return err; | |
163 | } | |
164 | ||
165 | static int tracefs_show_options(struct seq_file *m, struct dentry *root) | |
166 | { | |
167 | struct tracefs_fs_info *fsi = root->d_sb->s_fs_info; | |
168 | struct tracefs_mount_opts *opts = &fsi->mount_opts; | |
169 | ||
170 | if (!uid_eq(opts->uid, GLOBAL_ROOT_UID)) | |
171 | seq_printf(m, ",uid=%u", | |
172 | from_kuid_munged(&init_user_ns, opts->uid)); | |
173 | if (!gid_eq(opts->gid, GLOBAL_ROOT_GID)) | |
174 | seq_printf(m, ",gid=%u", | |
175 | from_kgid_munged(&init_user_ns, opts->gid)); | |
176 | if (opts->mode != TRACEFS_DEFAULT_MODE) | |
177 | seq_printf(m, ",mode=%o", opts->mode); | |
178 | ||
179 | return 0; | |
180 | } | |
181 | ||
182 | static const struct super_operations tracefs_super_operations = { | |
183 | .statfs = simple_statfs, | |
184 | .remount_fs = tracefs_remount, | |
185 | .show_options = tracefs_show_options, | |
186 | }; | |
187 | ||
188 | static int trace_fill_super(struct super_block *sb, void *data, int silent) | |
189 | { | |
190 | static struct tree_descr trace_files[] = {{""}}; | |
191 | struct tracefs_fs_info *fsi; | |
192 | int err; | |
193 | ||
194 | save_mount_options(sb, data); | |
195 | ||
196 | fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL); | |
197 | sb->s_fs_info = fsi; | |
198 | if (!fsi) { | |
199 | err = -ENOMEM; | |
200 | goto fail; | |
201 | } | |
202 | ||
203 | err = tracefs_parse_options(data, &fsi->mount_opts); | |
204 | if (err) | |
205 | goto fail; | |
206 | ||
207 | err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files); | |
208 | if (err) | |
209 | goto fail; | |
210 | ||
211 | sb->s_op = &tracefs_super_operations; | |
212 | ||
213 | tracefs_apply_options(sb); | |
214 | ||
215 | return 0; | |
216 | ||
217 | fail: | |
218 | kfree(fsi); | |
219 | sb->s_fs_info = NULL; | |
220 | return err; | |
221 | } | |
222 | ||
223 | static struct dentry *trace_mount(struct file_system_type *fs_type, | |
224 | int flags, const char *dev_name, | |
225 | void *data) | |
226 | { | |
227 | return mount_single(fs_type, flags, data, trace_fill_super); | |
228 | } | |
229 | ||
230 | static struct file_system_type trace_fs_type = { | |
231 | .owner = THIS_MODULE, | |
232 | .name = "tracefs", | |
233 | .mount = trace_mount, | |
234 | .kill_sb = kill_litter_super, | |
235 | }; | |
236 | MODULE_ALIAS_FS("tracefs"); | |
237 | ||
238 | static struct dentry *start_creating(const char *name, struct dentry *parent) | |
239 | { | |
240 | struct dentry *dentry; | |
241 | int error; | |
242 | ||
243 | pr_debug("tracefs: creating file '%s'\n",name); | |
244 | ||
245 | error = simple_pin_fs(&trace_fs_type, &tracefs_mount, | |
246 | &tracefs_mount_count); | |
247 | if (error) | |
248 | return ERR_PTR(error); | |
249 | ||
250 | /* If the parent is not specified, we create it in the root. | |
251 | * We need the root dentry to do this, which is in the super | |
252 | * block. A pointer to that is in the struct vfsmount that we | |
253 | * have around. | |
254 | */ | |
255 | if (!parent) | |
256 | parent = tracefs_mount->mnt_root; | |
257 | ||
258 | mutex_lock(&parent->d_inode->i_mutex); | |
259 | dentry = lookup_one_len(name, parent, strlen(name)); | |
260 | if (!IS_ERR(dentry) && dentry->d_inode) { | |
261 | dput(dentry); | |
262 | dentry = ERR_PTR(-EEXIST); | |
263 | } | |
264 | if (IS_ERR(dentry)) | |
265 | mutex_unlock(&parent->d_inode->i_mutex); | |
266 | return dentry; | |
267 | } | |
268 | ||
269 | static struct dentry *failed_creating(struct dentry *dentry) | |
270 | { | |
271 | mutex_unlock(&dentry->d_parent->d_inode->i_mutex); | |
272 | dput(dentry); | |
273 | simple_release_fs(&tracefs_mount, &tracefs_mount_count); | |
274 | return NULL; | |
275 | } | |
276 | ||
277 | static struct dentry *end_creating(struct dentry *dentry) | |
278 | { | |
279 | mutex_unlock(&dentry->d_parent->d_inode->i_mutex); | |
280 | return dentry; | |
281 | } | |
282 | ||
283 | /** | |
284 | * tracefs_create_file - create a file in the tracefs filesystem | |
285 | * @name: a pointer to a string containing the name of the file to create. | |
286 | * @mode: the permission that the file should have. | |
287 | * @parent: a pointer to the parent dentry for this file. This should be a | |
288 | * directory dentry if set. If this parameter is NULL, then the | |
289 | * file will be created in the root of the tracefs filesystem. | |
290 | * @data: a pointer to something that the caller will want to get to later | |
291 | * on. The inode.i_private pointer will point to this value on | |
292 | * the open() call. | |
293 | * @fops: a pointer to a struct file_operations that should be used for | |
294 | * this file. | |
295 | * | |
296 | * This is the basic "create a file" function for tracefs. It allows for a | |
297 | * wide range of flexibility in creating a file, or a directory (if you want | |
298 | * to create a directory, the tracefs_create_dir() function is | |
299 | * recommended to be used instead.) | |
300 | * | |
301 | * This function will return a pointer to a dentry if it succeeds. This | |
302 | * pointer must be passed to the tracefs_remove() function when the file is | |
303 | * to be removed (no automatic cleanup happens if your module is unloaded, | |
304 | * you are responsible here.) If an error occurs, %NULL will be returned. | |
305 | * | |
306 | * If tracefs is not enabled in the kernel, the value -%ENODEV will be | |
307 | * returned. | |
308 | */ | |
309 | struct dentry *tracefs_create_file(const char *name, umode_t mode, | |
310 | struct dentry *parent, void *data, | |
311 | const struct file_operations *fops) | |
312 | { | |
313 | struct dentry *dentry; | |
314 | struct inode *inode; | |
315 | ||
316 | if (!(mode & S_IFMT)) | |
317 | mode |= S_IFREG; | |
318 | BUG_ON(!S_ISREG(mode)); | |
319 | dentry = start_creating(name, parent); | |
320 | ||
321 | if (IS_ERR(dentry)) | |
322 | return NULL; | |
323 | ||
324 | inode = tracefs_get_inode(dentry->d_sb); | |
325 | if (unlikely(!inode)) | |
326 | return failed_creating(dentry); | |
327 | ||
328 | inode->i_mode = mode; | |
329 | inode->i_fop = fops ? fops : &tracefs_file_operations; | |
330 | inode->i_private = data; | |
331 | d_instantiate(dentry, inode); | |
332 | fsnotify_create(dentry->d_parent->d_inode, dentry); | |
333 | return end_creating(dentry); | |
334 | } | |
335 | ||
336 | /** | |
337 | * tracefs_create_dir - create a directory in the tracefs filesystem | |
338 | * @name: a pointer to a string containing the name of the directory to | |
339 | * create. | |
340 | * @parent: a pointer to the parent dentry for this file. This should be a | |
341 | * directory dentry if set. If this parameter is NULL, then the | |
342 | * directory will be created in the root of the tracefs filesystem. | |
343 | * | |
344 | * This function creates a directory in tracefs with the given name. | |
345 | * | |
346 | * This function will return a pointer to a dentry if it succeeds. This | |
347 | * pointer must be passed to the tracefs_remove() function when the file is | |
348 | * to be removed. If an error occurs, %NULL will be returned. | |
349 | * | |
350 | * If tracing is not enabled in the kernel, the value -%ENODEV will be | |
351 | * returned. | |
352 | */ | |
353 | struct dentry *tracefs_create_dir(const char *name, struct dentry *parent) | |
354 | { | |
355 | struct dentry *dentry = start_creating(name, parent); | |
356 | struct inode *inode; | |
357 | ||
358 | if (IS_ERR(dentry)) | |
359 | return NULL; | |
360 | ||
361 | inode = tracefs_get_inode(dentry->d_sb); | |
362 | if (unlikely(!inode)) | |
363 | return failed_creating(dentry); | |
364 | ||
365 | inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; | |
366 | inode->i_op = &simple_dir_inode_operations; | |
367 | inode->i_fop = &simple_dir_operations; | |
368 | ||
369 | /* directory inodes start off with i_nlink == 2 (for "." entry) */ | |
370 | inc_nlink(inode); | |
371 | d_instantiate(dentry, inode); | |
372 | inc_nlink(dentry->d_parent->d_inode); | |
373 | fsnotify_mkdir(dentry->d_parent->d_inode, dentry); | |
374 | return end_creating(dentry); | |
375 | } | |
376 | ||
377 | static inline int tracefs_positive(struct dentry *dentry) | |
378 | { | |
379 | return dentry->d_inode && !d_unhashed(dentry); | |
380 | } | |
381 | ||
382 | static int __tracefs_remove(struct dentry *dentry, struct dentry *parent) | |
383 | { | |
384 | int ret = 0; | |
385 | ||
386 | if (tracefs_positive(dentry)) { | |
387 | if (dentry->d_inode) { | |
388 | dget(dentry); | |
389 | switch (dentry->d_inode->i_mode & S_IFMT) { | |
390 | case S_IFDIR: | |
391 | ret = simple_rmdir(parent->d_inode, dentry); | |
392 | break; | |
393 | default: | |
394 | simple_unlink(parent->d_inode, dentry); | |
395 | break; | |
396 | } | |
397 | if (!ret) | |
398 | d_delete(dentry); | |
399 | dput(dentry); | |
400 | } | |
401 | } | |
402 | return ret; | |
403 | } | |
404 | ||
405 | /** | |
406 | * tracefs_remove - removes a file or directory from the tracefs filesystem | |
407 | * @dentry: a pointer to a the dentry of the file or directory to be | |
408 | * removed. | |
409 | * | |
410 | * This function removes a file or directory in tracefs that was previously | |
411 | * created with a call to another tracefs function (like | |
412 | * tracefs_create_file() or variants thereof.) | |
413 | */ | |
414 | void tracefs_remove(struct dentry *dentry) | |
415 | { | |
416 | struct dentry *parent; | |
417 | int ret; | |
418 | ||
419 | if (IS_ERR_OR_NULL(dentry)) | |
420 | return; | |
421 | ||
422 | parent = dentry->d_parent; | |
423 | if (!parent || !parent->d_inode) | |
424 | return; | |
425 | ||
426 | mutex_lock(&parent->d_inode->i_mutex); | |
427 | ret = __tracefs_remove(dentry, parent); | |
428 | mutex_unlock(&parent->d_inode->i_mutex); | |
429 | if (!ret) | |
430 | simple_release_fs(&tracefs_mount, &tracefs_mount_count); | |
431 | } | |
432 | ||
433 | /** | |
434 | * tracefs_remove_recursive - recursively removes a directory | |
435 | * @dentry: a pointer to a the dentry of the directory to be removed. | |
436 | * | |
437 | * This function recursively removes a directory tree in tracefs that | |
438 | * was previously created with a call to another tracefs function | |
439 | * (like tracefs_create_file() or variants thereof.) | |
440 | */ | |
441 | void tracefs_remove_recursive(struct dentry *dentry) | |
442 | { | |
443 | struct dentry *child, *parent; | |
444 | ||
445 | if (IS_ERR_OR_NULL(dentry)) | |
446 | return; | |
447 | ||
448 | parent = dentry->d_parent; | |
449 | if (!parent || !parent->d_inode) | |
450 | return; | |
451 | ||
452 | parent = dentry; | |
453 | down: | |
454 | mutex_lock(&parent->d_inode->i_mutex); | |
455 | loop: | |
456 | /* | |
457 | * The parent->d_subdirs is protected by the d_lock. Outside that | |
458 | * lock, the child can be unlinked and set to be freed which can | |
459 | * use the d_u.d_child as the rcu head and corrupt this list. | |
460 | */ | |
461 | spin_lock(&parent->d_lock); | |
462 | list_for_each_entry(child, &parent->d_subdirs, d_child) { | |
463 | if (!tracefs_positive(child)) | |
464 | continue; | |
465 | ||
466 | /* perhaps simple_empty(child) makes more sense */ | |
467 | if (!list_empty(&child->d_subdirs)) { | |
468 | spin_unlock(&parent->d_lock); | |
469 | mutex_unlock(&parent->d_inode->i_mutex); | |
470 | parent = child; | |
471 | goto down; | |
472 | } | |
473 | ||
474 | spin_unlock(&parent->d_lock); | |
475 | ||
476 | if (!__tracefs_remove(child, parent)) | |
477 | simple_release_fs(&tracefs_mount, &tracefs_mount_count); | |
478 | ||
479 | /* | |
480 | * The parent->d_lock protects agaist child from unlinking | |
481 | * from d_subdirs. When releasing the parent->d_lock we can | |
482 | * no longer trust that the next pointer is valid. | |
483 | * Restart the loop. We'll skip this one with the | |
484 | * tracefs_positive() check. | |
485 | */ | |
486 | goto loop; | |
487 | } | |
488 | spin_unlock(&parent->d_lock); | |
489 | ||
490 | mutex_unlock(&parent->d_inode->i_mutex); | |
491 | child = parent; | |
492 | parent = parent->d_parent; | |
493 | mutex_lock(&parent->d_inode->i_mutex); | |
494 | ||
495 | if (child != dentry) | |
496 | /* go up */ | |
497 | goto loop; | |
498 | ||
499 | if (!__tracefs_remove(child, parent)) | |
500 | simple_release_fs(&tracefs_mount, &tracefs_mount_count); | |
501 | mutex_unlock(&parent->d_inode->i_mutex); | |
502 | } | |
503 | ||
504 | /** | |
505 | * tracefs_initialized - Tells whether tracefs has been registered | |
506 | */ | |
507 | bool tracefs_initialized(void) | |
508 | { | |
509 | return tracefs_registered; | |
510 | } | |
511 | ||
512 | static int __init tracefs_init(void) | |
513 | { | |
514 | int retval; | |
515 | ||
516 | retval = register_filesystem(&trace_fs_type); | |
517 | if (!retval) | |
518 | tracefs_registered = true; | |
519 | ||
520 | return retval; | |
521 | } | |
522 | core_initcall(tracefs_init); |