Commit | Line | Data |
---|---|---|
b8441ed2 TH |
1 | /* |
2 | * fs/kernfs/dir.c - kernfs directory implementation | |
3 | * | |
4 | * Copyright (c) 2001-3 Patrick Mochel | |
5 | * Copyright (c) 2007 SUSE Linux Products GmbH | |
6 | * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> | |
7 | * | |
8 | * This file is released under the GPLv2. | |
9 | */ | |
fd7b9f7b TH |
10 | |
11 | #include <linux/fs.h> | |
12 | #include <linux/namei.h> | |
13 | #include <linux/idr.h> | |
14 | #include <linux/slab.h> | |
15 | #include <linux/security.h> | |
16 | #include <linux/hash.h> | |
17 | ||
18 | #include "kernfs-internal.h" | |
19 | ||
20 | DEFINE_MUTEX(sysfs_mutex); | |
21 | ||
22 | #define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb) | |
23 | ||
24 | static DEFINE_SPINLOCK(sysfs_ino_lock); | |
25 | static DEFINE_IDA(sysfs_ino_ida); | |
26 | ||
27 | /** | |
28 | * sysfs_name_hash | |
29 | * @name: Null terminated string to hash | |
30 | * @ns: Namespace tag to hash | |
31 | * | |
32 | * Returns 31 bit hash of ns + name (so it fits in an off_t ) | |
33 | */ | |
34 | static unsigned int sysfs_name_hash(const char *name, const void *ns) | |
35 | { | |
36 | unsigned long hash = init_name_hash(); | |
37 | unsigned int len = strlen(name); | |
38 | while (len--) | |
39 | hash = partial_name_hash(*name++, hash); | |
40 | hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); | |
41 | hash &= 0x7fffffffU; | |
42 | /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ | |
43 | if (hash < 1) | |
44 | hash += 2; | |
45 | if (hash >= INT_MAX) | |
46 | hash = INT_MAX - 1; | |
47 | return hash; | |
48 | } | |
49 | ||
50 | static int sysfs_name_compare(unsigned int hash, const char *name, | |
51 | const void *ns, const struct sysfs_dirent *sd) | |
52 | { | |
53 | if (hash != sd->s_hash) | |
54 | return hash - sd->s_hash; | |
55 | if (ns != sd->s_ns) | |
56 | return ns - sd->s_ns; | |
57 | return strcmp(name, sd->s_name); | |
58 | } | |
59 | ||
60 | static int sysfs_sd_compare(const struct sysfs_dirent *left, | |
61 | const struct sysfs_dirent *right) | |
62 | { | |
63 | return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns, | |
64 | right); | |
65 | } | |
66 | ||
67 | /** | |
68 | * sysfs_link_sibling - link sysfs_dirent into sibling rbtree | |
69 | * @sd: sysfs_dirent of interest | |
70 | * | |
71 | * Link @sd into its sibling rbtree which starts from | |
72 | * sd->s_parent->s_dir.children. | |
73 | * | |
74 | * Locking: | |
75 | * mutex_lock(sysfs_mutex) | |
76 | * | |
77 | * RETURNS: | |
78 | * 0 on susccess -EEXIST on failure. | |
79 | */ | |
80 | static int sysfs_link_sibling(struct sysfs_dirent *sd) | |
81 | { | |
82 | struct rb_node **node = &sd->s_parent->s_dir.children.rb_node; | |
83 | struct rb_node *parent = NULL; | |
84 | ||
85 | if (sysfs_type(sd) == SYSFS_DIR) | |
86 | sd->s_parent->s_dir.subdirs++; | |
87 | ||
88 | while (*node) { | |
89 | struct sysfs_dirent *pos; | |
90 | int result; | |
91 | ||
92 | pos = to_sysfs_dirent(*node); | |
93 | parent = *node; | |
94 | result = sysfs_sd_compare(sd, pos); | |
95 | if (result < 0) | |
96 | node = &pos->s_rb.rb_left; | |
97 | else if (result > 0) | |
98 | node = &pos->s_rb.rb_right; | |
99 | else | |
100 | return -EEXIST; | |
101 | } | |
102 | /* add new node and rebalance the tree */ | |
103 | rb_link_node(&sd->s_rb, parent, node); | |
104 | rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children); | |
105 | return 0; | |
106 | } | |
107 | ||
108 | /** | |
109 | * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree | |
110 | * @sd: sysfs_dirent of interest | |
111 | * | |
112 | * Unlink @sd from its sibling rbtree which starts from | |
113 | * sd->s_parent->s_dir.children. | |
114 | * | |
115 | * Locking: | |
116 | * mutex_lock(sysfs_mutex) | |
117 | */ | |
118 | static void sysfs_unlink_sibling(struct sysfs_dirent *sd) | |
119 | { | |
120 | if (sysfs_type(sd) == SYSFS_DIR) | |
121 | sd->s_parent->s_dir.subdirs--; | |
122 | ||
123 | rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children); | |
124 | } | |
125 | ||
126 | /** | |
127 | * sysfs_get_active - get an active reference to sysfs_dirent | |
128 | * @sd: sysfs_dirent to get an active reference to | |
129 | * | |
130 | * Get an active reference of @sd. This function is noop if @sd | |
131 | * is NULL. | |
132 | * | |
133 | * RETURNS: | |
134 | * Pointer to @sd on success, NULL on failure. | |
135 | */ | |
136 | struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd) | |
137 | { | |
138 | if (unlikely(!sd)) | |
139 | return NULL; | |
140 | ||
141 | if (!atomic_inc_unless_negative(&sd->s_active)) | |
142 | return NULL; | |
143 | ||
144 | if (sd->s_flags & SYSFS_FLAG_LOCKDEP) | |
145 | rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_); | |
146 | return sd; | |
147 | } | |
148 | ||
149 | /** | |
150 | * sysfs_put_active - put an active reference to sysfs_dirent | |
151 | * @sd: sysfs_dirent to put an active reference to | |
152 | * | |
153 | * Put an active reference to @sd. This function is noop if @sd | |
154 | * is NULL. | |
155 | */ | |
156 | void sysfs_put_active(struct sysfs_dirent *sd) | |
157 | { | |
158 | int v; | |
159 | ||
160 | if (unlikely(!sd)) | |
161 | return; | |
162 | ||
163 | if (sd->s_flags & SYSFS_FLAG_LOCKDEP) | |
164 | rwsem_release(&sd->dep_map, 1, _RET_IP_); | |
165 | v = atomic_dec_return(&sd->s_active); | |
166 | if (likely(v != SD_DEACTIVATED_BIAS)) | |
167 | return; | |
168 | ||
169 | /* atomic_dec_return() is a mb(), we'll always see the updated | |
170 | * sd->u.completion. | |
171 | */ | |
172 | complete(sd->u.completion); | |
173 | } | |
174 | ||
175 | /** | |
176 | * sysfs_deactivate - deactivate sysfs_dirent | |
177 | * @sd: sysfs_dirent to deactivate | |
178 | * | |
179 | * Deny new active references and drain existing ones. | |
180 | */ | |
181 | static void sysfs_deactivate(struct sysfs_dirent *sd) | |
182 | { | |
183 | DECLARE_COMPLETION_ONSTACK(wait); | |
184 | int v; | |
185 | ||
186 | BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED)); | |
187 | ||
188 | if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF)) | |
189 | return; | |
190 | ||
191 | sd->u.completion = (void *)&wait; | |
192 | ||
193 | rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_); | |
194 | /* atomic_add_return() is a mb(), put_active() will always see | |
195 | * the updated sd->u.completion. | |
196 | */ | |
197 | v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); | |
198 | ||
199 | if (v != SD_DEACTIVATED_BIAS) { | |
200 | lock_contended(&sd->dep_map, _RET_IP_); | |
201 | wait_for_completion(&wait); | |
202 | } | |
203 | ||
204 | lock_acquired(&sd->dep_map, _RET_IP_); | |
205 | rwsem_release(&sd->dep_map, 1, _RET_IP_); | |
206 | } | |
207 | ||
208 | static int sysfs_alloc_ino(unsigned int *pino) | |
209 | { | |
210 | int ino, rc; | |
211 | ||
212 | retry: | |
213 | spin_lock(&sysfs_ino_lock); | |
ba7443bc | 214 | rc = ida_get_new_above(&sysfs_ino_ida, 1, &ino); |
fd7b9f7b TH |
215 | spin_unlock(&sysfs_ino_lock); |
216 | ||
217 | if (rc == -EAGAIN) { | |
218 | if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL)) | |
219 | goto retry; | |
220 | rc = -ENOMEM; | |
221 | } | |
222 | ||
223 | *pino = ino; | |
224 | return rc; | |
225 | } | |
226 | ||
227 | static void sysfs_free_ino(unsigned int ino) | |
228 | { | |
229 | spin_lock(&sysfs_ino_lock); | |
230 | ida_remove(&sysfs_ino_ida, ino); | |
231 | spin_unlock(&sysfs_ino_lock); | |
232 | } | |
233 | ||
234 | /** | |
235 | * kernfs_get - get a reference count on a sysfs_dirent | |
236 | * @sd: the target sysfs_dirent | |
237 | */ | |
238 | void kernfs_get(struct sysfs_dirent *sd) | |
239 | { | |
240 | if (sd) { | |
241 | WARN_ON(!atomic_read(&sd->s_count)); | |
242 | atomic_inc(&sd->s_count); | |
243 | } | |
244 | } | |
245 | EXPORT_SYMBOL_GPL(kernfs_get); | |
246 | ||
247 | /** | |
248 | * kernfs_put - put a reference count on a sysfs_dirent | |
249 | * @sd: the target sysfs_dirent | |
250 | * | |
251 | * Put a reference count of @sd and destroy it if it reached zero. | |
252 | */ | |
253 | void kernfs_put(struct sysfs_dirent *sd) | |
254 | { | |
255 | struct sysfs_dirent *parent_sd; | |
ba7443bc | 256 | struct kernfs_root *root; |
fd7b9f7b TH |
257 | |
258 | if (!sd || !atomic_dec_and_test(&sd->s_count)) | |
259 | return; | |
ba7443bc | 260 | root = kernfs_root(sd); |
fd7b9f7b TH |
261 | repeat: |
262 | /* Moving/renaming is always done while holding reference. | |
263 | * sd->s_parent won't change beneath us. | |
264 | */ | |
265 | parent_sd = sd->s_parent; | |
266 | ||
267 | WARN(!(sd->s_flags & SYSFS_FLAG_REMOVED), | |
268 | "sysfs: free using entry: %s/%s\n", | |
269 | parent_sd ? parent_sd->s_name : "", sd->s_name); | |
270 | ||
271 | if (sysfs_type(sd) == SYSFS_KOBJ_LINK) | |
272 | kernfs_put(sd->s_symlink.target_sd); | |
273 | if (sysfs_type(sd) & SYSFS_COPY_NAME) | |
274 | kfree(sd->s_name); | |
275 | if (sd->s_iattr && sd->s_iattr->ia_secdata) | |
276 | security_release_secctx(sd->s_iattr->ia_secdata, | |
277 | sd->s_iattr->ia_secdata_len); | |
278 | kfree(sd->s_iattr); | |
279 | sysfs_free_ino(sd->s_ino); | |
280 | kmem_cache_free(sysfs_dir_cachep, sd); | |
281 | ||
282 | sd = parent_sd; | |
ba7443bc TH |
283 | if (sd) { |
284 | if (atomic_dec_and_test(&sd->s_count)) | |
285 | goto repeat; | |
286 | } else { | |
287 | /* just released the root sd, free @root too */ | |
288 | kfree(root); | |
289 | } | |
fd7b9f7b TH |
290 | } |
291 | EXPORT_SYMBOL_GPL(kernfs_put); | |
292 | ||
293 | static int sysfs_dentry_delete(const struct dentry *dentry) | |
294 | { | |
295 | struct sysfs_dirent *sd = dentry->d_fsdata; | |
296 | return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED)); | |
297 | } | |
298 | ||
299 | static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags) | |
300 | { | |
301 | struct sysfs_dirent *sd; | |
302 | ||
303 | if (flags & LOOKUP_RCU) | |
304 | return -ECHILD; | |
305 | ||
306 | sd = dentry->d_fsdata; | |
307 | mutex_lock(&sysfs_mutex); | |
308 | ||
309 | /* The sysfs dirent has been deleted */ | |
310 | if (sd->s_flags & SYSFS_FLAG_REMOVED) | |
311 | goto out_bad; | |
312 | ||
313 | /* The sysfs dirent has been moved? */ | |
314 | if (dentry->d_parent->d_fsdata != sd->s_parent) | |
315 | goto out_bad; | |
316 | ||
317 | /* The sysfs dirent has been renamed */ | |
318 | if (strcmp(dentry->d_name.name, sd->s_name) != 0) | |
319 | goto out_bad; | |
320 | ||
321 | /* The sysfs dirent has been moved to a different namespace */ | |
322 | if (sd->s_parent && (sd->s_parent->s_flags & SYSFS_FLAG_NS) && | |
323 | sysfs_info(dentry->d_sb)->ns != sd->s_ns) | |
324 | goto out_bad; | |
325 | ||
326 | mutex_unlock(&sysfs_mutex); | |
327 | out_valid: | |
328 | return 1; | |
329 | out_bad: | |
330 | /* Remove the dentry from the dcache hashes. | |
331 | * If this is a deleted dentry we use d_drop instead of d_delete | |
332 | * so sysfs doesn't need to cope with negative dentries. | |
333 | * | |
334 | * If this is a dentry that has simply been renamed we | |
335 | * use d_drop to remove it from the dcache lookup on its | |
336 | * old parent. If this dentry persists later when a lookup | |
337 | * is performed at its new name the dentry will be readded | |
338 | * to the dcache hashes. | |
339 | */ | |
340 | mutex_unlock(&sysfs_mutex); | |
341 | ||
342 | /* If we have submounts we must allow the vfs caches | |
343 | * to lie about the state of the filesystem to prevent | |
344 | * leaks and other nasty things. | |
345 | */ | |
346 | if (check_submounts_and_drop(dentry) != 0) | |
347 | goto out_valid; | |
348 | ||
349 | return 0; | |
350 | } | |
351 | ||
352 | static void sysfs_dentry_release(struct dentry *dentry) | |
353 | { | |
354 | kernfs_put(dentry->d_fsdata); | |
355 | } | |
356 | ||
357 | const struct dentry_operations sysfs_dentry_ops = { | |
358 | .d_revalidate = sysfs_dentry_revalidate, | |
359 | .d_delete = sysfs_dentry_delete, | |
360 | .d_release = sysfs_dentry_release, | |
361 | }; | |
362 | ||
363 | struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) | |
364 | { | |
365 | char *dup_name = NULL; | |
366 | struct sysfs_dirent *sd; | |
367 | ||
368 | if (type & SYSFS_COPY_NAME) { | |
369 | name = dup_name = kstrdup(name, GFP_KERNEL); | |
370 | if (!name) | |
371 | return NULL; | |
372 | } | |
373 | ||
374 | sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); | |
375 | if (!sd) | |
376 | goto err_out1; | |
377 | ||
378 | if (sysfs_alloc_ino(&sd->s_ino)) | |
379 | goto err_out2; | |
380 | ||
381 | atomic_set(&sd->s_count, 1); | |
382 | atomic_set(&sd->s_active, 0); | |
383 | ||
384 | sd->s_name = name; | |
385 | sd->s_mode = mode; | |
386 | sd->s_flags = type | SYSFS_FLAG_REMOVED; | |
387 | ||
388 | return sd; | |
389 | ||
390 | err_out2: | |
391 | kmem_cache_free(sysfs_dir_cachep, sd); | |
392 | err_out1: | |
393 | kfree(dup_name); | |
394 | return NULL; | |
395 | } | |
396 | ||
397 | /** | |
398 | * sysfs_addrm_start - prepare for sysfs_dirent add/remove | |
399 | * @acxt: pointer to sysfs_addrm_cxt to be used | |
400 | * | |
401 | * This function is called when the caller is about to add or remove | |
402 | * sysfs_dirent. This function acquires sysfs_mutex. @acxt is used | |
403 | * to keep and pass context to other addrm functions. | |
404 | * | |
405 | * LOCKING: | |
406 | * Kernel thread context (may sleep). sysfs_mutex is locked on | |
407 | * return. | |
408 | */ | |
409 | void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt) | |
410 | __acquires(sysfs_mutex) | |
411 | { | |
412 | memset(acxt, 0, sizeof(*acxt)); | |
413 | ||
414 | mutex_lock(&sysfs_mutex); | |
415 | } | |
416 | ||
417 | /** | |
418 | * sysfs_add_one - add sysfs_dirent to parent without warning | |
419 | * @acxt: addrm context to use | |
420 | * @sd: sysfs_dirent to be added | |
421 | * @parent_sd: the parent sysfs_dirent to add @sd to | |
422 | * | |
423 | * Get @parent_sd and set @sd->s_parent to it and increment nlink of | |
424 | * the parent inode if @sd is a directory and link into the children | |
425 | * list of the parent. | |
426 | * | |
427 | * This function should be called between calls to | |
428 | * sysfs_addrm_start() and sysfs_addrm_finish() and should be | |
429 | * passed the same @acxt as passed to sysfs_addrm_start(). | |
430 | * | |
431 | * LOCKING: | |
432 | * Determined by sysfs_addrm_start(). | |
433 | * | |
434 | * RETURNS: | |
435 | * 0 on success, -EEXIST if entry with the given name already | |
436 | * exists. | |
437 | */ | |
438 | int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, | |
439 | struct sysfs_dirent *parent_sd) | |
440 | { | |
441 | bool has_ns = parent_sd->s_flags & SYSFS_FLAG_NS; | |
442 | struct sysfs_inode_attrs *ps_iattr; | |
443 | int ret; | |
444 | ||
445 | if (has_ns != (bool)sd->s_ns) { | |
446 | WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", | |
447 | has_ns ? "required" : "invalid", | |
448 | parent_sd->s_name, sd->s_name); | |
449 | return -EINVAL; | |
450 | } | |
451 | ||
452 | if (sysfs_type(parent_sd) != SYSFS_DIR) | |
453 | return -EINVAL; | |
454 | ||
455 | sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); | |
456 | sd->s_parent = parent_sd; | |
457 | kernfs_get(parent_sd); | |
458 | ||
459 | ret = sysfs_link_sibling(sd); | |
460 | if (ret) | |
461 | return ret; | |
462 | ||
463 | /* Update timestamps on the parent */ | |
464 | ps_iattr = parent_sd->s_iattr; | |
465 | if (ps_iattr) { | |
466 | struct iattr *ps_iattrs = &ps_iattr->ia_iattr; | |
467 | ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; | |
468 | } | |
469 | ||
470 | /* Mark the entry added into directory tree */ | |
471 | sd->s_flags &= ~SYSFS_FLAG_REMOVED; | |
472 | ||
473 | return 0; | |
474 | } | |
475 | ||
476 | /** | |
477 | * sysfs_remove_one - remove sysfs_dirent from parent | |
478 | * @acxt: addrm context to use | |
479 | * @sd: sysfs_dirent to be removed | |
480 | * | |
481 | * Mark @sd removed and drop nlink of parent inode if @sd is a | |
482 | * directory. @sd is unlinked from the children list. | |
483 | * | |
484 | * This function should be called between calls to | |
485 | * sysfs_addrm_start() and sysfs_addrm_finish() and should be | |
486 | * passed the same @acxt as passed to sysfs_addrm_start(). | |
487 | * | |
488 | * LOCKING: | |
489 | * Determined by sysfs_addrm_start(). | |
490 | */ | |
491 | static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, | |
492 | struct sysfs_dirent *sd) | |
493 | { | |
494 | struct sysfs_inode_attrs *ps_iattr; | |
495 | ||
496 | /* | |
497 | * Removal can be called multiple times on the same node. Only the | |
498 | * first invocation is effective and puts the base ref. | |
499 | */ | |
500 | if (sd->s_flags & SYSFS_FLAG_REMOVED) | |
501 | return; | |
502 | ||
ba7443bc TH |
503 | if (sd->s_parent) { |
504 | sysfs_unlink_sibling(sd); | |
fd7b9f7b | 505 | |
ba7443bc TH |
506 | /* Update timestamps on the parent */ |
507 | ps_iattr = sd->s_parent->s_iattr; | |
508 | if (ps_iattr) { | |
509 | ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; | |
510 | ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; | |
511 | } | |
fd7b9f7b TH |
512 | } |
513 | ||
514 | sd->s_flags |= SYSFS_FLAG_REMOVED; | |
515 | sd->u.removed_list = acxt->removed; | |
516 | acxt->removed = sd; | |
517 | } | |
518 | ||
519 | /** | |
520 | * sysfs_addrm_finish - finish up sysfs_dirent add/remove | |
521 | * @acxt: addrm context to finish up | |
522 | * | |
523 | * Finish up sysfs_dirent add/remove. Resources acquired by | |
524 | * sysfs_addrm_start() are released and removed sysfs_dirents are | |
525 | * cleaned up. | |
526 | * | |
527 | * LOCKING: | |
528 | * sysfs_mutex is released. | |
529 | */ | |
530 | void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) | |
531 | __releases(sysfs_mutex) | |
532 | { | |
533 | /* release resources acquired by sysfs_addrm_start() */ | |
534 | mutex_unlock(&sysfs_mutex); | |
535 | ||
536 | /* kill removed sysfs_dirents */ | |
537 | while (acxt->removed) { | |
538 | struct sysfs_dirent *sd = acxt->removed; | |
539 | ||
540 | acxt->removed = sd->u.removed_list; | |
541 | ||
542 | sysfs_deactivate(sd); | |
543 | sysfs_unmap_bin_file(sd); | |
544 | kernfs_put(sd); | |
545 | } | |
546 | } | |
547 | ||
548 | /** | |
549 | * kernfs_find_ns - find sysfs_dirent with the given name | |
550 | * @parent: sysfs_dirent to search under | |
551 | * @name: name to look for | |
552 | * @ns: the namespace tag to use | |
553 | * | |
554 | * Look for sysfs_dirent with name @name under @parent. Returns pointer to | |
555 | * the found sysfs_dirent on success, %NULL on failure. | |
556 | */ | |
557 | static struct sysfs_dirent *kernfs_find_ns(struct sysfs_dirent *parent, | |
558 | const unsigned char *name, | |
559 | const void *ns) | |
560 | { | |
561 | struct rb_node *node = parent->s_dir.children.rb_node; | |
562 | bool has_ns = parent->s_flags & SYSFS_FLAG_NS; | |
563 | unsigned int hash; | |
564 | ||
565 | lockdep_assert_held(&sysfs_mutex); | |
566 | ||
567 | if (has_ns != (bool)ns) { | |
568 | WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", | |
569 | has_ns ? "required" : "invalid", | |
570 | parent->s_name, name); | |
571 | return NULL; | |
572 | } | |
573 | ||
574 | hash = sysfs_name_hash(name, ns); | |
575 | while (node) { | |
576 | struct sysfs_dirent *sd; | |
577 | int result; | |
578 | ||
579 | sd = to_sysfs_dirent(node); | |
580 | result = sysfs_name_compare(hash, name, ns, sd); | |
581 | if (result < 0) | |
582 | node = node->rb_left; | |
583 | else if (result > 0) | |
584 | node = node->rb_right; | |
585 | else | |
586 | return sd; | |
587 | } | |
588 | return NULL; | |
589 | } | |
590 | ||
591 | /** | |
592 | * kernfs_find_and_get_ns - find and get sysfs_dirent with the given name | |
593 | * @parent: sysfs_dirent to search under | |
594 | * @name: name to look for | |
595 | * @ns: the namespace tag to use | |
596 | * | |
597 | * Look for sysfs_dirent with name @name under @parent and get a reference | |
598 | * if found. This function may sleep and returns pointer to the found | |
599 | * sysfs_dirent on success, %NULL on failure. | |
600 | */ | |
601 | struct sysfs_dirent *kernfs_find_and_get_ns(struct sysfs_dirent *parent, | |
602 | const char *name, const void *ns) | |
603 | { | |
604 | struct sysfs_dirent *sd; | |
605 | ||
606 | mutex_lock(&sysfs_mutex); | |
607 | sd = kernfs_find_ns(parent, name, ns); | |
608 | kernfs_get(sd); | |
609 | mutex_unlock(&sysfs_mutex); | |
610 | ||
611 | return sd; | |
612 | } | |
613 | EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); | |
614 | ||
ba7443bc TH |
615 | /** |
616 | * kernfs_create_root - create a new kernfs hierarchy | |
617 | * @priv: opaque data associated with the new directory | |
618 | * | |
619 | * Returns the root of the new hierarchy on success, ERR_PTR() value on | |
620 | * failure. | |
621 | */ | |
622 | struct kernfs_root *kernfs_create_root(void *priv) | |
623 | { | |
624 | struct kernfs_root *root; | |
625 | struct sysfs_dirent *sd; | |
626 | ||
627 | root = kzalloc(sizeof(*root), GFP_KERNEL); | |
628 | if (!root) | |
629 | return ERR_PTR(-ENOMEM); | |
630 | ||
631 | sd = sysfs_new_dirent("", S_IFDIR | S_IRUGO | S_IXUGO, SYSFS_DIR); | |
632 | if (!sd) { | |
633 | kfree(root); | |
634 | return ERR_PTR(-ENOMEM); | |
635 | } | |
636 | ||
637 | sd->s_flags &= ~SYSFS_FLAG_REMOVED; | |
638 | sd->priv = priv; | |
639 | sd->s_dir.root = root; | |
640 | ||
641 | root->sd = sd; | |
642 | ||
643 | return root; | |
644 | } | |
645 | ||
646 | /** | |
647 | * kernfs_destroy_root - destroy a kernfs hierarchy | |
648 | * @root: root of the hierarchy to destroy | |
649 | * | |
650 | * Destroy the hierarchy anchored at @root by removing all existing | |
651 | * directories and destroying @root. | |
652 | */ | |
653 | void kernfs_destroy_root(struct kernfs_root *root) | |
654 | { | |
655 | kernfs_remove(root->sd); /* will also free @root */ | |
656 | } | |
657 | ||
fd7b9f7b TH |
658 | /** |
659 | * kernfs_create_dir_ns - create a directory | |
660 | * @parent: parent in which to create a new directory | |
661 | * @name: name of the new directory | |
662 | * @priv: opaque data associated with the new directory | |
663 | * @ns: optional namespace tag of the directory | |
664 | * | |
665 | * Returns the created node on success, ERR_PTR() value on failure. | |
666 | */ | |
667 | struct sysfs_dirent *kernfs_create_dir_ns(struct sysfs_dirent *parent, | |
668 | const char *name, void *priv, | |
669 | const void *ns) | |
670 | { | |
671 | umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; | |
672 | struct sysfs_addrm_cxt acxt; | |
673 | struct sysfs_dirent *sd; | |
674 | int rc; | |
675 | ||
676 | /* allocate */ | |
677 | sd = sysfs_new_dirent(name, mode, SYSFS_DIR); | |
678 | if (!sd) | |
679 | return ERR_PTR(-ENOMEM); | |
680 | ||
ba7443bc | 681 | sd->s_dir.root = parent->s_dir.root; |
fd7b9f7b TH |
682 | sd->s_ns = ns; |
683 | sd->priv = priv; | |
684 | ||
685 | /* link in */ | |
686 | sysfs_addrm_start(&acxt); | |
687 | rc = sysfs_add_one(&acxt, sd, parent); | |
688 | sysfs_addrm_finish(&acxt); | |
689 | ||
690 | if (!rc) | |
691 | return sd; | |
692 | ||
693 | kernfs_put(sd); | |
694 | return ERR_PTR(rc); | |
695 | } | |
696 | ||
697 | static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry, | |
698 | unsigned int flags) | |
699 | { | |
700 | struct dentry *ret = NULL; | |
701 | struct dentry *parent = dentry->d_parent; | |
702 | struct sysfs_dirent *parent_sd = parent->d_fsdata; | |
703 | struct sysfs_dirent *sd; | |
704 | struct inode *inode; | |
705 | const void *ns = NULL; | |
706 | ||
707 | mutex_lock(&sysfs_mutex); | |
708 | ||
709 | if (parent_sd->s_flags & SYSFS_FLAG_NS) | |
710 | ns = sysfs_info(dir->i_sb)->ns; | |
711 | ||
712 | sd = kernfs_find_ns(parent_sd, dentry->d_name.name, ns); | |
713 | ||
714 | /* no such entry */ | |
715 | if (!sd) { | |
716 | ret = ERR_PTR(-ENOENT); | |
717 | goto out_unlock; | |
718 | } | |
719 | kernfs_get(sd); | |
720 | dentry->d_fsdata = sd; | |
721 | ||
722 | /* attach dentry and inode */ | |
723 | inode = sysfs_get_inode(dir->i_sb, sd); | |
724 | if (!inode) { | |
725 | ret = ERR_PTR(-ENOMEM); | |
726 | goto out_unlock; | |
727 | } | |
728 | ||
729 | /* instantiate and hash dentry */ | |
730 | ret = d_materialise_unique(dentry, inode); | |
731 | out_unlock: | |
732 | mutex_unlock(&sysfs_mutex); | |
733 | return ret; | |
734 | } | |
735 | ||
736 | const struct inode_operations sysfs_dir_inode_operations = { | |
737 | .lookup = sysfs_lookup, | |
738 | .permission = sysfs_permission, | |
739 | .setattr = sysfs_setattr, | |
740 | .getattr = sysfs_getattr, | |
741 | .setxattr = sysfs_setxattr, | |
742 | }; | |
743 | ||
744 | static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos) | |
745 | { | |
746 | struct sysfs_dirent *last; | |
747 | ||
748 | while (true) { | |
749 | struct rb_node *rbn; | |
750 | ||
751 | last = pos; | |
752 | ||
753 | if (sysfs_type(pos) != SYSFS_DIR) | |
754 | break; | |
755 | ||
756 | rbn = rb_first(&pos->s_dir.children); | |
757 | if (!rbn) | |
758 | break; | |
759 | ||
760 | pos = to_sysfs_dirent(rbn); | |
761 | } | |
762 | ||
763 | return last; | |
764 | } | |
765 | ||
766 | /** | |
767 | * sysfs_next_descendant_post - find the next descendant for post-order walk | |
768 | * @pos: the current position (%NULL to initiate traversal) | |
769 | * @root: sysfs_dirent whose descendants to walk | |
770 | * | |
771 | * Find the next descendant to visit for post-order traversal of @root's | |
772 | * descendants. @root is included in the iteration and the last node to be | |
773 | * visited. | |
774 | */ | |
775 | static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos, | |
776 | struct sysfs_dirent *root) | |
777 | { | |
778 | struct rb_node *rbn; | |
779 | ||
780 | lockdep_assert_held(&sysfs_mutex); | |
781 | ||
782 | /* if first iteration, visit leftmost descendant which may be root */ | |
783 | if (!pos) | |
784 | return sysfs_leftmost_descendant(root); | |
785 | ||
786 | /* if we visited @root, we're done */ | |
787 | if (pos == root) | |
788 | return NULL; | |
789 | ||
790 | /* if there's an unvisited sibling, visit its leftmost descendant */ | |
791 | rbn = rb_next(&pos->s_rb); | |
792 | if (rbn) | |
793 | return sysfs_leftmost_descendant(to_sysfs_dirent(rbn)); | |
794 | ||
795 | /* no sibling left, visit parent */ | |
796 | return pos->s_parent; | |
797 | } | |
798 | ||
799 | static void __kernfs_remove(struct sysfs_addrm_cxt *acxt, | |
800 | struct sysfs_dirent *sd) | |
801 | { | |
802 | struct sysfs_dirent *pos, *next; | |
803 | ||
804 | if (!sd) | |
805 | return; | |
806 | ||
807 | pr_debug("sysfs %s: removing\n", sd->s_name); | |
808 | ||
809 | next = NULL; | |
810 | do { | |
811 | pos = next; | |
812 | next = sysfs_next_descendant_post(pos, sd); | |
813 | if (pos) | |
814 | sysfs_remove_one(acxt, pos); | |
815 | } while (next); | |
816 | } | |
817 | ||
818 | /** | |
819 | * kernfs_remove - remove a sysfs_dirent recursively | |
820 | * @sd: the sysfs_dirent to remove | |
821 | * | |
822 | * Remove @sd along with all its subdirectories and files. | |
823 | */ | |
824 | void kernfs_remove(struct sysfs_dirent *sd) | |
825 | { | |
826 | struct sysfs_addrm_cxt acxt; | |
827 | ||
828 | sysfs_addrm_start(&acxt); | |
829 | __kernfs_remove(&acxt, sd); | |
830 | sysfs_addrm_finish(&acxt); | |
831 | } | |
832 | ||
833 | /** | |
834 | * kernfs_remove_by_name_ns - find a sysfs_dirent by name and remove it | |
835 | * @dir_sd: parent of the target | |
836 | * @name: name of the sysfs_dirent to remove | |
837 | * @ns: namespace tag of the sysfs_dirent to remove | |
838 | * | |
839 | * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove | |
840 | * it. Returns 0 on success, -ENOENT if such entry doesn't exist. | |
841 | */ | |
842 | int kernfs_remove_by_name_ns(struct sysfs_dirent *dir_sd, const char *name, | |
843 | const void *ns) | |
844 | { | |
845 | struct sysfs_addrm_cxt acxt; | |
846 | struct sysfs_dirent *sd; | |
847 | ||
848 | if (!dir_sd) { | |
849 | WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n", | |
850 | name); | |
851 | return -ENOENT; | |
852 | } | |
853 | ||
854 | sysfs_addrm_start(&acxt); | |
855 | ||
856 | sd = kernfs_find_ns(dir_sd, name, ns); | |
857 | if (sd) | |
858 | __kernfs_remove(&acxt, sd); | |
859 | ||
860 | sysfs_addrm_finish(&acxt); | |
861 | ||
862 | if (sd) | |
863 | return 0; | |
864 | else | |
865 | return -ENOENT; | |
866 | } | |
867 | ||
868 | /** | |
869 | * kernfs_rename_ns - move and rename a kernfs_node | |
870 | * @sd: target node | |
871 | * @new_parent: new parent to put @sd under | |
872 | * @new_name: new name | |
873 | * @new_ns: new namespace tag | |
874 | */ | |
875 | int kernfs_rename_ns(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent, | |
876 | const char *new_name, const void *new_ns) | |
877 | { | |
878 | int error; | |
879 | ||
880 | mutex_lock(&sysfs_mutex); | |
881 | ||
882 | error = 0; | |
883 | if ((sd->s_parent == new_parent) && (sd->s_ns == new_ns) && | |
884 | (strcmp(sd->s_name, new_name) == 0)) | |
885 | goto out; /* nothing to rename */ | |
886 | ||
887 | error = -EEXIST; | |
888 | if (kernfs_find_ns(new_parent, new_name, new_ns)) | |
889 | goto out; | |
890 | ||
891 | /* rename sysfs_dirent */ | |
892 | if (strcmp(sd->s_name, new_name) != 0) { | |
893 | error = -ENOMEM; | |
894 | new_name = kstrdup(new_name, GFP_KERNEL); | |
895 | if (!new_name) | |
896 | goto out; | |
897 | ||
898 | kfree(sd->s_name); | |
899 | sd->s_name = new_name; | |
900 | } | |
901 | ||
902 | /* | |
903 | * Move to the appropriate place in the appropriate directories rbtree. | |
904 | */ | |
905 | sysfs_unlink_sibling(sd); | |
906 | kernfs_get(new_parent); | |
907 | kernfs_put(sd->s_parent); | |
908 | sd->s_ns = new_ns; | |
909 | sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); | |
910 | sd->s_parent = new_parent; | |
911 | sysfs_link_sibling(sd); | |
912 | ||
913 | error = 0; | |
914 | out: | |
915 | mutex_unlock(&sysfs_mutex); | |
916 | return error; | |
917 | } | |
918 | ||
919 | /** | |
920 | * kernfs_enable_ns - enable namespace under a directory | |
921 | * @sd: directory of interest, should be empty | |
922 | * | |
923 | * This is to be called right after @sd is created to enable namespace | |
924 | * under it. All children of @sd must have non-NULL namespace tags and | |
925 | * only the ones which match the super_block's tag will be visible. | |
926 | */ | |
927 | void kernfs_enable_ns(struct sysfs_dirent *sd) | |
928 | { | |
929 | WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR); | |
930 | WARN_ON_ONCE(!RB_EMPTY_ROOT(&sd->s_dir.children)); | |
931 | sd->s_flags |= SYSFS_FLAG_NS; | |
932 | } | |
933 | ||
934 | /* Relationship between s_mode and the DT_xxx types */ | |
935 | static inline unsigned char dt_type(struct sysfs_dirent *sd) | |
936 | { | |
937 | return (sd->s_mode >> 12) & 15; | |
938 | } | |
939 | ||
940 | static int sysfs_dir_release(struct inode *inode, struct file *filp) | |
941 | { | |
942 | kernfs_put(filp->private_data); | |
943 | return 0; | |
944 | } | |
945 | ||
946 | static struct sysfs_dirent *sysfs_dir_pos(const void *ns, | |
947 | struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos) | |
948 | { | |
949 | if (pos) { | |
950 | int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && | |
951 | pos->s_parent == parent_sd && | |
952 | hash == pos->s_hash; | |
953 | kernfs_put(pos); | |
954 | if (!valid) | |
955 | pos = NULL; | |
956 | } | |
957 | if (!pos && (hash > 1) && (hash < INT_MAX)) { | |
958 | struct rb_node *node = parent_sd->s_dir.children.rb_node; | |
959 | while (node) { | |
960 | pos = to_sysfs_dirent(node); | |
961 | ||
962 | if (hash < pos->s_hash) | |
963 | node = node->rb_left; | |
964 | else if (hash > pos->s_hash) | |
965 | node = node->rb_right; | |
966 | else | |
967 | break; | |
968 | } | |
969 | } | |
970 | /* Skip over entries in the wrong namespace */ | |
971 | while (pos && pos->s_ns != ns) { | |
972 | struct rb_node *node = rb_next(&pos->s_rb); | |
973 | if (!node) | |
974 | pos = NULL; | |
975 | else | |
976 | pos = to_sysfs_dirent(node); | |
977 | } | |
978 | return pos; | |
979 | } | |
980 | ||
981 | static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns, | |
982 | struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) | |
983 | { | |
984 | pos = sysfs_dir_pos(ns, parent_sd, ino, pos); | |
985 | if (pos) | |
986 | do { | |
987 | struct rb_node *node = rb_next(&pos->s_rb); | |
988 | if (!node) | |
989 | pos = NULL; | |
990 | else | |
991 | pos = to_sysfs_dirent(node); | |
992 | } while (pos && pos->s_ns != ns); | |
993 | return pos; | |
994 | } | |
995 | ||
996 | static int sysfs_readdir(struct file *file, struct dir_context *ctx) | |
997 | { | |
998 | struct dentry *dentry = file->f_path.dentry; | |
999 | struct sysfs_dirent *parent_sd = dentry->d_fsdata; | |
1000 | struct sysfs_dirent *pos = file->private_data; | |
1001 | const void *ns = NULL; | |
1002 | ||
1003 | if (!dir_emit_dots(file, ctx)) | |
1004 | return 0; | |
1005 | mutex_lock(&sysfs_mutex); | |
1006 | ||
1007 | if (parent_sd->s_flags & SYSFS_FLAG_NS) | |
1008 | ns = sysfs_info(dentry->d_sb)->ns; | |
1009 | ||
1010 | for (pos = sysfs_dir_pos(ns, parent_sd, ctx->pos, pos); | |
1011 | pos; | |
1012 | pos = sysfs_dir_next_pos(ns, parent_sd, ctx->pos, pos)) { | |
1013 | const char *name = pos->s_name; | |
1014 | unsigned int type = dt_type(pos); | |
1015 | int len = strlen(name); | |
1016 | ino_t ino = pos->s_ino; | |
1017 | ||
1018 | ctx->pos = pos->s_hash; | |
1019 | file->private_data = pos; | |
1020 | kernfs_get(pos); | |
1021 | ||
1022 | mutex_unlock(&sysfs_mutex); | |
1023 | if (!dir_emit(ctx, name, len, ino, type)) | |
1024 | return 0; | |
1025 | mutex_lock(&sysfs_mutex); | |
1026 | } | |
1027 | mutex_unlock(&sysfs_mutex); | |
1028 | file->private_data = NULL; | |
1029 | ctx->pos = INT_MAX; | |
1030 | return 0; | |
1031 | } | |
1032 | ||
1033 | static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence) | |
1034 | { | |
1035 | struct inode *inode = file_inode(file); | |
1036 | loff_t ret; | |
1037 | ||
1038 | mutex_lock(&inode->i_mutex); | |
1039 | ret = generic_file_llseek(file, offset, whence); | |
1040 | mutex_unlock(&inode->i_mutex); | |
1041 | ||
1042 | return ret; | |
1043 | } | |
1044 | ||
1045 | const struct file_operations sysfs_dir_operations = { | |
1046 | .read = generic_read_dir, | |
1047 | .iterate = sysfs_readdir, | |
1048 | .release = sysfs_dir_release, | |
1049 | .llseek = sysfs_dir_llseek, | |
1050 | }; |