exportfs: stop retrying once we race with rename/remove
[deliverable/linux.git] / fs / exportfs / expfs.c
1 /*
2 * Copyright (C) Neil Brown 2002
3 * Copyright (C) Christoph Hellwig 2007
4 *
5 * This file contains the code mapping from inodes to NFS file handles,
6 * and for mapping back from file handles to dentries.
7 *
8 * For details on why we do all the strange and hairy things in here
9 * take a look at Documentation/filesystems/nfs/Exporting.
10 */
11 #include <linux/exportfs.h>
12 #include <linux/fs.h>
13 #include <linux/file.h>
14 #include <linux/module.h>
15 #include <linux/mount.h>
16 #include <linux/namei.h>
17 #include <linux/sched.h>
18
19 #define dprintk(fmt, args...) do{}while(0)
20
21
22 static int get_name(const struct path *path, char *name, struct dentry *child);
23
24
25 static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir,
26 char *name, struct dentry *child)
27 {
28 const struct export_operations *nop = dir->d_sb->s_export_op;
29 struct path path = {.mnt = mnt, .dentry = dir};
30
31 if (nop->get_name)
32 return nop->get_name(dir, name, child);
33 else
34 return get_name(&path, name, child);
35 }
36
37 /*
38 * Check if the dentry or any of it's aliases is acceptable.
39 */
40 static struct dentry *
41 find_acceptable_alias(struct dentry *result,
42 int (*acceptable)(void *context, struct dentry *dentry),
43 void *context)
44 {
45 struct dentry *dentry, *toput = NULL;
46 struct inode *inode;
47
48 if (acceptable(context, result))
49 return result;
50
51 inode = result->d_inode;
52 spin_lock(&inode->i_lock);
53 hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
54 dget(dentry);
55 spin_unlock(&inode->i_lock);
56 if (toput)
57 dput(toput);
58 if (dentry != result && acceptable(context, dentry)) {
59 dput(result);
60 return dentry;
61 }
62 spin_lock(&inode->i_lock);
63 toput = dentry;
64 }
65 spin_unlock(&inode->i_lock);
66
67 if (toput)
68 dput(toput);
69 return NULL;
70 }
71
72 /*
73 * Find root of a disconnected subtree and return a reference to it.
74 */
75 static struct dentry *
76 find_disconnected_root(struct dentry *dentry)
77 {
78 dget(dentry);
79 while (!IS_ROOT(dentry)) {
80 struct dentry *parent = dget_parent(dentry);
81
82 if (!(parent->d_flags & DCACHE_DISCONNECTED)) {
83 dput(parent);
84 break;
85 }
86
87 dput(dentry);
88 dentry = parent;
89 }
90 return dentry;
91 }
92
93 static bool dentry_connected(struct dentry *dentry)
94 {
95 dget(dentry);
96 while (dentry->d_flags & DCACHE_DISCONNECTED) {
97 struct dentry *parent = dget_parent(dentry);
98
99 dput(dentry);
100 if (IS_ROOT(dentry)) {
101 dput(parent);
102 return false;
103 }
104 dentry = parent;
105 }
106 dput(dentry);
107 return true;
108 }
109
110 static void clear_disconnected(struct dentry *dentry)
111 {
112 dget(dentry);
113 while (dentry->d_flags & DCACHE_DISCONNECTED) {
114 struct dentry *parent = dget_parent(dentry);
115
116 WARN_ON_ONCE(IS_ROOT(dentry));
117
118 spin_lock(&dentry->d_lock);
119 dentry->d_flags &= ~DCACHE_DISCONNECTED;
120 spin_unlock(&dentry->d_lock);
121
122 dput(dentry);
123 dentry = parent;
124 }
125 dput(dentry);
126 }
127
128 /*
129 * Make sure target_dir is fully connected to the dentry tree.
130 *
131 * On successful return, DCACHE_DISCONNECTED will be cleared on
132 * target_dir, and target_dir->d_parent->...->d_parent will reach the
133 * root of the filesystem.
134 *
135 * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected.
136 * But the converse is not true: target_dir may have DCACHE_DISCONNECTED
137 * set but already be connected. In that case we'll verify the
138 * connection to root and then clear the flag.
139 *
140 * Note that target_dir could be removed by a concurrent operation. In
141 * that case reconnect_path may still succeed with target_dir fully
142 * connected, but further operations using the filehandle will fail when
143 * necessary (due to S_DEAD being set on the directory).
144 */
145 static int
146 reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
147 {
148 int noprogress = 0;
149 int err = -ESTALE;
150
151 /*
152 * It is possible that a confused file system might not let us complete
153 * the path to the root. For example, if get_parent returns a directory
154 * in which we cannot find a name for the child. While this implies a
155 * very sick filesystem we don't want it to cause knfsd to spin. Hence
156 * the noprogress counter. If we go through the loop 10 times (2 is
157 * probably enough) without getting anywhere, we just give up
158 */
159 while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) {
160 struct dentry *pd = find_disconnected_root(target_dir);
161
162 BUG_ON(pd == mnt->mnt_sb->s_root);
163
164 if (!IS_ROOT(pd)) {
165 /* must have found a connected parent - great */
166 clear_disconnected(target_dir);
167 dput(pd);
168 break;
169 } else {
170 /*
171 * We have hit the top of a disconnected path, try to
172 * find parent and connect.
173 *
174 * Racing with some other process renaming a directory
175 * isn't much of a problem here. If someone renames
176 * the directory, it will end up properly connected,
177 * which is what we want
178 *
179 * Getting the parent can't be supported generically,
180 * the locking is too icky.
181 *
182 * Instead we just return EACCES. If server reboots
183 * or inodes get flushed, you lose
184 */
185 struct dentry *ppd = ERR_PTR(-EACCES);
186 struct dentry *npd;
187
188 mutex_lock(&pd->d_inode->i_mutex);
189 if (mnt->mnt_sb->s_export_op->get_parent)
190 ppd = mnt->mnt_sb->s_export_op->get_parent(pd);
191 mutex_unlock(&pd->d_inode->i_mutex);
192
193 if (IS_ERR(ppd)) {
194 err = PTR_ERR(ppd);
195 dprintk("%s: get_parent of %ld failed, err %d\n",
196 __func__, pd->d_inode->i_ino, err);
197 dput(pd);
198 break;
199 }
200
201 dprintk("%s: find name of %lu in %lu\n", __func__,
202 pd->d_inode->i_ino, ppd->d_inode->i_ino);
203 err = exportfs_get_name(mnt, ppd, nbuf, pd);
204 if (err) {
205 dput(ppd);
206 dput(pd);
207 if (err == -ENOENT)
208 /* some race between get_parent and
209 * get_name?
210 */
211 goto out_reconnected;
212 break;
213 }
214 dprintk("%s: found name: %s\n", __func__, nbuf);
215 mutex_lock(&ppd->d_inode->i_mutex);
216 npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
217 mutex_unlock(&ppd->d_inode->i_mutex);
218 if (IS_ERR(npd)) {
219 err = PTR_ERR(npd);
220 dprintk("%s: lookup failed: %d\n",
221 __func__, err);
222 dput(ppd);
223 dput(pd);
224 break;
225 }
226 /* we didn't really want npd, we really wanted
227 * a side-effect of the lookup.
228 * hopefully, npd == pd, though it isn't really
229 * a problem if it isn't
230 */
231 dput(npd);
232 dput(ppd);
233 if (npd == pd)
234 noprogress = 0;
235 else
236 goto out_reconnected;
237 if (IS_ROOT(pd)) {
238 /* something went wrong, we have to give up */
239 dput(pd);
240 break;
241 }
242 }
243 dput(pd);
244 }
245
246 if (target_dir->d_flags & DCACHE_DISCONNECTED) {
247 /* something went wrong - oh-well */
248 if (!err)
249 err = -ESTALE;
250 return err;
251 }
252
253 return 0;
254 out_reconnected:
255 /*
256 * Someone must have renamed our entry into another parent, in
257 * which case it has been reconnected by the rename.
258 *
259 * Or someone removed it entirely, in which case filehandle
260 * lookup will succeed but the directory is now IS_DEAD and
261 * subsequent operations on it will fail.
262 *
263 * Alternatively, maybe there was no race at all, and the
264 * filesystem is just corrupt and gave us a parent that doesn't
265 * actually contain any entry pointing to this inode. So,
266 * double check that this worked and return -ESTALE if not:
267 */
268 if (!dentry_connected(target_dir))
269 return -ESTALE;
270 clear_disconnected(target_dir);
271 return 0;
272 }
273
274 struct getdents_callback {
275 struct dir_context ctx;
276 char *name; /* name that was found. It already points to a
277 buffer NAME_MAX+1 is size */
278 u64 ino; /* the inum we are looking for */
279 int found; /* inode matched? */
280 int sequence; /* sequence counter */
281 };
282
283 /*
284 * A rather strange filldir function to capture
285 * the name matching the specified inode number.
286 */
287 static int filldir_one(void * __buf, const char * name, int len,
288 loff_t pos, u64 ino, unsigned int d_type)
289 {
290 struct getdents_callback *buf = __buf;
291 int result = 0;
292
293 buf->sequence++;
294 if (buf->ino == ino && len <= NAME_MAX) {
295 memcpy(buf->name, name, len);
296 buf->name[len] = '\0';
297 buf->found = 1;
298 result = -1;
299 }
300 return result;
301 }
302
303 /**
304 * get_name - default export_operations->get_name function
305 * @dentry: the directory in which to find a name
306 * @name: a pointer to a %NAME_MAX+1 char buffer to store the name
307 * @child: the dentry for the child directory.
308 *
309 * calls readdir on the parent until it finds an entry with
310 * the same inode number as the child, and returns that.
311 */
312 static int get_name(const struct path *path, char *name, struct dentry *child)
313 {
314 const struct cred *cred = current_cred();
315 struct inode *dir = path->dentry->d_inode;
316 int error;
317 struct file *file;
318 struct kstat stat;
319 struct path child_path = {
320 .mnt = path->mnt,
321 .dentry = child,
322 };
323 struct getdents_callback buffer = {
324 .ctx.actor = filldir_one,
325 .name = name,
326 };
327
328 error = -ENOTDIR;
329 if (!dir || !S_ISDIR(dir->i_mode))
330 goto out;
331 error = -EINVAL;
332 if (!dir->i_fop)
333 goto out;
334 /*
335 * inode->i_ino is unsigned long, kstat->ino is u64, so the
336 * former would be insufficient on 32-bit hosts when the
337 * filesystem supports 64-bit inode numbers. So we need to
338 * actually call ->getattr, not just read i_ino:
339 */
340 error = vfs_getattr_nosec(&child_path, &stat);
341 if (error)
342 return error;
343 buffer.ino = stat.ino;
344 /*
345 * Open the directory ...
346 */
347 file = dentry_open(path, O_RDONLY, cred);
348 error = PTR_ERR(file);
349 if (IS_ERR(file))
350 goto out;
351
352 error = -EINVAL;
353 if (!file->f_op->iterate)
354 goto out_close;
355
356 buffer.sequence = 0;
357 while (1) {
358 int old_seq = buffer.sequence;
359
360 error = iterate_dir(file, &buffer.ctx);
361 if (buffer.found) {
362 error = 0;
363 break;
364 }
365
366 if (error < 0)
367 break;
368
369 error = -ENOENT;
370 if (old_seq == buffer.sequence)
371 break;
372 }
373
374 out_close:
375 fput(file);
376 out:
377 return error;
378 }
379
380 /**
381 * export_encode_fh - default export_operations->encode_fh function
382 * @inode: the object to encode
383 * @fh: where to store the file handle fragment
384 * @max_len: maximum length to store there
385 * @parent: parent directory inode, if wanted
386 *
387 * This default encode_fh function assumes that the 32 inode number
388 * is suitable for locating an inode, and that the generation number
389 * can be used to check that it is still valid. It places them in the
390 * filehandle fragment where export_decode_fh expects to find them.
391 */
392 static int export_encode_fh(struct inode *inode, struct fid *fid,
393 int *max_len, struct inode *parent)
394 {
395 int len = *max_len;
396 int type = FILEID_INO32_GEN;
397
398 if (parent && (len < 4)) {
399 *max_len = 4;
400 return FILEID_INVALID;
401 } else if (len < 2) {
402 *max_len = 2;
403 return FILEID_INVALID;
404 }
405
406 len = 2;
407 fid->i32.ino = inode->i_ino;
408 fid->i32.gen = inode->i_generation;
409 if (parent) {
410 fid->i32.parent_ino = parent->i_ino;
411 fid->i32.parent_gen = parent->i_generation;
412 len = 4;
413 type = FILEID_INO32_GEN_PARENT;
414 }
415 *max_len = len;
416 return type;
417 }
418
419 int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
420 int *max_len, struct inode *parent)
421 {
422 const struct export_operations *nop = inode->i_sb->s_export_op;
423
424 if (nop && nop->encode_fh)
425 return nop->encode_fh(inode, fid->raw, max_len, parent);
426
427 return export_encode_fh(inode, fid, max_len, parent);
428 }
429 EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh);
430
431 int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,
432 int connectable)
433 {
434 int error;
435 struct dentry *p = NULL;
436 struct inode *inode = dentry->d_inode, *parent = NULL;
437
438 if (connectable && !S_ISDIR(inode->i_mode)) {
439 p = dget_parent(dentry);
440 /*
441 * note that while p might've ceased to be our parent already,
442 * it's still pinned by and still positive.
443 */
444 parent = p->d_inode;
445 }
446
447 error = exportfs_encode_inode_fh(inode, fid, max_len, parent);
448 dput(p);
449
450 return error;
451 }
452 EXPORT_SYMBOL_GPL(exportfs_encode_fh);
453
454 struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
455 int fh_len, int fileid_type,
456 int (*acceptable)(void *, struct dentry *), void *context)
457 {
458 const struct export_operations *nop = mnt->mnt_sb->s_export_op;
459 struct dentry *result, *alias;
460 char nbuf[NAME_MAX+1];
461 int err;
462
463 /*
464 * Try to get any dentry for the given file handle from the filesystem.
465 */
466 if (!nop || !nop->fh_to_dentry)
467 return ERR_PTR(-ESTALE);
468 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
469 if (!result)
470 result = ERR_PTR(-ESTALE);
471 if (IS_ERR(result))
472 return result;
473
474 if (S_ISDIR(result->d_inode->i_mode)) {
475 /*
476 * This request is for a directory.
477 *
478 * On the positive side there is only one dentry for each
479 * directory inode. On the negative side this implies that we
480 * to ensure our dentry is connected all the way up to the
481 * filesystem root.
482 */
483 if (result->d_flags & DCACHE_DISCONNECTED) {
484 err = reconnect_path(mnt, result, nbuf);
485 if (err)
486 goto err_result;
487 }
488
489 if (!acceptable(context, result)) {
490 err = -EACCES;
491 goto err_result;
492 }
493
494 return result;
495 } else {
496 /*
497 * It's not a directory. Life is a little more complicated.
498 */
499 struct dentry *target_dir, *nresult;
500
501 /*
502 * See if either the dentry we just got from the filesystem
503 * or any alias for it is acceptable. This is always true
504 * if this filesystem is exported without the subtreecheck
505 * option. If the filesystem is exported with the subtree
506 * check option there's a fair chance we need to look at
507 * the parent directory in the file handle and make sure
508 * it's connected to the filesystem root.
509 */
510 alias = find_acceptable_alias(result, acceptable, context);
511 if (alias)
512 return alias;
513
514 /*
515 * Try to extract a dentry for the parent directory from the
516 * file handle. If this fails we'll have to give up.
517 */
518 err = -ESTALE;
519 if (!nop->fh_to_parent)
520 goto err_result;
521
522 target_dir = nop->fh_to_parent(mnt->mnt_sb, fid,
523 fh_len, fileid_type);
524 if (!target_dir)
525 goto err_result;
526 err = PTR_ERR(target_dir);
527 if (IS_ERR(target_dir))
528 goto err_result;
529
530 /*
531 * And as usual we need to make sure the parent directory is
532 * connected to the filesystem root. The VFS really doesn't
533 * like disconnected directories..
534 */
535 err = reconnect_path(mnt, target_dir, nbuf);
536 if (err) {
537 dput(target_dir);
538 goto err_result;
539 }
540
541 /*
542 * Now that we've got both a well-connected parent and a
543 * dentry for the inode we're after, make sure that our
544 * inode is actually connected to the parent.
545 */
546 err = exportfs_get_name(mnt, target_dir, nbuf, result);
547 if (!err) {
548 mutex_lock(&target_dir->d_inode->i_mutex);
549 nresult = lookup_one_len(nbuf, target_dir,
550 strlen(nbuf));
551 mutex_unlock(&target_dir->d_inode->i_mutex);
552 if (!IS_ERR(nresult)) {
553 if (nresult->d_inode) {
554 dput(result);
555 result = nresult;
556 } else
557 dput(nresult);
558 }
559 }
560
561 /*
562 * At this point we are done with the parent, but it's pinned
563 * by the child dentry anyway.
564 */
565 dput(target_dir);
566
567 /*
568 * And finally make sure the dentry is actually acceptable
569 * to NFSD.
570 */
571 alias = find_acceptable_alias(result, acceptable, context);
572 if (!alias) {
573 err = -EACCES;
574 goto err_result;
575 }
576
577 return alias;
578 }
579
580 err_result:
581 dput(result);
582 return ERR_PTR(err);
583 }
584 EXPORT_SYMBOL_GPL(exportfs_decode_fh);
585
586 MODULE_LICENSE("GPL");
This page took 0.042055 seconds and 6 git commands to generate.