Merge branch 'linus' into perf/urgent
[deliverable/linux.git] / fs / exportfs / expfs.c
index a235f0016889b557e06d6273f0f3dca7d873a608..48a359dd286e79b08062db77ffdc593be92a2cb6 100644 (file)
@@ -69,145 +69,162 @@ find_acceptable_alias(struct dentry *result,
        return NULL;
 }
 
-/*
- * Find root of a disconnected subtree and return a reference to it.
- */
-static struct dentry *
-find_disconnected_root(struct dentry *dentry)
+static bool dentry_connected(struct dentry *dentry)
 {
        dget(dentry);
-       while (!IS_ROOT(dentry)) {
+       while (dentry->d_flags & DCACHE_DISCONNECTED) {
                struct dentry *parent = dget_parent(dentry);
 
-               if (!(parent->d_flags & DCACHE_DISCONNECTED)) {
+               dput(dentry);
+               if (IS_ROOT(dentry)) {
                        dput(parent);
-                       break;
+                       return false;
                }
+               dentry = parent;
+       }
+       dput(dentry);
+       return true;
+}
+
+static void clear_disconnected(struct dentry *dentry)
+{
+       dget(dentry);
+       while (dentry->d_flags & DCACHE_DISCONNECTED) {
+               struct dentry *parent = dget_parent(dentry);
+
+               WARN_ON_ONCE(IS_ROOT(dentry));
+
+               spin_lock(&dentry->d_lock);
+               dentry->d_flags &= ~DCACHE_DISCONNECTED;
+               spin_unlock(&dentry->d_lock);
 
                dput(dentry);
                dentry = parent;
        }
-       return dentry;
+       dput(dentry);
+}
+
+/*
+ * Reconnect a directory dentry with its parent.
+ *
+ * This can return a dentry, or NULL, or an error.
+ *
+ * In the first case the returned dentry is the parent of the given
+ * dentry, and may itself need to be reconnected to its parent.
+ *
+ * In the NULL case, a concurrent VFS operation has either renamed or
+ * removed this directory.  The concurrent operation has reconnected our
+ * dentry, so we no longer need to.
+ */
+static struct dentry *reconnect_one(struct vfsmount *mnt,
+               struct dentry *dentry, char *nbuf)
+{
+       struct dentry *parent;
+       struct dentry *tmp;
+       int err;
+
+       parent = ERR_PTR(-EACCES);
+       mutex_lock(&dentry->d_inode->i_mutex);
+       if (mnt->mnt_sb->s_export_op->get_parent)
+               parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
+       mutex_unlock(&dentry->d_inode->i_mutex);
+
+       if (IS_ERR(parent)) {
+               dprintk("%s: get_parent of %ld failed, err %d\n",
+                       __func__, dentry->d_inode->i_ino, PTR_ERR(parent));
+               return parent;
+       }
+
+       dprintk("%s: find name of %lu in %lu\n", __func__,
+               dentry->d_inode->i_ino, parent->d_inode->i_ino);
+       err = exportfs_get_name(mnt, parent, nbuf, dentry);
+       if (err == -ENOENT)
+               goto out_reconnected;
+       if (err)
+               goto out_err;
+       dprintk("%s: found name: %s\n", __func__, nbuf);
+       mutex_lock(&parent->d_inode->i_mutex);
+       tmp = lookup_one_len(nbuf, parent, strlen(nbuf));
+       mutex_unlock(&parent->d_inode->i_mutex);
+       if (IS_ERR(tmp)) {
+               dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
+               goto out_err;
+       }
+       if (tmp != dentry) {
+               dput(tmp);
+               goto out_reconnected;
+       }
+       dput(tmp);
+       if (IS_ROOT(dentry)) {
+               err = -ESTALE;
+               goto out_err;
+       }
+       return parent;
+
+out_err:
+       dput(parent);
+       return ERR_PTR(err);
+out_reconnected:
+       dput(parent);
+       /*
+        * Someone must have renamed our entry into another parent, in
+        * which case it has been reconnected by the rename.
+        *
+        * Or someone removed it entirely, in which case filehandle
+        * lookup will succeed but the directory is now IS_DEAD and
+        * subsequent operations on it will fail.
+        *
+        * Alternatively, maybe there was no race at all, and the
+        * filesystem is just corrupt and gave us a parent that doesn't
+        * actually contain any entry pointing to this inode.  So,
+        * double check that this worked and return -ESTALE if not:
+        */
+       if (!dentry_connected(dentry))
+               return ERR_PTR(-ESTALE);
+       return NULL;
 }
 
 /*
  * Make sure target_dir is fully connected to the dentry tree.
  *
- * It may already be, as the flag isn't always updated when connection happens.
+ * On successful return, DCACHE_DISCONNECTED will be cleared on
+ * target_dir, and target_dir->d_parent->...->d_parent will reach the
+ * root of the filesystem.
+ *
+ * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected.
+ * But the converse is not true: target_dir may have DCACHE_DISCONNECTED
+ * set but already be connected.  In that case we'll verify the
+ * connection to root and then clear the flag.
+ *
+ * Note that target_dir could be removed by a concurrent operation.  In
+ * that case reconnect_path may still succeed with target_dir fully
+ * connected, but further operations using the filehandle will fail when
+ * necessary (due to S_DEAD being set on the directory).
  */
 static int
 reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
 {
-       int noprogress = 0;
-       int err = -ESTALE;
+       struct dentry *dentry, *parent;
 
-       /*
-        * It is possible that a confused file system might not let us complete
-        * the path to the root.  For example, if get_parent returns a directory
-        * in which we cannot find a name for the child.  While this implies a
-        * very sick filesystem we don't want it to cause knfsd to spin.  Hence
-        * the noprogress counter.  If we go through the loop 10 times (2 is
-        * probably enough) without getting anywhere, we just give up
-        */
-       while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) {
-               struct dentry *pd = find_disconnected_root(target_dir);
-
-               if (!IS_ROOT(pd)) {
-                       /* must have found a connected parent - great */
-                       spin_lock(&pd->d_lock);
-                       pd->d_flags &= ~DCACHE_DISCONNECTED;
-                       spin_unlock(&pd->d_lock);
-                       noprogress = 0;
-               } else if (pd == mnt->mnt_sb->s_root) {
-                       printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n");
-                       spin_lock(&pd->d_lock);
-                       pd->d_flags &= ~DCACHE_DISCONNECTED;
-                       spin_unlock(&pd->d_lock);
-                       noprogress = 0;
-               } else {
-                       /*
-                        * We have hit the top of a disconnected path, try to
-                        * find parent and connect.
-                        *
-                        * Racing with some other process renaming a directory
-                        * isn't much of a problem here.  If someone renames
-                        * the directory, it will end up properly connected,
-                        * which is what we want
-                        *
-                        * Getting the parent can't be supported generically,
-                        * the locking is too icky.
-                        *
-                        * Instead we just return EACCES.  If server reboots
-                        * or inodes get flushed, you lose
-                        */
-                       struct dentry *ppd = ERR_PTR(-EACCES);
-                       struct dentry *npd;
-
-                       mutex_lock(&pd->d_inode->i_mutex);
-                       if (mnt->mnt_sb->s_export_op->get_parent)
-                               ppd = mnt->mnt_sb->s_export_op->get_parent(pd);
-                       mutex_unlock(&pd->d_inode->i_mutex);
-
-                       if (IS_ERR(ppd)) {
-                               err = PTR_ERR(ppd);
-                               dprintk("%s: get_parent of %ld failed, err %d\n",
-                                       __func__, pd->d_inode->i_ino, err);
-                               dput(pd);
-                               break;
-                       }
+       dentry = dget(target_dir);
 
-                       dprintk("%s: find name of %lu in %lu\n", __func__,
-                               pd->d_inode->i_ino, ppd->d_inode->i_ino);
-                       err = exportfs_get_name(mnt, ppd, nbuf, pd);
-                       if (err) {
-                               dput(ppd);
-                               dput(pd);
-                               if (err == -ENOENT)
-                                       /* some race between get_parent and
-                                        * get_name?  just try again
-                                        */
-                                       continue;
-                               break;
-                       }
-                       dprintk("%s: found name: %s\n", __func__, nbuf);
-                       mutex_lock(&ppd->d_inode->i_mutex);
-                       npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
-                       mutex_unlock(&ppd->d_inode->i_mutex);
-                       if (IS_ERR(npd)) {
-                               err = PTR_ERR(npd);
-                               dprintk("%s: lookup failed: %d\n",
-                                       __func__, err);
-                               dput(ppd);
-                               dput(pd);
-                               break;
-                       }
-                       /* we didn't really want npd, we really wanted
-                        * a side-effect of the lookup.
-                        * hopefully, npd == pd, though it isn't really
-                        * a problem if it isn't
-                        */
-                       if (npd == pd)
-                               noprogress = 0;
-                       else
-                               printk("%s: npd != pd\n", __func__);
-                       dput(npd);
-                       dput(ppd);
-                       if (IS_ROOT(pd)) {
-                               /* something went wrong, we have to give up */
-                               dput(pd);
-                               break;
-                       }
-               }
-               dput(pd);
-       }
+       while (dentry->d_flags & DCACHE_DISCONNECTED) {
+               BUG_ON(dentry == mnt->mnt_sb->s_root);
 
-       if (target_dir->d_flags & DCACHE_DISCONNECTED) {
-               /* something went wrong - oh-well */
-               if (!err)
-                       err = -ESTALE;
-               return err;
-       }
+               if (IS_ROOT(dentry))
+                       parent = reconnect_one(mnt, dentry, nbuf);
+               else
+                       parent = dget_parent(dentry);
 
+               if (!parent)
+                       break;
+               dput(dentry);
+               if (IS_ERR(parent))
+                       return PTR_ERR(parent);
+               dentry = parent;
+       }
+       dput(dentry);
+       clear_disconnected(target_dir);
        return 0;
 }
 
@@ -215,7 +232,7 @@ struct getdents_callback {
        struct dir_context ctx;
        char *name;             /* name that was found. It already points to a
                                   buffer NAME_MAX+1 is size */
-       unsigned long ino;      /* the inum we are looking for */
+       u64 ino;                /* the inum we are looking for */
        int found;              /* inode matched? */
        int sequence;           /* sequence counter */
 };
@@ -255,10 +272,14 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
        struct inode *dir = path->dentry->d_inode;
        int error;
        struct file *file;
+       struct kstat stat;
+       struct path child_path = {
+               .mnt = path->mnt,
+               .dentry = child,
+       };
        struct getdents_callback buffer = {
                .ctx.actor = filldir_one,
                .name = name,
-               .ino = child->d_inode->i_ino
        };
 
        error = -ENOTDIR;
@@ -267,6 +288,16 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
        error = -EINVAL;
        if (!dir->i_fop)
                goto out;
+       /*
+        * inode->i_ino is unsigned long, kstat->ino is u64, so the
+        * former would be insufficient on 32-bit hosts when the
+        * filesystem supports 64-bit inode numbers.  So we need to
+        * actually call ->getattr, not just read i_ino:
+        */
+       error = vfs_getattr_nosec(&child_path, &stat);
+       if (error)
+               return error;
+       buffer.ino = stat.ino;
        /*
         * Open the directory ...
         */
This page took 0.031733 seconds and 5 git commands to generate.