* name indicated by the symlink. The old code always complained that the
* name already exists, due to not following the symlink even if its target
* is nonexistent. The new semantics affects also mknod() and link() when
- * the name is a symlink pointing to a non-existant name.
+ * the name is a symlink pointing to a non-existent name.
*
* I don't know which semantics is the right one, since I have no access
* to standards. But I found by trial that HP-UX 9.0 has the full "new"
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
+ if (current_user_ns() != inode_userns(inode))
+ goto other_perms;
+
if (current_fsuid() == inode->i_uid)
mode >>= 6;
else {
mode >>= 3;
}
+other_perms:
/*
* If the DACs are ok we don't need any capability check.
*/
* Executable DACs are overridable if at least one exec bit is set.
*/
if (!(mask & MAY_EXEC) || execute_ok(inode))
- if (capable(CAP_DAC_OVERRIDE))
+ if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
return 0;
/*
*/
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
- if (capable(CAP_DAC_READ_SEARCH))
+ if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
return 0;
return -EACCES;
}
EXPORT_SYMBOL(path_put);
-/**
- * nameidata_drop_rcu - drop this nameidata out of rcu-walk
- * @nd: nameidata pathwalk data to drop
- * Returns: 0 on success, -ECHILD on failure
- *
+/*
* Path walking has 2 modes, rcu-walk and ref-walk (see
- * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt
- * to drop out of rcu-walk mode and take normal reference counts on dentries
- * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take
- * refcounts at the last known good point before rcu-walk got stuck, so
- * ref-walk may continue from there. If this is not successful (eg. a seqcount
- * has changed), then failure is returned and path walk restarts from the
- * beginning in ref-walk mode.
- *
- * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into
- * ref-walk. Must be called from rcu-walk context.
+ * Documentation/filesystems/path-lookup.txt). In situations when we can't
+ * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab
+ * normal reference counts on dentries and vfsmounts to transition to rcu-walk
+ * mode. Refcounts are grabbed at the last known good point before rcu-walk
+ * got stuck, so ref-walk may continue from there. If this is not successful
+ * (eg. a seqcount has changed), then failure is returned and it's up to caller
+ * to restart the path walk from the beginning in ref-walk mode.
*/
-static int nameidata_drop_rcu(struct nameidata *nd)
-{
- struct fs_struct *fs = current->fs;
- struct dentry *dentry = nd->path.dentry;
- int want_root = 0;
-
- BUG_ON(!(nd->flags & LOOKUP_RCU));
- if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
- want_root = 1;
- spin_lock(&fs->lock);
- if (nd->root.mnt != fs->root.mnt ||
- nd->root.dentry != fs->root.dentry)
- goto err_root;
- }
- spin_lock(&dentry->d_lock);
- if (!__d_rcu_to_refcount(dentry, nd->seq))
- goto err;
- BUG_ON(nd->inode != dentry->d_inode);
- spin_unlock(&dentry->d_lock);
- if (want_root) {
- path_get(&nd->root);
- spin_unlock(&fs->lock);
- }
- mntget(nd->path.mnt);
-
- rcu_read_unlock();
- br_read_unlock(vfsmount_lock);
- nd->flags &= ~LOOKUP_RCU;
- return 0;
-err:
- spin_unlock(&dentry->d_lock);
-err_root:
- if (want_root)
- spin_unlock(&fs->lock);
- return -ECHILD;
-}
-
-/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
-static inline int nameidata_drop_rcu_maybe(struct nameidata *nd)
-{
- if (nd->flags & LOOKUP_RCU)
- return nameidata_drop_rcu(nd);
- return 0;
-}
/**
- * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk
- * @nd: nameidata pathwalk data to drop
- * @dentry: dentry to drop
+ * unlazy_walk - try to switch to ref-walk mode.
+ * @nd: nameidata pathwalk data
+ * @dentry: child of nd->path.dentry or NULL
* Returns: 0 on success, -ECHILD on failure
*
- * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root,
- * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on
- * @nd. Must be called from rcu-walk context.
+ * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry
+ * for ref-walk mode. @dentry must be a path found by a do_lookup call on
+ * @nd or NULL. Must be called from rcu-walk context.
*/
-static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry)
+static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
{
struct fs_struct *fs = current->fs;
struct dentry *parent = nd->path.dentry;
goto err_root;
}
spin_lock(&parent->d_lock);
- spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- if (!__d_rcu_to_refcount(dentry, nd->seq))
- goto err;
- /*
- * If the sequence check on the child dentry passed, then the child has
- * not been removed from its parent. This means the parent dentry must
- * be valid and able to take a reference at this point.
- */
- BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
- BUG_ON(!parent->d_count);
- parent->d_count++;
- spin_unlock(&dentry->d_lock);
+ if (!dentry) {
+ if (!__d_rcu_to_refcount(parent, nd->seq))
+ goto err_parent;
+ BUG_ON(nd->inode != parent->d_inode);
+ } else {
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ if (!__d_rcu_to_refcount(dentry, nd->seq))
+ goto err_child;
+ /*
+ * If the sequence check on the child dentry passed, then
+ * the child has not been removed from its parent. This
+ * means the parent dentry must be valid and able to take
+ * a reference at this point.
+ */
+ BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
+ BUG_ON(!parent->d_count);
+ parent->d_count++;
+ spin_unlock(&dentry->d_lock);
+ }
spin_unlock(&parent->d_lock);
if (want_root) {
path_get(&nd->root);
br_read_unlock(vfsmount_lock);
nd->flags &= ~LOOKUP_RCU;
return 0;
-err:
+
+err_child:
spin_unlock(&dentry->d_lock);
+err_parent:
spin_unlock(&parent->d_lock);
err_root:
if (want_root)
return -ECHILD;
}
-/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
-static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
-{
- if (nd->flags & LOOKUP_RCU) {
- if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) {
- nd->flags &= ~LOOKUP_RCU;
- if (!(nd->flags & LOOKUP_ROOT))
- nd->root.mnt = NULL;
- rcu_read_unlock();
- br_read_unlock(vfsmount_lock);
- return -ECHILD;
- }
- }
- return 0;
-}
-
-/**
- * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk
- * @nd: nameidata pathwalk data to drop
- * Returns: 0 on success, -ECHILD on failure
- *
- * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk.
- * nd->path should be the final element of the lookup, so nd->root is discarded.
- * Must be called from rcu-walk context.
- */
-static int nameidata_drop_rcu_last(struct nameidata *nd)
-{
- struct dentry *dentry = nd->path.dentry;
-
- BUG_ON(!(nd->flags & LOOKUP_RCU));
- nd->flags &= ~LOOKUP_RCU;
- if (!(nd->flags & LOOKUP_ROOT))
- nd->root.mnt = NULL;
- spin_lock(&dentry->d_lock);
- if (!__d_rcu_to_refcount(dentry, nd->seq))
- goto err_unlock;
- BUG_ON(nd->inode != dentry->d_inode);
- spin_unlock(&dentry->d_lock);
-
- mntget(nd->path.mnt);
-
- rcu_read_unlock();
- br_read_unlock(vfsmount_lock);
-
- return 0;
-
-err_unlock:
- spin_unlock(&dentry->d_lock);
- rcu_read_unlock();
- br_read_unlock(vfsmount_lock);
- return -ECHILD;
-}
-
/**
* release_open_intent - free up open intent resources
* @nd: pointer to nameidata
return dentry;
}
-/*
- * handle_reval_path - force revalidation of a dentry
- *
- * In some situations the path walking code will trust dentries without
- * revalidating them. This causes problems for filesystems that depend on
- * d_revalidate to handle file opens (e.g. NFSv4). When FS_REVAL_DOT is set
- * (which indicates that it's possible for the dentry to go stale), force
- * a d_revalidate call before proceeding.
+/**
+ * complete_walk - successful completion of path walk
+ * @nd: pointer nameidata
*
- * Returns 0 if the revalidation was successful. If the revalidation fails,
- * either return the error returned by d_revalidate or -ESTALE if the
- * revalidation it just returned 0. If d_revalidate returns 0, we attempt to
- * invalidate the dentry. It's up to the caller to handle putting references
- * to the path if necessary.
+ * If we had been in RCU mode, drop out of it and legitimize nd->path.
+ * Revalidate the final result, unless we'd already done that during
+ * the path walk or the filesystem doesn't ask for it. Return 0 on
+ * success, -error on failure. In case of failure caller does not
+ * need to drop nd->path.
*/
-static inline int handle_reval_path(struct nameidata *nd)
+static int complete_walk(struct nameidata *nd)
{
struct dentry *dentry = nd->path.dentry;
int status;
+ if (nd->flags & LOOKUP_RCU) {
+ nd->flags &= ~LOOKUP_RCU;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ spin_lock(&dentry->d_lock);
+ if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
+ spin_unlock(&dentry->d_lock);
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ return -ECHILD;
+ }
+ BUG_ON(nd->inode != dentry->d_inode);
+ spin_unlock(&dentry->d_lock);
+ mntget(nd->path.mnt);
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ }
+
if (likely(!(nd->flags & LOOKUP_JUMPED)))
return 0;
if (!status)
status = -ESTALE;
+ path_put(&nd->path);
return status;
}
static inline int exec_permission(struct inode *inode, unsigned int flags)
{
int ret;
+ struct user_namespace *ns = inode_userns(inode);
if (inode->i_op->permission) {
ret = inode->i_op->permission(inode, MAY_EXEC, flags);
if (ret == -ECHILD)
return ret;
- if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
+ if (ns_capable(ns, CAP_DAC_OVERRIDE) ||
+ ns_capable(ns, CAP_DAC_READ_SEARCH))
goto ok;
return ret;
do {
seq = read_seqcount_begin(&fs->seq);
nd->root = fs->root;
+ nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
} while (read_seqcount_retry(&fs->seq, seq));
}
}
BUG_ON(nd->flags & LOOKUP_RCU);
+ if (link->mnt == nd->path.mnt)
+ mntget(link->mnt);
+
if (unlikely(current->total_link_count >= 40)) {
*p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
- path_put_conditional(link, nd);
path_put(&nd->path);
return -ELOOP;
}
touch_atime(link->mnt, dentry);
nd_set_link(nd, NULL);
- if (link->mnt == nd->path.mnt)
- mntget(link->mnt);
-
error = security_inode_follow_link(link->dentry, nd);
if (error) {
*p = ERR_PTR(error); /* no ->put_link(), please */
if (managed & DCACHE_MANAGE_TRANSIT) {
BUG_ON(!path->dentry->d_op);
BUG_ON(!path->dentry->d_op->d_manage);
- ret = path->dentry->d_op->d_manage(path->dentry,
- false, false);
+ ret = path->dentry->d_op->d_manage(path->dentry, false);
if (ret < 0)
return ret == -EISDIR ? 0 : ret;
}
return 0;
}
+static inline bool managed_dentry_might_block(struct dentry *dentry)
+{
+ return (dentry->d_flags & DCACHE_MANAGE_TRANSIT &&
+ dentry->d_op->d_manage(dentry, true) < 0);
+}
+
/*
* Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we
* meet a managed dentry and we're not walking to "..". True is returned to
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
struct inode **inode, bool reverse_transit)
{
- while (d_mountpoint(path->dentry)) {
+ for (;;) {
struct vfsmount *mounted;
- if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
- !reverse_transit &&
- path->dentry->d_op->d_manage(path->dentry, false, true) < 0)
+ /*
+ * Don't forget we might have a non-mountpoint managed dentry
+ * that wants to block transit.
+ */
+ *inode = path->dentry->d_inode;
+ if (!reverse_transit &&
+ unlikely(managed_dentry_might_block(path->dentry)))
return false;
+
+ if (!d_mountpoint(path->dentry))
+ break;
+
mounted = __lookup_mnt(path->mnt, path->dentry, 1);
if (!mounted)
break;
path->mnt = mounted;
path->dentry = mounted->mnt_root;
nd->seq = read_seqcount_begin(&path->dentry->d_seq);
- *inode = path->dentry->d_inode;
}
if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
* Care must be taken as namespace_sem may be held (indicated by mounting_here
* being true).
*/
-int follow_down(struct path *path, bool mounting_here)
+int follow_down(struct path *path)
{
unsigned managed;
int ret;
BUG_ON(!path->dentry->d_op);
BUG_ON(!path->dentry->d_op->d_manage);
ret = path->dentry->d_op->d_manage(
- path->dentry, mounting_here, false);
+ path->dentry, false);
if (ret < 0)
return ret == -EISDIR ? 0 : ret;
}
if (likely(__follow_mount_rcu(nd, path, inode, false)))
return 0;
unlazy:
- if (dentry) {
- if (nameidata_dentry_drop_rcu(nd, dentry))
- return -ECHILD;
- } else {
- if (nameidata_drop_rcu(nd))
- return -ECHILD;
- }
+ if (unlazy_walk(nd, dentry))
+ return -ECHILD;
} else {
dentry = __d_lookup(parent, name);
}
int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
if (err != -ECHILD)
return err;
- if (nameidata_drop_rcu(nd))
+ if (unlazy_walk(nd, NULL))
return -ECHILD;
}
return exec_permission(nd->inode, 0);
return -ENOENT;
}
if (unlikely(inode->i_op->follow_link) && follow) {
- if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
- return -ECHILD;
+ if (nd->flags & LOOKUP_RCU) {
+ if (unlikely(unlazy_walk(nd, path->dentry))) {
+ terminate_walk(nd);
+ return -ECHILD;
+ }
+ }
BUG_ON(inode != path->dentry->d_inode);
return 1;
}
}
}
- if (nd->flags & LOOKUP_RCU) {
- /* went all way through without dropping RCU */
- BUG_ON(err);
- if (nameidata_drop_rcu_last(nd))
- err = -ECHILD;
- }
-
if (!err)
- err = handle_reval_path(nd);
+ err = complete_walk(nd);
if (!err && nd->flags & LOOKUP_DIRECTORY) {
if (!nd->inode->i_op->lookup) {
path_put(&nd->path);
- return -ENOTDIR;
+ err = -ENOTDIR;
}
}
if (!(dir->i_mode & S_ISVTX))
return 0;
+ if (current_user_ns() != inode_userns(inode))
+ goto other_userns;
if (inode->i_uid == fsuid)
return 0;
if (dir->i_uid == fsuid)
return 0;
- return !capable(CAP_FOWNER);
+
+other_userns:
+ return !ns_capable(inode_userns(inode), CAP_FOWNER);
}
/*
}
/* O_NOATIME can only be set by the owner or superuser */
- if (flag & O_NOATIME && !is_owner_or_cap(inode))
+ if (flag & O_NOATIME && !inode_owner_or_capable(inode))
return -EPERM;
/*
return ERR_PTR(error);
/* fallthrough */
case LAST_ROOT:
- if (nd->flags & LOOKUP_RCU) {
- if (nameidata_drop_rcu_last(nd))
- return ERR_PTR(-ECHILD);
- }
- error = handle_reval_path(nd);
+ error = complete_walk(nd);
if (error)
- goto exit;
+ return ERR_PTR(error);
audit_inode(pathname, nd->path.dentry);
if (open_flag & O_CREAT) {
error = -EISDIR;
}
goto ok;
case LAST_BIND:
- /* can't be RCU mode here */
- error = handle_reval_path(nd);
+ error = complete_walk(nd);
if (error)
- goto exit;
+ return ERR_PTR(error);
audit_inode(pathname, dir);
goto ok;
}
if (error) /* symlink */
return NULL;
/* sayonara */
- if (nd->flags & LOOKUP_RCU) {
- if (nameidata_drop_rcu_last(nd))
- return ERR_PTR(-ECHILD);
- }
+ error = complete_walk(nd);
+ if (error)
+ return ERR_PTR(-ECHILD);
error = -ENOTDIR;
if (nd->flags & LOOKUP_DIRECTORY) {
}
/* create side of things */
-
- if (nd->flags & LOOKUP_RCU) {
- if (nameidata_drop_rcu_last(nd))
- return ERR_PTR(-ECHILD);
- }
+ error = complete_walk(nd);
+ if (error)
+ return ERR_PTR(error);
audit_inode(pathname, dir);
error = -EISDIR;
if (error)
return error;
- if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
+ if ((S_ISCHR(mode) || S_ISBLK(mode)) &&
+ !ns_capable(inode_userns(dir), CAP_MKNOD))
return -EPERM;
if (!dir->i_op->mknod)
*/
void dentry_unhash(struct dentry *dentry)
{
- dget(dentry);
shrink_dcache_parent(dentry);
spin_lock(&dentry->d_lock);
- if (dentry->d_count == 2)
+ if (dentry->d_count == 1)
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
}
return -EPERM;
mutex_lock(&dentry->d_inode->i_mutex);
- dentry_unhash(dentry);
if (d_mountpoint(dentry))
error = -EBUSY;
else {
if (!error) {
d_delete(dentry);
}
- dput(dentry);
return error;
}
mutex_unlock(&target->i_mutex);
if (d_unhashed(new_dentry))
d_rehash(new_dentry);
- dput(new_dentry);
}
if (!error)
if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))