Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 29 Jul 2016 22:54:19 +0000 (15:54 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 29 Jul 2016 22:54:19 +0000 (15:54 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jul 2016 22:54:19 +0000 (15:54 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jul 2016 22:54:19 +0000 (15:54 -0700)
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c

index d4cc73bb6e1eaea0baca4f6574fb162ecd225790..542801f04b0d8d61637394897c2c855d3c636411 100644 (file)
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -415,7 +415,7 @@ static int mdc_unpack_acl(struct ptlrpc_request *req, struct lustre_md *md)
                 return rc;
         }
  
-       rc = posix_acl_valid(acl);
+       rc = posix_acl_valid(&init_user_ns, acl);
         if (rc) {
                 CERROR("validate acl: %d\n", rc);
                 posix_acl_release(acl);
diff --git a/fs/9p/acl.c b/fs/9p/acl.c

index 0576eaeb60b952341f8b739db4e458128aab757c..5b6a1743ea17bdf33f3c76a3929dde92d8051eb6 100644 (file)
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -266,7 +266,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
                 if (IS_ERR(acl))
                         return PTR_ERR(acl);
                 else if (acl) {
-                       retval = posix_acl_valid(acl);
+                       retval = posix_acl_valid(inode->i_sb->s_user_ns, acl);
                         if (retval)
                                 goto err_out;
                 }
diff --git a/fs/attr.c b/fs/attr.c

index 25b24d0f6c8810c86ef79319e091fc65231733d0..42bb42bb3c72c206923294a3f08f0020df6c5a43 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -255,6 +255,25 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
         if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID)))
                 return 0;
  
+       /*
+        * Verify that uid/gid changes are valid in the target
+        * namespace of the superblock.
+        */
+       if (ia_valid & ATTR_UID &&
+           !kuid_has_mapping(inode->i_sb->s_user_ns, attr->ia_uid))
+               return -EOVERFLOW;
+       if (ia_valid & ATTR_GID &&
+           !kgid_has_mapping(inode->i_sb->s_user_ns, attr->ia_gid))
+               return -EOVERFLOW;
+
+       /* Don't allow modifications of files with invalid uids or
+        * gids unless those uids & gids are being made valid.
+        */
+       if (!(ia_valid & ATTR_UID) && !uid_valid(inode->i_uid))
+               return -EOVERFLOW;
+       if (!(ia_valid & ATTR_GID) && !gid_valid(inode->i_gid))
+               return -EOVERFLOW;
+
         error = security_inode_setattr(dentry, attr);
         if (error)
                 return error;
diff --git a/fs/block_dev.c b/fs/block_dev.c

index 5cbd5391667eed740f98df7b627903ec3a056d22..ada42cf42d0667a5d41be90daa203766854e3705 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1846,7 +1846,7 @@ struct block_device *lookup_bdev(const char *pathname)
         if (!S_ISBLK(inode->i_mode))
                 goto fail;
         error = -EACCES;
-       if (path.mnt->mnt_flags & MNT_NODEV)
+       if (!may_open_dev(&path))
                 goto fail;
         error = -ENOMEM;
         bdev = bd_acquire(inode);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c

index 37c134a132c7dbaa561cbfe287dec1f6499fa624..d116453b0276634fabb32ee96df9cf96ce9cb25a 100644 (file)
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -396,6 +396,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
  {
         struct inode *inode;
  
+       s->s_iflags &= ~SB_I_NODEV;
         s->s_blocksize = 1024;
         s->s_blocksize_bits = 10;
         s->s_magic = DEVPTS_SUPER_MAGIC;
@@ -480,7 +481,7 @@ static struct file_system_type devpts_fs_type = {
         .name           = "devpts",
         .mount          = devpts_mount,
         .kill_sb        = devpts_kill_sb,
-       .fs_flags       = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT,
+       .fs_flags       = FS_USERNS_MOUNT,
  };
  
  /*
diff --git a/fs/exec.c b/fs/exec.c

index 887c1c955df8264efc43bd0964f971ac8c107f34..ca239fc86d8d0acd5446d754ff341e2d4d4613cb 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1411,7 +1411,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm)
         bprm->cred->euid = current_euid();
         bprm->cred->egid = current_egid();
  
-       if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
+       if (!mnt_may_suid(bprm->file->f_path.mnt))
                 return;
  
         if (task_no_new_privs(current))
diff --git a/fs/inode.c b/fs/inode.c

index e171f7b5f9e490c6fbcc07cdb5d4d12a80446c90..9cef4e16aedab53b5ff1272a5936de251ccddc58 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1619,6 +1619,13 @@ bool atime_needs_update(const struct path *path, struct inode *inode)
  
         if (inode->i_flags & S_NOATIME)
                 return false;
+
+       /* Atime updates will likely cause i_uid and i_gid to be written
+        * back improprely if their true value is unknown to the vfs.
+        */
+       if (HAS_UNMAPPED_ID(inode))
+               return false;
+
         if (IS_NOATIME(inode))
                 return false;
         if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c

index 63534f5f9073c364bacd619a2a07e75617e4ece2..b3d73ad52b22ae9d22f2cea762fa0d5d34809e3e 100644 (file)
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -152,6 +152,8 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
         struct dentry *root;
  
         info->sb = sb;
+       /* Userspace would break if executables or devices appear on sysfs */
+       sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
         sb->s_blocksize = PAGE_SIZE;
         sb->s_blocksize_bits = PAGE_SHIFT;
         sb->s_magic = magic;
@@ -241,7 +243,8 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
         info->root = root;
         info->ns = ns;
  
-       sb = sget(fs_type, kernfs_test_super, kernfs_set_super, flags, info);
+       sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags,
+                        &init_user_ns, info);
         if (IS_ERR(sb) || sb->s_fs_info != info)
                 kfree(info);
         if (IS_ERR(sb))
diff --git a/fs/namei.c b/fs/namei.c

index 68a896c804b77d01e6c025bf52c4beebe7c54afd..c386a329ab203d44bc9a1ee5c49d5681a32fd81d 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -36,6 +36,7 @@
  #include <linux/posix_acl.h>
  #include <linux/hash.h>
  #include <linux/bitops.h>
+#include <linux/init_task.h>
  #include <asm/uaccess.h>
  
  #include "internal.h"
@@ -410,6 +411,14 @@ int __inode_permission(struct inode *inode, int mask)
                  */
                 if (IS_IMMUTABLE(inode))
                         return -EACCES;
+
+               /*
+                * Updating mtime will likely cause i_uid and i_gid to be
+                * written back improperly if their true value is unknown
+                * to the vfs.
+                */
+               if (HAS_UNMAPPED_ID(inode))
+                       return -EACCES;
         }
  
         retval = do_inode_permission(inode, mask);
@@ -901,6 +910,7 @@ static inline int may_follow_link(struct nameidata *nd)
  {
         const struct inode *inode;
         const struct inode *parent;
+       kuid_t puid;
  
         if (!sysctl_protected_symlinks)
                 return 0;
@@ -916,7 +926,8 @@ static inline int may_follow_link(struct nameidata *nd)
                 return 0;
  
         /* Allowed if parent directory and link owner match. */
-       if (uid_eq(parent->i_uid, inode->i_uid))
+       puid = parent->i_uid;
+       if (uid_valid(puid) && uid_eq(puid, inode->i_uid))
                 return 0;
  
         if (nd->flags & LOOKUP_RCU)
@@ -1089,6 +1100,7 @@ static int follow_automount(struct path *path, struct nameidata *nd,
                             bool *need_mntput)
  {
         struct vfsmount *mnt;
+       const struct cred *old_cred;
         int err;
  
         if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
@@ -1110,11 +1122,16 @@ static int follow_automount(struct path *path, struct nameidata *nd,
             path->dentry->d_inode)
                 return -EISDIR;
  
+       if (path->dentry->d_sb->s_user_ns != &init_user_ns)
+               return -EACCES;
+
         nd->total_link_count++;
         if (nd->total_link_count >= 40)
                 return -ELOOP;
  
+       old_cred = override_creds(&init_cred);
         mnt = path->dentry->d_op->d_automount(path);
+       revert_creds(old_cred);
         if (IS_ERR(mnt)) {
                 /*
                  * The filesystem is allowed to return -EISDIR here to indicate
@@ -2741,10 +2758,11 @@ EXPORT_SYMBOL(__check_sticky);
   *     c. have CAP_FOWNER capability
   *  6. If the victim is append-only or immutable we can't do antyhing with
   *     links pointing to it.
- *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
- *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
- *  9. We can't remove a root or mountpoint.
- * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
+ *  7. If the victim has an unknown uid or gid we can't change the inode.
+ *  8. If we were asked to remove a directory and victim isn't one - ENOTDIR.
+ *  9. If we were asked to remove a non-directory and victim isn't one - EISDIR.
+ * 10. We can't remove a root or mountpoint.
+ * 11. We don't allow removal of NFS sillyrenamed files; it's handled by
   *     nfs_async_unlink().
   */
  static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
@@ -2766,7 +2784,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
                 return -EPERM;
  
         if (check_sticky(dir, inode) || IS_APPEND(inode) ||
-           IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
+           IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) || HAS_UNMAPPED_ID(inode))
                 return -EPERM;
         if (isdir) {
                 if (!d_is_dir(victim))
@@ -2787,16 +2805,22 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
   *  1. We can't do it if child already exists (open has special treatment for
   *     this case, but since we are inlined it's OK)
   *  2. We can't do it if dir is read-only (done in permission())
- *  3. We should have write and exec permissions on dir
- *  4. We can't do it if dir is immutable (done in permission())
+ *  3. We can't do it if the fs can't represent the fsuid or fsgid.
+ *  4. We should have write and exec permissions on dir
+ *  5. We can't do it if dir is immutable (done in permission())
   */
  static inline int may_create(struct inode *dir, struct dentry *child)
  {
+       struct user_namespace *s_user_ns;
         audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
         if (child->d_inode)
                 return -EEXIST;
         if (IS_DEADDIR(dir))
                 return -ENOENT;
+       s_user_ns = dir->i_sb->s_user_ns;
+       if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
+           !kgid_has_mapping(s_user_ns, current_fsgid()))
+               return -EOVERFLOW;
         return inode_permission(dir, MAY_WRITE | MAY_EXEC);
  }
  
@@ -2865,6 +2889,12 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
  }
  EXPORT_SYMBOL(vfs_create);
  
+bool may_open_dev(const struct path *path)
+{
+       return !(path->mnt->mnt_flags & MNT_NODEV) &&
+               !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
+}
+
  static int may_open(struct path *path, int acc_mode, int flag)
  {
         struct dentry *dentry = path->dentry;
@@ -2883,7 +2913,7 @@ static int may_open(struct path *path, int acc_mode, int flag)
                 break;
         case S_IFBLK:
         case S_IFCHR:
-               if (path->mnt->mnt_flags & MNT_NODEV)
+               if (!may_open_dev(path))
                         return -EACCES;
                 /*FALLTHRU*/
         case S_IFIFO:
@@ -4135,6 +4165,13 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
          */
         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
                 return -EPERM;
+       /*
+        * Updating the link count will likely cause i_uid and i_gid to
+        * be writen back improperly if their true value is unknown to
+        * the vfs.
+        */
+       if (HAS_UNMAPPED_ID(inode))
+               return -EPERM;
         if (!dir->i_op->link)
                 return -EPERM;
         if (S_ISDIR(inode->i_mode))
diff --git a/fs/namespace.c b/fs/namespace.c

index 419f746d851d1c1745fc0edca4a37459a4b17f35..7bb2cda3bfef50b27f9bb8b3b478cc7aeb3d1049 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2186,13 +2186,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
         }
         if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
             !(mnt_flags & MNT_NODEV)) {
-               /* Was the nodev implicitly added in mount? */
-               if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
-                   !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
-                       mnt_flags |= MNT_NODEV;
-               } else {
-                       return -EPERM;
-               }
+               return -EPERM;
         }
         if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
             !(mnt_flags & MNT_NOSUID)) {
@@ -2376,7 +2370,7 @@ unlock:
         return err;
  }
  
-static bool fs_fully_visible(struct file_system_type *fs_type, int *new_mnt_flags);
+static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags);
  
  /*
   * create a new mount for userspace and request it to be added into the
@@ -2386,7 +2380,6 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
                         int mnt_flags, const char *name, void *data)
  {
         struct file_system_type *type;
-       struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
         struct vfsmount *mnt;
         int err;
  
@@ -2397,26 +2390,6 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
         if (!type)
                 return -ENODEV;
  
-       if (user_ns != &init_user_ns) {
-               if (!(type->fs_flags & FS_USERNS_MOUNT)) {
-                       put_filesystem(type);
-                       return -EPERM;
-               }
-               /* Only in special cases allow devices from mounts
-                * created outside the initial user namespace.
-                */
-               if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
-                       flags |= MS_NODEV;
-                       mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
-               }
-               if (type->fs_flags & FS_USERNS_VISIBLE) {
-                       if (!fs_fully_visible(type, &mnt_flags)) {
-                               put_filesystem(type);
-                               return -EPERM;
-                       }
-               }
-       }
-
         mnt = vfs_kern_mount(type, flags, name, data);
         if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
             !mnt->mnt_sb->s_subtype)
@@ -2426,6 +2399,11 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
         if (IS_ERR(mnt))
                 return PTR_ERR(mnt);
  
+       if (mount_too_revealing(mnt, &mnt_flags)) {
+               mntput(mnt);
+               return -EPERM;
+       }
+
         err = do_add_mount(real_mount(mnt), path, mnt_flags);
         if (err)
                 mntput(mnt);
@@ -3217,22 +3195,19 @@ bool current_chrooted(void)
         return chrooted;
  }
  
-static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
+static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
+                               int *new_mnt_flags)
  {
-       struct mnt_namespace *ns = current->nsproxy->mnt_ns;
         int new_flags = *new_mnt_flags;
         struct mount *mnt;
         bool visible = false;
  
-       if (unlikely(!ns))
-               return false;
-
         down_read(&namespace_sem);
         list_for_each_entry(mnt, &ns->list, mnt_list) {
                 struct mount *child;
                 int mnt_flags;
  
-               if (mnt->mnt.mnt_sb->s_type != type)
+               if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type)
                         continue;
  
                 /* This mount is not fully visible if it's root directory
@@ -3241,12 +3216,8 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
                 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
                         continue;
  
-               /* Read the mount flags and filter out flags that
-                * may safely be ignored.
-                */
+               /* A local view of the mount flags */
                 mnt_flags = mnt->mnt.mnt_flags;
-               if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC)
-                       mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC);
  
                 /* Don't miss readonly hidden in the superblock flags */
                 if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY)
@@ -3258,15 +3229,6 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
                 if ((mnt_flags & MNT_LOCK_READONLY) &&
                     !(new_flags & MNT_READONLY))
                         continue;
-               if ((mnt_flags & MNT_LOCK_NODEV) &&
-                   !(new_flags & MNT_NODEV))
-                       continue;
-               if ((mnt_flags & MNT_LOCK_NOSUID) &&
-                   !(new_flags & MNT_NOSUID))
-                       continue;
-               if ((mnt_flags & MNT_LOCK_NOEXEC) &&
-                   !(new_flags & MNT_NOEXEC))
-                       continue;
                 if ((mnt_flags & MNT_LOCK_ATIME) &&
                     ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
                         continue;
@@ -3286,9 +3248,6 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
                 }
                 /* Preserve the locked attributes */
                 *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
-                                              MNT_LOCK_NODEV    | \
-                                              MNT_LOCK_NOSUID   | \
-                                              MNT_LOCK_NOEXEC   | \
                                                MNT_LOCK_ATIME);
                 visible = true;
                 goto found;
@@ -3299,6 +3258,42 @@ found:
         return visible;
  }
  
+static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
+{
+       const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
+       struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+       unsigned long s_iflags;
+
+       if (ns->user_ns == &init_user_ns)
+               return false;
+
+       /* Can this filesystem be too revealing? */
+       s_iflags = mnt->mnt_sb->s_iflags;
+       if (!(s_iflags & SB_I_USERNS_VISIBLE))
+               return false;
+
+       if ((s_iflags & required_iflags) != required_iflags) {
+               WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
+                         required_iflags);
+               return true;
+       }
+
+       return !mnt_already_visible(ns, mnt, new_mnt_flags);
+}
+
+bool mnt_may_suid(struct vfsmount *mnt)
+{
+       /*
+        * Foreign mounts (accessed via fchdir or through /proc
+        * symlinks) are always treated as if they are nosuid.  This
+        * prevents namespaces from trusting potentially unsafe
+        * suid/sgid bits, file caps, or security labels that originate
+        * in other namespaces.
+        */
+       return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
+              current_in_userns(mnt->mnt_sb->s_user_ns);
+}
+
  static struct ns_common *mntns_get(struct task_struct *task)
  {
         struct ns_common *ns = NULL;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c

index e7787777620e9d236043499d2c7adb871610a035..65ad0165a94f8b0f327861a98a5a46f393b7f452 100644 (file)
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1151,20 +1151,15 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
  #endif
                 /* last one */ {""}
         };
-       struct net *net = data;
-       int ret;
-
-       ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
-       if (ret)
-               return ret;
-       sb->s_fs_info = get_net(net);
-       return 0;
+       get_net(sb->s_fs_info);
+       return simple_fill_super(sb, 0x6e667364, nfsd_files);
  }
  
  static struct dentry *nfsd_mount(struct file_system_type *fs_type,
         int flags, const char *dev_name, void *data)
  {
-       return mount_ns(fs_type, flags, current->nsproxy->net_ns, nfsd_fill_super);
+       struct net *net = current->nsproxy->net_ns;
+       return mount_ns(fs_type, flags, data, net, net->user_ns, nfsd_fill_super);
  }
  
  static void nfsd_umount(struct super_block *sb)
diff --git a/fs/posix_acl.c b/fs/posix_acl.c

index edc452c2a563a0f86849094a848786a7728ef99e..59d47ab0791af5ce96200c18ecaeee53800cd35a 100644 (file)
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -205,7 +205,7 @@ posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
   * Check if an acl is valid. Returns 0 if it is, or -E... otherwise.
   */
  int
-posix_acl_valid(const struct posix_acl *acl)
+posix_acl_valid(struct user_namespace *user_ns, const struct posix_acl *acl)
  {
         const struct posix_acl_entry *pa, *pe;
         int state = ACL_USER_OBJ;
@@ -225,7 +225,7 @@ posix_acl_valid(const struct posix_acl *acl)
                         case ACL_USER:
                                 if (state != ACL_USER)
                                         return -EINVAL;
-                               if (!uid_valid(pa->e_uid))
+                               if (!kuid_has_mapping(user_ns, pa->e_uid))
                                         return -EINVAL;
                                 needs_mask = 1;
                                 break;
@@ -240,7 +240,7 @@ posix_acl_valid(const struct posix_acl *acl)
                         case ACL_GROUP:
                                 if (state != ACL_GROUP)
                                         return -EINVAL;
-                               if (!gid_valid(pa->e_gid))
+                               if (!kgid_has_mapping(user_ns, pa->e_gid))
                                         return -EINVAL;
                                 needs_mask = 1;
                                 break;
@@ -834,7 +834,7 @@ set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
                 return -EPERM;
  
         if (acl) {
-               int ret = posix_acl_valid(acl);
+               int ret = posix_acl_valid(inode->i_sb->s_user_ns, acl);
                 if (ret)
                         return ret;
         }
diff --git a/fs/proc/inode.c b/fs/proc/inode.c

index 42305ddcbaa00124cfbdaccf6c44b6c33cf7f701..c1b72388e57115747790948a172312738997fb0a 100644 (file)
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -457,17 +457,30 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
         return inode;
  }
  
-int proc_fill_super(struct super_block *s)
+int proc_fill_super(struct super_block *s, void *data, int silent)
  {
+       struct pid_namespace *ns = get_pid_ns(s->s_fs_info);
         struct inode *root_inode;
         int ret;
  
+       if (!proc_parse_options(data, ns))
+               return -EINVAL;
+
+       /* User space would break if executables or devices appear on proc */
+       s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
         s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
         s->s_blocksize = 1024;
         s->s_blocksize_bits = 10;
         s->s_magic = PROC_SUPER_MAGIC;
         s->s_op = &proc_sops;
         s->s_time_gran = 1;
+
+       /*
+        * procfs isn't actually a stacking filesystem; however, there is
+        * too much magic going on inside it to permit stacking things on
+        * top of it
+        */
+       s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
         
         pde_get(&proc_root);
         root_inode = proc_get_inode(s, &proc_root);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h

index aa2781095bd15f4d9e98b5bcf58fe77ba5576f09..7931c558c19250ab87fd911b6c09ff359d4d05e1 100644 (file)
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -212,7 +212,7 @@ extern const struct inode_operations proc_pid_link_inode_operations;
  
  extern void proc_init_inodecache(void);
  extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
-extern int proc_fill_super(struct super_block *);
+extern int proc_fill_super(struct super_block *, void *data, int flags);
  extern void proc_entry_rundown(struct proc_dir_entry *);
  
  /*
@@ -268,6 +268,7 @@ static inline void proc_tty_init(void) {}
   * root.c
   */
  extern struct proc_dir_entry proc_root;
+extern int proc_parse_options(char *options, struct pid_namespace *pid);
  
  extern void proc_self_init(void);
  extern int proc_remount(struct super_block *, int *, char *);
diff --git a/fs/proc/root.c b/fs/proc/root.c

index 06702783bf40254593f82b76dd9745ad557ac334..8d3e484055a6b3529982135289d693decc4dcb3c 100644 (file)
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -23,21 +23,6 @@
  
  #include "internal.h"
  
-static int proc_test_super(struct super_block *sb, void *data)
-{
-       return sb->s_fs_info == data;
-}
-
-static int proc_set_super(struct super_block *sb, void *data)
-{
-       int err = set_anon_super(sb, NULL);
-       if (!err) {
-               struct pid_namespace *ns = (struct pid_namespace *)data;
-               sb->s_fs_info = get_pid_ns(ns);
-       }
-       return err;
-}
-
  enum {
         Opt_gid, Opt_hidepid, Opt_err,
  };
@@ -48,7 +33,7 @@ static const match_table_t tokens = {
         {Opt_err, NULL},
  };
  
-static int proc_parse_options(char *options, struct pid_namespace *pid)
+int proc_parse_options(char *options, struct pid_namespace *pid)
  {
         char *p;
         substring_t args[MAX_OPT_ARGS];
@@ -100,52 +85,16 @@ int proc_remount(struct super_block *sb, int *flags, char *data)
  static struct dentry *proc_mount(struct file_system_type *fs_type,
         int flags, const char *dev_name, void *data)
  {
-       int err;
-       struct super_block *sb;
         struct pid_namespace *ns;
-       char *options;
  
         if (flags & MS_KERNMOUNT) {
-               ns = (struct pid_namespace *)data;
-               options = NULL;
+               ns = data;
+               data = NULL;
         } else {
                 ns = task_active_pid_ns(current);
-               options = data;
-
-               /* Does the mounter have privilege over the pid namespace? */
-               if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
-                       return ERR_PTR(-EPERM);
-       }
-
-       sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
-       if (IS_ERR(sb))
-               return ERR_CAST(sb);
-
-       /*
-        * procfs isn't actually a stacking filesystem; however, there is
-        * too much magic going on inside it to permit stacking things on
-        * top of it
-        */
-       sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
-
-       if (!proc_parse_options(options, ns)) {
-               deactivate_locked_super(sb);
-               return ERR_PTR(-EINVAL);
-       }
-
-       if (!sb->s_root) {
-               err = proc_fill_super(sb);
-               if (err) {
-                       deactivate_locked_super(sb);
-                       return ERR_PTR(err);
-               }
-
-               sb->s_flags |= MS_ACTIVE;
-               /* User space would break if executables appear on proc */
-               sb->s_iflags |= SB_I_NOEXEC;
         }
  
-       return dget(sb->s_root);
+       return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super);
  }
  
  static void proc_kill_sb(struct super_block *sb)
@@ -165,7 +114,7 @@ static struct file_system_type proc_fs_type = {
         .name           = "proc",
         .mount          = proc_mount,
         .kill_sb        = proc_kill_sb,
-       .fs_flags       = FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
+       .fs_flags       = FS_USERNS_MOUNT,
  };
  
  void __init proc_root_init(void)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c

index b1322dd9d1364ce035fa4f762b126dda658ce881..1bfac28b7e7df1febe08012abf2ad51d3189cc54 100644 (file)
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -841,6 +841,9 @@ struct dquot *dqget(struct super_block *sb, struct kqid qid)
         unsigned int hashent = hashfn(sb, qid);
         struct dquot *dquot, *empty = NULL;
  
+       if (!qid_has_mapping(sb->s_user_ns, qid))
+               return ERR_PTR(-EINVAL);
+
          if (!sb_has_quota_active(sb, qid.type))
                 return ERR_PTR(-ESRCH);
  we_slept:
@@ -2268,6 +2271,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
                 error = -EINVAL;
                 goto out_fmt;
         }
+       /* Filesystems outside of init_user_ns not yet supported */
+       if (sb->s_user_ns != &init_user_ns) {
+               error = -EINVAL;
+               goto out_fmt;
+       }
         /* Usage always has to be set... */
         if (!(flags & DQUOT_USAGE_ENABLED)) {
                 error = -EINVAL;
diff --git a/fs/quota/quota.c b/fs/quota/quota.c

index 0f10ee9892ce3a371b5341ebc4f1db2bd5a74a8a..35df08ee9c97da4f489748b674b778bd61f074df 100644 (file)
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -211,7 +211,7 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id,
         if (!sb->s_qcop->get_dqblk)
                 return -ENOSYS;
         qid = make_kqid(current_user_ns(), type, id);
-       if (!qid_valid(qid))
+       if (!qid_has_mapping(sb->s_user_ns, qid))
                 return -EINVAL;
         ret = sb->s_qcop->get_dqblk(sb, qid, &fdq);
         if (ret)
@@ -237,7 +237,7 @@ static int quota_getnextquota(struct super_block *sb, int type, qid_t id,
         if (!sb->s_qcop->get_nextdqblk)
                 return -ENOSYS;
         qid = make_kqid(current_user_ns(), type, id);
-       if (!qid_valid(qid))
+       if (!qid_has_mapping(sb->s_user_ns, qid))
                 return -EINVAL;
         ret = sb->s_qcop->get_nextdqblk(sb, &qid, &fdq);
         if (ret)
@@ -288,7 +288,7 @@ static int quota_setquota(struct super_block *sb, int type, qid_t id,
         if (!sb->s_qcop->set_dqblk)
                 return -ENOSYS;
         qid = make_kqid(current_user_ns(), type, id);
-       if (!qid_valid(qid))
+       if (!qid_has_mapping(sb->s_user_ns, qid))
                 return -EINVAL;
         copy_from_if_dqblk(&fdq, &idq);
         return sb->s_qcop->set_dqblk(sb, qid, &fdq);
@@ -581,10 +581,10 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id,
         if (!sb->s_qcop->set_dqblk)
                 return -ENOSYS;
         qid = make_kqid(current_user_ns(), type, id);
-       if (!qid_valid(qid))
+       if (!qid_has_mapping(sb->s_user_ns, qid))
                 return -EINVAL;
         /* Are we actually setting timer / warning limits for all users? */
-       if (from_kqid(&init_user_ns, qid) == 0 &&
+       if (from_kqid(sb->s_user_ns, qid) == 0 &&
             fdq.d_fieldmask & (FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK)) {
                 struct qc_info qinfo;
                 int ret;
@@ -642,7 +642,7 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
         if (!sb->s_qcop->get_dqblk)
                 return -ENOSYS;
         qid = make_kqid(current_user_ns(), type, id);
-       if (!qid_valid(qid))
+       if (!qid_has_mapping(sb->s_user_ns, qid))
                 return -EINVAL;
         ret = sb->s_qcop->get_dqblk(sb, qid, &qdq);
         if (ret)
@@ -669,7 +669,7 @@ static int quota_getnextxquota(struct super_block *sb, int type, qid_t id,
         if (!sb->s_qcop->get_nextdqblk)
                 return -ENOSYS;
         qid = make_kqid(current_user_ns(), type, id);
-       if (!qid_valid(qid))
+       if (!qid_has_mapping(sb->s_user_ns, qid))
                 return -EINVAL;
         ret = sb->s_qcop->get_nextdqblk(sb, &qid, &qdq);
         if (ret)
diff --git a/fs/super.c b/fs/super.c

index 5806ffd455636c68d00edc2ee9ebda10ac12bc78..c2ff475c1711f38aa0b4b5296c68589e68fa1fc2 100644 (file)
--- a/fs/super.c
+++ b/fs/super.c
@@ -33,6 +33,7 @@
  #include <linux/cleancache.h>
  #include <linux/fsnotify.h>
  #include <linux/lockdep.h>
+#include <linux/user_namespace.h>
  #include "internal.h"
  
  
@@ -165,6 +166,7 @@ static void destroy_super(struct super_block *s)
         list_lru_destroy(&s->s_inode_lru);
         security_sb_free(s);
         WARN_ON(!list_empty(&s->s_mounts));
+       put_user_ns(s->s_user_ns);
         kfree(s->s_subtype);
         kfree(s->s_options);
         call_rcu(&s->rcu, destroy_super_rcu);
@@ -174,11 +176,13 @@ static void destroy_super(struct super_block *s)
   *     alloc_super     -       create new superblock
   *     @type:  filesystem type superblock should belong to
   *     @flags: the mount flags
+ *     @user_ns: User namespace for the super_block
   *
   *     Allocates and initializes a new &struct super_block.  alloc_super()
   *     returns a pointer new superblock or %NULL if allocation had failed.
   */
-static struct super_block *alloc_super(struct file_system_type *type, int flags)
+static struct super_block *alloc_super(struct file_system_type *type, int flags,
+                                      struct user_namespace *user_ns)
  {
         struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
         static const struct super_operations default_op;
@@ -188,6 +192,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
                 return NULL;
  
         INIT_LIST_HEAD(&s->s_mounts);
+       s->s_user_ns = get_user_ns(user_ns);
  
         if (security_sb_alloc(s))
                 goto fail;
@@ -201,6 +206,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
         init_waitqueue_head(&s->s_writers.wait_unfrozen);
         s->s_bdi = &noop_backing_dev_info;
         s->s_flags = flags;
+       if (s->s_user_ns != &init_user_ns)
+               s->s_iflags |= SB_I_NODEV;
         INIT_HLIST_NODE(&s->s_instances);
         INIT_HLIST_BL_HEAD(&s->s_anon);
         mutex_init(&s->s_sync_lock);
@@ -445,29 +452,42 @@ void generic_shutdown_super(struct super_block *sb)
  EXPORT_SYMBOL(generic_shutdown_super);
  
  /**
- *     sget    -       find or create a superblock
+ *     sget_userns -   find or create a superblock
   *     @type:  filesystem type superblock should belong to
   *     @test:  comparison callback
   *     @set:   setup callback
   *     @flags: mount flags
+ *     @user_ns: User namespace for the super_block
   *     @data:  argument to each of them
   */
-struct super_block *sget(struct file_system_type *type,
+struct super_block *sget_userns(struct file_system_type *type,
                         int (*test)(struct super_block *,void *),
                         int (*set)(struct super_block *,void *),
-                       int flags,
+                       int flags, struct user_namespace *user_ns,
                         void *data)
  {
         struct super_block *s = NULL;
         struct super_block *old;
         int err;
  
+       if (!(flags & MS_KERNMOUNT) &&
+           !(type->fs_flags & FS_USERNS_MOUNT) &&
+           !capable(CAP_SYS_ADMIN))
+               return ERR_PTR(-EPERM);
  retry:
         spin_lock(&sb_lock);
         if (test) {
                 hlist_for_each_entry(old, &type->fs_supers, s_instances) {
                         if (!test(old, data))
                                 continue;
+                       if (user_ns != old->s_user_ns) {
+                               spin_unlock(&sb_lock);
+                               if (s) {
+                                       up_write(&s->s_umount);
+                                       destroy_super(s);
+                               }
+                               return ERR_PTR(-EBUSY);
+                       }
                         if (!grab_super(old))
                                 goto retry;
                         if (s) {
@@ -480,7 +500,7 @@ retry:
         }
         if (!s) {
                 spin_unlock(&sb_lock);
-               s = alloc_super(type, flags);
+               s = alloc_super(type, flags, user_ns);
                 if (!s)
                         return ERR_PTR(-ENOMEM);
                 goto retry;
@@ -503,6 +523,31 @@ retry:
         return s;
  }
  
+EXPORT_SYMBOL(sget_userns);
+
+/**
+ *     sget    -       find or create a superblock
+ *     @type:    filesystem type superblock should belong to
+ *     @test:    comparison callback
+ *     @set:     setup callback
+ *     @flags:   mount flags
+ *     @data:    argument to each of them
+ */
+struct super_block *sget(struct file_system_type *type,
+                       int (*test)(struct super_block *,void *),
+                       int (*set)(struct super_block *,void *),
+                       int flags,
+                       void *data)
+{
+       struct user_namespace *user_ns = current_user_ns();
+
+       /* Ensure the requestor has permissions over the target filesystem */
+       if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
+               return ERR_PTR(-EPERM);
+
+       return sget_userns(type, test, set, flags, user_ns, data);
+}
+
  EXPORT_SYMBOL(sget);
  
  void drop_super(struct super_block *sb)
@@ -920,12 +965,20 @@ static int ns_set_super(struct super_block *sb, void *data)
         return set_anon_super(sb, NULL);
  }
  
-struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
-       void *data, int (*fill_super)(struct super_block *, void *, int))
+struct dentry *mount_ns(struct file_system_type *fs_type,
+       int flags, void *data, void *ns, struct user_namespace *user_ns,
+       int (*fill_super)(struct super_block *, void *, int))
  {
         struct super_block *sb;
  
-       sb = sget(fs_type, ns_test_super, ns_set_super, flags, data);
+       /* Don't allow mounting unless the caller has CAP_SYS_ADMIN
+        * over the namespace.
+        */
+       if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
+               return ERR_PTR(-EPERM);
+
+       sb = sget_userns(fs_type, ns_test_super, ns_set_super, flags,
+                        user_ns, ns);
         if (IS_ERR(sb))
                 return ERR_CAST(sb);
  
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c

index f3db82071cfbd5997bdb1393097e755ae730ea96..20b8f82e115b647b9d6f29c0877a1a2e3d6fc44c 100644 (file)
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -41,8 +41,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
         if (IS_ERR(root) || !new_sb)
                 kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
         else if (new_sb)
-               /* Userspace would break if executables appear on sysfs */
-               root->d_sb->s_iflags |= SB_I_NOEXEC;
+               root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
  
         return root;
  }
@@ -59,7 +58,7 @@ static struct file_system_type sysfs_fs_type = {
         .name           = "sysfs",
         .mount          = sysfs_mount,
         .kill_sb        = sysfs_kill_sb,
-       .fs_flags       = FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
+       .fs_flags       = FS_USERNS_MOUNT,
  };
  
  int __init sysfs_init(void)
diff --git a/fs/xattr.c b/fs/xattr.c

index 4beafc43daa58bff015f0839c78b5a65f8b8d2ab..c243905835abd25b52eb8daf061b5f11b3a5708e 100644 (file)
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -38,6 +38,13 @@ xattr_permission(struct inode *inode, const char *name, int mask)
         if (mask & MAY_WRITE) {
                 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                         return -EPERM;
+               /*
+                * Updating an xattr will likely cause i_uid and i_gid
+                * to be writen back improperly if their true value is
+                * unknown to the vfs.
+                */
+               if (HAS_UNMAPPED_ID(inode))
+                       return -EPERM;
         }
  
         /*
diff --git a/include/linux/fs.h b/include/linux/fs.h

index f65a6801f60967346f7a47b4c49087966dcb2593..577365a77b4729af5d9fae4a46c224acd9b137f1 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -829,31 +829,6 @@ static inline void i_size_write(struct inode *inode, loff_t i_size)
  #endif
  }
  
-/* Helper functions so that in most cases filesystems will
- * not need to deal directly with kuid_t and kgid_t and can
- * instead deal with the raw numeric values that are stored
- * in the filesystem.
- */
-static inline uid_t i_uid_read(const struct inode *inode)
-{
-       return from_kuid(&init_user_ns, inode->i_uid);
-}
-
-static inline gid_t i_gid_read(const struct inode *inode)
-{
-       return from_kgid(&init_user_ns, inode->i_gid);
-}
-
-static inline void i_uid_write(struct inode *inode, uid_t uid)
-{
-       inode->i_uid = make_kuid(&init_user_ns, uid);
-}
-
-static inline void i_gid_write(struct inode *inode, gid_t gid)
-{
-       inode->i_gid = make_kgid(&init_user_ns, gid);
-}
-
  static inline unsigned iminor(const struct inode *inode)
  {
         return MINOR(inode->i_rdev);
@@ -1320,6 +1295,10 @@ struct mm_struct;
  /* sb->s_iflags */
  #define SB_I_CGROUPWB  0x00000001      /* cgroup-aware writeback enabled */
  #define SB_I_NOEXEC    0x00000002      /* Ignore executables on this fs */
+#define SB_I_NODEV     0x00000004      /* Ignore devices on this fs */
+
+/* sb->s_iflags to limit user namespace mounts */
+#define SB_I_USERNS_VISIBLE            0x00000010 /* fstype already mounted */
  
  /* Possible states of 'frozen' field */
  enum {
@@ -1422,6 +1401,13 @@ struct super_block {
         struct workqueue_struct *s_dio_done_wq;
         struct hlist_head s_pins;
  
+       /*
+        * Owning user namespace and default context in which to
+        * interpret filesystem uids, gids, quotas, device nodes,
+        * xattrs and security labels.
+        */
+       struct user_namespace *s_user_ns;
+
         /*
          * Keep the lru lists last in the structure so they always sit on their
          * own individual cachelines.
@@ -1446,6 +1432,31 @@ struct super_block {
         struct list_head        s_inodes_wb;    /* writeback inodes */
  };
  
+/* Helper functions so that in most cases filesystems will
+ * not need to deal directly with kuid_t and kgid_t and can
+ * instead deal with the raw numeric values that are stored
+ * in the filesystem.
+ */
+static inline uid_t i_uid_read(const struct inode *inode)
+{
+       return from_kuid(inode->i_sb->s_user_ns, inode->i_uid);
+}
+
+static inline gid_t i_gid_read(const struct inode *inode)
+{
+       return from_kgid(inode->i_sb->s_user_ns, inode->i_gid);
+}
+
+static inline void i_uid_write(struct inode *inode, uid_t uid)
+{
+       inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid);
+}
+
+static inline void i_gid_write(struct inode *inode, gid_t gid)
+{
+       inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid);
+}
+
  extern struct timespec current_fs_time(struct super_block *sb);
  
  /*
@@ -1588,6 +1599,7 @@ extern int vfs_whiteout(struct inode *, struct dentry *);
   */
  extern void inode_init_owner(struct inode *inode, const struct inode *dir,
                         umode_t mode);
+extern bool may_open_dev(const struct path *path);
  /*
   * VFS FS_IOC_FIEMAP helper definitions.
   */
@@ -1858,6 +1870,11 @@ struct super_operations {
  #define IS_WHITEOUT(inode)     (S_ISCHR(inode->i_mode) && \
                                  (inode)->i_rdev == WHITEOUT_DEV)
  
+static inline bool HAS_UNMAPPED_ID(struct inode *inode)
+{
+       return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
+}
+
  /*
   * Inode state bits.  Protected by inode->i_lock
   *
@@ -2006,8 +2023,6 @@ struct file_system_type {
  #define FS_BINARY_MOUNTDATA    2
  #define FS_HAS_SUBTYPE         4
  #define FS_USERNS_MOUNT                8       /* Can be mounted by userns root */
-#define FS_USERNS_DEV_MOUNT    16 /* A userns mount does not imply MNT_NODEV */
-#define FS_USERNS_VISIBLE      32      /* FS must already be visible */
  #define FS_RENAME_DOES_D_MOVE  32768   /* FS will handle d_move() during rename() internally. */
         struct dentry *(*mount) (struct file_system_type *, int,
                        const char *, void *);
@@ -2028,8 +2043,9 @@ struct file_system_type {
  
  #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
  
-extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
-       void *data, int (*fill_super)(struct super_block *, void *, int));
+extern struct dentry *mount_ns(struct file_system_type *fs_type,
+       int flags, void *data, void *ns, struct user_namespace *user_ns,
+       int (*fill_super)(struct super_block *, void *, int));
  extern struct dentry *mount_bdev(struct file_system_type *fs_type,
         int flags, const char *dev_name, void *data,
         int (*fill_super)(struct super_block *, void *, int));
@@ -2049,6 +2065,11 @@ void deactivate_locked_super(struct super_block *sb);
  int set_anon_super(struct super_block *s, void *data);
  int get_anon_bdev(dev_t *);
  void free_anon_bdev(dev_t);
+struct super_block *sget_userns(struct file_system_type *type,
+                       int (*test)(struct super_block *,void *),
+                       int (*set)(struct super_block *,void *),
+                       int flags, struct user_namespace *user_ns,
+                       void *data);
  struct super_block *sget(struct file_system_type *type,
                         int (*test)(struct super_block *,void *),
                         int (*set)(struct super_block *,void *),
diff --git a/include/linux/mount.h b/include/linux/mount.h

index f822c3c113777113958418a4cb4fdca4151ad21f..54a594d49733b6954f9f51ce5a7a61924cccb1f1 100644 (file)
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -81,6 +81,7 @@ extern void mntput(struct vfsmount *mnt);
  extern struct vfsmount *mntget(struct vfsmount *mnt);
  extern struct vfsmount *mnt_clone_internal(struct path *path);
  extern int __mnt_is_readonly(struct vfsmount *mnt);
+extern bool mnt_may_suid(struct vfsmount *mnt);
  
  struct path;
  extern struct vfsmount *clone_private_mount(struct path *path);
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h

index c818772d9f9d13538309226a89894b03a78c1aa4..d5d3d741f02866008c47754f0e586515b1610ce3 100644 (file)
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -79,7 +79,7 @@ posix_acl_release(struct posix_acl *acl)
  
  extern void posix_acl_init(struct posix_acl *, int);
  extern struct posix_acl *posix_acl_alloc(int, gfp_t);
-extern int posix_acl_valid(const struct posix_acl *);
+extern int posix_acl_valid(struct user_namespace *, const struct posix_acl *);
  extern int posix_acl_permission(struct inode *, const struct posix_acl *, int);
  extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t);
  extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *);
diff --git a/include/linux/quota.h b/include/linux/quota.h

index 8486d27cf360bca4a33295cc0bfcbf49e0a84cd3..55107a8ff8877f270b02fc67a689eac992f2ea74 100644 (file)
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -179,6 +179,16 @@ static inline struct kqid make_kqid_projid(kprojid_t projid)
         return kqid;
  }
  
+/**
+ *     qid_has_mapping - Report if a qid maps into a user namespace.
+ *     @ns:  The user namespace to see if a value maps into.
+ *     @qid: The kernel internal quota identifier to test.
+ */
+static inline bool qid_has_mapping(struct user_namespace *ns, struct kqid qid)
+{
+       return from_kqid(ns, qid) != (qid_t) -1;
+}
+
  
  extern spinlock_t dq_data_lock;
  
diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h

index 03835522dfcb68ab830a9d38768c374a99ec131d..25e9d92163408c38eba7e08d26f508c4335c49de 100644 (file)
--- a/include/linux/uidgid.h
+++ b/include/linux/uidgid.h
@@ -177,12 +177,12 @@ static inline gid_t from_kgid_munged(struct user_namespace *to, kgid_t kgid)
  
  static inline bool kuid_has_mapping(struct user_namespace *ns, kuid_t uid)
  {
-       return true;
+       return uid_valid(uid);
  }
  
  static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
  {
-       return true;
+       return gid_valid(gid);
  }
  
  #endif /* CONFIG_USER_NS */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h

index 8297e5b341d863aa627a3b44a70d025bc8bc7c30..9217169c64cb80478420e55546aacda1e900d355 100644 (file)
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -72,6 +72,7 @@ extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t,
  extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
  extern int proc_setgroups_show(struct seq_file *m, void *v);
  extern bool userns_may_setgroups(const struct user_namespace *ns);
+extern bool current_in_userns(const struct user_namespace *target_ns);
  #else
  
  static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -100,6 +101,11 @@ static inline bool userns_may_setgroups(const struct user_namespace *ns)
  {
         return true;
  }
+
+static inline bool current_in_userns(const struct user_namespace *target_ns)
+{
+       return true;
+}
  #endif
  
  #endif /* _LINUX_USER_H */
diff --git a/ipc/mqueue.c b/ipc/mqueue.c

index ade739f67f1df67fc17cf631da2994eef0e3d95c..0b13ace266f2d06ee423d8d0d42f9e1946de698a 100644 (file)
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -305,8 +305,9 @@ err:
  static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
  {
         struct inode *inode;
-       struct ipc_namespace *ns = data;
+       struct ipc_namespace *ns = sb->s_fs_info;
  
+       sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
         sb->s_blocksize = PAGE_SIZE;
         sb->s_blocksize_bits = PAGE_SHIFT;
         sb->s_magic = MQUEUE_MAGIC;
@@ -326,17 +327,14 @@ static struct dentry *mqueue_mount(struct file_system_type *fs_type,
                          int flags, const char *dev_name,
                          void *data)
  {
-       if (!(flags & MS_KERNMOUNT)) {
-               struct ipc_namespace *ns = current->nsproxy->ipc_ns;
-               /* Don't allow mounting unless the caller has CAP_SYS_ADMIN
-                * over the ipc namespace.
-                */
-               if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
-                       return ERR_PTR(-EPERM);
-
-               data = ns;
+       struct ipc_namespace *ns;
+       if (flags & MS_KERNMOUNT) {
+               ns = data;
+               data = NULL;
+       } else {
+               ns = current->nsproxy->ipc_ns;
         }
-       return mount_ns(fs_type, flags, data, mqueue_fill_super);
+       return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super);
  }
  
  static void init_once(void *foo)
diff --git a/ipc/namespace.c b/ipc/namespace.c

index 068caf18d56509711e46b47b25a363254d681abe..04cb07eb81f1c148c43915e1eecd57b3cfe9699e 100644 (file)
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -34,8 +34,11 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
         ns->ns.ops = &ipcns_operations;
  
         atomic_set(&ns->count, 1);
+       ns->user_ns = get_user_ns(user_ns);
+
         err = mq_init_ns(ns);
         if (err) {
+               put_user_ns(ns->user_ns);
                 ns_free_inum(&ns->ns);
                 kfree(ns);
                 return ERR_PTR(err);
@@ -46,8 +49,6 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
         msg_init_ns(ns);
         shm_init_ns(ns);
  
-       ns->user_ns = get_user_ns(user_ns);
-
         return ns;
  }
  
diff --git a/kernel/cred.c b/kernel/cred.c

index 0c0cd8a62285279a521b3f724189fe997da84ddb..5f264fb5737dcd01329fdeff02a4143584ba9c38 100644 (file)
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -689,6 +689,8 @@ EXPORT_SYMBOL(set_security_override_from_ctx);
   */
  int set_create_files_as(struct cred *new, struct inode *inode)
  {
+       if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
+               return -EINVAL;
         new->fsuid = inode->i_uid;
         new->fsgid = inode->i_gid;
         return security_kernel_create_files_as(new, inode);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c

index 9bafc211930c79fac444a77f6f075a04ccc7f980..68f5942127590e23b86c375369b12a2973f1c6e7 100644 (file)
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -938,6 +938,20 @@ bool userns_may_setgroups(const struct user_namespace *ns)
         return allowed;
  }
  
+/*
+ * Returns true if @ns is the same namespace as or a descendant of
+ * @target_ns.
+ */
+bool current_in_userns(const struct user_namespace *target_ns)
+{
+       struct user_namespace *ns;
+       for (ns = current_user_ns(); ns; ns = ns->parent) {
+               if (ns == target_ns)
+                       return true;
+       }
+       return false;
+}
+
  static inline struct user_namespace *to_user_ns(struct ns_common *ns)
  {
         return container_of(ns, struct user_namespace, ns);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c

index fc48eca21fd2edb5a4b7ef9770cd03e106956b10..84f98cbe31c3cf4ce4b19a2ca9e47f6d69783116 100644 (file)
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1386,7 +1386,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
  {
         struct inode *inode;
         struct dentry *root, *gssd_dentry;
-       struct net *net = data;
+       struct net *net = get_net(sb->s_fs_info);
         struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
         int err;
  
@@ -1419,7 +1419,6 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
                                            sb);
         if (err)
                 goto err_depopulate;
-       sb->s_fs_info = get_net(net);
         mutex_unlock(&sn->pipefs_sb_lock);
         return 0;
  
@@ -1448,7 +1447,8 @@ static struct dentry *
  rpc_mount(struct file_system_type *fs_type,
                 int flags, const char *dev_name, void *data)
  {
-       return mount_ns(fs_type, flags, current->nsproxy->net_ns, rpc_fill_super);
+       struct net *net = current->nsproxy->net_ns;
+       return mount_ns(fs_type, flags, data, net, net->user_ns, rpc_fill_super);
  }
  
  static void rpc_kill_sb(struct super_block *sb)
@@ -1468,9 +1468,9 @@ static void rpc_kill_sb(struct super_block *sb)
                                            RPC_PIPEFS_UMOUNT,
                                            sb);
         mutex_unlock(&sn->pipefs_sb_lock);
-       put_net(net);
  out:
         kill_litter_super(sb);
+       put_net(net);
  }
  
  static struct file_system_type rpc_pipe_fs_type = {
diff --git a/security/commoncap.c b/security/commoncap.c

index e7fadde737f41cb48aa1e25f6a486375e4d8f597..14540bd7856182260486d549ccab5fd179363060 100644 (file)
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -453,7 +453,15 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c
         if (!file_caps_enabled)
                 return 0;
  
-       if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
+       if (!mnt_may_suid(bprm->file->f_path.mnt))
+               return 0;
+
+       /*
+        * This check is redundant with mnt_may_suid() but is kept to make
+        * explicit that capability bits are limited to s_user_ns and its
+        * descendants.
+        */
+       if (!current_in_userns(bprm->file->f_path.mnt->mnt_sb->s_user_ns))
                 return 0;
  
         rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c

index 30b6b7d0429fc98f92bbf9e8169f60116c66019a..11c1d30bd705a93450dc2b275581fc63cd3b8c77 100644 (file)
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -151,8 +151,8 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode,
         memset(&hmac_misc, 0, sizeof(hmac_misc));
         hmac_misc.ino = inode->i_ino;
         hmac_misc.generation = inode->i_generation;
-       hmac_misc.uid = from_kuid(&init_user_ns, inode->i_uid);
-       hmac_misc.gid = from_kgid(&init_user_ns, inode->i_gid);
+       hmac_misc.uid = from_kuid(inode->i_sb->s_user_ns, inode->i_uid);
+       hmac_misc.gid = from_kgid(inode->i_sb->s_user_ns, inode->i_gid);
         hmac_misc.mode = inode->i_mode;
         crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc));
         if (evm_hmac_attrs & EVM_ATTR_FSUUID)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c

index a86d537eb79b149a7dfe1536a243e180f4b9ec92..19be9d39c7424e2c9d9c8615b00936aa6a014963 100644 (file)
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -830,6 +830,28 @@ static int selinux_set_mnt_opts(struct super_block *sb,
                         goto out;
                 }
         }
+
+       /*
+        * If this is a user namespace mount, no contexts are allowed
+        * on the command line and security labels must be ignored.
+        */
+       if (sb->s_user_ns != &init_user_ns) {
+               if (context_sid || fscontext_sid || rootcontext_sid ||
+                   defcontext_sid) {
+                       rc = -EACCES;
+                       goto out;
+               }
+               if (sbsec->behavior == SECURITY_FS_USE_XATTR) {
+                       sbsec->behavior = SECURITY_FS_USE_MNTPOINT;
+                       rc = security_transition_sid(current_sid(), current_sid(),
+                                                    SECCLASS_FILE, NULL,
+                                                    &sbsec->mntpoint_sid);
+                       if (rc)
+                               goto out;
+               }
+               goto out_set_opts;
+       }
+
         /* sets the context of the superblock for the fs being mounted. */
         if (fscontext_sid) {
                 rc = may_context_mount_sb_relabel(fscontext_sid, sbsec, cred);
@@ -898,6 +920,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
                 sbsec->def_sid = defcontext_sid;
         }
  
+out_set_opts:
         rc = sb_finish_set_opts(sb);
  out:
         mutex_unlock(&sbsec->lock);
@@ -2259,7 +2282,7 @@ static int check_nnp_nosuid(const struct linux_binprm *bprm,
                             const struct task_security_struct *new_tsec)
  {
         int nnp = (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS);
-       int nosuid = (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID);
+       int nosuid = !mnt_may_suid(bprm->file->f_path.mnt);
         int rc;
  
         if (!nnp && !nosuid)
diff --git a/security/smack/smack.h b/security/smack/smack.h

index 6c91156ae2256798e59fb54518efa6d056094d7d..26e58f1804b10b686c1c6182a86dba822fcd3283 100644 (file)
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -90,9 +90,15 @@ struct superblock_smack {
         struct smack_known      *smk_floor;
         struct smack_known      *smk_hat;
         struct smack_known      *smk_default;
-       int                     smk_initialized;
+       int                     smk_flags;
  };
  
+/*
+ * Superblock flags
+ */
+#define SMK_SB_INITIALIZED     0x01
+#define SMK_SB_UNTRUSTED       0x02
+
  struct socket_smack {
         struct smack_known      *smk_out;       /* outbound label */
         struct smack_known      *smk_in;        /* inbound label */
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c

index 6777295f4b2b75bd4edc385a60a09cdc4fdde0c0..b75634dbf53ba24f4e39be091f9474f697ccb0d9 100644 (file)
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -549,7 +549,7 @@ static int smack_sb_alloc_security(struct super_block *sb)
         sbsp->smk_floor = &smack_known_floor;
         sbsp->smk_hat = &smack_known_hat;
         /*
-        * smk_initialized will be zero from kzalloc.
+        * SMK_SB_INITIALIZED will be zero from kzalloc.
          */
         sb->s_security = sbsp;
  
@@ -766,10 +766,10 @@ static int smack_set_mnt_opts(struct super_block *sb,
         int num_opts = opts->num_mnt_opts;
         int transmute = 0;
  
-       if (sp->smk_initialized)
+       if (sp->smk_flags & SMK_SB_INITIALIZED)
                 return 0;
  
-       sp->smk_initialized = 1;
+       sp->smk_flags |= SMK_SB_INITIALIZED;
  
         for (i = 0; i < num_opts; i++) {
                 switch (opts->mnt_opts_flags[i]) {
@@ -821,6 +821,17 @@ static int smack_set_mnt_opts(struct super_block *sb,
                 skp = smk_of_current();
                 sp->smk_root = skp;
                 sp->smk_default = skp;
+               /*
+                * For a handful of fs types with no user-controlled
+                * backing store it's okay to trust security labels
+                * in the filesystem. The rest are untrusted.
+                */
+               if (sb->s_user_ns != &init_user_ns &&
+                   sb->s_magic != SYSFS_MAGIC && sb->s_magic != TMPFS_MAGIC &&
+                   sb->s_magic != RAMFS_MAGIC) {
+                       transmute = 1;
+                       sp->smk_flags |= SMK_SB_UNTRUSTED;
+               }
         }
  
         /*
@@ -908,6 +919,7 @@ static int smack_bprm_set_creds(struct linux_binprm *bprm)
         struct inode *inode = file_inode(bprm->file);
         struct task_smack *bsp = bprm->cred->security;
         struct inode_smack *isp;
+       struct superblock_smack *sbsp;
         int rc;
  
         if (bprm->cred_prepared)
@@ -917,6 +929,11 @@ static int smack_bprm_set_creds(struct linux_binprm *bprm)
         if (isp->smk_task == NULL || isp->smk_task == bsp->smk_task)
                 return 0;
  
+       sbsp = inode->i_sb->s_security;
+       if ((sbsp->smk_flags & SMK_SB_UNTRUSTED) &&
+           isp->smk_task != sbsp->smk_root)
+               return 0;
+
         if (bprm->unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
                 struct task_struct *tracer;
                 rc = 0;
@@ -1203,6 +1220,7 @@ static int smack_inode_rename(struct inode *old_inode,
   */
  static int smack_inode_permission(struct inode *inode, int mask)
  {
+       struct superblock_smack *sbsp = inode->i_sb->s_security;
         struct smk_audit_info ad;
         int no_block = mask & MAY_NOT_BLOCK;
         int rc;
@@ -1214,6 +1232,11 @@ static int smack_inode_permission(struct inode *inode, int mask)
         if (mask == 0)
                 return 0;
  
+       if (sbsp->smk_flags & SMK_SB_UNTRUSTED) {
+               if (smk_of_inode(inode) != sbsp->smk_root)
+                       return -EACCES;
+       }
+
         /* May be droppable after audit */
         if (no_block)
                 return -ECHILD;
@@ -1708,6 +1731,7 @@ static int smack_mmap_file(struct file *file,
         struct task_smack *tsp;
         struct smack_known *okp;
         struct inode_smack *isp;
+       struct superblock_smack *sbsp;
         int may;
         int mmay;
         int tmay;
@@ -1719,6 +1743,10 @@ static int smack_mmap_file(struct file *file,
         isp = file_inode(file)->i_security;
         if (isp->smk_mmap == NULL)
                 return 0;
+       sbsp = file_inode(file)->i_sb->s_security;
+       if (sbsp->smk_flags & SMK_SB_UNTRUSTED &&
+           isp->smk_mmap != sbsp->smk_root)
+               return -EACCES;
         mkp = isp->smk_mmap;
  
         tsp = current_security();
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 29 Jul 2016 22:54:19 +0000 (15:54 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 29 Jul 2016 22:54:19 +0000 (15:54 -0700)
drivers/staging/lustre/lustre/mdc/mdc_request.c		patch \| blob \| blame \| history
fs/9p/acl.c		patch \| blob \| blame \| history
fs/attr.c		patch \| blob \| blame \| history
fs/block_dev.c		patch \| blob \| blame \| history
fs/devpts/inode.c		patch \| blob \| blame \| history
fs/exec.c		patch \| blob \| blame \| history
fs/inode.c		patch \| blob \| blame \| history
fs/kernfs/mount.c		patch \| blob \| blame \| history
fs/namei.c		patch \| blob \| blame \| history
fs/namespace.c		patch \| blob \| blame \| history
fs/nfsd/nfsctl.c		patch \| blob \| blame \| history
fs/posix_acl.c		patch \| blob \| blame \| history
fs/proc/inode.c		patch \| blob \| blame \| history
fs/proc/internal.h		patch \| blob \| blame \| history
fs/proc/root.c		patch \| blob \| blame \| history
fs/quota/dquot.c		patch \| blob \| blame \| history
fs/quota/quota.c		patch \| blob \| blame \| history
fs/super.c		patch \| blob \| blame \| history
fs/sysfs/mount.c		patch \| blob \| blame \| history
fs/xattr.c		patch \| blob \| blame \| history
include/linux/fs.h		patch \| blob \| blame \| history
include/linux/mount.h		patch \| blob \| blame \| history
include/linux/posix_acl.h		patch \| blob \| blame \| history
include/linux/quota.h		patch \| blob \| blame \| history
include/linux/uidgid.h		patch \| blob \| blame \| history
include/linux/user_namespace.h		patch \| blob \| blame \| history
ipc/mqueue.c		patch \| blob \| blame \| history
ipc/namespace.c		patch \| blob \| blame \| history
kernel/cred.c		patch \| blob \| blame \| history
kernel/user_namespace.c		patch \| blob \| blame \| history
net/sunrpc/rpc_pipe.c		patch \| blob \| blame \| history
security/commoncap.c		patch \| blob \| blame \| history
security/integrity/evm/evm_crypto.c		patch \| blob \| blame \| history
security/selinux/hooks.c		patch \| blob \| blame \| history
security/smack/smack.h		patch \| blob \| blame \| history
security/smack/smack_lsm.c		patch \| blob \| blame \| history