[kernel] CVE-2014-8989 userns can bypass group restrictions (rhbz 1170684 1170688)

Josh Boyer jwboyer at fedoraproject.org
Thu Dec 18 23:53:42 UTC 2014


commit 363cd71ff322e841027b4ec4436004e0bad98be0
Author: Josh Boyer <jwboyer at fedoraproject.org>
Date:   Thu Dec 18 16:17:32 2014 -0500

    CVE-2014-8989 userns can bypass group restrictions (rhbz 1170684 1170688)

 ...nsolidate-the-setgroups-permission-checks.patch |   90 +++++++
 kernel.spec                                        |   35 +++
 ...tly-add-MNT_NODEV-on-remount-when-it-was-.patch |   41 +++
 mnt-Update-unprivileged-remount-test.patch         |  280 ++++++++++++++++++++
 umount-Disallow-unprivileged-mount-force.patch     |   33 +++
 ...a-knob-to-disable-setgroups-on-a-per-user.patch |  280 ++++++++++++++++++++
 ...w-setting-gid_maps-without-privilege-when.patch |   40 +++
 ...k-euid-no-fsuid-when-establishing-an-unpr.patch |   39 +++
 ...ment-what-the-invariant-required-for-safe.patch |   48 ++++
 ...t-allow-setgroups-until-a-gid-mapping-has.patch |   98 +++++++
 ...t-allow-unprivileged-creation-of-gid-mapp.patch |   46 ++++
 ...-allow-the-creator-of-the-userns-unprivil.patch |   54 ++++
 ...Rename-id_map_mutex-to-userns_state_mutex.patch |   80 ++++++
 ...ns-Unbreak-the-unprivileged-remount-tests.patch |   91 +++++++
 14 files changed, 1255 insertions(+), 0 deletions(-)
---
diff --git a/groups-Consolidate-the-setgroups-permission-checks.patch b/groups-Consolidate-the-setgroups-permission-checks.patch
new file mode 100644
index 0000000..e65ea26
--- /dev/null
+++ b/groups-Consolidate-the-setgroups-permission-checks.patch
@@ -0,0 +1,90 @@
+From: "Eric W. Biederman" <ebiederm at xmission.com>
+Date: Fri, 5 Dec 2014 17:19:27 -0600
+Subject: [PATCH] groups: Consolidate the setgroups permission checks
+
+Today there are 3 instances of setgroups and due to an oversight their
+permission checking has diverged.  Add a common function so that
+they may all share the same permission checking code.
+
+This corrects the current oversight in the current permission checks
+and adds a helper to avoid this in the future.
+
+A user namespace security fix will update this new helper, shortly.
+
+Cc: stable at vger.kernel.org
+Signed-off-by: "Eric W. Biederman" <ebiederm at xmission.com>
+---
+ arch/s390/kernel/compat_linux.c | 2 +-
+ include/linux/cred.h            | 1 +
+ kernel/groups.c                 | 9 ++++++++-
+ kernel/uid16.c                  | 2 +-
+ 4 files changed, 11 insertions(+), 3 deletions(-)
+
+diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
+index ca38139423ae..437e61159279 100644
+--- a/arch/s390/kernel/compat_linux.c
++++ b/arch/s390/kernel/compat_linux.c
+@@ -249,7 +249,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
+ 	struct group_info *group_info;
+ 	int retval;
+ 
+-	if (!capable(CAP_SETGID))
++	if (!may_setgroups())
+ 		return -EPERM;
+ 	if ((unsigned)gidsetsize > NGROUPS_MAX)
+ 		return -EINVAL;
+diff --git a/include/linux/cred.h b/include/linux/cred.h
+index b2d0820837c4..2fb2ca2127ed 100644
+--- a/include/linux/cred.h
++++ b/include/linux/cred.h
+@@ -68,6 +68,7 @@ extern void groups_free(struct group_info *);
+ extern int set_current_groups(struct group_info *);
+ extern void set_groups(struct cred *, struct group_info *);
+ extern int groups_search(const struct group_info *, kgid_t);
++extern bool may_setgroups(void);
+ 
+ /* access the groups "array" with this macro */
+ #define GROUP_AT(gi, i) \
+diff --git a/kernel/groups.c b/kernel/groups.c
+index 451698f86cfa..02d8a251c476 100644
+--- a/kernel/groups.c
++++ b/kernel/groups.c
+@@ -213,6 +213,13 @@ out:
+ 	return i;
+ }
+ 
++bool may_setgroups(void)
++{
++	struct user_namespace *user_ns = current_user_ns();
++
++	return ns_capable(user_ns, CAP_SETGID);
++}
++
+ /*
+  *	SMP: Our groups are copy-on-write. We can set them safely
+  *	without another task interfering.
+@@ -223,7 +230,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
+ 	struct group_info *group_info;
+ 	int retval;
+ 
+-	if (!ns_capable(current_user_ns(), CAP_SETGID))
++	if (!may_setgroups())
+ 		return -EPERM;
+ 	if ((unsigned)gidsetsize > NGROUPS_MAX)
+ 		return -EINVAL;
+diff --git a/kernel/uid16.c b/kernel/uid16.c
+index 602e5bbbceff..d58cc4d8f0d1 100644
+--- a/kernel/uid16.c
++++ b/kernel/uid16.c
+@@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
+ 	struct group_info *group_info;
+ 	int retval;
+ 
+-	if (!ns_capable(current_user_ns(), CAP_SETGID))
++	if (!may_setgroups())
+ 		return -EPERM;
+ 	if ((unsigned)gidsetsize > NGROUPS_MAX)
+ 		return -EINVAL;
+-- 
+2.1.0
+
diff --git a/kernel.spec b/kernel.spec
index 7648694..2647122 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -645,6 +645,23 @@ Patch26106: dm-cache-fix-spurious-cell_defer-when-dealing-with-p.patch
 
 Patch26107: uapi-linux-target_core_user.h-fix-headers_install.sh.patch
 
+#mount fixes for stable
+Patch26108: mnt-Implicitly-add-MNT_NODEV-on-remount-when-it-was-.patch
+Patch26109: mnt-Update-unprivileged-remount-test.patch
+Patch26110: umount-Disallow-unprivileged-mount-force.patch
+
+#CVE-2014-8989 rhbz 1170684 1170688
+Patch26111: groups-Consolidate-the-setgroups-permission-checks.patch
+Patch26112: userns-Document-what-the-invariant-required-for-safe.patch
+Patch26113: userns-Don-t-allow-setgroups-until-a-gid-mapping-has.patch
+Patch26114: userns-Don-t-allow-unprivileged-creation-of-gid-mapp.patch
+Patch26115: userns-Check-euid-no-fsuid-when-establishing-an-unpr.patch
+Patch26116: userns-Only-allow-the-creator-of-the-userns-unprivil.patch
+Patch26117: userns-Rename-id_map_mutex-to-userns_state_mutex.patch
+Patch26118: userns-Add-a-knob-to-disable-setgroups-on-a-per-user.patch
+Patch26119: userns-Allow-setting-gid_maps-without-privilege-when.patch
+Patch26120: userns-Unbreak-the-unprivileged-remount-tests.patch
+
 # git clone ssh://git.fedorahosted.org/git/kernel-arm64.git, git diff master...devel
 Patch30000: kernel-arm64.patch
 
@@ -1398,6 +1415,23 @@ ApplyPatch dm-cache-fix-spurious-cell_defer-when-dealing-with-p.patch
 
 ApplyPatch uapi-linux-target_core_user.h-fix-headers_install.sh.patch
 
+#mount fixes for stable
+ApplyPatch mnt-Implicitly-add-MNT_NODEV-on-remount-when-it-was-.patch
+ApplyPatch mnt-Update-unprivileged-remount-test.patch
+ApplyPatch umount-Disallow-unprivileged-mount-force.patch
+
+#CVE-2014-8989 rhbz 1170684 1170688
+ApplyPatch groups-Consolidate-the-setgroups-permission-checks.patch
+ApplyPatch userns-Document-what-the-invariant-required-for-safe.patch
+ApplyPatch userns-Don-t-allow-setgroups-until-a-gid-mapping-has.patch
+ApplyPatch userns-Don-t-allow-unprivileged-creation-of-gid-mapp.patch
+ApplyPatch userns-Check-euid-no-fsuid-when-establishing-an-unpr.patch
+ApplyPatch userns-Only-allow-the-creator-of-the-userns-unprivil.patch
+ApplyPatch userns-Rename-id_map_mutex-to-userns_state_mutex.patch
+ApplyPatch userns-Add-a-knob-to-disable-setgroups-on-a-per-user.patch
+ApplyPatch userns-Allow-setting-gid_maps-without-privilege-when.patch
+ApplyPatch userns-Unbreak-the-unprivileged-remount-tests.patch
+
 %if 0%{?aarch64patches}
 ApplyPatch kernel-arm64.patch
 %ifnarch aarch64 # this is stupid, but i want to notice before secondary koji does.
@@ -2267,6 +2301,7 @@ fi
 #                                    ||     ||
 %changelog
 * Thu Dec 18 2014 Josh Boyer <jwboyer at fedoraproject.org>
+- CVE-2014-8989 userns can bypass group restrictions (rhbz 1170684 1170688)
 - Fix from Kyle McMartin for target_core_user uapi issue since it's enabled
 - Fix dm-cache crash (rhbz 1168434)
 - Fix blk-mq crash on CPU hotplug (rhbz 1175261)
diff --git a/mnt-Implicitly-add-MNT_NODEV-on-remount-when-it-was-.patch b/mnt-Implicitly-add-MNT_NODEV-on-remount-when-it-was-.patch
new file mode 100644
index 0000000..0fff151
--- /dev/null
+++ b/mnt-Implicitly-add-MNT_NODEV-on-remount-when-it-was-.patch
@@ -0,0 +1,41 @@
+From: "Eric W. Biederman" <ebiederm at xmission.com>
+Date: Wed, 13 Aug 2014 01:33:38 -0700
+Subject: [PATCH] mnt: Implicitly add MNT_NODEV on remount when it was
+ implicitly added by mount
+
+Now that remount is properly enforcing the rule that you can't remove
+nodev at least sandstorm.io is breaking when performing a remount.
+
+It turns out that there is an easy intuitive solution implicitly
+add nodev on remount when nodev was implicitly added on mount.
+
+Tested-by: Cedric Bosdonnat <cbosdonnat at suse.com>
+Tested-by: Richard Weinberger <richard at nod.at>
+Cc: stable at vger.kernel.org
+Signed-off-by: "Eric W. Biederman" <ebiederm at xmission.com>
+---
+ fs/namespace.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 5b66b2b3624d..3a1a87dc33df 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2098,7 +2098,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
+ 	}
+ 	if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
+ 	    !(mnt_flags & MNT_NODEV)) {
+-		return -EPERM;
++		/* Was the nodev implicitly added in mount? */
++		if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
++		    !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
++			mnt_flags |= MNT_NODEV;
++		} else {
++			return -EPERM;
++		}
+ 	}
+ 	if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
+ 	    !(mnt_flags & MNT_NOSUID)) {
+-- 
+2.1.0
+
diff --git a/mnt-Update-unprivileged-remount-test.patch b/mnt-Update-unprivileged-remount-test.patch
new file mode 100644
index 0000000..5913d82
--- /dev/null
+++ b/mnt-Update-unprivileged-remount-test.patch
@@ -0,0 +1,280 @@
+From: "Eric W. Biederman" <ebiederm at xmission.com>
+Date: Fri, 22 Aug 2014 16:39:03 -0500
+Subject: [PATCH] mnt: Update unprivileged remount test
+
+- MNT_NODEV should be irrelevant except when reading back mount flags,
+  no longer specify MNT_NODEV on remount.
+
+- Test MNT_NODEV on devpts where it is meaningful even for unprivileged mounts.
+
+- Add a test to verify that remount of a prexisting mount with the same flags
+  is allowed and does not change those flags.
+
+- Cleanup up the definitions of MS_REC, MS_RELATIME, MS_STRICTATIME that are used
+  when the code is built in an environment without them.
+
+- Correct the test error messages when tests fail.  There were not 5 tests
+  that tested MS_RELATIME.
+
+Cc: stable at vger.kernel.org
+Signed-off-by: Eric W. Biederman <ebiederm at xmission.com>
+---
+ .../selftests/mount/unprivileged-remount-test.c    | 172 +++++++++++++++++----
+ 1 file changed, 142 insertions(+), 30 deletions(-)
+
+diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c
+index 1b3ff2fda4d0..9669d375625a 100644
+--- a/tools/testing/selftests/mount/unprivileged-remount-test.c
++++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
+@@ -6,6 +6,8 @@
+ #include <sys/types.h>
+ #include <sys/mount.h>
+ #include <sys/wait.h>
++#include <sys/vfs.h>
++#include <sys/statvfs.h>
+ #include <stdlib.h>
+ #include <unistd.h>
+ #include <fcntl.h>
+@@ -32,11 +34,14 @@
+ # define CLONE_NEWPID 0x20000000
+ #endif
+ 
++#ifndef MS_REC
++# define MS_REC 16384
++#endif
+ #ifndef MS_RELATIME
+-#define MS_RELATIME (1 << 21)
++# define MS_RELATIME (1 << 21)
+ #endif
+ #ifndef MS_STRICTATIME
+-#define MS_STRICTATIME (1 << 24)
++# define MS_STRICTATIME (1 << 24)
+ #endif
+ 
+ static void die(char *fmt, ...)
+@@ -87,6 +92,45 @@ static void write_file(char *filename, char *fmt, ...)
+ 	}
+ }
+ 
++static int read_mnt_flags(const char *path)
++{
++	int ret;
++	struct statvfs stat;
++	int mnt_flags;
++
++	ret = statvfs(path, &stat);
++	if (ret != 0) {
++		die("statvfs of %s failed: %s\n",
++			path, strerror(errno));
++	}
++	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \
++			ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \
++			ST_SYNCHRONOUS | ST_MANDLOCK)) {
++		die("Unrecognized mount flags\n");
++	}
++	mnt_flags = 0;
++	if (stat.f_flag & ST_RDONLY)
++		mnt_flags |= MS_RDONLY;
++	if (stat.f_flag & ST_NOSUID)
++		mnt_flags |= MS_NOSUID;
++	if (stat.f_flag & ST_NODEV)
++		mnt_flags |= MS_NODEV;
++	if (stat.f_flag & ST_NOEXEC)
++		mnt_flags |= MS_NOEXEC;
++	if (stat.f_flag & ST_NOATIME)
++		mnt_flags |= MS_NOATIME;
++	if (stat.f_flag & ST_NODIRATIME)
++		mnt_flags |= MS_NODIRATIME;
++	if (stat.f_flag & ST_RELATIME)
++		mnt_flags |= MS_RELATIME;
++	if (stat.f_flag & ST_SYNCHRONOUS)
++		mnt_flags |= MS_SYNCHRONOUS;
++	if (stat.f_flag & ST_MANDLOCK)
++		mnt_flags |= ST_MANDLOCK;
++
++	return mnt_flags;
++}
++
+ static void create_and_enter_userns(void)
+ {
+ 	uid_t uid;
+@@ -118,7 +162,8 @@ static void create_and_enter_userns(void)
+ }
+ 
+ static
+-bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
++bool test_unpriv_remount(const char *fstype, const char *mount_options,
++			 int mount_flags, int remount_flags, int invalid_flags)
+ {
+ 	pid_t child;
+ 
+@@ -151,9 +196,11 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
+ 			strerror(errno));
+ 	}
+ 
+-	if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) {
+-		die("mount of /tmp failed: %s\n",
+-			strerror(errno));
++	if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) {
++		die("mount of %s with options '%s' on /tmp failed: %s\n",
++		    fstype,
++		    mount_options? mount_options : "",
++		    strerror(errno));
+ 	}
+ 
+ 	create_and_enter_userns();
+@@ -181,62 +228,127 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
+ 
+ static bool test_unpriv_remount_simple(int mount_flags)
+ {
+-	return test_unpriv_remount(mount_flags, mount_flags, 0);
++	return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0);
+ }
+ 
+ static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags)
+ {
+-	return test_unpriv_remount(mount_flags, mount_flags, invalid_flags);
++	return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags,
++				   invalid_flags);
++}
++
++static bool test_priv_mount_unpriv_remount(void)
++{
++	pid_t child;
++	int ret;
++	const char *orig_path = "/dev";
++	const char *dest_path = "/tmp";
++	int orig_mnt_flags, remount_mnt_flags;
++
++	child = fork();
++	if (child == -1) {
++		die("fork failed: %s\n",
++			strerror(errno));
++	}
++	if (child != 0) { /* parent */
++		pid_t pid;
++		int status;
++		pid = waitpid(child, &status, 0);
++		if (pid == -1) {
++			die("waitpid failed: %s\n",
++				strerror(errno));
++		}
++		if (pid != child) {
++			die("waited for %d got %d\n",
++				child, pid);
++		}
++		if (!WIFEXITED(status)) {
++			die("child did not terminate cleanly\n");
++		}
++		return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
++	}
++
++	orig_mnt_flags = read_mnt_flags(orig_path);
++
++	create_and_enter_userns();
++	ret = unshare(CLONE_NEWNS);
++	if (ret != 0) {
++		die("unshare(CLONE_NEWNS) failed: %s\n",
++			strerror(errno));
++	}
++
++	ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL);
++	if (ret != 0) {
++		die("recursive bind mount of %s onto %s failed: %s\n",
++			orig_path, dest_path, strerror(errno));
++	}
++
++	ret = mount(dest_path, dest_path, "none",
++		    MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL);
++	if (ret != 0) {
++		/* system("cat /proc/self/mounts"); */
++		die("remount of /tmp failed: %s\n",
++		    strerror(errno));
++	}
++
++	remount_mnt_flags = read_mnt_flags(dest_path);
++	if (orig_mnt_flags != remount_mnt_flags) {
++		die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n",
++			dest_path, orig_path);
++	}
++	exit(EXIT_SUCCESS);
+ }
+ 
+ int main(int argc, char **argv)
+ {
+-	if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) {
++	if (!test_unpriv_remount_simple(MS_RDONLY)) {
+ 		die("MS_RDONLY malfunctions\n");
+ 	}
+-	if (!test_unpriv_remount_simple(MS_NODEV)) {
++	if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) {
+ 		die("MS_NODEV malfunctions\n");
+ 	}
+-	if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) {
++	if (!test_unpriv_remount_simple(MS_NOSUID)) {
+ 		die("MS_NOSUID malfunctions\n");
+ 	}
+-	if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) {
++	if (!test_unpriv_remount_simple(MS_NOEXEC)) {
+ 		die("MS_NOEXEC malfunctions\n");
+ 	}
+-	if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV,
+-				       MS_NOATIME|MS_NODEV))
++	if (!test_unpriv_remount_atime(MS_RELATIME,
++				       MS_NOATIME))
+ 	{
+ 		die("MS_RELATIME malfunctions\n");
+ 	}
+-	if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV,
+-				       MS_NOATIME|MS_NODEV))
++	if (!test_unpriv_remount_atime(MS_STRICTATIME,
++				       MS_NOATIME))
+ 	{
+ 		die("MS_STRICTATIME malfunctions\n");
+ 	}
+-	if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV,
+-				       MS_STRICTATIME|MS_NODEV))
++	if (!test_unpriv_remount_atime(MS_NOATIME,
++				       MS_STRICTATIME))
+ 	{
+-		die("MS_RELATIME malfunctions\n");
++		die("MS_NOATIME malfunctions\n");
+ 	}
+-	if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV,
+-				       MS_NOATIME|MS_NODEV))
++	if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME,
++				       MS_NOATIME))
+ 	{
+-		die("MS_RELATIME malfunctions\n");
++		die("MS_RELATIME|MS_NODIRATIME malfunctions\n");
+ 	}
+-	if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV,
+-				       MS_NOATIME|MS_NODEV))
++	if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME,
++				       MS_NOATIME))
+ 	{
+-		die("MS_RELATIME malfunctions\n");
++		die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n");
+ 	}
+-	if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV,
+-				       MS_STRICTATIME|MS_NODEV))
++	if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME,
++				       MS_STRICTATIME))
+ 	{
+-		die("MS_RELATIME malfunctions\n");
++		die("MS_NOATIME|MS_DIRATIME malfunctions\n");
+ 	}
+-	if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV,
+-				 MS_NOATIME|MS_NODEV))
++	if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME))
+ 	{
+ 		die("Default atime malfunctions\n");
+ 	}
++	if (!test_priv_mount_unpriv_remount()) {
++		die("Mount flags unexpectedly changed after remount\n");
++	}
+ 	return EXIT_SUCCESS;
+ }
+-- 
+2.1.0
+
diff --git a/umount-Disallow-unprivileged-mount-force.patch b/umount-Disallow-unprivileged-mount-force.patch
new file mode 100644
index 0000000..a57b2c9
--- /dev/null
+++ b/umount-Disallow-unprivileged-mount-force.patch
@@ -0,0 +1,33 @@
+From: "Eric W. Biederman" <ebiederm at xmission.com>
+Date: Sat, 4 Oct 2014 14:44:03 -0700
+Subject: [PATCH] umount: Disallow unprivileged mount force
+
+Forced unmount affects not just the mount namespace but the underlying
+superblock as well.  Restrict forced unmount to the global root user
+for now.  Otherwise it becomes possible a user in a less privileged
+mount namespace to force the shutdown of a superblock of a filesystem
+in a more privileged mount namespace, allowing a DOS attack on root.
+
+Cc: stable at vger.kernel.org
+Signed-off-by: "Eric W. Biederman" <ebiederm at xmission.com>
+---
+ fs/namespace.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 3a1a87dc33df..43b16af8af30 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1544,6 +1544,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
+ 		goto dput_and_out;
+ 	if (mnt->mnt.mnt_flags & MNT_LOCKED)
+ 		goto dput_and_out;
++	retval = -EPERM;
++	if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
++		goto dput_and_out;
+ 
+ 	retval = do_umount(mnt, flags);
+ dput_and_out:
+-- 
+2.1.0
+
diff --git a/userns-Add-a-knob-to-disable-setgroups-on-a-per-user.patch b/userns-Add-a-knob-to-disable-setgroups-on-a-per-user.patch
new file mode 100644
index 0000000..a553817
--- /dev/null
+++ b/userns-Add-a-knob-to-disable-setgroups-on-a-per-user.patch
@@ -0,0 +1,280 @@
+From: "Eric W. Biederman" <ebiederm at xmission.com>
+Date: Tue, 2 Dec 2014 12:27:26 -0600
+Subject: [PATCH] userns: Add a knob to disable setgroups on a per user
+ namespace basis
+
+- Expose the knob to user space through a proc file /proc/<pid>/setgroups
+
+  A value of "deny" means the setgroups system call is disabled in the
+  current processes user namespace and can not be enabled in the
+  future in this user namespace.
+
+  A value of "allow" means the segtoups system call is enabled.
+
+- Descendant user namespaces inherit the value of setgroups from
+  their parents.
+
+- A proc file is used (instead of a sysctl) as sysctls currently do
+  not allow checking the permissions at open time.
+
+- Writing to the proc file is restricted to before the gid_map
+  for the user namespace is set.
+
+  This ensures that disabling setgroups at a user namespace
+  level will never remove the ability to call setgroups
+  from a process that already has that ability.
+
+  A process may opt in to the setgroups disable for itself by
+  creating, entering and configuring a user namespace or by calling
+  setns on an existing user namespace with setgroups disabled.
+  Processes without privileges already can not call setgroups so this
+  is a noop.  Prodcess with privilege become processes without
+  privilege when entering a user namespace and as with any other path
+  to dropping privilege they would not have the ability to call
+  setgroups.  So this remains within the bounds of what is possible
+  without a knob to disable setgroups permanently in a user namespace.
+
+Cc: stable at vger.kernel.org
+Signed-off-by: "Eric W. Biederman" <ebiederm at xmission.com>
+---
+ fs/proc/base.c                 | 53 ++++++++++++++++++++++++++
+ include/linux/user_namespace.h |  7 ++++
+ kernel/user.c                  |  1 +
+ kernel/user_namespace.c        | 85 ++++++++++++++++++++++++++++++++++++++++++
+ 4 files changed, 146 insertions(+)
+
+diff --git a/fs/proc/base.c b/fs/proc/base.c
+index 772efa45a452..7dc3ea89ef1a 100644
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -2464,6 +2464,57 @@ static const struct file_operations proc_projid_map_operations = {
+ 	.llseek		= seq_lseek,
+ 	.release	= proc_id_map_release,
+ };
++
++static int proc_setgroups_open(struct inode *inode, struct file *file)
++{
++	struct user_namespace *ns = NULL;
++	struct task_struct *task;
++	int ret;
++
++	ret = -ESRCH;
++	task = get_proc_task(inode);
++	if (task) {
++		rcu_read_lock();
++		ns = get_user_ns(task_cred_xxx(task, user_ns));
++		rcu_read_unlock();
++		put_task_struct(task);
++	}
++	if (!ns)
++		goto err;
++
++	if (file->f_mode & FMODE_WRITE) {
++		ret = -EACCES;
++		if (!ns_capable(ns, CAP_SYS_ADMIN))
++			goto err_put_ns;
++	}
++
++	ret = single_open(file, &proc_setgroups_show, ns);
++	if (ret)
++		goto err_put_ns;
++
++	return 0;
++err_put_ns:
++	put_user_ns(ns);
++err:
++	return ret;
++}
++
++static int proc_setgroups_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct user_namespace *ns = seq->private;
++	int ret = single_release(inode, file);
++	put_user_ns(ns);
++	return ret;
++}
++
++static const struct file_operations proc_setgroups_operations = {
++	.open		= proc_setgroups_open,
++	.write		= proc_setgroups_write,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= proc_setgroups_release,
++};
+ #endif /* CONFIG_USER_NS */
+ 
+ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
+@@ -2572,6 +2623,7 @@ static const struct pid_entry tgid_base_stuff[] = {
+ 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
+ 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
+ 	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
++	REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
+ #endif
+ #ifdef CONFIG_CHECKPOINT_RESTORE
+ 	REG("timers",	  S_IRUGO, proc_timers_operations),
+@@ -2913,6 +2965,7 @@ static const struct pid_entry tid_base_stuff[] = {
+ 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
+ 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
+ 	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
++	REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
+ #endif
+ };
+ 
+diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
+index 8d493083486a..9f3579ff543d 100644
+--- a/include/linux/user_namespace.h
++++ b/include/linux/user_namespace.h
+@@ -17,6 +17,10 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
+ 	} extent[UID_GID_MAP_MAX_EXTENTS];
+ };
+ 
++#define USERNS_SETGROUPS_ALLOWED 1UL
++
++#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
++
+ struct user_namespace {
+ 	struct uid_gid_map	uid_map;
+ 	struct uid_gid_map	gid_map;
+@@ -27,6 +31,7 @@ struct user_namespace {
+ 	kuid_t			owner;
+ 	kgid_t			group;
+ 	unsigned int		proc_inum;
++	unsigned long		flags;
+ 
+ 	/* Register of per-UID persistent keyrings for this namespace */
+ #ifdef CONFIG_PERSISTENT_KEYRINGS
+@@ -63,6 +68,8 @@ extern const struct seq_operations proc_projid_seq_operations;
+ extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
+ extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
+ extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
++extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
++extern int proc_setgroups_show(struct seq_file *m, void *v);
+ extern bool userns_may_setgroups(const struct user_namespace *ns);
+ #else
+ 
+diff --git a/kernel/user.c b/kernel/user.c
+index 4efa39350e44..2d09940c9632 100644
+--- a/kernel/user.c
++++ b/kernel/user.c
+@@ -51,6 +51,7 @@ struct user_namespace init_user_ns = {
+ 	.owner = GLOBAL_ROOT_UID,
+ 	.group = GLOBAL_ROOT_GID,
+ 	.proc_inum = PROC_USER_INIT_INO,
++	.flags = USERNS_INIT_FLAGS,
+ #ifdef CONFIG_PERSISTENT_KEYRINGS
+ 	.persistent_keyring_register_sem =
+ 	__RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem),
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
+index 44a555ac6104..6e80f4c1322b 100644
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -100,6 +100,11 @@ int create_user_ns(struct cred *new)
+ 	ns->owner = owner;
+ 	ns->group = group;
+ 
++	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
++	mutex_lock(&userns_state_mutex);
++	ns->flags = parent_ns->flags;
++	mutex_unlock(&userns_state_mutex);
++
+ 	set_cred_user_ns(new, ns);
+ 
+ #ifdef CONFIG_PERSISTENT_KEYRINGS
+@@ -839,6 +844,84 @@ static bool new_idmap_permitted(const struct file *file,
+ 	return false;
+ }
+ 
++int proc_setgroups_show(struct seq_file *seq, void *v)
++{
++	struct user_namespace *ns = seq->private;
++	unsigned long userns_flags = ACCESS_ONCE(ns->flags);
++
++	seq_printf(seq, "%s\n",
++		   (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
++		   "allow" : "deny");
++	return 0;
++}
++
++ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
++			     size_t count, loff_t *ppos)
++{
++	struct seq_file *seq = file->private_data;
++	struct user_namespace *ns = seq->private;
++	char kbuf[8], *pos;
++	bool setgroups_allowed;
++	ssize_t ret;
++
++	/* Only allow a very narrow range of strings to be written */
++	ret = -EINVAL;
++	if ((*ppos != 0) || (count >= sizeof(kbuf)))
++		goto out;
++
++	/* What was written? */
++	ret = -EFAULT;
++	if (copy_from_user(kbuf, buf, count))
++		goto out;
++	kbuf[count] = '\0';
++	pos = kbuf;
++
++	/* What is being requested? */
++	ret = -EINVAL;
++	if (strncmp(pos, "allow", 5) == 0) {
++		pos += 5;
++		setgroups_allowed = true;
++	}
++	else if (strncmp(pos, "deny", 4) == 0) {
++		pos += 4;
++		setgroups_allowed = false;
++	}
++	else
++		goto out;
++
++	/* Verify there is not trailing junk on the line */
++	pos = skip_spaces(pos);
++	if (*pos != '\0')
++		goto out;
++
++	ret = -EPERM;
++	mutex_lock(&userns_state_mutex);
++	if (setgroups_allowed) {
++		/* Enabling setgroups after setgroups has been disabled
++		 * is not allowed.
++		 */
++		if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
++			goto out_unlock;
++	} else {
++		/* Permanently disabling setgroups after setgroups has
++		 * been enabled by writing the gid_map is not allowed.
++		 */
++		if (ns->gid_map.nr_extents != 0)
++			goto out_unlock;
++		ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
++	}
++	mutex_unlock(&userns_state_mutex);
++
++	/* Report a successful write */
++	*ppos = count;
++	ret = count;
++out:
++	return ret;
++out_unlock:
++	mutex_unlock(&userns_state_mutex);
++	goto out;
++}
++
+ bool userns_may_setgroups(const struct user_namespace *ns)
+ {
+ 	bool allowed;
+@@ -848,6 +931,8 @@ bool userns_may_setgroups(const struct user_namespace *ns)
+ 	 * the user namespace has been established.
+ 	 */
+ 	allowed = ns->gid_map.nr_extents != 0;
++	/* Is setgroups allowed? */
++	allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
+ 	mutex_unlock(&userns_state_mutex);
+ 
+ 	return allowed;
+-- 
+2.1.0
+
diff --git a/userns-Allow-setting-gid_maps-without-privilege-when.patch b/userns-Allow-setting-gid_maps-without-privilege-when.patch
new file mode 100644
index 0000000..97d3fe6
--- /dev/null
+++ b/userns-Allow-setting-gid_maps-without-privilege-when.patch
@@ -0,0 +1,40 @@
+From: "Eric W. Biederman" <ebiederm at xmission.com>
+Date: Fri, 5 Dec 2014 19:36:04 -0600
+Subject: [PATCH] userns: Allow setting gid_maps without privilege when
+ setgroups is disabled
+
+Now that setgroups can be disabled and not reenabled, setting gid_map
+without privielge can now be enabled when setgroups is disabled.
+
+This restores most of the functionality that was lost when unprivileged
+setting of gid_map was removed.  Applications that use this functionality
+will need to check to see if they use setgroups or init_groups, and if they
+don't they can be fixed by simply disabling setgroups before writing to
+gid_map.
+
+Cc: stable at vger.kernel.org
+Reviewed-by: Andy Lutomirski <luto at amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm at xmission.com>
+---
+ kernel/user_namespace.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
+index 6e80f4c1322b..a2e37c5d2f63 100644
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -826,6 +826,11 @@ static bool new_idmap_permitted(const struct file *file,
+ 			kuid_t uid = make_kuid(ns->parent, id);
+ 			if (uid_eq(uid, cred->euid))
+ 				return true;
++		} else if (cap_setid == CAP_SETGID) {
++			kgid_t gid = make_kgid(ns->parent, id);
++			if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
++			    gid_eq(gid, cred->egid))
++				return true;
+ 		}
+ 	}
+ 
+-- 
+2.1.0
+
diff --git a/userns-Check-euid-no-fsuid-when-establishing-an-unpr.patch b/userns-Check-euid-no-fsuid-when-establishing-an-unpr.patch
new file mode 100644
index 0000000..50830c3
--- /dev/null
+++ b/userns-Check-euid-no-fsuid-when-establishing-an-unpr.patch
@@ -0,0 +1,39 @@
+From: "Eric W. Biederman" <ebiederm at xmission.com>
+Date: Fri, 5 Dec 2014 18:26:30 -0600
+Subject: [PATCH] userns: Check euid no fsuid when establishing an unprivileged
+ uid mapping
+
+setresuid allows the euid to be set to any of uid, euid, suid, and
+fsuid.  Therefor it is safe to allow an unprivileged user to map
+their euid and use CAP_SETUID privileged with exactly that uid,
+as no new credentials can be obtained.
+
+I can not find a combination of existing system calls that allows setting
+uid, euid, suid, and fsuid from the fsuid making the previous use
+of fsuid for allowing unprivileged mappings a bug.
+
+This is part of a fix for CVE-2014-8989.
+
+Cc: stable at vger.kernel.org
+Reviewed-by: Andy Lutomirski <luto at amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm at xmission.com>
+---
+ kernel/user_namespace.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
+index 1ce6d67c07b7..9451b12a9b6c 100644
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -819,7 +819,7 @@ static bool new_idmap_permitted(const struct file *file,
+ 		u32 id = new_map->extent[0].lower_first;
+ 		if (cap_setid == CAP_SETUID) {
+ 			kuid_t uid = make_kuid(ns->parent, id);
+-			if (uid_eq(uid, file->f_cred->fsuid))
++			if (uid_eq(uid, file->f_cred->euid))
+ 				return true;
+ 		}
+ 	}
+-- 
+2.1.0
+
diff --git a/userns-Document-what-the-invariant-required-for-safe.patch b/userns-Document-what-the-invariant-required-for-safe.patch
new file mode 100644
index 0000000..c364b2b
--- /dev/null
+++ b/userns-Document-what-the-invariant-required-for-safe.patch
@@ -0,0 +1,48 @@
+From: "Eric W. Biederman" <ebiederm at xmission.com>
+Date: Fri, 5 Dec 2014 17:51:47 -0600
+Subject: [PATCH] userns: Document what the invariant required for safe
+ unprivileged mappings.
+
+The rule is simple.  Don't allow anything that wouldn't be allowed
+without unprivileged mappings.
+
+It was previously overlooked that establishing gid mappings would
+allow dropping groups and potentially gaining permission to files and
+directories that had lesser permissions for a specific group than for
+all other users.
+
+This is the rule needed to fix CVE-2014-8989 and prevent any other
+security issues with new_idmap_permitted.
+
+The reason for this rule is that the unix permission model is old and
+there are programs out there somewhere that take advantage of every
+little corner of it.  So allowing a uid or gid mapping to be
+established without privielge that would allow anything that would not
+be allowed without that mapping will result in expectations from some
+code somewhere being violated.  Violated expectations about the
+behavior of the OS is a long way to say a security issue.
+
+Cc: stable at vger.kernel.org
+Signed-off-by: "Eric W. Biederman" <ebiederm at xmission.com>
+---
+ kernel/user_namespace.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
+index aa312b0dc3ec..b99c862a2e3f 100644
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -812,7 +812,9 @@ static bool new_idmap_permitted(const struct file *file,
+ 				struct user_namespace *ns, int cap_setid,
+ 				struct uid_gid_map *new_map)
+ {
+-	/* Allow mapping to your own filesystem ids */
++	/* Don't allow mappings that would allow anything that wouldn't
++	 * be allowed without the establishment of unprivileged mappings.
++	 */
+ 	if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
+ 		u32 id = new_map->extent[0].lower_first;
+ 		if (cap_setid == CAP_SETUID) {
+-- 
+2.1.0
+
diff --git a/userns-Don-t-allow-setgroups-until-a-gid-mapping-has.patch b/userns-Don-t-allow-setgroups-until-a-gid-mapping-has.patch
new file mode 100644
index 0000000..81217d2
--- /dev/null
+++ b/userns-Don-t-allow-setgroups-until-a-gid-mapping-has.patch
@@ -0,0 +1,98 @@
+From: "Eric W. Biederman" <ebiederm at xmission.com>
+Date: Fri, 5 Dec 2014 18:01:11 -0600
+Subject: [PATCH] userns: Don't allow setgroups until a gid mapping has been
+ setablished
+
+setgroups is unique in not needing a valid mapping before it can be called,
+in the case of setgroups(0, NULL) which drops all supplemental groups.
+
+The design of the user namespace assumes that CAP_SETGID can not actually
+be used until a gid mapping is established.  Therefore add a helper function
+to see if the user namespace gid mapping has been established and call
+that function in the setgroups permission check.
+
+This is part of the fix for CVE-2014-8989, being able to drop groups
+without privilege using user namespaces.
+
+Cc: stable at vger.kernel.org
+Reviewed-by: Andy Lutomirski <luto at amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm at xmission.com>
+---
+ include/linux/user_namespace.h |  5 +++++
+ kernel/groups.c                |  4 +++-
+ kernel/user_namespace.c        | 14 ++++++++++++++
+ 3 files changed, 22 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
+index e95372654f09..8d493083486a 100644
+--- a/include/linux/user_namespace.h
++++ b/include/linux/user_namespace.h
+@@ -63,6 +63,7 @@ extern const struct seq_operations proc_projid_seq_operations;
+ extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
+ extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
+ extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
++extern bool userns_may_setgroups(const struct user_namespace *ns);
+ #else
+ 
+ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
+@@ -87,6 +88,10 @@ static inline void put_user_ns(struct user_namespace *ns)
+ {
+ }
+ 
++static inline bool userns_may_setgroups(const struct user_namespace *ns)
++{
++	return true;
++}
+ #endif
+ 
+ #endif /* _LINUX_USER_H */
+diff --git a/kernel/groups.c b/kernel/groups.c
+index 02d8a251c476..664411f171b5 100644
+--- a/kernel/groups.c
++++ b/kernel/groups.c
+@@ -6,6 +6,7 @@
+ #include <linux/slab.h>
+ #include <linux/security.h>
+ #include <linux/syscalls.h>
++#include <linux/user_namespace.h>
+ #include <asm/uaccess.h>
+ 
+ /* init to 2 - one for init_task, one to ensure it is never freed */
+@@ -217,7 +218,8 @@ bool may_setgroups(void)
+ {
+ 	struct user_namespace *user_ns = current_user_ns();
+ 
+-	return ns_capable(user_ns, CAP_SETGID);
++	return ns_capable(user_ns, CAP_SETGID) &&
++		userns_may_setgroups(user_ns);
+ }
+ 
+ /*
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
+index b99c862a2e3f..27c8dab48c07 100644
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -843,6 +843,20 @@ static bool new_idmap_permitted(const struct file *file,
+ 	return false;
+ }
+ 
++bool userns_may_setgroups(const struct user_namespace *ns)
++{
++	bool allowed;
++
++	mutex_lock(&id_map_mutex);
++	/* It is not safe to use setgroups until a gid mapping in
++	 * the user namespace has been established.
++	 */
++	allowed = ns->gid_map.nr_extents != 0;
++	mutex_unlock(&id_map_mutex);
++
++	return allowed;
++}
++
+ static void *userns_get(struct task_struct *task)
+ {
+ 	struct user_namespace *user_ns;
+-- 
+2.1.0
+
diff --git a/userns-Don-t-allow-unprivileged-creation-of-gid-mapp.patch b/userns-Don-t-allow-unprivileged-creation-of-gid-mapp.patch
new file mode 100644
index 0000000..b1d5382
--- /dev/null
+++ b/userns-Don-t-allow-unprivileged-creation-of-gid-mapp.patch
@@ -0,0 +1,46 @@
+From: "Eric W. Biederman" <ebiederm at xmission.com>
+Date: Fri, 5 Dec 2014 18:14:19 -0600
+Subject: [PATCH] userns: Don't allow unprivileged creation of gid mappings
+
+As any gid mapping will allow and must allow for backwards
+compatibility dropping groups don't allow any gid mappings to be
+established without CAP_SETGID in the parent user namespace.
+
+For a small class of applications this change breaks userspace
+and removes useful functionality.  This small class of applications
+includes tools/testing/selftests/mount/unprivilged-remount-test.c
+
+Most of the removed functionality will be added back with the addition
+of a one way knob to disable setgroups.  Once setgroups is disabled
+setting the gid_map becomes as safe as setting the uid_map.
+
+For more common applications that set the uid_map and the gid_map
+with privilege this change will have no affect.
+
+This is part of a fix for CVE-2014-8989.
+
+Cc: stable at vger.kernel.org
+Reviewed-by: Andy Lutomirski <luto at amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm at xmission.com>
+---
+ kernel/user_namespace.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
+index 27c8dab48c07..1ce6d67c07b7 100644
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -821,10 +821,6 @@ static bool new_idmap_permitted(const struct file *file,
+ 			kuid_t uid = make_kuid(ns->parent, id);
+ 			if (uid_eq(uid, file->f_cred->fsuid))
+ 				return true;
+-		} else if (cap_setid == CAP_SETGID) {
+-			kgid_t gid = make_kgid(ns->parent, id);
+-			if (gid_eq(gid, file->f_cred->fsgid))
+-				return true;
+ 		}
+ 	}
+ 
+-- 
+2.1.0
+
diff --git a/userns-Only-allow-the-creator-of-the-userns-unprivil.patch b/userns-Only-allow-the-creator-of-the-userns-unprivil.patch
new file mode 100644
index 0000000..8381b14
--- /dev/null
+++ b/userns-Only-allow-the-creator-of-the-userns-unprivil.patch
@@ -0,0 +1,54 @@
+From: "Eric W. Biederman" <ebiederm at xmission.com>
+Date: Wed, 26 Nov 2014 23:22:14 -0600
+Subject: [PATCH] userns: Only allow the creator of the userns unprivileged
+ mappings
+
+If you did not create the user namespace and are allowed
+to write to uid_map or gid_map you should already have the necessary
+privilege in the parent user namespace to establish any mapping
+you want so this will not affect userspace in practice.
+
+Limiting unprivileged uid mapping establishment to the creator of the
+user namespace makes it easier to verify all credentials obtained with
+the uid mapping can be obtained without the uid mapping without
+privilege.
+
+Limiting unprivileged gid mapping establishment (which is temporarily
+absent) to the creator of the user namespace also ensures that the
+combination of uid and gid can already be obtained without privilege.
+
+This is part of the fix for CVE-2014-8989.
+
+Cc: stable at vger.kernel.org
+Reviewed-by: Andy Lutomirski <luto at amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm at xmission.com>
+---
+ kernel/user_namespace.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
+index 9451b12a9b6c..1e34de2fbd60 100644
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -812,14 +812,16 @@ static bool new_idmap_permitted(const struct file *file,
+ 				struct user_namespace *ns, int cap_setid,
+ 				struct uid_gid_map *new_map)
+ {
++	const struct cred *cred = file->f_cred;
+ 	/* Don't allow mappings that would allow anything that wouldn't
+ 	 * be allowed without the establishment of unprivileged mappings.
+ 	 */
+-	if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
++	if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
++	    uid_eq(ns->owner, cred->euid)) {
+ 		u32 id = new_map->extent[0].lower_first;
+ 		if (cap_setid == CAP_SETUID) {
+ 			kuid_t uid = make_kuid(ns->parent, id);
+-			if (uid_eq(uid, file->f_cred->euid))
++			if (uid_eq(uid, cred->euid))
+ 				return true;
+ 		}
+ 	}
+-- 
+2.1.0
+
diff --git a/userns-Rename-id_map_mutex-to-userns_state_mutex.patch b/userns-Rename-id_map_mutex-to-userns_state_mutex.patch
new file mode 100644
index 0000000..ce6288a
--- /dev/null
+++ b/userns-Rename-id_map_mutex-to-userns_state_mutex.patch
@@ -0,0 +1,80 @@
+From: "Eric W. Biederman" <ebiederm at xmission.com>
+Date: Tue, 9 Dec 2014 14:03:14 -0600
+Subject: [PATCH] userns: Rename id_map_mutex to userns_state_mutex
+
+Generalize id_map_mutex so it can be used for more state of a user namespace.
+
+Cc: stable at vger.kernel.org
+Reviewed-by: Andy Lutomirski <luto at amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm at xmission.com>
+---
+ kernel/user_namespace.c | 14 ++++++--------
+ 1 file changed, 6 insertions(+), 8 deletions(-)
+
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
+index 1e34de2fbd60..44a555ac6104 100644
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -24,6 +24,7 @@
+ #include <linux/fs_struct.h>
+ 
+ static struct kmem_cache *user_ns_cachep __read_mostly;
++static DEFINE_MUTEX(userns_state_mutex);
+ 
+ static bool new_idmap_permitted(const struct file *file,
+ 				struct user_namespace *ns, int cap_setid,
+@@ -583,9 +584,6 @@ static bool mappings_overlap(struct uid_gid_map *new_map,
+ 	return false;
+ }
+ 
+-
+-static DEFINE_MUTEX(id_map_mutex);
+-
+ static ssize_t map_write(struct file *file, const char __user *buf,
+ 			 size_t count, loff_t *ppos,
+ 			 int cap_setid,
+@@ -602,7 +600,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
+ 	ssize_t ret = -EINVAL;
+ 
+ 	/*
+-	 * The id_map_mutex serializes all writes to any given map.
++	 * The userns_state_mutex serializes all writes to any given map.
+ 	 *
+ 	 * Any map is only ever written once.
+ 	 *
+@@ -620,7 +618,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
+ 	 * order and smp_rmb() is guaranteed that we don't have crazy
+ 	 * architectures returning stale data.
+ 	 */
+-	mutex_lock(&id_map_mutex);
++	mutex_lock(&userns_state_mutex);
+ 
+ 	ret = -EPERM;
+ 	/* Only allow one successful write to the map */
+@@ -750,7 +748,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
+ 	*ppos = count;
+ 	ret = count;
+ out:
+-	mutex_unlock(&id_map_mutex);
++	mutex_unlock(&userns_state_mutex);
+ 	if (page)
+ 		free_page(page);
+ 	return ret;
+@@ -845,12 +843,12 @@ bool userns_may_setgroups(const struct user_namespace *ns)
+ {
+ 	bool allowed;
+ 
+-	mutex_lock(&id_map_mutex);
++	mutex_lock(&userns_state_mutex);
+ 	/* It is not safe to use setgroups until a gid mapping in
+ 	 * the user namespace has been established.
+ 	 */
+ 	allowed = ns->gid_map.nr_extents != 0;
+-	mutex_unlock(&id_map_mutex);
++	mutex_unlock(&userns_state_mutex);
+ 
+ 	return allowed;
+ }
+-- 
+2.1.0
+
diff --git a/userns-Unbreak-the-unprivileged-remount-tests.patch b/userns-Unbreak-the-unprivileged-remount-tests.patch
new file mode 100644
index 0000000..69edd2e
--- /dev/null
+++ b/userns-Unbreak-the-unprivileged-remount-tests.patch
@@ -0,0 +1,91 @@
+From: "Eric W. Biederman" <ebiederm at xmission.com>
+Date: Tue, 2 Dec 2014 13:56:30 -0600
+Subject: [PATCH] userns: Unbreak the unprivileged remount tests
+
+A security fix in caused the way the unprivileged remount tests were
+using user namespaces to break.  Tweak the way user namespaces are
+being used so the test works again.
+
+Cc: stable at vger.kernel.org
+Signed-off-by: "Eric W. Biederman" <ebiederm at xmission.com>
+---
+ .../selftests/mount/unprivileged-remount-test.c    | 32 ++++++++++++++++------
+ 1 file changed, 24 insertions(+), 8 deletions(-)
+
+diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c
+index 9669d375625a..517785052f1c 100644
+--- a/tools/testing/selftests/mount/unprivileged-remount-test.c
++++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
+@@ -53,17 +53,14 @@ static void die(char *fmt, ...)
+ 	exit(EXIT_FAILURE);
+ }
+ 
+-static void write_file(char *filename, char *fmt, ...)
++static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
+ {
+ 	char buf[4096];
+ 	int fd;
+ 	ssize_t written;
+ 	int buf_len;
+-	va_list ap;
+ 
+-	va_start(ap, fmt);
+ 	buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
+-	va_end(ap);
+ 	if (buf_len < 0) {
+ 		die("vsnprintf failed: %s\n",
+ 		    strerror(errno));
+@@ -74,6 +71,8 @@ static void write_file(char *filename, char *fmt, ...)
+ 
+ 	fd = open(filename, O_WRONLY);
+ 	if (fd < 0) {
++		if ((errno == ENOENT) && enoent_ok)
++			return;
+ 		die("open of %s failed: %s\n",
+ 		    filename, strerror(errno));
+ 	}
+@@ -92,6 +91,26 @@ static void write_file(char *filename, char *fmt, ...)
+ 	}
+ }
+ 
++static void maybe_write_file(char *filename, char *fmt, ...)
++{
++	va_list ap;
++
++	va_start(ap, fmt);
++	vmaybe_write_file(true, filename, fmt, ap);
++	va_end(ap);
++
++}
++
++static void write_file(char *filename, char *fmt, ...)
++{
++	va_list ap;
++
++	va_start(ap, fmt);
++	vmaybe_write_file(false, filename, fmt, ap);
++	va_end(ap);
++
++}
++
+ static int read_mnt_flags(const char *path)
+ {
+ 	int ret;
+@@ -144,13 +163,10 @@ static void create_and_enter_userns(void)
+ 			strerror(errno));
+ 	}
+ 
++	maybe_write_file("/proc/self/setgroups", "deny");
+ 	write_file("/proc/self/uid_map", "0 %d 1", uid);
+ 	write_file("/proc/self/gid_map", "0 %d 1", gid);
+ 
+-	if (setgroups(0, NULL) != 0) {
+-		die("setgroups failed: %s\n",
+-			strerror(errno));
+-	}
+ 	if (setgid(0) != 0) {
+ 		die ("setgid(0) failed %s\n",
+ 			strerror(errno));
+-- 
+2.1.0
+


More information about the scm-commits mailing list