commit a08d1c038c8457cda1b5d85c4d628595157812c1
Author: Serge Hallyn <serge.hallyn@ubuntu.com>
Date:   Tue Feb 10 21:18:45 2015 -0600

    startup: pivot into a mostly-empty new root
    
    And umount everything but /proc and /run/cgmanager/fs in our new root.
    
    That prevents us from pinning them if the host wants to unmount them.
    
    Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>

Index: cgmanager-0.33/configure.ac
===================================================================
--- cgmanager-0.33.orig/configure.ac
+++ cgmanager-0.33/configure.ac
@@ -23,6 +23,8 @@ AC_PROG_CC
 
 AC_PROG_CC_C99
 
+AC_CHECK_FUNCS([setns pivot_root])
+
 AC_PATH_PROG([NIH_DBUS_TOOL], [nih-dbus-tool])
 
 PKG_CHECK_MODULES([NIH], [libnih >= 1.0.2])
Index: cgmanager-0.33/fs.c
===================================================================
--- cgmanager-0.33.orig/fs.c
+++ cgmanager-0.33/fs.c
@@ -56,6 +56,21 @@
 #define AGENT SBINDIR "/cgm-release-agent"
 #define AGENT_LINK_PATH "/run/cgmanager/agents"
 
+/* Define pivot_root() if missing from the C library */
+#ifndef HAVE_PIVOT_ROOT
+static int pivot_root(const char * new_root, const char * put_old)
+{
+#ifdef __NR_pivot_root
+return syscall(__NR_pivot_root, new_root, put_old);
+#else
+errno = ENOSYS;
+return -1;
+#endif
+}
+#else
+extern int pivot_root(const char * new_root, const char * put_old);
+#endif
+
 char *all_controllers;
 
 struct controller_mounts {
@@ -124,6 +139,10 @@ bool setup_base_run_path(void)
 		nih_fatal("%s: failed to create /run/cgmanager/fs", __func__);
 		return false;
 	}
+	if (mount("cgmfs", "/run/cgmanager/fs", "tmpfs", 0, "size=100000,mode=0755") < 0) {
+		nih_fatal("%s: failed to mount tmpfs onto /run/cgmanager/fs", __func__);
+		return false;
+	}
 	if (mkdir(AGENT_LINK_PATH, 0755) < 0 && errno != EEXIST) {
 		nih_fatal("%s: failed to create %s", __func__, AGENT_LINK_PATH);
 		return false;
@@ -768,6 +787,116 @@ int collect_subsystems(char *extra_mount
 	return 0;
 }
 
+#define NEWROOT "/run/cgmanager/root"
+
+static int do_pivot(void) {
+	int oldroot = -1, newroot = -1;
+
+	oldroot = open("/", O_DIRECTORY | O_RDONLY);
+	if (oldroot < 0) {
+		nih_fatal("%s: Error opening old-/ for fchdir", __func__);
+		return -1;
+	}
+	newroot = open(NEWROOT, O_DIRECTORY | O_RDONLY);
+	if (newroot < 0) {
+		nih_fatal("%s: Error opening new-/ for fchdir", __func__);
+		goto fail;
+	}
+
+	/* change into new root fs */
+	if (fchdir(newroot)) {
+		nih_fatal("%s: can't chdir to new rootfs '%s'", __func__, NEWROOT);
+		goto fail;
+	}
+
+	/* pivot_root into our new root fs */
+	if (pivot_root(".", ".")) {
+		nih_fatal("%s: pivot_root syscall failed: %s",
+				__func__, strerror(errno));
+		goto fail;
+	}
+
+	/*
+	 * at this point the old-root is mounted on top of our new-root
+	 * To unmounted it we must not be chdir'd into it, so escape back
+	 * to old-root
+	 */
+	if (fchdir(oldroot) < 0) {
+		nih_fatal("%s: Error entering oldroot", __func__);
+		goto fail;
+	}
+	if (umount2(".", MNT_DETACH) < 0) {
+		nih_fatal("%s: Error detaching old root", __func__);
+		goto fail;
+	}
+
+	if (fchdir(newroot) < 0) {
+		nih_fatal("%s: Error re-entering newroot", __func__);
+		goto fail;
+	}
+
+	close(oldroot);
+	close(newroot);
+
+	return 0;
+
+fail:
+	if (oldroot != -1)
+		close(oldroot);
+	if (newroot != -1)
+		close(newroot);
+	return -1;
+}
+
+static int pivot_into_new_root(void) {
+	int i, ret;
+	char *createdirs[] = {NEWROOT "/proc", NEWROOT "/run",
+		NEWROOT "/run/cgmanager", NEWROOT "/run/cgmanager/fs", NULL};
+	char path[100];
+
+	/* Mount tmpfs for new root */
+	if (mkdir(NEWROOT, 0755) < 0 && errno != EEXIST) {
+		nih_fatal("%s: Failed to create directory for new root\n", __func__);
+		return -1;
+	}
+	ret = mount("root", NEWROOT, "tmpfs", 0, "size=10000,mode=0755");
+	if (ret < 0) {
+		nih_fatal("%s: Failed to mount tmpfs for root", __func__);
+		return -1;
+	}
+
+	/* create /proc and /run/cgmanager/fs, and move-mount those */
+	for (i = 0; createdirs[i]; i++) {
+		if (mkdir(createdirs[i], 0755) < 0) {
+			nih_fatal("%s: failed to created %s\n", __func__, createdirs[i]);
+			return -1;
+		}
+	}
+
+	ret = snprintf(path, 100, NEWROOT "/proc");
+	if (ret < 0 || ret > 100)
+		return -1;
+	if (mount("/proc", path, NULL, MS_REC|MS_MOVE, 0) < 0) {
+		nih_fatal("%s: failed to move /proc into new root: %s",
+			__func__, strerror(errno));
+		return -1;
+	}
+	ret = snprintf(path, 100, NEWROOT "/run/cgmanager/fs");
+	if (ret < 0 || ret > 100)
+		return -1;
+	if (mount("/run/cgmanager/fs", path, NULL, MS_REC|MS_MOVE, 0) < 0) {
+		nih_fatal("%s: failed to move /run/cgmanager/fs into new root: %s",
+			__func__, strerror(errno));
+		return -1;
+	}
+
+	/* Pivot into new root */
+	if (do_pivot() < 0)
+		return -1;
+
+	return 0;
+}
+
 /**
  * Mount the cgroup filesystems and record the information.
  * This should take configuration data from /etc.  For now,
@@ -789,8 +918,10 @@ int setup_cgroup_mounts(void)
 		return 0;
 	}
 
-	if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, 0) < 0)
-		nih_warn("Failed to re-mount / non-shared");
+	if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, 0) < 0) {
+		nih_warn("Failed to re-mount / private");
+		return -1;
+	}
 
 	for (i=0; i<num_controllers; i++) {
 		if (!do_mount_subsys(i)) {
@@ -799,6 +930,12 @@ int setup_cgroup_mounts(void)
 		}
 	}
 
+	/* Now pivot into a new root */
+	if (pivot_into_new_root() < 0) {
+		nih_fatal("Failed pivoting into new root");
+		return -1;
+	}
+
 	return 0;
 }
 
