summaryrefslogtreecommitdiff
path: root/fs/nullfs.c
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2026-01-12 16:47:10 +0100
committerChristian Brauner <brauner@kernel.org>2026-01-12 16:52:09 +0100
commit576ee5dfd459abe8e29bee8b204cd259e60b4e18 (patch)
tree466c8e1db55175c6aadf7d29f5e9f2fb814f69b0 /fs/nullfs.c
parent3c1b73fc6a4d7bc5469ab2679ef954f7b754d34b (diff)
fs: add immutable rootfs
Currently pivot_root() doesn't work on the real rootfs because it cannot be unmounted. Userspace has to do a recursive removal of the initramfs contents manually before continuing the boot. Really all we want from the real rootfs is to serve as the parent mount for anything that is actually useful such as the tmpfs or ramfs for initramfs unpacking or the rootfs itself. There's no need for the real rootfs to actually be anything meaningful or useful. Add a immutable rootfs called "nullfs" that can be selected via the "nullfs_rootfs" kernel command line option. The kernel will mount a tmpfs/ramfs on top of it, unpack the initramfs and fire up userspace which mounts the rootfs and can then just do: chdir(rootfs); pivot_root(".", "."); umount2(".", MNT_DETACH); and be done with it. (Ofc, userspace can also choose to retain the initramfs contents by using something like pivot_root(".", "/initramfs") without unmounting it.) Technically this also means that the rootfs mount in unprivileged namespaces doesn't need to become MNT_LOCKED anymore as it's guaranteed that the immutable rootfs remains permanently empty so there cannot be anything revealed by unmounting the covering mount. In the future this will also allow us to create completely empty mount namespaces without risking to leak anything. systemd already handles this all correctly as it tries to pivot_root() first and falls back to MS_MOVE only when that fails. This goes back to various discussion in previous years and a LPC 2024 presentation about this very topic. Link: https://patch.msgid.link/20260112-work-immutable-rootfs-v2-3-88dd1c34a204@kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'fs/nullfs.c')
-rw-r--r--fs/nullfs.c70
1 files changed, 70 insertions, 0 deletions
diff --git a/fs/nullfs.c b/fs/nullfs.c
new file mode 100644
index 000000000000..fdbd3e5d3d71
--- /dev/null
+++ b/fs/nullfs.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Christian Brauner <brauner@kernel.org> */
+#include <linux/fs/super_types.h>
+#include <linux/fs_context.h>
+#include <linux/magic.h>
+
+static const struct super_operations nullfs_super_operations = {
+ .statfs = simple_statfs,
+};
+
+static int nullfs_fs_fill_super(struct super_block *s, struct fs_context *fc)
+{
+ struct inode *inode;
+
+ s->s_maxbytes = MAX_LFS_FILESIZE;
+ s->s_blocksize = PAGE_SIZE;
+ s->s_blocksize_bits = PAGE_SHIFT;
+ s->s_magic = NULL_FS_MAGIC;
+ s->s_op = &nullfs_super_operations;
+ s->s_export_op = NULL;
+ s->s_xattr = NULL;
+ s->s_time_gran = 1;
+ s->s_d_flags = 0;
+
+ inode = new_inode(s);
+ if (!inode)
+ return -ENOMEM;
+
+ /* nullfs is permanently empty... */
+ make_empty_dir_inode(inode);
+ simple_inode_init_ts(inode);
+ inode->i_ino = 1;
+ /* ... and immutable. */
+ inode->i_flags |= S_IMMUTABLE;
+
+ s->s_root = d_make_root(inode);
+ if (!s->s_root)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/*
+ * For now this is a single global instance. If needed we can make it
+ * mountable by userspace at which point we will need to make it
+ * multi-instance.
+ */
+static int nullfs_fs_get_tree(struct fs_context *fc)
+{
+ return get_tree_single(fc, nullfs_fs_fill_super);
+}
+
+static const struct fs_context_operations nullfs_fs_context_ops = {
+ .get_tree = nullfs_fs_get_tree,
+};
+
+static int nullfs_init_fs_context(struct fs_context *fc)
+{
+ fc->ops = &nullfs_fs_context_ops;
+ fc->global = true;
+ fc->sb_flags = SB_NOUSER;
+ fc->s_iflags = SB_I_NOEXEC | SB_I_NODEV;
+ return 0;
+}
+
+struct file_system_type nullfs_fs_type = {
+ .name = "nullfs",
+ .init_fs_context = nullfs_init_fs_context,
+ .kill_sb = kill_anon_super,
+};