mirror of
https://gitee.com/bianbu-linux/linux-6.6
synced 2025-04-24 14:07:52 -04:00
Last cycle we've already made the interaction with idmapped mounts more robust and type safe by introducing the vfs{g,u}id_t type. This cycle we concluded the conversion and removed the legacy helpers. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate filesystem and mount namespaces and what different roles they have to play. Especially for filesystem developers without much experience in this area this is an easy source for bugs. Instead of passing the plain namespace we introduce a dedicated type struct mnt_idmap and replace the pointer with a pointer to a struct mnt_idmap. There are no semantic or size changes for the mount struct caused by this. We then start converting all places aware of idmapped mounts to rely on struct mnt_idmap. Once the conversion is done all helpers down to the really low-level make_vfs{g,u}id() and from_vfs{g,u}id() will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two, removing and thus eliminating the possibility of any bugs. Fwiw, I fixed some issues in that area a while ago in ntfs3 and ksmbd in the past. Afterwards, only low-level code can ultimately use the associated namespace for any permission checks. Even most of the vfs can be ultimately completely oblivious about this and filesystems will never interact with it directly in any form in the future. A struct mnt_idmap currently encompasses a simple refcount and a pointer to the relevant namespace the mount is idmapped to. If a mount isn't idmapped then it will point to a static nop_mnt_idmap. If it is an idmapped mount it will point to a new struct mnt_idmap. As usual there are no allocations or anything happening for non-idmapped mounts. Everthing is carefully written to be a nop for non-idmapped mounts as has always been the case. If an idmapped mount or mount tree is created a new struct mnt_idmap is allocated and a reference taken on the relevant namespace. For each mount in a mount tree that gets idmapped or a mount that inherits the idmap when it is cloned the reference count on the associated struct mnt_idmap is bumped. Just a reminder that we only allow a mount to change it's idmapping a single time and only if it hasn't already been attached to the filesystems and has no active writers. The actual changes are fairly straightforward. This will have huge benefits for maintenance and security in the long run even if it causes some churn. I'm aware that there's some cost for all of you. And I'll commit to doing this work and make this as painless as I can. Note that this also makes it possible to extend struct mount_idmap in the future. For example, it would be possible to place the namespace pointer in an anonymous union together with an idmapping struct. This would allow us to expose an api to userspace that would let it specify idmappings directly instead of having to go through the detour of setting up namespaces at all. This just adds the infrastructure and doesn't do any conversions. Reviewed-by: Seth Forshee (DigitalOcean) <sforshee@kernel.org> Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
128 lines
4.4 KiB
C
128 lines
4.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
*
|
|
* Definitions for mount interface. This describes the in the kernel build
|
|
* linkedlist with mounted filesystems.
|
|
*
|
|
* Author: Marco van Wieringen <mvw@planets.elm.net>
|
|
*
|
|
*/
|
|
#ifndef _LINUX_MOUNT_H
|
|
#define _LINUX_MOUNT_H
|
|
|
|
#include <linux/types.h>
|
|
#include <asm/barrier.h>
|
|
|
|
struct super_block;
|
|
struct dentry;
|
|
struct user_namespace;
|
|
struct mnt_idmap;
|
|
struct file_system_type;
|
|
struct fs_context;
|
|
struct file;
|
|
struct path;
|
|
|
|
#define MNT_NOSUID 0x01
|
|
#define MNT_NODEV 0x02
|
|
#define MNT_NOEXEC 0x04
|
|
#define MNT_NOATIME 0x08
|
|
#define MNT_NODIRATIME 0x10
|
|
#define MNT_RELATIME 0x20
|
|
#define MNT_READONLY 0x40 /* does the user want this to be r/o? */
|
|
#define MNT_NOSYMFOLLOW 0x80
|
|
|
|
#define MNT_SHRINKABLE 0x100
|
|
#define MNT_WRITE_HOLD 0x200
|
|
|
|
#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */
|
|
#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */
|
|
/*
|
|
* MNT_SHARED_MASK is the set of flags that should be cleared when a
|
|
* mount becomes shared. Currently, this is only the flag that says a
|
|
* mount cannot be bind mounted, since this is how we create a mount
|
|
* that shares events with another mount. If you add a new MNT_*
|
|
* flag, consider how it interacts with shared mounts.
|
|
*/
|
|
#define MNT_SHARED_MASK (MNT_UNBINDABLE)
|
|
#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
|
|
| MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
|
|
| MNT_READONLY | MNT_NOSYMFOLLOW)
|
|
#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
|
|
|
|
#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
|
|
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | \
|
|
MNT_CURSOR)
|
|
|
|
#define MNT_INTERNAL 0x4000
|
|
|
|
#define MNT_LOCK_ATIME 0x040000
|
|
#define MNT_LOCK_NOEXEC 0x080000
|
|
#define MNT_LOCK_NOSUID 0x100000
|
|
#define MNT_LOCK_NODEV 0x200000
|
|
#define MNT_LOCK_READONLY 0x400000
|
|
#define MNT_LOCKED 0x800000
|
|
#define MNT_DOOMED 0x1000000
|
|
#define MNT_SYNC_UMOUNT 0x2000000
|
|
#define MNT_MARKED 0x4000000
|
|
#define MNT_UMOUNT 0x8000000
|
|
#define MNT_CURSOR 0x10000000
|
|
|
|
struct vfsmount {
|
|
struct dentry *mnt_root; /* root of the mounted tree */
|
|
struct super_block *mnt_sb; /* pointer to superblock */
|
|
int mnt_flags;
|
|
struct mnt_idmap *mnt_idmap;
|
|
} __randomize_layout;
|
|
|
|
struct user_namespace *mnt_user_ns(const struct vfsmount *mnt);
|
|
struct user_namespace *mnt_idmap_owner(const struct mnt_idmap *idmap);
|
|
static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt)
|
|
{
|
|
/* Pairs with smp_store_release() in do_idmap_mount(). */
|
|
return smp_load_acquire(&mnt->mnt_idmap);
|
|
}
|
|
|
|
extern int mnt_want_write(struct vfsmount *mnt);
|
|
extern int mnt_want_write_file(struct file *file);
|
|
extern void mnt_drop_write(struct vfsmount *mnt);
|
|
extern void mnt_drop_write_file(struct file *file);
|
|
extern void mntput(struct vfsmount *mnt);
|
|
extern struct vfsmount *mntget(struct vfsmount *mnt);
|
|
extern struct vfsmount *mnt_clone_internal(const struct path *path);
|
|
extern bool __mnt_is_readonly(struct vfsmount *mnt);
|
|
extern bool mnt_may_suid(struct vfsmount *mnt);
|
|
|
|
extern struct vfsmount *clone_private_mount(const struct path *path);
|
|
extern int __mnt_want_write(struct vfsmount *);
|
|
extern void __mnt_drop_write(struct vfsmount *);
|
|
|
|
extern struct vfsmount *fc_mount(struct fs_context *fc);
|
|
extern struct vfsmount *vfs_create_mount(struct fs_context *fc);
|
|
extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
|
|
int flags, const char *name,
|
|
void *data);
|
|
extern struct vfsmount *vfs_submount(const struct dentry *mountpoint,
|
|
struct file_system_type *type,
|
|
const char *name, void *data);
|
|
|
|
extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
|
|
extern void mark_mounts_for_expiry(struct list_head *mounts);
|
|
|
|
extern dev_t name_to_dev_t(const char *name);
|
|
extern bool path_is_mountpoint(const struct path *path);
|
|
|
|
extern bool our_mnt(struct vfsmount *mnt);
|
|
|
|
extern struct vfsmount *kern_mount(struct file_system_type *);
|
|
extern void kern_unmount(struct vfsmount *mnt);
|
|
extern int may_umount_tree(struct vfsmount *);
|
|
extern int may_umount(struct vfsmount *);
|
|
extern long do_mount(const char *, const char __user *,
|
|
const char *, unsigned long, void *);
|
|
extern struct vfsmount *collect_mounts(const struct path *);
|
|
extern void drop_collected_mounts(struct vfsmount *);
|
|
extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
|
|
struct vfsmount *);
|
|
extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);
|
|
|
|
#endif /* _LINUX_MOUNT_H */
|