mirror of
https://gitee.com/bianbu-linux/linux-6.6
synced 2025-07-24 01:54:03 -04:00
-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCYCegywAKCRCRxhvAZXjc ouJ6AQDlf+7jCQlQdeKKoN9QDFfMzG1ooemat36EpRRTONaGuAD8D9A4sUsG4+5f 4IU5Lj9oY4DEmF8HenbWK2ZHsesL2Qg= =yPaw -----END PGP SIGNATURE----- Merge tag 'idmapped-mounts-v5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux Pull idmapped mounts from Christian Brauner: "This introduces idmapped mounts which has been in the making for some time. Simply put, different mounts can expose the same file or directory with different ownership. This initial implementation comes with ports for fat, ext4 and with Christoph's port for xfs with more filesystems being actively worked on by independent people and maintainers. Idmapping mounts handle a wide range of long standing use-cases. Here are just a few: - Idmapped mounts make it possible to easily share files between multiple users or multiple machines especially in complex scenarios. For example, idmapped mounts will be used in the implementation of portable home directories in systemd-homed.service(8) where they allow users to move their home directory to an external storage device and use it on multiple computers where they are assigned different uids and gids. This effectively makes it possible to assign random uids and gids at login time. - It is possible to share files from the host with unprivileged containers without having to change ownership permanently through chown(2). - It is possible to idmap a container's rootfs and without having to mangle every file. For example, Chromebooks use it to share the user's Download folder with their unprivileged containers in their Linux subsystem. - It is possible to share files between containers with non-overlapping idmappings. - Filesystem that lack a proper concept of ownership such as fat can use idmapped mounts to implement discretionary access (DAC) permission checking. - They allow users to efficiently changing ownership on a per-mount basis without having to (recursively) chown(2) all files. In contrast to chown (2) changing ownership of large sets of files is instantenous with idmapped mounts. This is especially useful when ownership of a whole root filesystem of a virtual machine or container is changed. With idmapped mounts a single syscall mount_setattr syscall will be sufficient to change the ownership of all files. - Idmapped mounts always take the current ownership into account as idmappings specify what a given uid or gid is supposed to be mapped to. This contrasts with the chown(2) syscall which cannot by itself take the current ownership of the files it changes into account. It simply changes the ownership to the specified uid and gid. This is especially problematic when recursively chown(2)ing a large set of files which is commong with the aforementioned portable home directory and container and vm scenario. - Idmapped mounts allow to change ownership locally, restricting it to specific mounts, and temporarily as the ownership changes only apply as long as the mount exists. Several userspace projects have either already put up patches and pull-requests for this feature or will do so should you decide to pull this: - systemd: In a wide variety of scenarios but especially right away in their implementation of portable home directories. https://systemd.io/HOME_DIRECTORY/ - container runtimes: containerd, runC, LXD:To share data between host and unprivileged containers, unprivileged and privileged containers, etc. The pull request for idmapped mounts support in containerd, the default Kubernetes runtime is already up for quite a while now: https://github.com/containerd/containerd/pull/4734 - The virtio-fs developers and several users have expressed interest in using this feature with virtual machines once virtio-fs is ported. - ChromeOS: Sharing host-directories with unprivileged containers. I've tightly synced with all those projects and all of those listed here have also expressed their need/desire for this feature on the mailing list. For more info on how people use this there's a bunch of talks about this too. Here's just two recent ones: https://www.cncf.io/wp-content/uploads/2020/12/Rootless-Containers-in-Gitpod.pdf https://fosdem.org/2021/schedule/event/containers_idmap/ This comes with an extensive xfstests suite covering both ext4 and xfs: https://git.kernel.org/brauner/xfstests-dev/h/idmapped_mounts It covers truncation, creation, opening, xattrs, vfscaps, setid execution, setgid inheritance and more both with idmapped and non-idmapped mounts. It already helped to discover an unrelated xfs setgid inheritance bug which has since been fixed in mainline. It will be sent for inclusion with the xfstests project should you decide to merge this. In order to support per-mount idmappings vfsmounts are marked with user namespaces. The idmapping of the user namespace will be used to map the ids of vfs objects when they are accessed through that mount. By default all vfsmounts are marked with the initial user namespace. The initial user namespace is used to indicate that a mount is not idmapped. All operations behave as before and this is verified in the testsuite. Based on prior discussions we want to attach the whole user namespace and not just a dedicated idmapping struct. This allows us to reuse all the helpers that already exist for dealing with idmappings instead of introducing a whole new range of helpers. In addition, if we decide in the future that we are confident enough to enable unprivileged users to setup idmapped mounts the permission checking can take into account whether the caller is privileged in the user namespace the mount is currently marked with. The user namespace the mount will be marked with can be specified by passing a file descriptor refering to the user namespace as an argument to the new mount_setattr() syscall together with the new MOUNT_ATTR_IDMAP flag. The system call follows the openat2() pattern of extensibility. The following conditions must be met in order to create an idmapped mount: - The caller must currently have the CAP_SYS_ADMIN capability in the user namespace the underlying filesystem has been mounted in. - The underlying filesystem must support idmapped mounts. - The mount must not already be idmapped. This also implies that the idmapping of a mount cannot be altered once it has been idmapped. - The mount must be a detached/anonymous mount, i.e. it must have been created by calling open_tree() with the OPEN_TREE_CLONE flag and it must not already have been visible in the filesystem. The last two points guarantee easier semantics for userspace and the kernel and make the implementation significantly simpler. By default vfsmounts are marked with the initial user namespace and no behavioral or performance changes are observed. The manpage with a detailed description can be found here: https://git.kernel.org/brauner/man-pages/c/1d7b902e2875a1ff342e036a9f866a995640aea8 In order to support idmapped mounts, filesystems need to be changed and mark themselves with the FS_ALLOW_IDMAP flag in fs_flags. The patches to convert individual filesystem are not very large or complicated overall as can be seen from the included fat, ext4, and xfs ports. Patches for other filesystems are actively worked on and will be sent out separately. The xfstestsuite can be used to verify that port has been done correctly. The mount_setattr() syscall is motivated independent of the idmapped mounts patches and it's been around since July 2019. One of the most valuable features of the new mount api is the ability to perform mounts based on file descriptors only. Together with the lookup restrictions available in the openat2() RESOLVE_* flag namespace which we added in v5.6 this is the first time we are close to hardened and race-free (e.g. symlinks) mounting and path resolution. While userspace has started porting to the new mount api to mount proper filesystems and create new bind-mounts it is currently not possible to change mount options of an already existing bind mount in the new mount api since the mount_setattr() syscall is missing. With the addition of the mount_setattr() syscall we remove this last restriction and userspace can now fully port to the new mount api, covering every use-case the old mount api could. We also add the crucial ability to recursively change mount options for a whole mount tree, both removing and adding mount options at the same time. This syscall has been requested multiple times by various people and projects. There is a simple tool available at https://github.com/brauner/mount-idmapped that allows to create idmapped mounts so people can play with this patch series. I'll add support for the regular mount binary should you decide to pull this in the following weeks: Here's an example to a simple idmapped mount of another user's home directory: u1001@f2-vm:/$ sudo ./mount --idmap both:1000:1001:1 /home/ubuntu/ /mnt u1001@f2-vm:/$ ls -al /home/ubuntu/ total 28 drwxr-xr-x 2 ubuntu ubuntu 4096 Oct 28 22:07 . drwxr-xr-x 4 root root 4096 Oct 28 04:00 .. -rw------- 1 ubuntu ubuntu 3154 Oct 28 22:12 .bash_history -rw-r--r-- 1 ubuntu ubuntu 220 Feb 25 2020 .bash_logout -rw-r--r-- 1 ubuntu ubuntu 3771 Feb 25 2020 .bashrc -rw-r--r-- 1 ubuntu ubuntu 807 Feb 25 2020 .profile -rw-r--r-- 1 ubuntu ubuntu 0 Oct 16 16:11 .sudo_as_admin_successful -rw------- 1 ubuntu ubuntu 1144 Oct 28 00:43 .viminfo u1001@f2-vm:/$ ls -al /mnt/ total 28 drwxr-xr-x 2 u1001 u1001 4096 Oct 28 22:07 . drwxr-xr-x 29 root root 4096 Oct 28 22:01 .. -rw------- 1 u1001 u1001 3154 Oct 28 22:12 .bash_history -rw-r--r-- 1 u1001 u1001 220 Feb 25 2020 .bash_logout -rw-r--r-- 1 u1001 u1001 3771 Feb 25 2020 .bashrc -rw-r--r-- 1 u1001 u1001 807 Feb 25 2020 .profile -rw-r--r-- 1 u1001 u1001 0 Oct 16 16:11 .sudo_as_admin_successful -rw------- 1 u1001 u1001 1144 Oct 28 00:43 .viminfo u1001@f2-vm:/$ touch /mnt/my-file u1001@f2-vm:/$ setfacl -m u:1001:rwx /mnt/my-file u1001@f2-vm:/$ sudo setcap -n 1001 cap_net_raw+ep /mnt/my-file u1001@f2-vm:/$ ls -al /mnt/my-file -rw-rwxr--+ 1 u1001 u1001 0 Oct 28 22:14 /mnt/my-file u1001@f2-vm:/$ ls -al /home/ubuntu/my-file -rw-rwxr--+ 1 ubuntu ubuntu 0 Oct 28 22:14 /home/ubuntu/my-file u1001@f2-vm:/$ getfacl /mnt/my-file getfacl: Removing leading '/' from absolute path names # file: mnt/my-file # owner: u1001 # group: u1001 user::rw- user:u1001:rwx group::rw- mask::rwx other::r-- u1001@f2-vm:/$ getfacl /home/ubuntu/my-file getfacl: Removing leading '/' from absolute path names # file: home/ubuntu/my-file # owner: ubuntu # group: ubuntu user::rw- user:ubuntu:rwx group::rw- mask::rwx other::r--" * tag 'idmapped-mounts-v5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux: (41 commits) xfs: remove the possibly unused mp variable in xfs_file_compat_ioctl xfs: support idmapped mounts ext4: support idmapped mounts fat: handle idmapped mounts tests: add mount_setattr() selftests fs: introduce MOUNT_ATTR_IDMAP fs: add mount_setattr() fs: add attr_flags_to_mnt_flags helper fs: split out functions to hold writers namespace: only take read lock in do_reconfigure_mnt() mount: make {lock,unlock}_mount_hash() static namespace: take lock_mount_hash() directly when changing flags nfs: do not export idmapped mounts overlayfs: do not mount on top of idmapped mounts ecryptfs: do not mount on top of idmapped mounts ima: handle idmapped mounts apparmor: handle idmapped mounts fs: make helpers idmap mount aware exec: handle idmapped mounts would_dump: handle idmapped mounts ...
740 lines
19 KiB
C
740 lines
19 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* NFS server file handle treatment.
|
|
*
|
|
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
|
|
* Portions Copyright (C) 1999 G. Allen Morris III <gam3@acm.org>
|
|
* Extensive rewrite by Neil Brown <neilb@cse.unsw.edu.au> Southern-Spring 1999
|
|
* ... and again Southern-Winter 2001 to support export_operations
|
|
*/
|
|
|
|
#include <linux/exportfs.h>
|
|
|
|
#include <linux/sunrpc/svcauth_gss.h>
|
|
#include "nfsd.h"
|
|
#include "vfs.h"
|
|
#include "auth.h"
|
|
#include "trace.h"
|
|
|
|
#define NFSDDBG_FACILITY NFSDDBG_FH
|
|
|
|
|
|
/*
|
|
* our acceptability function.
|
|
* if NOSUBTREECHECK, accept anything
|
|
* if not, require that we can walk up to exp->ex_dentry
|
|
* doing some checks on the 'x' bits
|
|
*/
|
|
static int nfsd_acceptable(void *expv, struct dentry *dentry)
|
|
{
|
|
struct svc_export *exp = expv;
|
|
int rv;
|
|
struct dentry *tdentry;
|
|
struct dentry *parent;
|
|
|
|
if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
|
|
return 1;
|
|
|
|
tdentry = dget(dentry);
|
|
while (tdentry != exp->ex_path.dentry && !IS_ROOT(tdentry)) {
|
|
/* make sure parents give x permission to user */
|
|
int err;
|
|
parent = dget_parent(tdentry);
|
|
err = inode_permission(&init_user_ns,
|
|
d_inode(parent), MAY_EXEC);
|
|
if (err < 0) {
|
|
dput(parent);
|
|
break;
|
|
}
|
|
dput(tdentry);
|
|
tdentry = parent;
|
|
}
|
|
if (tdentry != exp->ex_path.dentry)
|
|
dprintk("nfsd_acceptable failed at %p %pd\n", tdentry, tdentry);
|
|
rv = (tdentry == exp->ex_path.dentry);
|
|
dput(tdentry);
|
|
return rv;
|
|
}
|
|
|
|
/* Type check. The correct error return for type mismatches does not seem to be
|
|
* generally agreed upon. SunOS seems to use EISDIR if file isn't S_IFREG; a
|
|
* comment in the NFSv3 spec says this is incorrect (implementation notes for
|
|
* the write call).
|
|
*/
|
|
static inline __be32
|
|
nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry,
|
|
umode_t requested)
|
|
{
|
|
umode_t mode = d_inode(dentry)->i_mode & S_IFMT;
|
|
|
|
if (requested == 0) /* the caller doesn't care */
|
|
return nfs_ok;
|
|
if (mode == requested) {
|
|
if (mode == S_IFDIR && !d_can_lookup(dentry)) {
|
|
WARN_ON_ONCE(1);
|
|
return nfserr_notdir;
|
|
}
|
|
return nfs_ok;
|
|
}
|
|
/*
|
|
* v4 has an error more specific than err_notdir which we should
|
|
* return in preference to err_notdir:
|
|
*/
|
|
if (rqstp->rq_vers == 4 && mode == S_IFLNK)
|
|
return nfserr_symlink;
|
|
if (requested == S_IFDIR)
|
|
return nfserr_notdir;
|
|
if (mode == S_IFDIR)
|
|
return nfserr_isdir;
|
|
return nfserr_inval;
|
|
}
|
|
|
|
static bool nfsd_originating_port_ok(struct svc_rqst *rqstp, int flags)
|
|
{
|
|
if (flags & NFSEXP_INSECURE_PORT)
|
|
return true;
|
|
/* We don't require gss requests to use low ports: */
|
|
if (rqstp->rq_cred.cr_flavor >= RPC_AUTH_GSS)
|
|
return true;
|
|
return test_bit(RQ_SECURE, &rqstp->rq_flags);
|
|
}
|
|
|
|
static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
|
|
struct svc_export *exp)
|
|
{
|
|
int flags = nfsexp_flags(rqstp, exp);
|
|
|
|
/* Check if the request originated from a secure port. */
|
|
if (!nfsd_originating_port_ok(rqstp, flags)) {
|
|
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
|
|
dprintk("nfsd: request from insecure port %s!\n",
|
|
svc_print_addr(rqstp, buf, sizeof(buf)));
|
|
return nfserr_perm;
|
|
}
|
|
|
|
/* Set user creds for this exportpoint */
|
|
return nfserrno(nfsd_setuser(rqstp, exp));
|
|
}
|
|
|
|
static inline __be32 check_pseudo_root(struct svc_rqst *rqstp,
|
|
struct dentry *dentry, struct svc_export *exp)
|
|
{
|
|
if (!(exp->ex_flags & NFSEXP_V4ROOT))
|
|
return nfs_ok;
|
|
/*
|
|
* v2/v3 clients have no need for the V4ROOT export--they use
|
|
* the mount protocl instead; also, further V4ROOT checks may be
|
|
* in v4-specific code, in which case v2/v3 clients could bypass
|
|
* them.
|
|
*/
|
|
if (!nfsd_v4client(rqstp))
|
|
return nfserr_stale;
|
|
/*
|
|
* We're exposing only the directories and symlinks that have to be
|
|
* traversed on the way to real exports:
|
|
*/
|
|
if (unlikely(!d_is_dir(dentry) &&
|
|
!d_is_symlink(dentry)))
|
|
return nfserr_stale;
|
|
/*
|
|
* A pseudoroot export gives permission to access only one
|
|
* single directory; the kernel has to make another upcall
|
|
* before granting access to anything else under it:
|
|
*/
|
|
if (unlikely(dentry != exp->ex_path.dentry))
|
|
return nfserr_stale;
|
|
return nfs_ok;
|
|
}
|
|
|
|
/*
|
|
* Use the given filehandle to look up the corresponding export and
|
|
* dentry. On success, the results are used to set fh_export and
|
|
* fh_dentry.
|
|
*/
|
|
static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
|
|
{
|
|
struct knfsd_fh *fh = &fhp->fh_handle;
|
|
struct fid *fid = NULL, sfid;
|
|
struct svc_export *exp;
|
|
struct dentry *dentry;
|
|
int fileid_type;
|
|
int data_left = fh->fh_size/4;
|
|
__be32 error;
|
|
|
|
error = nfserr_stale;
|
|
if (rqstp->rq_vers > 2)
|
|
error = nfserr_badhandle;
|
|
if (rqstp->rq_vers == 4 && fh->fh_size == 0)
|
|
return nfserr_nofilehandle;
|
|
|
|
if (fh->fh_version == 1) {
|
|
int len;
|
|
|
|
if (--data_left < 0)
|
|
return error;
|
|
if (fh->fh_auth_type != 0)
|
|
return error;
|
|
len = key_len(fh->fh_fsid_type) / 4;
|
|
if (len == 0)
|
|
return error;
|
|
if (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
|
|
/* deprecated, convert to type 3 */
|
|
len = key_len(FSID_ENCODE_DEV)/4;
|
|
fh->fh_fsid_type = FSID_ENCODE_DEV;
|
|
/*
|
|
* struct knfsd_fh uses host-endian fields, which are
|
|
* sometimes used to hold net-endian values. This
|
|
* confuses sparse, so we must use __force here to
|
|
* keep it from complaining.
|
|
*/
|
|
fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]),
|
|
ntohl((__force __be32)fh->fh_fsid[1])));
|
|
fh->fh_fsid[1] = fh->fh_fsid[2];
|
|
}
|
|
data_left -= len;
|
|
if (data_left < 0)
|
|
return error;
|
|
exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid);
|
|
fid = (struct fid *)(fh->fh_fsid + len);
|
|
} else {
|
|
__u32 tfh[2];
|
|
dev_t xdev;
|
|
ino_t xino;
|
|
|
|
if (fh->fh_size != NFS_FHSIZE)
|
|
return error;
|
|
/* assume old filehandle format */
|
|
xdev = old_decode_dev(fh->ofh_xdev);
|
|
xino = u32_to_ino_t(fh->ofh_xino);
|
|
mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL);
|
|
exp = rqst_exp_find(rqstp, FSID_DEV, tfh);
|
|
}
|
|
|
|
error = nfserr_stale;
|
|
if (IS_ERR(exp)) {
|
|
trace_nfsd_set_fh_dentry_badexport(rqstp, fhp, PTR_ERR(exp));
|
|
|
|
if (PTR_ERR(exp) == -ENOENT)
|
|
return error;
|
|
|
|
return nfserrno(PTR_ERR(exp));
|
|
}
|
|
|
|
if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
|
|
/* Elevate privileges so that the lack of 'r' or 'x'
|
|
* permission on some parent directory will
|
|
* not stop exportfs_decode_fh from being able
|
|
* to reconnect a directory into the dentry cache.
|
|
* The same problem can affect "SUBTREECHECK" exports,
|
|
* but as nfsd_acceptable depends on correct
|
|
* access control settings being in effect, we cannot
|
|
* fix that case easily.
|
|
*/
|
|
struct cred *new = prepare_creds();
|
|
if (!new) {
|
|
error = nfserrno(-ENOMEM);
|
|
goto out;
|
|
}
|
|
new->cap_effective =
|
|
cap_raise_nfsd_set(new->cap_effective,
|
|
new->cap_permitted);
|
|
put_cred(override_creds(new));
|
|
put_cred(new);
|
|
} else {
|
|
error = nfsd_setuser_and_check_port(rqstp, exp);
|
|
if (error)
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Look up the dentry using the NFS file handle.
|
|
*/
|
|
error = nfserr_stale;
|
|
if (rqstp->rq_vers > 2)
|
|
error = nfserr_badhandle;
|
|
|
|
if (fh->fh_version != 1) {
|
|
sfid.i32.ino = fh->ofh_ino;
|
|
sfid.i32.gen = fh->ofh_generation;
|
|
sfid.i32.parent_ino = fh->ofh_dirino;
|
|
fid = &sfid;
|
|
data_left = 3;
|
|
if (fh->ofh_dirino == 0)
|
|
fileid_type = FILEID_INO32_GEN;
|
|
else
|
|
fileid_type = FILEID_INO32_GEN_PARENT;
|
|
} else
|
|
fileid_type = fh->fh_fileid_type;
|
|
|
|
if (fileid_type == FILEID_ROOT)
|
|
dentry = dget(exp->ex_path.dentry);
|
|
else {
|
|
dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid,
|
|
data_left, fileid_type,
|
|
nfsd_acceptable, exp);
|
|
if (IS_ERR_OR_NULL(dentry)) {
|
|
trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp,
|
|
dentry ? PTR_ERR(dentry) : -ESTALE);
|
|
switch (PTR_ERR(dentry)) {
|
|
case -ENOMEM:
|
|
case -ETIMEDOUT:
|
|
break;
|
|
default:
|
|
dentry = ERR_PTR(-ESTALE);
|
|
}
|
|
}
|
|
}
|
|
if (dentry == NULL)
|
|
goto out;
|
|
if (IS_ERR(dentry)) {
|
|
if (PTR_ERR(dentry) != -EINVAL)
|
|
error = nfserrno(PTR_ERR(dentry));
|
|
goto out;
|
|
}
|
|
|
|
if (d_is_dir(dentry) &&
|
|
(dentry->d_flags & DCACHE_DISCONNECTED)) {
|
|
printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %pd2\n",
|
|
dentry);
|
|
}
|
|
|
|
fhp->fh_dentry = dentry;
|
|
fhp->fh_export = exp;
|
|
|
|
switch (rqstp->rq_vers) {
|
|
case 4:
|
|
if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR)
|
|
fhp->fh_no_atomic_attr = true;
|
|
break;
|
|
case 3:
|
|
if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOWCC)
|
|
fhp->fh_no_wcc = true;
|
|
break;
|
|
case 2:
|
|
fhp->fh_no_wcc = true;
|
|
}
|
|
|
|
return 0;
|
|
out:
|
|
exp_put(exp);
|
|
return error;
|
|
}
|
|
|
|
/**
|
|
* fh_verify - filehandle lookup and access checking
|
|
* @rqstp: pointer to current rpc request
|
|
* @fhp: filehandle to be verified
|
|
* @type: expected type of object pointed to by filehandle
|
|
* @access: type of access needed to object
|
|
*
|
|
* Look up a dentry from the on-the-wire filehandle, check the client's
|
|
* access to the export, and set the current task's credentials.
|
|
*
|
|
* Regardless of success or failure of fh_verify(), fh_put() should be
|
|
* called on @fhp when the caller is finished with the filehandle.
|
|
*
|
|
* fh_verify() may be called multiple times on a given filehandle, for
|
|
* example, when processing an NFSv4 compound. The first call will look
|
|
* up a dentry using the on-the-wire filehandle. Subsequent calls will
|
|
* skip the lookup and just perform the other checks and possibly change
|
|
* the current task's credentials.
|
|
*
|
|
* @type specifies the type of object expected using one of the S_IF*
|
|
* constants defined in include/linux/stat.h. The caller may use zero
|
|
* to indicate that it doesn't care, or a negative integer to indicate
|
|
* that it expects something not of the given type.
|
|
*
|
|
* @access is formed from the NFSD_MAY_* constants defined in
|
|
* fs/nfsd/vfs.h.
|
|
*/
|
|
__be32
|
|
fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
|
|
{
|
|
struct svc_export *exp = NULL;
|
|
struct dentry *dentry;
|
|
__be32 error;
|
|
|
|
dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp));
|
|
|
|
if (!fhp->fh_dentry) {
|
|
error = nfsd_set_fh_dentry(rqstp, fhp);
|
|
if (error)
|
|
goto out;
|
|
}
|
|
dentry = fhp->fh_dentry;
|
|
exp = fhp->fh_export;
|
|
/*
|
|
* We still have to do all these permission checks, even when
|
|
* fh_dentry is already set:
|
|
* - fh_verify may be called multiple times with different
|
|
* "access" arguments (e.g. nfsd_proc_create calls
|
|
* fh_verify(...,NFSD_MAY_EXEC) first, then later (in
|
|
* nfsd_create) calls fh_verify(...,NFSD_MAY_CREATE).
|
|
* - in the NFSv4 case, the filehandle may have been filled
|
|
* in by fh_compose, and given a dentry, but further
|
|
* compound operations performed with that filehandle
|
|
* still need permissions checks. In the worst case, a
|
|
* mountpoint crossing may have changed the export
|
|
* options, and we may now need to use a different uid
|
|
* (for example, if different id-squashing options are in
|
|
* effect on the new filesystem).
|
|
*/
|
|
error = check_pseudo_root(rqstp, dentry, exp);
|
|
if (error)
|
|
goto out;
|
|
|
|
error = nfsd_setuser_and_check_port(rqstp, exp);
|
|
if (error)
|
|
goto out;
|
|
|
|
error = nfsd_mode_check(rqstp, dentry, type);
|
|
if (error)
|
|
goto out;
|
|
|
|
/*
|
|
* pseudoflavor restrictions are not enforced on NLM,
|
|
* which clients virtually always use auth_sys for,
|
|
* even while using RPCSEC_GSS for NFS.
|
|
*/
|
|
if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS)
|
|
goto skip_pseudoflavor_check;
|
|
/*
|
|
* Clients may expect to be able to use auth_sys during mount,
|
|
* even if they use gss for everything else; see section 2.3.2
|
|
* of rfc 2623.
|
|
*/
|
|
if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT
|
|
&& exp->ex_path.dentry == dentry)
|
|
goto skip_pseudoflavor_check;
|
|
|
|
error = check_nfsd_access(exp, rqstp);
|
|
if (error)
|
|
goto out;
|
|
|
|
skip_pseudoflavor_check:
|
|
/* Finally, check access permissions. */
|
|
error = nfsd_permission(rqstp, exp, dentry, access);
|
|
|
|
if (error) {
|
|
dprintk("fh_verify: %pd2 permission failure, "
|
|
"acc=%x, error=%d\n",
|
|
dentry,
|
|
access, ntohl(error));
|
|
}
|
|
out:
|
|
if (error == nfserr_stale)
|
|
nfsd_stats_fh_stale_inc(exp);
|
|
return error;
|
|
}
|
|
|
|
|
|
/*
|
|
* Compose a file handle for an NFS reply.
|
|
*
|
|
* Note that when first composed, the dentry may not yet have
|
|
* an inode. In this case a call to fh_update should be made
|
|
* before the fh goes out on the wire ...
|
|
*/
|
|
static void _fh_update(struct svc_fh *fhp, struct svc_export *exp,
|
|
struct dentry *dentry)
|
|
{
|
|
if (dentry != exp->ex_path.dentry) {
|
|
struct fid *fid = (struct fid *)
|
|
(fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1);
|
|
int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;
|
|
int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK);
|
|
|
|
fhp->fh_handle.fh_fileid_type =
|
|
exportfs_encode_fh(dentry, fid, &maxsize, subtreecheck);
|
|
fhp->fh_handle.fh_size += maxsize * 4;
|
|
} else {
|
|
fhp->fh_handle.fh_fileid_type = FILEID_ROOT;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* for composing old style file handles
|
|
*/
|
|
static inline void _fh_update_old(struct dentry *dentry,
|
|
struct svc_export *exp,
|
|
struct knfsd_fh *fh)
|
|
{
|
|
fh->ofh_ino = ino_t_to_u32(d_inode(dentry)->i_ino);
|
|
fh->ofh_generation = d_inode(dentry)->i_generation;
|
|
if (d_is_dir(dentry) ||
|
|
(exp->ex_flags & NFSEXP_NOSUBTREECHECK))
|
|
fh->ofh_dirino = 0;
|
|
}
|
|
|
|
static bool is_root_export(struct svc_export *exp)
|
|
{
|
|
return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root;
|
|
}
|
|
|
|
static struct super_block *exp_sb(struct svc_export *exp)
|
|
{
|
|
return exp->ex_path.dentry->d_sb;
|
|
}
|
|
|
|
static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
|
|
{
|
|
switch (fsid_type) {
|
|
case FSID_DEV:
|
|
if (!old_valid_dev(exp_sb(exp)->s_dev))
|
|
return false;
|
|
fallthrough;
|
|
case FSID_MAJOR_MINOR:
|
|
case FSID_ENCODE_DEV:
|
|
return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV;
|
|
case FSID_NUM:
|
|
return exp->ex_flags & NFSEXP_FSID;
|
|
case FSID_UUID8:
|
|
case FSID_UUID16:
|
|
if (!is_root_export(exp))
|
|
return false;
|
|
fallthrough;
|
|
case FSID_UUID4_INUM:
|
|
case FSID_UUID16_INUM:
|
|
return exp->ex_uuid != NULL;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
static void set_version_and_fsid_type(struct svc_fh *fhp, struct svc_export *exp, struct svc_fh *ref_fh)
|
|
{
|
|
u8 version;
|
|
u8 fsid_type;
|
|
retry:
|
|
version = 1;
|
|
if (ref_fh && ref_fh->fh_export == exp) {
|
|
version = ref_fh->fh_handle.fh_version;
|
|
fsid_type = ref_fh->fh_handle.fh_fsid_type;
|
|
|
|
ref_fh = NULL;
|
|
|
|
switch (version) {
|
|
case 0xca:
|
|
fsid_type = FSID_DEV;
|
|
break;
|
|
case 1:
|
|
break;
|
|
default:
|
|
goto retry;
|
|
}
|
|
|
|
/*
|
|
* As the fsid -> filesystem mapping was guided by
|
|
* user-space, there is no guarantee that the filesystem
|
|
* actually supports that fsid type. If it doesn't we
|
|
* loop around again without ref_fh set.
|
|
*/
|
|
if (!fsid_type_ok_for_exp(fsid_type, exp))
|
|
goto retry;
|
|
} else if (exp->ex_flags & NFSEXP_FSID) {
|
|
fsid_type = FSID_NUM;
|
|
} else if (exp->ex_uuid) {
|
|
if (fhp->fh_maxsize >= 64) {
|
|
if (is_root_export(exp))
|
|
fsid_type = FSID_UUID16;
|
|
else
|
|
fsid_type = FSID_UUID16_INUM;
|
|
} else {
|
|
if (is_root_export(exp))
|
|
fsid_type = FSID_UUID8;
|
|
else
|
|
fsid_type = FSID_UUID4_INUM;
|
|
}
|
|
} else if (!old_valid_dev(exp_sb(exp)->s_dev))
|
|
/* for newer device numbers, we must use a newer fsid format */
|
|
fsid_type = FSID_ENCODE_DEV;
|
|
else
|
|
fsid_type = FSID_DEV;
|
|
fhp->fh_handle.fh_version = version;
|
|
if (version)
|
|
fhp->fh_handle.fh_fsid_type = fsid_type;
|
|
}
|
|
|
|
__be32
|
|
fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
|
|
struct svc_fh *ref_fh)
|
|
{
|
|
/* ref_fh is a reference file handle.
|
|
* if it is non-null and for the same filesystem, then we should compose
|
|
* a filehandle which is of the same version, where possible.
|
|
* Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
|
|
* Then create a 32byte filehandle using nfs_fhbase_old
|
|
*
|
|
*/
|
|
|
|
struct inode * inode = d_inode(dentry);
|
|
dev_t ex_dev = exp_sb(exp)->s_dev;
|
|
|
|
dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n",
|
|
MAJOR(ex_dev), MINOR(ex_dev),
|
|
(long) d_inode(exp->ex_path.dentry)->i_ino,
|
|
dentry,
|
|
(inode ? inode->i_ino : 0));
|
|
|
|
/* Choose filehandle version and fsid type based on
|
|
* the reference filehandle (if it is in the same export)
|
|
* or the export options.
|
|
*/
|
|
set_version_and_fsid_type(fhp, exp, ref_fh);
|
|
|
|
/* If we have a ref_fh, then copy the fh_no_wcc setting from it. */
|
|
fhp->fh_no_wcc = ref_fh ? ref_fh->fh_no_wcc : false;
|
|
|
|
if (ref_fh == fhp)
|
|
fh_put(ref_fh);
|
|
|
|
if (fhp->fh_locked || fhp->fh_dentry) {
|
|
printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n",
|
|
dentry);
|
|
}
|
|
if (fhp->fh_maxsize < NFS_FHSIZE)
|
|
printk(KERN_ERR "fh_compose: called with maxsize %d! %pd2\n",
|
|
fhp->fh_maxsize,
|
|
dentry);
|
|
|
|
fhp->fh_dentry = dget(dentry); /* our internal copy */
|
|
fhp->fh_export = exp_get(exp);
|
|
|
|
if (fhp->fh_handle.fh_version == 0xca) {
|
|
/* old style filehandle please */
|
|
memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE);
|
|
fhp->fh_handle.fh_size = NFS_FHSIZE;
|
|
fhp->fh_handle.ofh_dcookie = 0xfeebbaca;
|
|
fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev);
|
|
fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev;
|
|
fhp->fh_handle.ofh_xino =
|
|
ino_t_to_u32(d_inode(exp->ex_path.dentry)->i_ino);
|
|
fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry));
|
|
if (inode)
|
|
_fh_update_old(dentry, exp, &fhp->fh_handle);
|
|
} else {
|
|
fhp->fh_handle.fh_size =
|
|
key_len(fhp->fh_handle.fh_fsid_type) + 4;
|
|
fhp->fh_handle.fh_auth_type = 0;
|
|
|
|
mk_fsid(fhp->fh_handle.fh_fsid_type,
|
|
fhp->fh_handle.fh_fsid,
|
|
ex_dev,
|
|
d_inode(exp->ex_path.dentry)->i_ino,
|
|
exp->ex_fsid, exp->ex_uuid);
|
|
|
|
if (inode)
|
|
_fh_update(fhp, exp, dentry);
|
|
if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
|
|
fh_put(fhp);
|
|
return nfserr_opnotsupp;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Update file handle information after changing a dentry.
|
|
* This is only called by nfsd_create, nfsd_create_v3 and nfsd_proc_create
|
|
*/
|
|
__be32
|
|
fh_update(struct svc_fh *fhp)
|
|
{
|
|
struct dentry *dentry;
|
|
|
|
if (!fhp->fh_dentry)
|
|
goto out_bad;
|
|
|
|
dentry = fhp->fh_dentry;
|
|
if (d_really_is_negative(dentry))
|
|
goto out_negative;
|
|
if (fhp->fh_handle.fh_version != 1) {
|
|
_fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle);
|
|
} else {
|
|
if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT)
|
|
return 0;
|
|
|
|
_fh_update(fhp, fhp->fh_export, dentry);
|
|
if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)
|
|
return nfserr_opnotsupp;
|
|
}
|
|
return 0;
|
|
out_bad:
|
|
printk(KERN_ERR "fh_update: fh not verified!\n");
|
|
return nfserr_serverfault;
|
|
out_negative:
|
|
printk(KERN_ERR "fh_update: %pd2 still negative!\n",
|
|
dentry);
|
|
return nfserr_serverfault;
|
|
}
|
|
|
|
/*
|
|
* Release a file handle.
|
|
*/
|
|
void
|
|
fh_put(struct svc_fh *fhp)
|
|
{
|
|
struct dentry * dentry = fhp->fh_dentry;
|
|
struct svc_export * exp = fhp->fh_export;
|
|
if (dentry) {
|
|
fh_unlock(fhp);
|
|
fhp->fh_dentry = NULL;
|
|
dput(dentry);
|
|
fh_clear_wcc(fhp);
|
|
}
|
|
fh_drop_write(fhp);
|
|
if (exp) {
|
|
exp_put(exp);
|
|
fhp->fh_export = NULL;
|
|
}
|
|
fhp->fh_no_wcc = false;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Shorthand for dprintk()'s
|
|
*/
|
|
char * SVCFH_fmt(struct svc_fh *fhp)
|
|
{
|
|
struct knfsd_fh *fh = &fhp->fh_handle;
|
|
|
|
static char buf[80];
|
|
sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x",
|
|
fh->fh_size,
|
|
fh->fh_base.fh_pad[0],
|
|
fh->fh_base.fh_pad[1],
|
|
fh->fh_base.fh_pad[2],
|
|
fh->fh_base.fh_pad[3],
|
|
fh->fh_base.fh_pad[4],
|
|
fh->fh_base.fh_pad[5]);
|
|
return buf;
|
|
}
|
|
|
|
enum fsid_source fsid_source(struct svc_fh *fhp)
|
|
{
|
|
if (fhp->fh_handle.fh_version != 1)
|
|
return FSIDSOURCE_DEV;
|
|
switch(fhp->fh_handle.fh_fsid_type) {
|
|
case FSID_DEV:
|
|
case FSID_ENCODE_DEV:
|
|
case FSID_MAJOR_MINOR:
|
|
if (exp_sb(fhp->fh_export)->s_type->fs_flags & FS_REQUIRES_DEV)
|
|
return FSIDSOURCE_DEV;
|
|
break;
|
|
case FSID_NUM:
|
|
if (fhp->fh_export->ex_flags & NFSEXP_FSID)
|
|
return FSIDSOURCE_FSID;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
/* either a UUID type filehandle, or the filehandle doesn't
|
|
* match the export.
|
|
*/
|
|
if (fhp->fh_export->ex_flags & NFSEXP_FSID)
|
|
return FSIDSOURCE_FSID;
|
|
if (fhp->fh_export->ex_uuid)
|
|
return FSIDSOURCE_UUID;
|
|
return FSIDSOURCE_DEV;
|
|
}
|