mirror of
https://gitee.com/bianbu-linux/linux-6.6
synced 2025-04-24 14:07:52 -04:00
bpf: Parameterize task iterators.
Allow creating an iterator that loops through resources of one thread/process. People could only create iterators to loop through all resources of files, vma, and tasks in the system, even though they were interested in only the resources of a specific task or process. Passing the additional parameters, people can now create an iterator to go through all resources or only the resources of a task. Signed-off-by: Kui-Feng Lee <kuifeng@fb.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Yonghong Song <yhs@fb.com> Acked-by: Martin KaFai Lau <martin.lau@kernel.org> Link: https://lore.kernel.org/bpf/20220926184957.208194-2-kuifeng@fb.com
This commit is contained in:
parent
87dbdc230d
commit
f0d74c4da1
4 changed files with 203 additions and 22 deletions
|
@ -1796,6 +1796,27 @@ int bpf_obj_get_user(const char __user *pathname, int flags);
|
||||||
extern int bpf_iter_ ## target(args); \
|
extern int bpf_iter_ ## target(args); \
|
||||||
int __init bpf_iter_ ## target(args) { return 0; }
|
int __init bpf_iter_ ## target(args) { return 0; }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The task type of iterators.
|
||||||
|
*
|
||||||
|
* For BPF task iterators, they can be parameterized with various
|
||||||
|
* parameters to visit only some of tasks.
|
||||||
|
*
|
||||||
|
* BPF_TASK_ITER_ALL (default)
|
||||||
|
* Iterate over resources of every task.
|
||||||
|
*
|
||||||
|
* BPF_TASK_ITER_TID
|
||||||
|
* Iterate over resources of a task/tid.
|
||||||
|
*
|
||||||
|
* BPF_TASK_ITER_TGID
|
||||||
|
* Iterate over resources of every task of a process / task group.
|
||||||
|
*/
|
||||||
|
enum bpf_iter_task_type {
|
||||||
|
BPF_TASK_ITER_ALL = 0,
|
||||||
|
BPF_TASK_ITER_TID,
|
||||||
|
BPF_TASK_ITER_TGID,
|
||||||
|
};
|
||||||
|
|
||||||
struct bpf_iter_aux_info {
|
struct bpf_iter_aux_info {
|
||||||
/* for map_elem iter */
|
/* for map_elem iter */
|
||||||
struct bpf_map *map;
|
struct bpf_map *map;
|
||||||
|
@ -1805,6 +1826,10 @@ struct bpf_iter_aux_info {
|
||||||
struct cgroup *start; /* starting cgroup */
|
struct cgroup *start; /* starting cgroup */
|
||||||
enum bpf_cgroup_iter_order order;
|
enum bpf_cgroup_iter_order order;
|
||||||
} cgroup;
|
} cgroup;
|
||||||
|
struct {
|
||||||
|
enum bpf_iter_task_type type;
|
||||||
|
u32 pid;
|
||||||
|
} task;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
|
typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
|
||||||
|
|
|
@ -110,6 +110,12 @@ union bpf_iter_link_info {
|
||||||
__u32 cgroup_fd;
|
__u32 cgroup_fd;
|
||||||
__u64 cgroup_id;
|
__u64 cgroup_id;
|
||||||
} cgroup;
|
} cgroup;
|
||||||
|
/* Parameters of task iterators. */
|
||||||
|
struct {
|
||||||
|
__u32 tid;
|
||||||
|
__u32 pid;
|
||||||
|
__u32 pid_fd;
|
||||||
|
} task;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* BPF syscall commands, see bpf(2) man-page for more details. */
|
/* BPF syscall commands, see bpf(2) man-page for more details. */
|
||||||
|
|
|
@ -12,6 +12,9 @@
|
||||||
|
|
||||||
struct bpf_iter_seq_task_common {
|
struct bpf_iter_seq_task_common {
|
||||||
struct pid_namespace *ns;
|
struct pid_namespace *ns;
|
||||||
|
enum bpf_iter_task_type type;
|
||||||
|
u32 pid;
|
||||||
|
u32 pid_visiting;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bpf_iter_seq_task_info {
|
struct bpf_iter_seq_task_info {
|
||||||
|
@ -22,18 +25,115 @@ struct bpf_iter_seq_task_info {
|
||||||
u32 tid;
|
u32 tid;
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
|
static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_common *common,
|
||||||
|
u32 *tid,
|
||||||
|
bool skip_if_dup_files)
|
||||||
|
{
|
||||||
|
struct task_struct *task, *next_task;
|
||||||
|
struct pid *pid;
|
||||||
|
u32 saved_tid;
|
||||||
|
|
||||||
|
if (!*tid) {
|
||||||
|
/* The first time, the iterator calls this function. */
|
||||||
|
pid = find_pid_ns(common->pid, common->ns);
|
||||||
|
if (!pid)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
task = get_pid_task(pid, PIDTYPE_TGID);
|
||||||
|
if (!task)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
*tid = common->pid;
|
||||||
|
common->pid_visiting = common->pid;
|
||||||
|
|
||||||
|
return task;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the control returns to user space and comes back to the
|
||||||
|
* kernel again, *tid and common->pid_visiting should be the
|
||||||
|
* same for task_seq_start() to pick up the correct task.
|
||||||
|
*/
|
||||||
|
if (*tid == common->pid_visiting) {
|
||||||
|
pid = find_pid_ns(common->pid_visiting, common->ns);
|
||||||
|
task = get_pid_task(pid, PIDTYPE_PID);
|
||||||
|
|
||||||
|
return task;
|
||||||
|
}
|
||||||
|
|
||||||
|
pid = find_pid_ns(common->pid_visiting, common->ns);
|
||||||
|
if (!pid)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
task = get_pid_task(pid, PIDTYPE_PID);
|
||||||
|
if (!task)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
retry:
|
||||||
|
if (!pid_alive(task)) {
|
||||||
|
put_task_struct(task);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
next_task = next_thread(task);
|
||||||
|
put_task_struct(task);
|
||||||
|
if (!next_task)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
saved_tid = *tid;
|
||||||
|
*tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns);
|
||||||
|
if (!*tid || *tid == common->pid) {
|
||||||
|
/* Run out of tasks of a process. The tasks of a
|
||||||
|
* thread_group are linked as circular linked list.
|
||||||
|
*/
|
||||||
|
*tid = saved_tid;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
get_task_struct(next_task);
|
||||||
|
common->pid_visiting = *tid;
|
||||||
|
|
||||||
|
if (skip_if_dup_files && task->files == task->group_leader->files) {
|
||||||
|
task = next_task;
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
|
||||||
|
return next_task;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common,
|
||||||
u32 *tid,
|
u32 *tid,
|
||||||
bool skip_if_dup_files)
|
bool skip_if_dup_files)
|
||||||
{
|
{
|
||||||
struct task_struct *task = NULL;
|
struct task_struct *task = NULL;
|
||||||
struct pid *pid;
|
struct pid *pid;
|
||||||
|
|
||||||
|
if (common->type == BPF_TASK_ITER_TID) {
|
||||||
|
if (*tid && *tid != common->pid)
|
||||||
|
return NULL;
|
||||||
|
rcu_read_lock();
|
||||||
|
pid = find_pid_ns(common->pid, common->ns);
|
||||||
|
if (pid) {
|
||||||
|
task = get_pid_task(pid, PIDTYPE_TGID);
|
||||||
|
*tid = common->pid;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return task;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (common->type == BPF_TASK_ITER_TGID) {
|
||||||
|
rcu_read_lock();
|
||||||
|
task = task_group_seq_get_next(common, tid, skip_if_dup_files);
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return task;
|
||||||
|
}
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
retry:
|
retry:
|
||||||
pid = find_ge_pid(*tid, ns);
|
pid = find_ge_pid(*tid, common->ns);
|
||||||
if (pid) {
|
if (pid) {
|
||||||
*tid = pid_nr_ns(pid, ns);
|
*tid = pid_nr_ns(pid, common->ns);
|
||||||
task = get_pid_task(pid, PIDTYPE_PID);
|
task = get_pid_task(pid, PIDTYPE_PID);
|
||||||
if (!task) {
|
if (!task) {
|
||||||
++*tid;
|
++*tid;
|
||||||
|
@ -56,7 +156,7 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos)
|
||||||
struct bpf_iter_seq_task_info *info = seq->private;
|
struct bpf_iter_seq_task_info *info = seq->private;
|
||||||
struct task_struct *task;
|
struct task_struct *task;
|
||||||
|
|
||||||
task = task_seq_get_next(info->common.ns, &info->tid, false);
|
task = task_seq_get_next(&info->common, &info->tid, false);
|
||||||
if (!task)
|
if (!task)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -73,7 +173,7 @@ static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||||
++*pos;
|
++*pos;
|
||||||
++info->tid;
|
++info->tid;
|
||||||
put_task_struct((struct task_struct *)v);
|
put_task_struct((struct task_struct *)v);
|
||||||
task = task_seq_get_next(info->common.ns, &info->tid, false);
|
task = task_seq_get_next(&info->common, &info->tid, false);
|
||||||
if (!task)
|
if (!task)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -117,6 +217,41 @@ static void task_seq_stop(struct seq_file *seq, void *v)
|
||||||
put_task_struct((struct task_struct *)v);
|
put_task_struct((struct task_struct *)v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int bpf_iter_attach_task(struct bpf_prog *prog,
|
||||||
|
union bpf_iter_link_info *linfo,
|
||||||
|
struct bpf_iter_aux_info *aux)
|
||||||
|
{
|
||||||
|
unsigned int flags;
|
||||||
|
struct pid *pid;
|
||||||
|
pid_t tgid;
|
||||||
|
|
||||||
|
if ((!!linfo->task.tid + !!linfo->task.pid + !!linfo->task.pid_fd) > 1)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
aux->task.type = BPF_TASK_ITER_ALL;
|
||||||
|
if (linfo->task.tid != 0) {
|
||||||
|
aux->task.type = BPF_TASK_ITER_TID;
|
||||||
|
aux->task.pid = linfo->task.tid;
|
||||||
|
}
|
||||||
|
if (linfo->task.pid != 0) {
|
||||||
|
aux->task.type = BPF_TASK_ITER_TGID;
|
||||||
|
aux->task.pid = linfo->task.pid;
|
||||||
|
}
|
||||||
|
if (linfo->task.pid_fd != 0) {
|
||||||
|
aux->task.type = BPF_TASK_ITER_TGID;
|
||||||
|
|
||||||
|
pid = pidfd_get_pid(linfo->task.pid_fd, &flags);
|
||||||
|
if (IS_ERR(pid))
|
||||||
|
return PTR_ERR(pid);
|
||||||
|
|
||||||
|
tgid = pid_nr_ns(pid, task_active_pid_ns(current));
|
||||||
|
aux->task.pid = tgid;
|
||||||
|
put_pid(pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static const struct seq_operations task_seq_ops = {
|
static const struct seq_operations task_seq_ops = {
|
||||||
.start = task_seq_start,
|
.start = task_seq_start,
|
||||||
.next = task_seq_next,
|
.next = task_seq_next,
|
||||||
|
@ -137,8 +272,7 @@ struct bpf_iter_seq_task_file_info {
|
||||||
static struct file *
|
static struct file *
|
||||||
task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
|
task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
|
||||||
{
|
{
|
||||||
struct pid_namespace *ns = info->common.ns;
|
u32 saved_tid = info->tid;
|
||||||
u32 curr_tid = info->tid;
|
|
||||||
struct task_struct *curr_task;
|
struct task_struct *curr_task;
|
||||||
unsigned int curr_fd = info->fd;
|
unsigned int curr_fd = info->fd;
|
||||||
|
|
||||||
|
@ -151,21 +285,18 @@ again:
|
||||||
curr_task = info->task;
|
curr_task = info->task;
|
||||||
curr_fd = info->fd;
|
curr_fd = info->fd;
|
||||||
} else {
|
} else {
|
||||||
curr_task = task_seq_get_next(ns, &curr_tid, true);
|
curr_task = task_seq_get_next(&info->common, &info->tid, true);
|
||||||
if (!curr_task) {
|
if (!curr_task) {
|
||||||
info->task = NULL;
|
info->task = NULL;
|
||||||
info->tid = curr_tid;
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* set info->task and info->tid */
|
/* set info->task */
|
||||||
info->task = curr_task;
|
info->task = curr_task;
|
||||||
if (curr_tid == info->tid) {
|
if (saved_tid == info->tid)
|
||||||
curr_fd = info->fd;
|
curr_fd = info->fd;
|
||||||
} else {
|
else
|
||||||
info->tid = curr_tid;
|
|
||||||
curr_fd = 0;
|
curr_fd = 0;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
@ -186,9 +317,15 @@ again:
|
||||||
/* the current task is done, go to the next task */
|
/* the current task is done, go to the next task */
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
put_task_struct(curr_task);
|
put_task_struct(curr_task);
|
||||||
|
|
||||||
|
if (info->common.type == BPF_TASK_ITER_TID) {
|
||||||
|
info->task = NULL;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
info->task = NULL;
|
info->task = NULL;
|
||||||
info->fd = 0;
|
info->fd = 0;
|
||||||
curr_tid = ++(info->tid);
|
saved_tid = ++(info->tid);
|
||||||
goto again;
|
goto again;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -269,6 +406,9 @@ static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux)
|
||||||
struct bpf_iter_seq_task_common *common = priv_data;
|
struct bpf_iter_seq_task_common *common = priv_data;
|
||||||
|
|
||||||
common->ns = get_pid_ns(task_active_pid_ns(current));
|
common->ns = get_pid_ns(task_active_pid_ns(current));
|
||||||
|
common->type = aux->task.type;
|
||||||
|
common->pid = aux->task.pid;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -307,11 +447,10 @@ enum bpf_task_vma_iter_find_op {
|
||||||
static struct vm_area_struct *
|
static struct vm_area_struct *
|
||||||
task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
|
task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
|
||||||
{
|
{
|
||||||
struct pid_namespace *ns = info->common.ns;
|
|
||||||
enum bpf_task_vma_iter_find_op op;
|
enum bpf_task_vma_iter_find_op op;
|
||||||
struct vm_area_struct *curr_vma;
|
struct vm_area_struct *curr_vma;
|
||||||
struct task_struct *curr_task;
|
struct task_struct *curr_task;
|
||||||
u32 curr_tid = info->tid;
|
u32 saved_tid = info->tid;
|
||||||
|
|
||||||
/* If this function returns a non-NULL vma, it holds a reference to
|
/* If this function returns a non-NULL vma, it holds a reference to
|
||||||
* the task_struct, and holds read lock on vma->mm->mmap_lock.
|
* the task_struct, and holds read lock on vma->mm->mmap_lock.
|
||||||
|
@ -371,14 +510,13 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
again:
|
again:
|
||||||
curr_task = task_seq_get_next(ns, &curr_tid, true);
|
curr_task = task_seq_get_next(&info->common, &info->tid, true);
|
||||||
if (!curr_task) {
|
if (!curr_task) {
|
||||||
info->tid = curr_tid + 1;
|
info->tid++;
|
||||||
goto finish;
|
goto finish;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (curr_tid != info->tid) {
|
if (saved_tid != info->tid) {
|
||||||
info->tid = curr_tid;
|
|
||||||
/* new task, process the first vma */
|
/* new task, process the first vma */
|
||||||
op = task_vma_iter_first_vma;
|
op = task_vma_iter_first_vma;
|
||||||
} else {
|
} else {
|
||||||
|
@ -430,9 +568,12 @@ again:
|
||||||
return curr_vma;
|
return curr_vma;
|
||||||
|
|
||||||
next_task:
|
next_task:
|
||||||
|
if (info->common.type == BPF_TASK_ITER_TID)
|
||||||
|
goto finish;
|
||||||
|
|
||||||
put_task_struct(curr_task);
|
put_task_struct(curr_task);
|
||||||
info->task = NULL;
|
info->task = NULL;
|
||||||
curr_tid++;
|
info->tid++;
|
||||||
goto again;
|
goto again;
|
||||||
|
|
||||||
finish:
|
finish:
|
||||||
|
@ -533,6 +674,7 @@ static const struct bpf_iter_seq_info task_seq_info = {
|
||||||
|
|
||||||
static struct bpf_iter_reg task_reg_info = {
|
static struct bpf_iter_reg task_reg_info = {
|
||||||
.target = "task",
|
.target = "task",
|
||||||
|
.attach_target = bpf_iter_attach_task,
|
||||||
.feature = BPF_ITER_RESCHED,
|
.feature = BPF_ITER_RESCHED,
|
||||||
.ctx_arg_info_size = 1,
|
.ctx_arg_info_size = 1,
|
||||||
.ctx_arg_info = {
|
.ctx_arg_info = {
|
||||||
|
@ -551,6 +693,7 @@ static const struct bpf_iter_seq_info task_file_seq_info = {
|
||||||
|
|
||||||
static struct bpf_iter_reg task_file_reg_info = {
|
static struct bpf_iter_reg task_file_reg_info = {
|
||||||
.target = "task_file",
|
.target = "task_file",
|
||||||
|
.attach_target = bpf_iter_attach_task,
|
||||||
.feature = BPF_ITER_RESCHED,
|
.feature = BPF_ITER_RESCHED,
|
||||||
.ctx_arg_info_size = 2,
|
.ctx_arg_info_size = 2,
|
||||||
.ctx_arg_info = {
|
.ctx_arg_info = {
|
||||||
|
@ -571,6 +714,7 @@ static const struct bpf_iter_seq_info task_vma_seq_info = {
|
||||||
|
|
||||||
static struct bpf_iter_reg task_vma_reg_info = {
|
static struct bpf_iter_reg task_vma_reg_info = {
|
||||||
.target = "task_vma",
|
.target = "task_vma",
|
||||||
|
.attach_target = bpf_iter_attach_task,
|
||||||
.feature = BPF_ITER_RESCHED,
|
.feature = BPF_ITER_RESCHED,
|
||||||
.ctx_arg_info_size = 2,
|
.ctx_arg_info_size = 2,
|
||||||
.ctx_arg_info = {
|
.ctx_arg_info = {
|
||||||
|
|
|
@ -110,6 +110,12 @@ union bpf_iter_link_info {
|
||||||
__u32 cgroup_fd;
|
__u32 cgroup_fd;
|
||||||
__u64 cgroup_id;
|
__u64 cgroup_id;
|
||||||
} cgroup;
|
} cgroup;
|
||||||
|
/* Parameters of task iterators. */
|
||||||
|
struct {
|
||||||
|
__u32 tid;
|
||||||
|
__u32 pid;
|
||||||
|
__u32 pid_fd;
|
||||||
|
} task;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* BPF syscall commands, see bpf(2) man-page for more details. */
|
/* BPF syscall commands, see bpf(2) man-page for more details. */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue