bianbu-linux-6.6/include/linux/sched/task.h
Elena Reshetova ec1d281923 sched/core: Convert task_struct.usage to refcount_t
atomic_t variables are currently used to implement reference
counters with the following properties:

 - counter is initialized to 1 using atomic_set()
 - a resource is freed upon counter reaching zero
 - once counter reaches zero, its further
   increments aren't allowed
 - counter schema uses basic atomic operations
   (set, inc, inc_not_zero, dec_and_test, etc.)

Such atomic variables should be converted to a newly provided
refcount_t type and API that prevents accidental counter overflows
and underflows. This is important since overflows and underflows
can lead to use-after-free situation and be exploitable.

The variable task_struct.usage is used as pure reference counter.
Convert it to refcount_t and fix up the operations.

** Important note for maintainers:

Some functions from refcount_t API defined in lib/refcount.c
have different memory ordering guarantees than their atomic
counterparts.

The full comparison can be seen in
https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon
in state to be merged to the documentation tree.

Normally the differences should not matter since refcount_t provides
enough guarantees to satisfy the refcounting use cases, but in
some rare cases it might matter.

Please double check that you don't have some undocumented
memory guarantees for this variable usage.

For the task_struct.usage it might make a difference
in following places:

 - put_task_struct(): decrement in refcount_dec_and_test() only
   provides RELEASE ordering and control dependency on success
   vs. fully ordered atomic counterpart

Suggested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: David Windsor <dwindsor@gmail.com>
Reviewed-by: Hans Liljestrand <ishkamiel@gmail.com>
Reviewed-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: viro@zeniv.linux.org.uk
Link: https://lkml.kernel.org/r/1547814450-18902-5-git-send-email-elena.reshetova@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2019-02-04 08:53:55 +01:00

155 lines
4.3 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SCHED_TASK_H
#define _LINUX_SCHED_TASK_H
/*
* Interface between the scheduler and various task lifetime (fork()/exit())
* functionality:
*/
#include <linux/sched.h>
struct task_struct;
struct rusage;
union thread_union;
/*
* This serializes "schedule()" and also protects
* the run-queue from deletions/modifications (but
* _adding_ to the beginning of the run-queue has
* a separate lock).
*/
extern rwlock_t tasklist_lock;
extern spinlock_t mmlist_lock;
extern union thread_union init_thread_union;
extern struct task_struct init_task;
#ifdef CONFIG_PROVE_RCU
extern int lockdep_tasklist_lock_is_held(void);
#endif /* #ifdef CONFIG_PROVE_RCU */
extern asmlinkage void schedule_tail(struct task_struct *prev);
extern void init_idle(struct task_struct *idle, int cpu);
extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
extern void sched_dead(struct task_struct *p);
void __noreturn do_task_dead(void);
extern void proc_caches_init(void);
extern void fork_init(void);
extern void release_task(struct task_struct * p);
#ifdef CONFIG_HAVE_COPY_THREAD_TLS
extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
struct task_struct *, unsigned long);
#else
extern int copy_thread(unsigned long, unsigned long, unsigned long,
struct task_struct *);
/* Architectures that haven't opted into copy_thread_tls get the tls argument
* via pt_regs, so ignore the tls argument passed via C. */
static inline int copy_thread_tls(
unsigned long clone_flags, unsigned long sp, unsigned long arg,
struct task_struct *p, unsigned long tls)
{
return copy_thread(clone_flags, sp, arg, p);
}
#endif
extern void flush_thread(void);
#ifdef CONFIG_HAVE_EXIT_THREAD
extern void exit_thread(struct task_struct *tsk);
#else
static inline void exit_thread(struct task_struct *tsk)
{
}
#endif
extern void do_group_exit(int);
extern void exit_files(struct task_struct *);
extern void exit_itimers(struct signal_struct *);
extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
struct task_struct *fork_idle(int);
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
extern void free_task(struct task_struct *tsk);
/* sched_exec is called by processes performing an exec */
#ifdef CONFIG_SMP
extern void sched_exec(void);
#else
#define sched_exec() {}
#endif
#define get_task_struct(tsk) do { refcount_inc(&(tsk)->usage); } while(0)
extern void __put_task_struct(struct task_struct *t);
static inline void put_task_struct(struct task_struct *t)
{
if (refcount_dec_and_test(&t->usage))
__put_task_struct(t);
}
struct task_struct *task_rcu_dereference(struct task_struct **ptask);
#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
extern int arch_task_struct_size __read_mostly;
#else
# define arch_task_struct_size (sizeof(struct task_struct))
#endif
#ifndef CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST
/*
* If an architecture has not declared a thread_struct whitelist we
* must assume something there may need to be copied to userspace.
*/
static inline void arch_thread_struct_whitelist(unsigned long *offset,
unsigned long *size)
{
*offset = 0;
/* Handle dynamically sized thread_struct. */
*size = arch_task_struct_size - offsetof(struct task_struct, thread);
}
#endif
#ifdef CONFIG_VMAP_STACK
static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
{
return t->stack_vm_area;
}
#else
static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
{
return NULL;
}
#endif
/*
* Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
* subscriptions and synchronises with wait4(). Also used in procfs. Also
* pins the final release of task.io_context. Also protects ->cpuset and
* ->cgroup.subsys[]. And ->vfork_done.
*
* Nests both inside and outside of read_lock(&tasklist_lock).
* It must not be nested with write_lock_irq(&tasklist_lock),
* neither inside nor outside.
*/
static inline void task_lock(struct task_struct *p)
{
spin_lock(&p->alloc_lock);
}
static inline void task_unlock(struct task_struct *p)
{
spin_unlock(&p->alloc_lock);
}
#endif /* _LINUX_SCHED_TASK_H */