mirror of
https://gitee.com/bianbu-linux/linux-6.6
synced 2025-04-24 14:07:52 -04:00
commit fa2690af573dfefb47ba6eef888797a64b6b5f3c upstream. The below bug was reported on a non-SMP kernel: [ 275.267158][ T4335] ------------[ cut here ]------------ [ 275.267949][ T4335] kernel BUG at include/linux/page_ref.h:275! [ 275.268526][ T4335] invalid opcode: 0000 [#1] KASAN PTI [ 275.269001][ T4335] CPU: 0 PID: 4335 Comm: trinity-c3 Not tainted 6.7.0-rc4-00061-gefa7df3e3bb5 #1 [ 275.269787][ T4335] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 [ 275.270679][ T4335] RIP: 0010:try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.272813][ T4335] RSP: 0018:ffffc90005dcf650 EFLAGS: 00010202 [ 275.273346][ T4335] RAX: 0000000000000246 RBX: ffffea00066e0000 RCX: 0000000000000000 [ 275.274032][ T4335] RDX: fffff94000cdc007 RSI: 0000000000000004 RDI: ffffea00066e0034 [ 275.274719][ T4335] RBP: ffffea00066e0000 R08: 0000000000000000 R09: fffff94000cdc006 [ 275.275404][ T4335] R10: ffffea00066e0037 R11: 0000000000000000 R12: 0000000000000136 [ 275.276106][ T4335] R13: ffffea00066e0034 R14: dffffc0000000000 R15: ffffea00066e0008 [ 275.276790][ T4335] FS: 00007fa2f9b61740(0000) GS:ffffffff89d0d000(0000) knlGS:0000000000000000 [ 275.277570][ T4335] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 275.278143][ T4335] CR2: 00007fa2f6c00000 CR3: 0000000134b04000 CR4: 00000000000406f0 [ 275.278833][ T4335] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 275.279521][ T4335] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 275.280201][ T4335] Call Trace: [ 275.280499][ T4335] <TASK> [ 275.280751][ T4335] ? die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434 arch/x86/kernel/dumpstack.c:447) [ 275.281087][ T4335] ? do_trap (arch/x86/kernel/traps.c:112 arch/x86/kernel/traps.c:153) [ 275.281463][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.281884][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.282300][ T4335] ? do_error_trap (arch/x86/kernel/traps.c:174) [ 275.282711][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.283129][ T4335] ? handle_invalid_op (arch/x86/kernel/traps.c:212) [ 275.283561][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.283990][ T4335] ? exc_invalid_op (arch/x86/kernel/traps.c:264) [ 275.284415][ T4335] ? asm_exc_invalid_op (arch/x86/include/asm/idtentry.h:568) [ 275.284859][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.285278][ T4335] try_grab_folio (mm/gup.c:148) [ 275.285684][ T4335] __get_user_pages (mm/gup.c:1297 (discriminator 1)) [ 275.286111][ T4335] ? __pfx___get_user_pages (mm/gup.c:1188) [ 275.286579][ T4335] ? __pfx_validate_chain (kernel/locking/lockdep.c:3825) [ 275.287034][ T4335] ? mark_lock (kernel/locking/lockdep.c:4656 (discriminator 1)) [ 275.287416][ T4335] __gup_longterm_locked (mm/gup.c:1509 mm/gup.c:2209) [ 275.288192][ T4335] ? __pfx___gup_longterm_locked (mm/gup.c:2204) [ 275.288697][ T4335] ? __pfx_lock_acquire (kernel/locking/lockdep.c:5722) [ 275.289135][ T4335] ? __pfx___might_resched (kernel/sched/core.c:10106) [ 275.289595][ T4335] pin_user_pages_remote (mm/gup.c:3350) [ 275.290041][ T4335] ? __pfx_pin_user_pages_remote (mm/gup.c:3350) [ 275.290545][ T4335] ? find_held_lock (kernel/locking/lockdep.c:5244 (discriminator 1)) [ 275.290961][ T4335] ? mm_access (kernel/fork.c:1573) [ 275.291353][ T4335] process_vm_rw_single_vec+0x142/0x360 [ 275.291900][ T4335] ? __pfx_process_vm_rw_single_vec+0x10/0x10 [ 275.292471][ T4335] ? mm_access (kernel/fork.c:1573) [ 275.292859][ T4335] process_vm_rw_core+0x272/0x4e0 [ 275.293384][ T4335] ? hlock_class (arch/x86/include/asm/bitops.h:227 arch/x86/include/asm/bitops.h:239 include/asm-generic/bitops/instrumented-non-atomic.h:142 kernel/locking/lockdep.c:228) [ 275.293780][ T4335] ? __pfx_process_vm_rw_core+0x10/0x10 [ 275.294350][ T4335] process_vm_rw (mm/process_vm_access.c:284) [ 275.294748][ T4335] ? __pfx_process_vm_rw (mm/process_vm_access.c:259) [ 275.295197][ T4335] ? __task_pid_nr_ns (include/linux/rcupdate.h:306 (discriminator 1) include/linux/rcupdate.h:780 (discriminator 1) kernel/pid.c:504 (discriminator 1)) [ 275.295634][ T4335] __x64_sys_process_vm_readv (mm/process_vm_access.c:291) [ 275.296139][ T4335] ? syscall_enter_from_user_mode (kernel/entry/common.c:94 kernel/entry/common.c:112) [ 275.296642][ T4335] do_syscall_64 (arch/x86/entry/common.c:51 (discriminator 1) arch/x86/entry/common.c:82 (discriminator 1)) [ 275.297032][ T4335] ? __task_pid_nr_ns (include/linux/rcupdate.h:306 (discriminator 1) include/linux/rcupdate.h:780 (discriminator 1) kernel/pid.c:504 (discriminator 1)) [ 275.297470][ T4335] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4300 kernel/locking/lockdep.c:4359) [ 275.297988][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97) [ 275.298389][ T4335] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4300 kernel/locking/lockdep.c:4359) [ 275.298906][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97) [ 275.299304][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97) [ 275.299703][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97) [ 275.300115][ T4335] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) This BUG is the VM_BUG_ON(!in_atomic() && !irqs_disabled()) assertion in folio_ref_try_add_rcu() for non-SMP kernel. The process_vm_readv() calls GUP to pin the THP. An optimization for pinning THP instroduced by commit57edfcfd34
("mm/gup: accelerate thp gup even for "pages != NULL"") calls try_grab_folio() to pin the THP, but try_grab_folio() is supposed to be called in atomic context for non-SMP kernel, for example, irq disabled or preemption disabled, due to the optimization introduced by commite286781d5f
("mm: speculative page references"). The commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") is not actually the root cause although it was bisected to. It just makes the problem exposed more likely. The follow up discussion suggested the optimization for non-SMP kernel may be out-dated and not worth it anymore [1]. So removing the optimization to silence the BUG. However calling try_grab_folio() in GUP slow path actually is unnecessary, so the following patch will clean this up. [1] https://lore.kernel.org/linux-mm/821cf1d6-92b9-4ac4-bacc-d8f2364ac14f@paulmck-laptop/ Link: https://lkml.kernel.org/r/20240625205350.1777481-1-yang@os.amperecomputing.com Fixes:57edfcfd34
("mm/gup: accelerate thp gup even for "pages != NULL"") Signed-off-by: Yang Shi <yang@os.amperecomputing.com> Reported-by: kernel test robot <oliver.sang@intel.com> Tested-by: Oliver Sang <oliver.sang@intel.com> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: David Hildenbrand <david@redhat.com> Cc: Christoph Lameter <cl@linux.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Rik van Riel <riel@surriel.com> Cc: Vivek Kasireddy <vivek.kasireddy@intel.com> Cc: <stable@vger.kernel.org> [6.6+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
299 lines
7.5 KiB
C
299 lines
7.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_PAGE_REF_H
|
|
#define _LINUX_PAGE_REF_H
|
|
|
|
#include <linux/atomic.h>
|
|
#include <linux/mm_types.h>
|
|
#include <linux/page-flags.h>
|
|
#include <linux/tracepoint-defs.h>
|
|
|
|
DECLARE_TRACEPOINT(page_ref_set);
|
|
DECLARE_TRACEPOINT(page_ref_mod);
|
|
DECLARE_TRACEPOINT(page_ref_mod_and_test);
|
|
DECLARE_TRACEPOINT(page_ref_mod_and_return);
|
|
DECLARE_TRACEPOINT(page_ref_mod_unless);
|
|
DECLARE_TRACEPOINT(page_ref_freeze);
|
|
DECLARE_TRACEPOINT(page_ref_unfreeze);
|
|
|
|
#ifdef CONFIG_DEBUG_PAGE_REF
|
|
|
|
/*
|
|
* Ideally we would want to use the trace_<tracepoint>_enabled() helper
|
|
* functions. But due to include header file issues, that is not
|
|
* feasible. Instead we have to open code the static key functions.
|
|
*
|
|
* See trace_##name##_enabled(void) in include/linux/tracepoint.h
|
|
*/
|
|
#define page_ref_tracepoint_active(t) tracepoint_enabled(t)
|
|
|
|
extern void __page_ref_set(struct page *page, int v);
|
|
extern void __page_ref_mod(struct page *page, int v);
|
|
extern void __page_ref_mod_and_test(struct page *page, int v, int ret);
|
|
extern void __page_ref_mod_and_return(struct page *page, int v, int ret);
|
|
extern void __page_ref_mod_unless(struct page *page, int v, int u);
|
|
extern void __page_ref_freeze(struct page *page, int v, int ret);
|
|
extern void __page_ref_unfreeze(struct page *page, int v);
|
|
|
|
#else
|
|
|
|
#define page_ref_tracepoint_active(t) false
|
|
|
|
static inline void __page_ref_set(struct page *page, int v)
|
|
{
|
|
}
|
|
static inline void __page_ref_mod(struct page *page, int v)
|
|
{
|
|
}
|
|
static inline void __page_ref_mod_and_test(struct page *page, int v, int ret)
|
|
{
|
|
}
|
|
static inline void __page_ref_mod_and_return(struct page *page, int v, int ret)
|
|
{
|
|
}
|
|
static inline void __page_ref_mod_unless(struct page *page, int v, int u)
|
|
{
|
|
}
|
|
static inline void __page_ref_freeze(struct page *page, int v, int ret)
|
|
{
|
|
}
|
|
static inline void __page_ref_unfreeze(struct page *page, int v)
|
|
{
|
|
}
|
|
|
|
#endif
|
|
|
|
static inline int page_ref_count(const struct page *page)
|
|
{
|
|
return atomic_read(&page->_refcount);
|
|
}
|
|
|
|
/**
|
|
* folio_ref_count - The reference count on this folio.
|
|
* @folio: The folio.
|
|
*
|
|
* The refcount is usually incremented by calls to folio_get() and
|
|
* decremented by calls to folio_put(). Some typical users of the
|
|
* folio refcount:
|
|
*
|
|
* - Each reference from a page table
|
|
* - The page cache
|
|
* - Filesystem private data
|
|
* - The LRU list
|
|
* - Pipes
|
|
* - Direct IO which references this page in the process address space
|
|
*
|
|
* Return: The number of references to this folio.
|
|
*/
|
|
static inline int folio_ref_count(const struct folio *folio)
|
|
{
|
|
return page_ref_count(&folio->page);
|
|
}
|
|
|
|
static inline int page_count(const struct page *page)
|
|
{
|
|
return folio_ref_count(page_folio(page));
|
|
}
|
|
|
|
static inline void set_page_count(struct page *page, int v)
|
|
{
|
|
atomic_set(&page->_refcount, v);
|
|
if (page_ref_tracepoint_active(page_ref_set))
|
|
__page_ref_set(page, v);
|
|
}
|
|
|
|
static inline void folio_set_count(struct folio *folio, int v)
|
|
{
|
|
set_page_count(&folio->page, v);
|
|
}
|
|
|
|
/*
|
|
* Setup the page count before being freed into the page allocator for
|
|
* the first time (boot or memory hotplug)
|
|
*/
|
|
static inline void init_page_count(struct page *page)
|
|
{
|
|
set_page_count(page, 1);
|
|
}
|
|
|
|
static inline void page_ref_add(struct page *page, int nr)
|
|
{
|
|
atomic_add(nr, &page->_refcount);
|
|
if (page_ref_tracepoint_active(page_ref_mod))
|
|
__page_ref_mod(page, nr);
|
|
}
|
|
|
|
static inline void folio_ref_add(struct folio *folio, int nr)
|
|
{
|
|
page_ref_add(&folio->page, nr);
|
|
}
|
|
|
|
static inline void page_ref_sub(struct page *page, int nr)
|
|
{
|
|
atomic_sub(nr, &page->_refcount);
|
|
if (page_ref_tracepoint_active(page_ref_mod))
|
|
__page_ref_mod(page, -nr);
|
|
}
|
|
|
|
static inline void folio_ref_sub(struct folio *folio, int nr)
|
|
{
|
|
page_ref_sub(&folio->page, nr);
|
|
}
|
|
|
|
static inline int page_ref_sub_return(struct page *page, int nr)
|
|
{
|
|
int ret = atomic_sub_return(nr, &page->_refcount);
|
|
|
|
if (page_ref_tracepoint_active(page_ref_mod_and_return))
|
|
__page_ref_mod_and_return(page, -nr, ret);
|
|
return ret;
|
|
}
|
|
|
|
static inline int folio_ref_sub_return(struct folio *folio, int nr)
|
|
{
|
|
return page_ref_sub_return(&folio->page, nr);
|
|
}
|
|
|
|
static inline void page_ref_inc(struct page *page)
|
|
{
|
|
atomic_inc(&page->_refcount);
|
|
if (page_ref_tracepoint_active(page_ref_mod))
|
|
__page_ref_mod(page, 1);
|
|
}
|
|
|
|
static inline void folio_ref_inc(struct folio *folio)
|
|
{
|
|
page_ref_inc(&folio->page);
|
|
}
|
|
|
|
static inline void page_ref_dec(struct page *page)
|
|
{
|
|
atomic_dec(&page->_refcount);
|
|
if (page_ref_tracepoint_active(page_ref_mod))
|
|
__page_ref_mod(page, -1);
|
|
}
|
|
|
|
static inline void folio_ref_dec(struct folio *folio)
|
|
{
|
|
page_ref_dec(&folio->page);
|
|
}
|
|
|
|
static inline int page_ref_sub_and_test(struct page *page, int nr)
|
|
{
|
|
int ret = atomic_sub_and_test(nr, &page->_refcount);
|
|
|
|
if (page_ref_tracepoint_active(page_ref_mod_and_test))
|
|
__page_ref_mod_and_test(page, -nr, ret);
|
|
return ret;
|
|
}
|
|
|
|
static inline int folio_ref_sub_and_test(struct folio *folio, int nr)
|
|
{
|
|
return page_ref_sub_and_test(&folio->page, nr);
|
|
}
|
|
|
|
static inline int page_ref_inc_return(struct page *page)
|
|
{
|
|
int ret = atomic_inc_return(&page->_refcount);
|
|
|
|
if (page_ref_tracepoint_active(page_ref_mod_and_return))
|
|
__page_ref_mod_and_return(page, 1, ret);
|
|
return ret;
|
|
}
|
|
|
|
static inline int folio_ref_inc_return(struct folio *folio)
|
|
{
|
|
return page_ref_inc_return(&folio->page);
|
|
}
|
|
|
|
static inline int page_ref_dec_and_test(struct page *page)
|
|
{
|
|
int ret = atomic_dec_and_test(&page->_refcount);
|
|
|
|
if (page_ref_tracepoint_active(page_ref_mod_and_test))
|
|
__page_ref_mod_and_test(page, -1, ret);
|
|
return ret;
|
|
}
|
|
|
|
static inline int folio_ref_dec_and_test(struct folio *folio)
|
|
{
|
|
return page_ref_dec_and_test(&folio->page);
|
|
}
|
|
|
|
static inline int page_ref_dec_return(struct page *page)
|
|
{
|
|
int ret = atomic_dec_return(&page->_refcount);
|
|
|
|
if (page_ref_tracepoint_active(page_ref_mod_and_return))
|
|
__page_ref_mod_and_return(page, -1, ret);
|
|
return ret;
|
|
}
|
|
|
|
static inline int folio_ref_dec_return(struct folio *folio)
|
|
{
|
|
return page_ref_dec_return(&folio->page);
|
|
}
|
|
|
|
static inline bool page_ref_add_unless(struct page *page, int nr, int u)
|
|
{
|
|
bool ret = atomic_add_unless(&page->_refcount, nr, u);
|
|
|
|
if (page_ref_tracepoint_active(page_ref_mod_unless))
|
|
__page_ref_mod_unless(page, nr, ret);
|
|
return ret;
|
|
}
|
|
|
|
static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u)
|
|
{
|
|
return page_ref_add_unless(&folio->page, nr, u);
|
|
}
|
|
|
|
/**
|
|
* folio_try_get - Attempt to increase the refcount on a folio.
|
|
* @folio: The folio.
|
|
*
|
|
* If you do not already have a reference to a folio, you can attempt to
|
|
* get one using this function. It may fail if, for example, the folio
|
|
* has been freed since you found a pointer to it, or it is frozen for
|
|
* the purposes of splitting or migration.
|
|
*
|
|
* Return: True if the reference count was successfully incremented.
|
|
*/
|
|
static inline bool folio_try_get(struct folio *folio)
|
|
{
|
|
return folio_ref_add_unless(folio, 1, 0);
|
|
}
|
|
|
|
static inline bool folio_ref_try_add(struct folio *folio, int count)
|
|
{
|
|
return folio_ref_add_unless(folio, count, 0);
|
|
}
|
|
|
|
static inline int page_ref_freeze(struct page *page, int count)
|
|
{
|
|
int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count);
|
|
|
|
if (page_ref_tracepoint_active(page_ref_freeze))
|
|
__page_ref_freeze(page, count, ret);
|
|
return ret;
|
|
}
|
|
|
|
static inline int folio_ref_freeze(struct folio *folio, int count)
|
|
{
|
|
return page_ref_freeze(&folio->page, count);
|
|
}
|
|
|
|
static inline void page_ref_unfreeze(struct page *page, int count)
|
|
{
|
|
VM_BUG_ON_PAGE(page_count(page) != 0, page);
|
|
VM_BUG_ON(count == 0);
|
|
|
|
atomic_set_release(&page->_refcount, count);
|
|
if (page_ref_tracepoint_active(page_ref_unfreeze))
|
|
__page_ref_unfreeze(page, count);
|
|
}
|
|
|
|
static inline void folio_ref_unfreeze(struct folio *folio, int count)
|
|
{
|
|
page_ref_unfreeze(&folio->page, count);
|
|
}
|
|
#endif
|