mirror of
https://gitee.com/bianbu-linux/linux-6.6
synced 2025-04-24 14:07:52 -04:00
LRU pagevec holds refcount of pages until the pagevec are drained. It could prevent migration since the refcount of the page is greater than the expection in migration logic. To mitigate the issue, callers of migrate_pages drains LRU pagevec via migrate_prep or lru_add_drain_all before migrate_pages call. However, it's not enough because pages coming into pagevec after the draining call still could stay at the pagevec so it could keep preventing page migration. Since some callers of migrate_pages have retrial logic with LRU draining, the page would migrate at next trail but it is still fragile in that it doesn't close the fundamental race between upcoming LRU pages into pagvec and migration so the migration failure could cause contiguous memory allocation failure in the end. To close the race, this patch disables lru caches(i.e, pagevec) during ongoing migration until migrate is done. Since it's really hard to reproduce, I measured how many times migrate_pages retried with force mode(it is about a fallback to a sync migration) with below debug code. int migrate_pages(struct list_head *from, new_page_t get_new_page, .. .. if (rc && reason == MR_CONTIG_RANGE && pass > 2) { printk(KERN_ERR, "pfn 0x%lx reason %d", page_to_pfn(page), rc); dump_page(page, "fail to migrate"); } The test was repeating android apps launching with cma allocation in background every five seconds. Total cma allocation count was about 500 during the testing. With this patch, the dump_page count was reduced from 400 to 30. The new interface is also useful for memory hotplug which currently drains lru pcp caches after each migration failure. This is rather suboptimal as it has to disrupt others running during the operation. With the new interface the operation happens only once. This is also in line with pcp allocator cache which are disabled for the offlining as well. Link: https://lkml.kernel.org/r/20210319175127.886124-1-minchan@kernel.org Signed-off-by: Minchan Kim <minchan@kernel.org> Reviewed-by: Chris Goldsworthy <cgoldswo@codeaurora.org> Acked-by: Michal Hocko <mhocko@suse.com> Cc: John Dias <joaodias@google.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: David Hildenbrand <david@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Oliver Sang <oliver.sang@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
203 lines
6.1 KiB
C
203 lines
6.1 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_MIGRATE_H
|
|
#define _LINUX_MIGRATE_H
|
|
|
|
#include <linux/mm.h>
|
|
#include <linux/mempolicy.h>
|
|
#include <linux/migrate_mode.h>
|
|
#include <linux/hugetlb.h>
|
|
|
|
typedef struct page *new_page_t(struct page *page, unsigned long private);
|
|
typedef void free_page_t(struct page *page, unsigned long private);
|
|
|
|
struct migration_target_control;
|
|
|
|
/*
|
|
* Return values from addresss_space_operations.migratepage():
|
|
* - negative errno on page migration failure;
|
|
* - zero on page migration success;
|
|
*/
|
|
#define MIGRATEPAGE_SUCCESS 0
|
|
|
|
enum migrate_reason {
|
|
MR_COMPACTION,
|
|
MR_MEMORY_FAILURE,
|
|
MR_MEMORY_HOTPLUG,
|
|
MR_SYSCALL, /* also applies to cpusets */
|
|
MR_MEMPOLICY_MBIND,
|
|
MR_NUMA_MISPLACED,
|
|
MR_CONTIG_RANGE,
|
|
MR_TYPES
|
|
};
|
|
|
|
/* In mm/debug.c; also keep sync with include/trace/events/migrate.h */
|
|
extern const char *migrate_reason_names[MR_TYPES];
|
|
|
|
#ifdef CONFIG_MIGRATION
|
|
|
|
extern void putback_movable_pages(struct list_head *l);
|
|
extern int migrate_page(struct address_space *mapping,
|
|
struct page *newpage, struct page *page,
|
|
enum migrate_mode mode);
|
|
extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
|
|
unsigned long private, enum migrate_mode mode, int reason);
|
|
extern struct page *alloc_migration_target(struct page *page, unsigned long private);
|
|
extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
|
|
extern void putback_movable_page(struct page *page);
|
|
|
|
extern void migrate_prep(void);
|
|
extern void migrate_finish(void);
|
|
extern void migrate_prep_local(void);
|
|
extern void migrate_page_states(struct page *newpage, struct page *page);
|
|
extern void migrate_page_copy(struct page *newpage, struct page *page);
|
|
extern int migrate_huge_page_move_mapping(struct address_space *mapping,
|
|
struct page *newpage, struct page *page);
|
|
extern int migrate_page_move_mapping(struct address_space *mapping,
|
|
struct page *newpage, struct page *page, int extra_count);
|
|
#else
|
|
|
|
static inline void putback_movable_pages(struct list_head *l) {}
|
|
static inline int migrate_pages(struct list_head *l, new_page_t new,
|
|
free_page_t free, unsigned long private, enum migrate_mode mode,
|
|
int reason)
|
|
{ return -ENOSYS; }
|
|
static inline struct page *alloc_migration_target(struct page *page,
|
|
unsigned long private)
|
|
{ return NULL; }
|
|
static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
|
|
{ return -EBUSY; }
|
|
|
|
static inline int migrate_prep(void) { return -ENOSYS; }
|
|
static inline int migrate_finish(void) { return -ENOSYS; }
|
|
static inline int migrate_prep_local(void) { return -ENOSYS; }
|
|
|
|
static inline void migrate_page_states(struct page *newpage, struct page *page)
|
|
{
|
|
}
|
|
|
|
static inline void migrate_page_copy(struct page *newpage,
|
|
struct page *page) {}
|
|
|
|
static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
|
|
struct page *newpage, struct page *page)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
|
|
#endif /* CONFIG_MIGRATION */
|
|
|
|
#ifdef CONFIG_COMPACTION
|
|
extern int PageMovable(struct page *page);
|
|
extern void __SetPageMovable(struct page *page, struct address_space *mapping);
|
|
extern void __ClearPageMovable(struct page *page);
|
|
#else
|
|
static inline int PageMovable(struct page *page) { return 0; }
|
|
static inline void __SetPageMovable(struct page *page,
|
|
struct address_space *mapping)
|
|
{
|
|
}
|
|
static inline void __ClearPageMovable(struct page *page)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_NUMA_BALANCING
|
|
extern bool pmd_trans_migrating(pmd_t pmd);
|
|
extern int migrate_misplaced_page(struct page *page,
|
|
struct vm_area_struct *vma, int node);
|
|
#else
|
|
static inline bool pmd_trans_migrating(pmd_t pmd)
|
|
{
|
|
return false;
|
|
}
|
|
static inline int migrate_misplaced_page(struct page *page,
|
|
struct vm_area_struct *vma, int node)
|
|
{
|
|
return -EAGAIN; /* can't migrate now */
|
|
}
|
|
#endif /* CONFIG_NUMA_BALANCING */
|
|
|
|
#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
|
|
extern int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
|
struct vm_area_struct *vma,
|
|
pmd_t *pmd, pmd_t entry,
|
|
unsigned long address,
|
|
struct page *page, int node);
|
|
#else
|
|
static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
|
struct vm_area_struct *vma,
|
|
pmd_t *pmd, pmd_t entry,
|
|
unsigned long address,
|
|
struct page *page, int node)
|
|
{
|
|
return -EAGAIN;
|
|
}
|
|
#endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/
|
|
|
|
|
|
#ifdef CONFIG_MIGRATION
|
|
|
|
/*
|
|
* Watch out for PAE architecture, which has an unsigned long, and might not
|
|
* have enough bits to store all physical address and flags. So far we have
|
|
* enough room for all our flags.
|
|
*/
|
|
#define MIGRATE_PFN_VALID (1UL << 0)
|
|
#define MIGRATE_PFN_MIGRATE (1UL << 1)
|
|
#define MIGRATE_PFN_LOCKED (1UL << 2)
|
|
#define MIGRATE_PFN_WRITE (1UL << 3)
|
|
#define MIGRATE_PFN_SHIFT 6
|
|
|
|
static inline struct page *migrate_pfn_to_page(unsigned long mpfn)
|
|
{
|
|
if (!(mpfn & MIGRATE_PFN_VALID))
|
|
return NULL;
|
|
return pfn_to_page(mpfn >> MIGRATE_PFN_SHIFT);
|
|
}
|
|
|
|
static inline unsigned long migrate_pfn(unsigned long pfn)
|
|
{
|
|
return (pfn << MIGRATE_PFN_SHIFT) | MIGRATE_PFN_VALID;
|
|
}
|
|
|
|
enum migrate_vma_direction {
|
|
MIGRATE_VMA_SELECT_SYSTEM = 1 << 0,
|
|
MIGRATE_VMA_SELECT_DEVICE_PRIVATE = 1 << 1,
|
|
};
|
|
|
|
struct migrate_vma {
|
|
struct vm_area_struct *vma;
|
|
/*
|
|
* Both src and dst array must be big enough for
|
|
* (end - start) >> PAGE_SHIFT entries.
|
|
*
|
|
* The src array must not be modified by the caller after
|
|
* migrate_vma_setup(), and must not change the dst array after
|
|
* migrate_vma_pages() returns.
|
|
*/
|
|
unsigned long *dst;
|
|
unsigned long *src;
|
|
unsigned long cpages;
|
|
unsigned long npages;
|
|
unsigned long start;
|
|
unsigned long end;
|
|
|
|
/*
|
|
* Set to the owner value also stored in page->pgmap->owner for
|
|
* migrating out of device private memory. The flags also need to
|
|
* be set to MIGRATE_VMA_SELECT_DEVICE_PRIVATE.
|
|
* The caller should always set this field when using mmu notifier
|
|
* callbacks to avoid device MMU invalidations for device private
|
|
* pages that are not being migrated.
|
|
*/
|
|
void *pgmap_owner;
|
|
unsigned long flags;
|
|
};
|
|
|
|
int migrate_vma_setup(struct migrate_vma *args);
|
|
void migrate_vma_pages(struct migrate_vma *migrate);
|
|
void migrate_vma_finalize(struct migrate_vma *migrate);
|
|
|
|
#endif /* CONFIG_MIGRATION */
|
|
|
|
#endif /* _LINUX_MIGRATE_H */
|