mirror of
https://gitee.com/bianbu-linux/linux-6.6
synced 2025-04-24 14:07:52 -04:00
Folio changes for 5.18
- Rewrite how munlock works to massively reduce the contention on i_mmap_rwsem (Hugh Dickins): https://lore.kernel.org/linux-mm/8e4356d-9622-a7f0-b2c-f116b5f2efea@google.com/ - Sort out the page refcount mess for ZONE_DEVICE pages (Christoph Hellwig): https://lore.kernel.org/linux-mm/20220210072828.2930359-1-hch@lst.de/ - Convert GUP to use folios and make pincount available for order-1 pages. (Matthew Wilcox) - Convert a few more truncation functions to use folios (Matthew Wilcox) - Convert page_vma_mapped_walk to use PFNs instead of pages (Matthew Wilcox) - Convert rmap_walk to use folios (Matthew Wilcox) - Convert most of shrink_page_list() to use a folio (Matthew Wilcox) - Add support for creating large folios in readahead (Matthew Wilcox) -----BEGIN PGP SIGNATURE----- iQEzBAABCgAdFiEEejHryeLBw/spnjHrDpNsjXcpgj4FAmI4ucgACgkQDpNsjXcp gj69Wgf6AwqwmO5Tmy+fLScDPqWxmXJofbocae1kyoGHf7Ui91OK4U2j6IpvAr+g P/vLIK+JAAcTQcrSCjymuEkf4HkGZOR03QQn7maPIEe4eLrZRQDEsmHC1L9gpeJp s/GMvDWiGE0Tnxu0EOzfVi/yT+qjIl/S8VvqtCoJv1HdzxitZ7+1RDuqImaMC5MM Qi3uHag78vLmCltLXpIOdpgZhdZexCdL2Y/1npf+b6FVkAJRRNUnA0gRbS7YpoVp CbxEJcmAl9cpJLuj5i5kIfS9trr+/QcvbUlzRxh4ggC58iqnmF2V09l2MJ7YU3XL v1O/Elq4lRhXninZFQEm9zjrri7LDQ== =n9Ad -----END PGP SIGNATURE----- Merge tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache Pull folio updates from Matthew Wilcox: - Rewrite how munlock works to massively reduce the contention on i_mmap_rwsem (Hugh Dickins): https://lore.kernel.org/linux-mm/8e4356d-9622-a7f0-b2c-f116b5f2efea@google.com/ - Sort out the page refcount mess for ZONE_DEVICE pages (Christoph Hellwig): https://lore.kernel.org/linux-mm/20220210072828.2930359-1-hch@lst.de/ - Convert GUP to use folios and make pincount available for order-1 pages. (Matthew Wilcox) - Convert a few more truncation functions to use folios (Matthew Wilcox) - Convert page_vma_mapped_walk to use PFNs instead of pages (Matthew Wilcox) - Convert rmap_walk to use folios (Matthew Wilcox) - Convert most of shrink_page_list() to use a folio (Matthew Wilcox) - Add support for creating large folios in readahead (Matthew Wilcox) * tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache: (114 commits) mm/damon: minor cleanup for damon_pa_young selftests/vm/transhuge-stress: Support file-backed PMD folios mm/filemap: Support VM_HUGEPAGE for file mappings mm/readahead: Switch to page_cache_ra_order mm/readahead: Align file mappings for non-DAX mm/readahead: Add large folio readahead mm: Support arbitrary THP sizes mm: Make large folios depend on THP mm: Fix READ_ONLY_THP warning mm/filemap: Allow large folios to be added to the page cache mm: Turn can_split_huge_page() into can_split_folio() mm/vmscan: Convert pageout() to take a folio mm/vmscan: Turn page_check_references() into folio_check_references() mm/vmscan: Account large folios correctly mm/vmscan: Optimise shrink_page_list for non-PMD-sized folios mm/vmscan: Free non-shmem folios without splitting them mm/rmap: Constify the rmap_walk_control argument mm/rmap: Convert rmap_walk() to take a folio mm: Turn page_anon_vma() into folio_anon_vma() mm/rmap: Turn page_lock_anon_vma_read() into folio_lock_anon_vma_read() ...
This commit is contained in:
commit
9030fb0bb9
100 changed files with 2924 additions and 3044 deletions
177
mm/swap.c
177
mm/swap.c
|
@ -74,8 +74,8 @@ static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = {
|
|||
};
|
||||
|
||||
/*
|
||||
* This path almost never happens for VM activity - pages are normally
|
||||
* freed via pagevecs. But it gets used by networking.
|
||||
* This path almost never happens for VM activity - pages are normally freed
|
||||
* via pagevecs. But it gets used by networking - and for compound pages.
|
||||
*/
|
||||
static void __page_cache_release(struct page *page)
|
||||
{
|
||||
|
@ -89,6 +89,14 @@ static void __page_cache_release(struct page *page)
|
|||
__clear_page_lru_flags(page);
|
||||
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||
}
|
||||
/* See comment on PageMlocked in release_pages() */
|
||||
if (unlikely(PageMlocked(page))) {
|
||||
int nr_pages = thp_nr_pages(page);
|
||||
|
||||
__ClearPageMlocked(page);
|
||||
mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
|
||||
count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages);
|
||||
}
|
||||
__ClearPageWaiters(page);
|
||||
}
|
||||
|
||||
|
@ -114,17 +122,9 @@ static void __put_compound_page(struct page *page)
|
|||
|
||||
void __put_page(struct page *page)
|
||||
{
|
||||
if (is_zone_device_page(page)) {
|
||||
put_dev_pagemap(page->pgmap);
|
||||
|
||||
/*
|
||||
* The page belongs to the device that created pgmap. Do
|
||||
* not return it to page allocator.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
if (unlikely(PageCompound(page)))
|
||||
if (unlikely(is_zone_device_page(page)))
|
||||
free_zone_device_page(page);
|
||||
else if (unlikely(PageCompound(page)))
|
||||
__put_compound_page(page);
|
||||
else
|
||||
__put_single_page(page);
|
||||
|
@ -482,22 +482,12 @@ EXPORT_SYMBOL(folio_add_lru);
|
|||
void lru_cache_add_inactive_or_unevictable(struct page *page,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
bool unevictable;
|
||||
|
||||
VM_BUG_ON_PAGE(PageLRU(page), page);
|
||||
|
||||
unevictable = (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED;
|
||||
if (unlikely(unevictable) && !TestSetPageMlocked(page)) {
|
||||
int nr_pages = thp_nr_pages(page);
|
||||
/*
|
||||
* We use the irq-unsafe __mod_zone_page_state because this
|
||||
* counter is not modified from interrupt context, and the pte
|
||||
* lock is held(spinlock), which implies preemption disabled.
|
||||
*/
|
||||
__mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
|
||||
count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
|
||||
}
|
||||
lru_cache_add(page);
|
||||
if (unlikely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED))
|
||||
mlock_new_page(page);
|
||||
else
|
||||
lru_cache_add(page);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -636,35 +626,37 @@ void lru_add_drain_cpu(int cpu)
|
|||
pagevec_lru_move_fn(pvec, lru_lazyfree_fn);
|
||||
|
||||
activate_page_drain(cpu);
|
||||
mlock_page_drain(cpu);
|
||||
}
|
||||
|
||||
/**
|
||||
* deactivate_file_page - forcefully deactivate a file page
|
||||
* @page: page to deactivate
|
||||
* deactivate_file_folio() - Forcefully deactivate a file folio.
|
||||
* @folio: Folio to deactivate.
|
||||
*
|
||||
* This function hints the VM that @page is a good reclaim candidate,
|
||||
* for example if its invalidation fails due to the page being dirty
|
||||
* This function hints to the VM that @folio is a good reclaim candidate,
|
||||
* for example if its invalidation fails due to the folio being dirty
|
||||
* or under writeback.
|
||||
*
|
||||
* Context: Caller holds a reference on the page.
|
||||
*/
|
||||
void deactivate_file_page(struct page *page)
|
||||
void deactivate_file_folio(struct folio *folio)
|
||||
{
|
||||
struct pagevec *pvec;
|
||||
|
||||
/*
|
||||
* In a workload with many unevictable page such as mprotect,
|
||||
* unevictable page deactivation for accelerating reclaim is pointless.
|
||||
* In a workload with many unevictable pages such as mprotect,
|
||||
* unevictable folio deactivation for accelerating reclaim is pointless.
|
||||
*/
|
||||
if (PageUnevictable(page))
|
||||
if (folio_test_unevictable(folio))
|
||||
return;
|
||||
|
||||
if (likely(get_page_unless_zero(page))) {
|
||||
struct pagevec *pvec;
|
||||
folio_get(folio);
|
||||
local_lock(&lru_pvecs.lock);
|
||||
pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
|
||||
|
||||
local_lock(&lru_pvecs.lock);
|
||||
pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
|
||||
|
||||
if (pagevec_add_and_need_flush(pvec, page))
|
||||
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn);
|
||||
local_unlock(&lru_pvecs.lock);
|
||||
}
|
||||
if (pagevec_add_and_need_flush(pvec, &folio->page))
|
||||
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn);
|
||||
local_unlock(&lru_pvecs.lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -837,6 +829,7 @@ inline void __lru_add_drain_all(bool force_all_cpus)
|
|||
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) ||
|
||||
need_activate_page_drain(cpu) ||
|
||||
need_mlock_page_drain(cpu) ||
|
||||
has_bh_in_lru(cpu, NULL)) {
|
||||
INIT_WORK(work, lru_add_drain_per_cpu);
|
||||
queue_work_on(cpu, mm_percpu_wq, work);
|
||||
|
@ -935,18 +928,10 @@ void release_pages(struct page **pages, int nr)
|
|||
unlock_page_lruvec_irqrestore(lruvec, flags);
|
||||
lruvec = NULL;
|
||||
}
|
||||
/*
|
||||
* ZONE_DEVICE pages that return 'false' from
|
||||
* page_is_devmap_managed() do not require special
|
||||
* processing, and instead, expect a call to
|
||||
* put_page_testzero().
|
||||
*/
|
||||
if (page_is_devmap_managed(page)) {
|
||||
put_devmap_managed_page(page);
|
||||
if (put_devmap_managed_page(page))
|
||||
continue;
|
||||
}
|
||||
if (put_page_testzero(page))
|
||||
put_dev_pagemap(page->pgmap);
|
||||
free_zone_device_page(page);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -974,6 +959,18 @@ void release_pages(struct page **pages, int nr)
|
|||
__clear_page_lru_flags(page);
|
||||
}
|
||||
|
||||
/*
|
||||
* In rare cases, when truncation or holepunching raced with
|
||||
* munlock after VM_LOCKED was cleared, Mlocked may still be
|
||||
* found set here. This does not indicate a problem, unless
|
||||
* "unevictable_pgs_cleared" appears worryingly large.
|
||||
*/
|
||||
if (unlikely(PageMlocked(page))) {
|
||||
__ClearPageMlocked(page);
|
||||
dec_zone_page_state(page, NR_MLOCK);
|
||||
count_vm_event(UNEVICTABLE_PGCLEARED);
|
||||
}
|
||||
|
||||
__ClearPageWaiters(page);
|
||||
|
||||
list_add(&page->lru, &pages_to_free);
|
||||
|
@ -1014,43 +1011,32 @@ static void __pagevec_lru_add_fn(struct folio *folio, struct lruvec *lruvec)
|
|||
|
||||
VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
|
||||
|
||||
/*
|
||||
* A folio becomes evictable in two ways:
|
||||
* 1) Within LRU lock [munlock_vma_page() and __munlock_pagevec()].
|
||||
* 2) Before acquiring LRU lock to put the folio on the correct LRU
|
||||
* and then
|
||||
* a) do PageLRU check with lock [check_move_unevictable_pages]
|
||||
* b) do PageLRU check before lock [clear_page_mlock]
|
||||
*
|
||||
* (1) & (2a) are ok as LRU lock will serialize them. For (2b), we need
|
||||
* following strict ordering:
|
||||
*
|
||||
* #0: __pagevec_lru_add_fn #1: clear_page_mlock
|
||||
*
|
||||
* folio_set_lru() folio_test_clear_mlocked()
|
||||
* smp_mb() // explicit ordering // above provides strict
|
||||
* // ordering
|
||||
* folio_test_mlocked() folio_test_lru()
|
||||
*
|
||||
*
|
||||
* if '#1' does not observe setting of PG_lru by '#0' and
|
||||
* fails isolation, the explicit barrier will make sure that
|
||||
* folio_evictable check will put the folio on the correct
|
||||
* LRU. Without smp_mb(), folio_set_lru() can be reordered
|
||||
* after folio_test_mlocked() check and can make '#1' fail the
|
||||
* isolation of the folio whose mlocked bit is cleared (#0 is
|
||||
* also looking at the same folio) and the evictable folio will
|
||||
* be stranded on an unevictable LRU.
|
||||
*/
|
||||
folio_set_lru(folio);
|
||||
smp_mb__after_atomic();
|
||||
|
||||
/*
|
||||
* Is an smp_mb__after_atomic() still required here, before
|
||||
* folio_evictable() tests PageMlocked, to rule out the possibility
|
||||
* of stranding an evictable folio on an unevictable LRU? I think
|
||||
* not, because __munlock_page() only clears PageMlocked while the LRU
|
||||
* lock is held.
|
||||
*
|
||||
* (That is not true of __page_cache_release(), and not necessarily
|
||||
* true of release_pages(): but those only clear PageMlocked after
|
||||
* put_page_testzero() has excluded any other users of the page.)
|
||||
*/
|
||||
if (folio_evictable(folio)) {
|
||||
if (was_unevictable)
|
||||
__count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages);
|
||||
} else {
|
||||
folio_clear_active(folio);
|
||||
folio_set_unevictable(folio);
|
||||
/*
|
||||
* folio->mlock_count = !!folio_test_mlocked(folio)?
|
||||
* But that leaves __mlock_page() in doubt whether another
|
||||
* actor has already counted the mlock or not. Err on the
|
||||
* safe side, underestimate, let page reclaim fix it, rather
|
||||
* than leaving a page on the unevictable LRU indefinitely.
|
||||
*/
|
||||
folio->mlock_count = 0;
|
||||
if (!was_unevictable)
|
||||
__count_vm_events(UNEVICTABLE_PGCULLED, nr_pages);
|
||||
}
|
||||
|
@ -1158,26 +1144,3 @@ void __init swap_setup(void)
|
|||
* _really_ don't want to cluster much more
|
||||
*/
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEV_PAGEMAP_OPS
|
||||
void put_devmap_managed_page(struct page *page)
|
||||
{
|
||||
int count;
|
||||
|
||||
if (WARN_ON_ONCE(!page_is_devmap_managed(page)))
|
||||
return;
|
||||
|
||||
count = page_ref_dec_return(page);
|
||||
|
||||
/*
|
||||
* devmap page refcounts are 1-based, rather than 0-based: if
|
||||
* refcount is 1, then the page is free and the refcount is
|
||||
* stable because nobody holds a reference on the page.
|
||||
*/
|
||||
if (count == 1)
|
||||
free_devmap_managed_page(page);
|
||||
else if (!count)
|
||||
__put_page(page);
|
||||
}
|
||||
EXPORT_SYMBOL(put_devmap_managed_page);
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue