mm: don't be stuck to rmap lock on reclaim path

The rmap locks(i_mmap_rwsem and anon_vma->root->rwsem) could be contended
under memory pressure if processes keep working on their vmas(e.g., fork,
mmap, munmap).  It makes reclaim path stuck.  In our real workload traces,
we see kswapd is waiting the lock for 300ms+(worst case, a sec) and it
makes other processes entering direct reclaim, which were also stuck on
the lock.

This patch makes lru aging path try_lock mode like shink_page_list so the
reclaim context will keep working with next lru pages without being stuck.
if it found the rmap lock contended, it rotates the page back to head of
lru in both active/inactive lrus to make them consistent behavior, which
is basic starting point rather than adding more heristic.

Since this patch introduces a new "contended" field as out-param along
with try_lock in-param in rmap_walk_control, it's not immutable any longer
if the try_lock is set so remove const keywords on rmap related functions.
Since rmap walking is already expensive operation, I doubt the const
would help sizable benefit( And we didn't have it until 5.17).

In a heavy app workload in Android, trace shows following statistics.  It
almost removes rmap lock contention from reclaim path.

Martin Liu reported:

Before:

   max_dur(ms)  min_dur(ms)  max-min(dur)ms  avg_dur(ms)  sum_dur(ms)  count blocked_function
         1632            0            1631   151.542173        31672    209  page_lock_anon_vma_read
          601            0             601   145.544681        28817    198  rmap_walk_file

After:

   max_dur(ms)  min_dur(ms)  max-min(dur)ms  avg_dur(ms)  sum_dur(ms)  count blocked_function
          NaN          NaN              NaN          NaN          NaN    0.0             NaN
            0            0                0     0.127645            1     12  rmap_walk_file

[minchan@kernel.org: add comment, per Matthew]
  Link: https://lkml.kernel.org/r/YnNqeB5tUf6LZ57b@google.com
Link: https://lkml.kernel.org/r/20220510215423.164547-1-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: John Dias <joaodias@google.com>
Cc: Tim Murray <timmurray@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Martin Liu <liumartin@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Minchan Kim 2022-05-19 14:08:54 -07:00 committed by akpm
parent f4840ccfca
commit 6d4675e601
8 changed files with 87 additions and 28 deletions

View file

@ -527,9 +527,11 @@ out:
*
* Its a little more complex as it tries to keep the fast path to a single
* atomic op -- the trylock. If we fail the trylock, we fall back to getting a
* reference like with page_get_anon_vma() and then block on the mutex.
* reference like with page_get_anon_vma() and then block on the mutex
* on !rwc->try_lock case.
*/
struct anon_vma *folio_lock_anon_vma_read(struct folio *folio)
struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
struct rmap_walk_control *rwc)
{
struct anon_vma *anon_vma = NULL;
struct anon_vma *root_anon_vma;
@ -557,6 +559,12 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio)
goto out;
}
if (rwc && rwc->try_lock) {
anon_vma = NULL;
rwc->contended = true;
goto out;
}
/* trylock failed, we got to sleep */
if (!atomic_inc_not_zero(&anon_vma->refcount)) {
anon_vma = NULL;
@ -883,7 +891,8 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
*
* Quick test_and_clear_referenced for all mappings of a folio,
*
* Return: The number of mappings which referenced the folio.
* Return: The number of mappings which referenced the folio. Return -1 if
* the function bailed out due to rmap lock contention.
*/
int folio_referenced(struct folio *folio, int is_locked,
struct mem_cgroup *memcg, unsigned long *vm_flags)
@ -897,6 +906,7 @@ int folio_referenced(struct folio *folio, int is_locked,
.rmap_one = folio_referenced_one,
.arg = (void *)&pra,
.anon_lock = folio_lock_anon_vma_read,
.try_lock = true,
};
*vm_flags = 0;
@ -927,7 +937,7 @@ int folio_referenced(struct folio *folio, int is_locked,
if (we_locked)
folio_unlock(folio);
return pra.referenced;
return rwc.contended ? -1 : pra.referenced;
}
static int page_vma_mkclean_one(struct page_vma_mapped_walk *pvmw)
@ -2336,12 +2346,12 @@ void __put_anon_vma(struct anon_vma *anon_vma)
}
static struct anon_vma *rmap_walk_anon_lock(struct folio *folio,
const struct rmap_walk_control *rwc)
struct rmap_walk_control *rwc)
{
struct anon_vma *anon_vma;
if (rwc->anon_lock)
return rwc->anon_lock(folio);
return rwc->anon_lock(folio, rwc);
/*
* Note: remove_migration_ptes() cannot use folio_lock_anon_vma_read()
@ -2353,7 +2363,17 @@ static struct anon_vma *rmap_walk_anon_lock(struct folio *folio,
if (!anon_vma)
return NULL;
if (anon_vma_trylock_read(anon_vma))
goto out;
if (rwc->try_lock) {
anon_vma = NULL;
rwc->contended = true;
goto out;
}
anon_vma_lock_read(anon_vma);
out:
return anon_vma;
}
@ -2367,7 +2387,7 @@ static struct anon_vma *rmap_walk_anon_lock(struct folio *folio,
* contained in the anon_vma struct it points to.
*/
static void rmap_walk_anon(struct folio *folio,
const struct rmap_walk_control *rwc, bool locked)
struct rmap_walk_control *rwc, bool locked)
{
struct anon_vma *anon_vma;
pgoff_t pgoff_start, pgoff_end;
@ -2415,7 +2435,7 @@ static void rmap_walk_anon(struct folio *folio,
* contained in the address_space struct it points to.
*/
static void rmap_walk_file(struct folio *folio,
const struct rmap_walk_control *rwc, bool locked)
struct rmap_walk_control *rwc, bool locked)
{
struct address_space *mapping = folio_mapping(folio);
pgoff_t pgoff_start, pgoff_end;
@ -2434,8 +2454,18 @@ static void rmap_walk_file(struct folio *folio,
pgoff_start = folio_pgoff(folio);
pgoff_end = pgoff_start + folio_nr_pages(folio) - 1;
if (!locked)
if (!locked) {
if (i_mmap_trylock_read(mapping))
goto lookup;
if (rwc->try_lock) {
rwc->contended = true;
return;
}
i_mmap_lock_read(mapping);
}
lookup:
vma_interval_tree_foreach(vma, &mapping->i_mmap,
pgoff_start, pgoff_end) {
unsigned long address = vma_address(&folio->page, vma);
@ -2457,7 +2487,7 @@ done:
i_mmap_unlock_read(mapping);
}
void rmap_walk(struct folio *folio, const struct rmap_walk_control *rwc)
void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc)
{
if (unlikely(folio_test_ksm(folio)))
rmap_walk_ksm(folio, rwc);
@ -2468,7 +2498,7 @@ void rmap_walk(struct folio *folio, const struct rmap_walk_control *rwc)
}
/* Like rmap_walk, but caller holds relevant rmap lock */
void rmap_walk_locked(struct folio *folio, const struct rmap_walk_control *rwc)
void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc)
{
/* no ksm support for now */
VM_BUG_ON_FOLIO(folio_test_ksm(folio), folio);