mirror of
https://gitee.com/bianbu-linux/linux-6.6
synced 2025-04-24 14:07:52 -04:00
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton: - A few misc subsystems: kthread, scripts, ntfs, ocfs2, block, and vfs - Most the MM patches which precede the patches in Willy's tree: kasan, pagecache, gup, swap, shmem, memcg, selftests, pagemap, mremap, sparsemem, vmalloc, pagealloc, memory-failure, mlock, hugetlb, userfaultfd, vmscan, compaction, mempolicy, oom-kill, migration, thp, cma, autonuma, psi, ksm, page-poison, madvise, memory-hotplug, rmap, zswap, uaccess, ioremap, highmem, cleanups, kfence, hmm, and damon. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (227 commits) mm/damon/sysfs: remove repeat container_of() in damon_sysfs_kdamond_release() Docs/ABI/testing: add DAMON sysfs interface ABI document Docs/admin-guide/mm/damon/usage: document DAMON sysfs interface selftests/damon: add a test for DAMON sysfs interface mm/damon/sysfs: support DAMOS stats mm/damon/sysfs: support DAMOS watermarks mm/damon/sysfs: support schemes prioritization mm/damon/sysfs: support DAMOS quotas mm/damon/sysfs: support DAMON-based Operation Schemes mm/damon/sysfs: support the physical address space monitoring mm/damon/sysfs: link DAMON for virtual address spaces monitoring mm/damon: implement a minimal stub for sysfs-based DAMON interface mm/damon/core: add number of each enum type values mm/damon/core: allow non-exclusive DAMON start/stop Docs/damon: update outdated term 'regions update interval' Docs/vm/damon/design: update DAMON-Idle Page Tracking interference handling Docs/vm/damon: call low level monitoring primitives the operations mm/damon: remove unnecessary CONFIG_DAMON option mm/damon/paddr,vaddr: remove damon_{p,v}a_{target_valid,set_operations}() mm/damon/dbgfs-test: fix is_target_id() change ...
This commit is contained in:
commit
3bf03b9a08
262 changed files with 6789 additions and 2673 deletions
274
Documentation/ABI/testing/sysfs-kernel-mm-damon
Normal file
274
Documentation/ABI/testing/sysfs-kernel-mm-damon
Normal file
|
@ -0,0 +1,274 @@
|
||||||
|
what: /sys/kernel/mm/damon/
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Interface for Data Access MONitoring (DAMON). Contains files
|
||||||
|
for controlling DAMON. For more details on DAMON itself,
|
||||||
|
please refer to Documentation/admin-guide/mm/damon/index.rst.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Interface for privileged users of DAMON. Contains files for
|
||||||
|
controlling DAMON that aimed to be used by privileged users.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/nr_kdamonds
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a number 'N' to this file creates the number of
|
||||||
|
directories for controlling each DAMON worker thread (kdamond)
|
||||||
|
named '0' to 'N-1' under the kdamonds/ directory.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/state
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing 'on' or 'off' to this file makes the kdamond starts or
|
||||||
|
stops, respectively. Reading the file returns the keywords
|
||||||
|
based on the current status. Writing 'update_schemes_stats' to
|
||||||
|
the file updates contents of schemes stats files of the
|
||||||
|
kdamond.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/pid
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the pid of the kdamond if it is
|
||||||
|
running.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/nr_contexts
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a number 'N' to this file creates the number of
|
||||||
|
directories for controlling each DAMON context named '0' to
|
||||||
|
'N-1' under the contexts/ directory.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/operations
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a keyword for a monitoring operations set ('vaddr' for
|
||||||
|
virtual address spaces monitoring, and 'paddr' for the physical
|
||||||
|
address space monitoring) to this file makes the context to use
|
||||||
|
the operations set. Reading the file returns the keyword for
|
||||||
|
the operations set the context is set to use.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/sample_us
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a value to this file sets the sampling interval of the
|
||||||
|
DAMON context in microseconds as the value. Reading this file
|
||||||
|
returns the value.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/aggr_us
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a value to this file sets the aggregation interval of
|
||||||
|
the DAMON context in microseconds as the value. Reading this
|
||||||
|
file returns the value.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/update_us
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a value to this file sets the update interval of the
|
||||||
|
DAMON context in microseconds as the value. Reading this file
|
||||||
|
returns the value.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/nr_regions/min
|
||||||
|
|
||||||
|
WDate: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a value to this file sets the minimum number of
|
||||||
|
monitoring regions of the DAMON context as the value. Reading
|
||||||
|
this file returns the value.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/nr_regions/max
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a value to this file sets the maximum number of
|
||||||
|
monitoring regions of the DAMON context as the value. Reading
|
||||||
|
this file returns the value.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/nr_targets
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a number 'N' to this file creates the number of
|
||||||
|
directories for controlling each DAMON target of the context
|
||||||
|
named '0' to 'N-1' under the contexts/ directory.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/pid_target
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the pid of
|
||||||
|
the target process if the context is for virtual address spaces
|
||||||
|
monitoring, respectively.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/nr_regions
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a number 'N' to this file creates the number of
|
||||||
|
directories for setting each DAMON target memory region of the
|
||||||
|
context named '0' to 'N-1' under the regions/ directory. In
|
||||||
|
case of the virtual address space monitoring, DAMON
|
||||||
|
automatically sets the target memory region based on the target
|
||||||
|
processes' mappings.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/<R>/start
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the start
|
||||||
|
address of the monitoring region.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/<R>/end
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the end
|
||||||
|
address of the monitoring region.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/nr_schemes
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a number 'N' to this file creates the number of
|
||||||
|
directories for controlling each DAMON-based operation scheme
|
||||||
|
of the context named '0' to 'N-1' under the schemes/ directory.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/action
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the action
|
||||||
|
of the scheme.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/sz/min
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the mimimum
|
||||||
|
size of the scheme's target regions in bytes.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/sz/max
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the maximum
|
||||||
|
size of the scheme's target regions in bytes.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/nr_accesses/min
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the manimum
|
||||||
|
'nr_accesses' of the scheme's target regions.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/nr_accesses/max
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the maximum
|
||||||
|
'nr_accesses' of the scheme's target regions.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/age/min
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the minimum
|
||||||
|
'age' of the scheme's target regions.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/age/max
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the maximum
|
||||||
|
'age' of the scheme's target regions.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/ms
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the time
|
||||||
|
quota of the scheme in milliseconds.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/bytes
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the size
|
||||||
|
quota of the scheme in bytes.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/reset_interval_ms
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the quotas
|
||||||
|
charge reset interval of the scheme in milliseconds.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/sz_permil
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the
|
||||||
|
under-quota limit regions prioritization weight for 'size' in
|
||||||
|
permil.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/nr_accesses_permil
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the
|
||||||
|
under-quota limit regions prioritization weight for
|
||||||
|
'nr_accesses' in permil.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/age_permil
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the
|
||||||
|
under-quota limit regions prioritization weight for 'age' in
|
||||||
|
permil.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/metric
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the metric
|
||||||
|
of the watermarks for the scheme. The writable/readable
|
||||||
|
keywords for this file are 'none' for disabling the watermarks
|
||||||
|
feature, or 'free_mem_rate' for the system's global free memory
|
||||||
|
rate in permil.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/interval_us
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the metric
|
||||||
|
check interval of the watermarks for the scheme in
|
||||||
|
microseconds.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/high
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the high
|
||||||
|
watermark of the scheme in permil.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/mid
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the mid
|
||||||
|
watermark of the scheme in permil.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/low
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the low
|
||||||
|
watermark of the scheme in permil.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_tried
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the number of regions that the action
|
||||||
|
of the scheme has tried to be applied.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_tried
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the total size of regions that the
|
||||||
|
action of the scheme has tried to be applied in bytes.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_applied
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the number of regions that the action
|
||||||
|
of the scheme has successfully applied.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_applied
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the total size of regions that the
|
||||||
|
action of the scheme has successfully applied in bytes.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/qt_exceeds
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the number of the exceed events of
|
||||||
|
the scheme's quotas.
|
|
@ -64,6 +64,7 @@ Brief summary of control files.
|
||||||
threads
|
threads
|
||||||
cgroup.procs show list of processes
|
cgroup.procs show list of processes
|
||||||
cgroup.event_control an interface for event_fd()
|
cgroup.event_control an interface for event_fd()
|
||||||
|
This knob is not available on CONFIG_PREEMPT_RT systems.
|
||||||
memory.usage_in_bytes show current usage for memory
|
memory.usage_in_bytes show current usage for memory
|
||||||
(See 5.5 for details)
|
(See 5.5 for details)
|
||||||
memory.memsw.usage_in_bytes show current usage for memory+Swap
|
memory.memsw.usage_in_bytes show current usage for memory+Swap
|
||||||
|
@ -75,6 +76,7 @@ Brief summary of control files.
|
||||||
memory.max_usage_in_bytes show max memory usage recorded
|
memory.max_usage_in_bytes show max memory usage recorded
|
||||||
memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded
|
memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded
|
||||||
memory.soft_limit_in_bytes set/show soft limit of memory usage
|
memory.soft_limit_in_bytes set/show soft limit of memory usage
|
||||||
|
This knob is not available on CONFIG_PREEMPT_RT systems.
|
||||||
memory.stat show various statistics
|
memory.stat show various statistics
|
||||||
memory.use_hierarchy set/show hierarchical account enabled
|
memory.use_hierarchy set/show hierarchical account enabled
|
||||||
This knob is deprecated and shouldn't be
|
This knob is deprecated and shouldn't be
|
||||||
|
|
|
@ -1301,6 +1301,11 @@ PAGE_SIZE multiple when read back.
|
||||||
Amount of memory used to cache filesystem data,
|
Amount of memory used to cache filesystem data,
|
||||||
including tmpfs and shared memory.
|
including tmpfs and shared memory.
|
||||||
|
|
||||||
|
kernel (npn)
|
||||||
|
Amount of total kernel memory, including
|
||||||
|
(kernel_stack, pagetables, percpu, vmalloc, slab) in
|
||||||
|
addition to other kernel memory use cases.
|
||||||
|
|
||||||
kernel_stack
|
kernel_stack
|
||||||
Amount of memory allocated to kernel stacks.
|
Amount of memory allocated to kernel stacks.
|
||||||
|
|
||||||
|
|
|
@ -1649,7 +1649,7 @@
|
||||||
[KNL] Reguires CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
|
[KNL] Reguires CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
|
||||||
enabled.
|
enabled.
|
||||||
Allows heavy hugetlb users to free up some more
|
Allows heavy hugetlb users to free up some more
|
||||||
memory (6 * PAGE_SIZE for each 2MB hugetlb page).
|
memory (7 * PAGE_SIZE for each 2MB hugetlb page).
|
||||||
Format: { on | off (default) }
|
Format: { on | off (default) }
|
||||||
|
|
||||||
on: enable the feature
|
on: enable the feature
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
Detailed Usages
|
Detailed Usages
|
||||||
===============
|
===============
|
||||||
|
|
||||||
DAMON provides below three interfaces for different users.
|
DAMON provides below interfaces for different users.
|
||||||
|
|
||||||
- *DAMON user space tool.*
|
- *DAMON user space tool.*
|
||||||
`This <https://github.com/awslabs/damo>`_ is for privileged people such as
|
`This <https://github.com/awslabs/damo>`_ is for privileged people such as
|
||||||
|
@ -14,17 +14,21 @@ DAMON provides below three interfaces for different users.
|
||||||
virtual and physical address spaces monitoring. For more detail, please
|
virtual and physical address spaces monitoring. For more detail, please
|
||||||
refer to its `usage document
|
refer to its `usage document
|
||||||
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
|
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
|
||||||
- *debugfs interface.*
|
- *sysfs interface.*
|
||||||
:ref:`This <debugfs_interface>` is for privileged user space programmers who
|
:ref:`This <sysfs_interface>` is for privileged user space programmers who
|
||||||
want more optimized use of DAMON. Using this, users can use DAMON’s major
|
want more optimized use of DAMON. Using this, users can use DAMON’s major
|
||||||
features by reading from and writing to special debugfs files. Therefore,
|
features by reading from and writing to special sysfs files. Therefore,
|
||||||
you can write and use your personalized DAMON debugfs wrapper programs that
|
you can write and use your personalized DAMON sysfs wrapper programs that
|
||||||
reads/writes the debugfs files instead of you. The `DAMON user space tool
|
reads/writes the sysfs files instead of you. The `DAMON user space tool
|
||||||
<https://github.com/awslabs/damo>`_ is one example of such programs. It
|
<https://github.com/awslabs/damo>`_ is one example of such programs. It
|
||||||
supports both virtual and physical address spaces monitoring. Note that this
|
supports both virtual and physical address spaces monitoring. Note that this
|
||||||
interface provides only simple :ref:`statistics <damos_stats>` for the
|
interface provides only simple :ref:`statistics <damos_stats>` for the
|
||||||
monitoring results. For detailed monitoring results, DAMON provides a
|
monitoring results. For detailed monitoring results, DAMON provides a
|
||||||
:ref:`tracepoint <tracepoint>`.
|
:ref:`tracepoint <tracepoint>`.
|
||||||
|
- *debugfs interface.*
|
||||||
|
:ref:`This <debugfs_interface>` is almost identical to :ref:`sysfs interface
|
||||||
|
<sysfs_interface>`. This will be removed after next LTS kernel is released,
|
||||||
|
so users should move to the :ref:`sysfs interface <sysfs_interface>`.
|
||||||
- *Kernel Space Programming Interface.*
|
- *Kernel Space Programming Interface.*
|
||||||
:doc:`This </vm/damon/api>` is for kernel space programmers. Using this,
|
:doc:`This </vm/damon/api>` is for kernel space programmers. Using this,
|
||||||
users can utilize every feature of DAMON most flexibly and efficiently by
|
users can utilize every feature of DAMON most flexibly and efficiently by
|
||||||
|
@ -32,6 +36,340 @@ DAMON provides below three interfaces for different users.
|
||||||
DAMON for various address spaces. For detail, please refer to the interface
|
DAMON for various address spaces. For detail, please refer to the interface
|
||||||
:doc:`document </vm/damon/api>`.
|
:doc:`document </vm/damon/api>`.
|
||||||
|
|
||||||
|
.. _sysfs_interface:
|
||||||
|
|
||||||
|
sysfs Interface
|
||||||
|
===============
|
||||||
|
|
||||||
|
DAMON sysfs interface is built when ``CONFIG_DAMON_SYSFS`` is defined. It
|
||||||
|
creates multiple directories and files under its sysfs directory,
|
||||||
|
``<sysfs>/kernel/mm/damon/``. You can control DAMON by writing to and reading
|
||||||
|
from the files under the directory.
|
||||||
|
|
||||||
|
For a short example, users can monitor the virtual address space of a given
|
||||||
|
workload as below. ::
|
||||||
|
|
||||||
|
# cd /sys/kernel/mm/damon/admin/
|
||||||
|
# echo 1 > kdamonds/nr && echo 1 > kdamonds/0/contexts/nr
|
||||||
|
# echo vaddr > kdamonds/0/contexts/0/operations
|
||||||
|
# echo 1 > kdamonds/0/contexts/0/targets/nr
|
||||||
|
# echo $(pidof <workload>) > kdamonds/0/contexts/0/targets/0/pid
|
||||||
|
# echo on > kdamonds/0/state
|
||||||
|
|
||||||
|
Files Hierarchy
|
||||||
|
---------------
|
||||||
|
|
||||||
|
The files hierarchy of DAMON sysfs interface is shown below. In the below
|
||||||
|
figure, parents-children relations are represented with indentations, each
|
||||||
|
directory is having ``/`` suffix, and files in each directory are separated by
|
||||||
|
comma (","). ::
|
||||||
|
|
||||||
|
/sys/kernel/mm/damon/admin
|
||||||
|
│ kdamonds/nr_kdamonds
|
||||||
|
│ │ 0/state,pid
|
||||||
|
│ │ │ contexts/nr_contexts
|
||||||
|
│ │ │ │ 0/operations
|
||||||
|
│ │ │ │ │ monitoring_attrs/
|
||||||
|
│ │ │ │ │ │ intervals/sample_us,aggr_us,update_us
|
||||||
|
│ │ │ │ │ │ nr_regions/min,max
|
||||||
|
│ │ │ │ │ targets/nr_targets
|
||||||
|
│ │ │ │ │ │ 0/pid_target
|
||||||
|
│ │ │ │ │ │ │ regions/nr_regions
|
||||||
|
│ │ │ │ │ │ │ │ 0/start,end
|
||||||
|
│ │ │ │ │ │ │ │ ...
|
||||||
|
│ │ │ │ │ │ ...
|
||||||
|
│ │ │ │ │ schemes/nr_schemes
|
||||||
|
│ │ │ │ │ │ 0/action
|
||||||
|
│ │ │ │ │ │ │ access_pattern/
|
||||||
|
│ │ │ │ │ │ │ │ sz/min,max
|
||||||
|
│ │ │ │ │ │ │ │ nr_accesses/min,max
|
||||||
|
│ │ │ │ │ │ │ │ age/min,max
|
||||||
|
│ │ │ │ │ │ │ quotas/ms,bytes,reset_interval_ms
|
||||||
|
│ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
|
||||||
|
│ │ │ │ │ │ │ watermarks/metric,interval_us,high,mid,low
|
||||||
|
│ │ │ │ │ │ │ stats/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds
|
||||||
|
│ │ │ │ │ │ ...
|
||||||
|
│ │ │ │ ...
|
||||||
|
│ │ ...
|
||||||
|
|
||||||
|
Root
|
||||||
|
----
|
||||||
|
|
||||||
|
The root of the DAMON sysfs interface is ``<sysfs>/kernel/mm/damon/``, and it
|
||||||
|
has one directory named ``admin``. The directory contains the files for
|
||||||
|
privileged user space programs' control of DAMON. User space tools or deamons
|
||||||
|
having the root permission could use this directory.
|
||||||
|
|
||||||
|
kdamonds/
|
||||||
|
---------
|
||||||
|
|
||||||
|
The monitoring-related information including request specifications and results
|
||||||
|
are called DAMON context. DAMON executes each context with a kernel thread
|
||||||
|
called kdamond, and multiple kdamonds could run in parallel.
|
||||||
|
|
||||||
|
Under the ``admin`` directory, one directory, ``kdamonds``, which has files for
|
||||||
|
controlling the kdamonds exist. In the beginning, this directory has only one
|
||||||
|
file, ``nr_kdamonds``. Writing a number (``N``) to the file creates the number
|
||||||
|
of child directories named ``0`` to ``N-1``. Each directory represents each
|
||||||
|
kdamond.
|
||||||
|
|
||||||
|
kdamonds/<N>/
|
||||||
|
-------------
|
||||||
|
|
||||||
|
In each kdamond directory, two files (``state`` and ``pid``) and one directory
|
||||||
|
(``contexts``) exist.
|
||||||
|
|
||||||
|
Reading ``state`` returns ``on`` if the kdamond is currently running, or
|
||||||
|
``off`` if it is not running. Writing ``on`` or ``off`` makes the kdamond be
|
||||||
|
in the state. Writing ``update_schemes_stats`` to ``state`` file updates the
|
||||||
|
contents of stats files for each DAMON-based operation scheme of the kdamond.
|
||||||
|
For details of the stats, please refer to :ref:`stats section
|
||||||
|
<sysfs_schemes_stats>`.
|
||||||
|
|
||||||
|
If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
|
||||||
|
|
||||||
|
``contexts`` directory contains files for controlling the monitoring contexts
|
||||||
|
that this kdamond will execute.
|
||||||
|
|
||||||
|
kdamonds/<N>/contexts/
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
In the beginning, this directory has only one file, ``nr_contexts``. Writing a
|
||||||
|
number (``N``) to the file creates the number of child directories named as
|
||||||
|
``0`` to ``N-1``. Each directory represents each monitoring context. At the
|
||||||
|
moment, only one context per kdamond is supported, so only ``0`` or ``1`` can
|
||||||
|
be written to the file.
|
||||||
|
|
||||||
|
contexts/<N>/
|
||||||
|
-------------
|
||||||
|
|
||||||
|
In each context directory, one file (``operations``) and three directories
|
||||||
|
(``monitoring_attrs``, ``targets``, and ``schemes``) exist.
|
||||||
|
|
||||||
|
DAMON supports multiple types of monitoring operations, including those for
|
||||||
|
virtual address space and the physical address space. You can set and get what
|
||||||
|
type of monitoring operations DAMON will use for the context by writing one of
|
||||||
|
below keywords to, and reading from the file.
|
||||||
|
|
||||||
|
- vaddr: Monitor virtual address spaces of specific processes
|
||||||
|
- paddr: Monitor the physical address space of the system
|
||||||
|
|
||||||
|
contexts/<N>/monitoring_attrs/
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
Files for specifying attributes of the monitoring including required quality
|
||||||
|
and efficiency of the monitoring are in ``monitoring_attrs`` directory.
|
||||||
|
Specifically, two directories, ``intervals`` and ``nr_regions`` exist in this
|
||||||
|
directory.
|
||||||
|
|
||||||
|
Under ``intervals`` directory, three files for DAMON's sampling interval
|
||||||
|
(``sample_us``), aggregation interval (``aggr_us``), and update interval
|
||||||
|
(``update_us``) exist. You can set and get the values in micro-seconds by
|
||||||
|
writing to and reading from the files.
|
||||||
|
|
||||||
|
Under ``nr_regions`` directory, two files for the lower-bound and upper-bound
|
||||||
|
of DAMON's monitoring regions (``min`` and ``max``, respectively), which
|
||||||
|
controls the monitoring overhead, exist. You can set and get the values by
|
||||||
|
writing to and rading from the files.
|
||||||
|
|
||||||
|
For more details about the intervals and monitoring regions range, please refer
|
||||||
|
to the Design document (:doc:`/vm/damon/design`).
|
||||||
|
|
||||||
|
contexts/<N>/targets/
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
In the beginning, this directory has only one file, ``nr_targets``. Writing a
|
||||||
|
number (``N``) to the file creates the number of child directories named ``0``
|
||||||
|
to ``N-1``. Each directory represents each monitoring target.
|
||||||
|
|
||||||
|
targets/<N>/
|
||||||
|
------------
|
||||||
|
|
||||||
|
In each target directory, one file (``pid_target``) and one directory
|
||||||
|
(``regions``) exist.
|
||||||
|
|
||||||
|
If you wrote ``vaddr`` to the ``contexts/<N>/operations``, each target should
|
||||||
|
be a process. You can specify the process to DAMON by writing the pid of the
|
||||||
|
process to the ``pid_target`` file.
|
||||||
|
|
||||||
|
targets/<N>/regions
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
When ``vaddr`` monitoring operations set is being used (``vaddr`` is written to
|
||||||
|
the ``contexts/<N>/operations`` file), DAMON automatically sets and updates the
|
||||||
|
monitoring target regions so that entire memory mappings of target processes
|
||||||
|
can be covered. However, users could want to set the initial monitoring region
|
||||||
|
to specific address ranges.
|
||||||
|
|
||||||
|
In contrast, DAMON do not automatically sets and updates the monitoring target
|
||||||
|
regions when ``paddr`` monitoring operations set is being used (``paddr`` is
|
||||||
|
written to the ``contexts/<N>/operations``). Therefore, users should set the
|
||||||
|
monitoring target regions by themselves in the case.
|
||||||
|
|
||||||
|
For such cases, users can explicitly set the initial monitoring target regions
|
||||||
|
as they want, by writing proper values to the files under this directory.
|
||||||
|
|
||||||
|
In the beginning, this directory has only one file, ``nr_regions``. Writing a
|
||||||
|
number (``N``) to the file creates the number of child directories named ``0``
|
||||||
|
to ``N-1``. Each directory represents each initial monitoring target region.
|
||||||
|
|
||||||
|
regions/<N>/
|
||||||
|
------------
|
||||||
|
|
||||||
|
In each region directory, you will find two files (``start`` and ``end``). You
|
||||||
|
can set and get the start and end addresses of the initial monitoring target
|
||||||
|
region by writing to and reading from the files, respectively.
|
||||||
|
|
||||||
|
contexts/<N>/schemes/
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
For usual DAMON-based data access aware memory management optimizations, users
|
||||||
|
would normally want the system to apply a memory management action to a memory
|
||||||
|
region of a specific access pattern. DAMON receives such formalized operation
|
||||||
|
schemes from the user and applies those to the target memory regions. Users
|
||||||
|
can get and set the schemes by reading from and writing to files under this
|
||||||
|
directory.
|
||||||
|
|
||||||
|
In the beginning, this directory has only one file, ``nr_schemes``. Writing a
|
||||||
|
number (``N``) to the file creates the number of child directories named ``0``
|
||||||
|
to ``N-1``. Each directory represents each DAMON-based operation scheme.
|
||||||
|
|
||||||
|
schemes/<N>/
|
||||||
|
------------
|
||||||
|
|
||||||
|
In each scheme directory, four directories (``access_pattern``, ``quotas``,
|
||||||
|
``watermarks``, and ``stats``) and one file (``action``) exist.
|
||||||
|
|
||||||
|
The ``action`` file is for setting and getting what action you want to apply to
|
||||||
|
memory regions having specific access pattern of the interest. The keywords
|
||||||
|
that can be written to and read from the file and their meaning are as below.
|
||||||
|
|
||||||
|
- ``willneed``: Call ``madvise()`` for the region with ``MADV_WILLNEED``
|
||||||
|
- ``cold``: Call ``madvise()`` for the region with ``MADV_COLD``
|
||||||
|
- ``pageout``: Call ``madvise()`` for the region with ``MADV_PAGEOUT``
|
||||||
|
- ``hugepage``: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``
|
||||||
|
- ``nohugepage``: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``
|
||||||
|
- ``stat``: Do nothing but count the statistics
|
||||||
|
|
||||||
|
schemes/<N>/access_pattern/
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
The target access pattern of each DAMON-based operation scheme is constructed
|
||||||
|
with three ranges including the size of the region in bytes, number of
|
||||||
|
monitored accesses per aggregate interval, and number of aggregated intervals
|
||||||
|
for the age of the region.
|
||||||
|
|
||||||
|
Under the ``access_pattern`` directory, three directories (``sz``,
|
||||||
|
``nr_accesses``, and ``age``) each having two files (``min`` and ``max``)
|
||||||
|
exist. You can set and get the access pattern for the given scheme by writing
|
||||||
|
to and reading from the ``min`` and ``max`` files under ``sz``,
|
||||||
|
``nr_accesses``, and ``age`` directories, respectively.
|
||||||
|
|
||||||
|
schemes/<N>/quotas/
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
Optimal ``target access pattern`` for each ``action`` is workload dependent, so
|
||||||
|
not easy to find. Worse yet, setting a scheme of some action too aggressive
|
||||||
|
can cause severe overhead. To avoid such overhead, users can limit time and
|
||||||
|
size quota for each scheme. In detail, users can ask DAMON to try to use only
|
||||||
|
up to specific time (``time quota``) for applying the action, and to apply the
|
||||||
|
action to only up to specific amount (``size quota``) of memory regions having
|
||||||
|
the target access pattern within a given time interval (``reset interval``).
|
||||||
|
|
||||||
|
When the quota limit is expected to be exceeded, DAMON prioritizes found memory
|
||||||
|
regions of the ``target access pattern`` based on their size, access frequency,
|
||||||
|
and age. For personalized prioritization, users can set the weights for the
|
||||||
|
three properties.
|
||||||
|
|
||||||
|
Under ``quotas`` directory, three files (``ms``, ``bytes``,
|
||||||
|
``reset_interval_ms``) and one directory (``weights``) having three files
|
||||||
|
(``sz_permil``, ``nr_accesses_permil``, and ``age_permil``) in it exist.
|
||||||
|
|
||||||
|
You can set the ``time quota`` in milliseconds, ``size quota`` in bytes, and
|
||||||
|
``reset interval`` in milliseconds by writing the values to the three files,
|
||||||
|
respectively. You can also set the prioritization weights for size, access
|
||||||
|
frequency, and age in per-thousand unit by writing the values to the three
|
||||||
|
files under the ``weights`` directory.
|
||||||
|
|
||||||
|
schemes/<N>/watermarks/
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
To allow easy activation and deactivation of each scheme based on system
|
||||||
|
status, DAMON provides a feature called watermarks. The feature receives five
|
||||||
|
values called ``metric``, ``interval``, ``high``, ``mid``, and ``low``. The
|
||||||
|
``metric`` is the system metric such as free memory ratio that can be measured.
|
||||||
|
If the metric value of the system is higher than the value in ``high`` or lower
|
||||||
|
than ``low`` at the memoent, the scheme is deactivated. If the value is lower
|
||||||
|
than ``mid``, the scheme is activated.
|
||||||
|
|
||||||
|
Under the watermarks directory, five files (``metric``, ``interval_us``,
|
||||||
|
``high``, ``mid``, and ``low``) for setting each value exist. You can set and
|
||||||
|
get the five values by writing to the files, respectively.
|
||||||
|
|
||||||
|
Keywords and meanings of those that can be written to the ``metric`` file are
|
||||||
|
as below.
|
||||||
|
|
||||||
|
- none: Ignore the watermarks
|
||||||
|
- free_mem_rate: System's free memory rate (per thousand)
|
||||||
|
|
||||||
|
The ``interval`` should written in microseconds unit.
|
||||||
|
|
||||||
|
.. _sysfs_schemes_stats:
|
||||||
|
|
||||||
|
schemes/<N>/stats/
|
||||||
|
------------------
|
||||||
|
|
||||||
|
DAMON counts the total number and bytes of regions that each scheme is tried to
|
||||||
|
be applied, the two numbers for the regions that each scheme is successfully
|
||||||
|
applied, and the total number of the quota limit exceeds. This statistics can
|
||||||
|
be used for online analysis or tuning of the schemes.
|
||||||
|
|
||||||
|
The statistics can be retrieved by reading the files under ``stats`` directory
|
||||||
|
(``nr_tried``, ``sz_tried``, ``nr_applied``, ``sz_applied``, and
|
||||||
|
``qt_exceeds``), respectively. The files are not updated in real time, so you
|
||||||
|
should ask DAMON sysfs interface to updte the content of the files for the
|
||||||
|
stats by writing a special keyword, ``update_schemes_stats`` to the relevant
|
||||||
|
``kdamonds/<N>/state`` file.
|
||||||
|
|
||||||
|
Example
|
||||||
|
~~~~~~~
|
||||||
|
|
||||||
|
Below commands applies a scheme saying "If a memory region of size in [4KiB,
|
||||||
|
8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate
|
||||||
|
interval in [10, 20], page out the region. For the paging out, use only up to
|
||||||
|
10ms per second, and also don't page out more than 1GiB per second. Under the
|
||||||
|
limitation, page out memory regions having longer age first. Also, check the
|
||||||
|
free memory rate of the system every 5 seconds, start the monitoring and paging
|
||||||
|
out when the free memory rate becomes lower than 50%, but stop it if the free
|
||||||
|
memory rate becomes larger than 60%, or lower than 30%". ::
|
||||||
|
|
||||||
|
# cd <sysfs>/kernel/mm/damon/admin
|
||||||
|
# # populate directories
|
||||||
|
# echo 1 > kdamonds/nr_kdamonds; echo 1 > kdamonds/0/contexts/nr_contexts;
|
||||||
|
# echo 1 > kdamonds/0/contexts/0/schemes/nr_schemes
|
||||||
|
# cd kdamonds/0/contexts/0/schemes/0
|
||||||
|
# # set the basic access pattern and the action
|
||||||
|
# echo 4096 > access_patterns/sz/min
|
||||||
|
# echo 8192 > access_patterns/sz/max
|
||||||
|
# echo 0 > access_patterns/nr_accesses/min
|
||||||
|
# echo 5 > access_patterns/nr_accesses/max
|
||||||
|
# echo 10 > access_patterns/age/min
|
||||||
|
# echo 20 > access_patterns/age/max
|
||||||
|
# echo pageout > action
|
||||||
|
# # set quotas
|
||||||
|
# echo 10 > quotas/ms
|
||||||
|
# echo $((1024*1024*1024)) > quotas/bytes
|
||||||
|
# echo 1000 > quotas/reset_interval_ms
|
||||||
|
# # set watermark
|
||||||
|
# echo free_mem_rate > watermarks/metric
|
||||||
|
# echo 5000000 > watermarks/interval_us
|
||||||
|
# echo 600 > watermarks/high
|
||||||
|
# echo 500 > watermarks/mid
|
||||||
|
# echo 300 > watermarks/low
|
||||||
|
|
||||||
|
Please note that it's highly recommended to use user space tools like `damo
|
||||||
|
<https://github.com/awslabs/damo>`_ rather than manually reading and writing
|
||||||
|
the files as above. Above is only for an example.
|
||||||
|
|
||||||
.. _debugfs_interface:
|
.. _debugfs_interface:
|
||||||
|
|
||||||
|
@ -47,7 +385,7 @@ Attributes
|
||||||
----------
|
----------
|
||||||
|
|
||||||
Users can get and set the ``sampling interval``, ``aggregation interval``,
|
Users can get and set the ``sampling interval``, ``aggregation interval``,
|
||||||
``regions update interval``, and min/max number of monitoring target regions by
|
``update interval``, and min/max number of monitoring target regions by
|
||||||
reading from and writing to the ``attrs`` file. To know about the monitoring
|
reading from and writing to the ``attrs`` file. To know about the monitoring
|
||||||
attributes in detail, please refer to the :doc:`/vm/damon/design`. For
|
attributes in detail, please refer to the :doc:`/vm/damon/design`. For
|
||||||
example, below commands set those values to 5 ms, 100 ms, 1,000 ms, 10 and
|
example, below commands set those values to 5 ms, 100 ms, 1,000 ms, 10 and
|
||||||
|
@ -108,24 +446,28 @@ In such cases, users can explicitly set the initial monitoring target regions
|
||||||
as they want, by writing proper values to the ``init_regions`` file. Each line
|
as they want, by writing proper values to the ``init_regions`` file. Each line
|
||||||
of the input should represent one region in below form.::
|
of the input should represent one region in below form.::
|
||||||
|
|
||||||
<target id> <start address> <end address>
|
<target idx> <start address> <end address>
|
||||||
|
|
||||||
The ``target id`` should already in ``target_ids`` file, and the regions should
|
The ``target idx`` should be the index of the target in ``target_ids`` file,
|
||||||
be passed in address order. For example, below commands will set a couple of
|
starting from ``0``, and the regions should be passed in address order. For
|
||||||
address ranges, ``1-100`` and ``100-200`` as the initial monitoring target
|
example, below commands will set a couple of address ranges, ``1-100`` and
|
||||||
region of process 42, and another couple of address ranges, ``20-40`` and
|
``100-200`` as the initial monitoring target region of pid 42, which is the
|
||||||
``50-100`` as that of process 4242.::
|
first one (index ``0``) in ``target_ids``, and another couple of address
|
||||||
|
ranges, ``20-40`` and ``50-100`` as that of pid 4242, which is the second one
|
||||||
|
(index ``1``) in ``target_ids``.::
|
||||||
|
|
||||||
# cd <debugfs>/damon
|
# cd <debugfs>/damon
|
||||||
# echo "42 1 100
|
# cat target_ids
|
||||||
42 100 200
|
42 4242
|
||||||
4242 20 40
|
# echo "0 1 100
|
||||||
4242 50 100" > init_regions
|
0 100 200
|
||||||
|
1 20 40
|
||||||
|
1 50 100" > init_regions
|
||||||
|
|
||||||
Note that this sets the initial monitoring target regions only. In case of
|
Note that this sets the initial monitoring target regions only. In case of
|
||||||
virtual memory monitoring, DAMON will automatically updates the boundary of the
|
virtual memory monitoring, DAMON will automatically updates the boundary of the
|
||||||
regions after one ``regions update interval``. Therefore, users should set the
|
regions after one ``update interval``. Therefore, users should set the
|
||||||
``regions update interval`` large enough in this case, if they don't want the
|
``update interval`` large enough in this case, if they don't want the
|
||||||
update.
|
update.
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -130,9 +130,25 @@ attribute, e.g.::
|
||||||
echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
|
echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
|
||||||
|
|
||||||
When zswap same-filled page identification is disabled at runtime, it will stop
|
When zswap same-filled page identification is disabled at runtime, it will stop
|
||||||
checking for the same-value filled pages during store operation. However, the
|
checking for the same-value filled pages during store operation.
|
||||||
existing pages which are marked as same-value filled pages remain stored
|
In other words, every page will be then considered non-same-value filled.
|
||||||
unchanged in zswap until they are either loaded or invalidated.
|
However, the existing pages which are marked as same-value filled pages remain
|
||||||
|
stored unchanged in zswap until they are either loaded or invalidated.
|
||||||
|
|
||||||
|
In some circumstances it might be advantageous to make use of just the zswap
|
||||||
|
ability to efficiently store same-filled pages without enabling the whole
|
||||||
|
compressed page storage.
|
||||||
|
In this case the handling of non-same-value pages by zswap (enabled by default)
|
||||||
|
can be disabled by setting the ``non_same_filled_pages_enabled`` attribute
|
||||||
|
to 0, e.g. ``zswap.non_same_filled_pages_enabled=0``.
|
||||||
|
It can also be enabled and disabled at runtime using the sysfs
|
||||||
|
``non_same_filled_pages_enabled`` attribute, e.g.::
|
||||||
|
|
||||||
|
echo 1 > /sys/module/zswap/parameters/non_same_filled_pages_enabled
|
||||||
|
|
||||||
|
Disabling both ``zswap.same_filled_pages_enabled`` and
|
||||||
|
``zswap.non_same_filled_pages_enabled`` effectively disables accepting any new
|
||||||
|
pages by zswap.
|
||||||
|
|
||||||
To prevent zswap from shrinking pool when zswap is full and there's a high
|
To prevent zswap from shrinking pool when zswap is full and there's a high
|
||||||
pressure on swap (this will result in flipping pages in and out zswap pool
|
pressure on swap (this will result in flipping pages in and out zswap pool
|
||||||
|
|
|
@ -595,22 +595,34 @@ Documentation/admin-guide/kernel-parameters.rst).
|
||||||
numa_balancing
|
numa_balancing
|
||||||
==============
|
==============
|
||||||
|
|
||||||
Enables/disables automatic page fault based NUMA memory
|
Enables/disables and configures automatic page fault based NUMA memory
|
||||||
balancing. Memory is moved automatically to nodes
|
balancing. Memory is moved automatically to nodes that access it often.
|
||||||
that access it often.
|
The value to set can be the result of ORing the following:
|
||||||
|
|
||||||
Enables/disables automatic NUMA memory balancing. On NUMA machines, there
|
= =================================
|
||||||
is a performance penalty if remote memory is accessed by a CPU. When this
|
0 NUMA_BALANCING_DISABLED
|
||||||
feature is enabled the kernel samples what task thread is accessing memory
|
1 NUMA_BALANCING_NORMAL
|
||||||
by periodically unmapping pages and later trapping a page fault. At the
|
2 NUMA_BALANCING_MEMORY_TIERING
|
||||||
time of the page fault, it is determined if the data being accessed should
|
= =================================
|
||||||
be migrated to a local memory node.
|
|
||||||
|
Or NUMA_BALANCING_NORMAL to optimize page placement among different
|
||||||
|
NUMA nodes to reduce remote accessing. On NUMA machines, there is a
|
||||||
|
performance penalty if remote memory is accessed by a CPU. When this
|
||||||
|
feature is enabled the kernel samples what task thread is accessing
|
||||||
|
memory by periodically unmapping pages and later trapping a page
|
||||||
|
fault. At the time of the page fault, it is determined if the data
|
||||||
|
being accessed should be migrated to a local memory node.
|
||||||
|
|
||||||
The unmapping of pages and trapping faults incur additional overhead that
|
The unmapping of pages and trapping faults incur additional overhead that
|
||||||
ideally is offset by improved memory locality but there is no universal
|
ideally is offset by improved memory locality but there is no universal
|
||||||
guarantee. If the target workload is already bound to NUMA nodes then this
|
guarantee. If the target workload is already bound to NUMA nodes then this
|
||||||
feature should be disabled.
|
feature should be disabled.
|
||||||
|
|
||||||
|
Or NUMA_BALANCING_MEMORY_TIERING to optimize page placement among
|
||||||
|
different types of memory (represented as different NUMA nodes) to
|
||||||
|
place the hot pages in the fast memory. This is implemented based on
|
||||||
|
unmapping and page fault too.
|
||||||
|
|
||||||
oops_all_cpu_backtrace
|
oops_all_cpu_backtrace
|
||||||
======================
|
======================
|
||||||
|
|
||||||
|
|
|
@ -58,15 +58,30 @@ Virtually Contiguous Mappings
|
||||||
File Mapping and Page Cache
|
File Mapping and Page Cache
|
||||||
===========================
|
===========================
|
||||||
|
|
||||||
.. kernel-doc:: mm/readahead.c
|
Filemap
|
||||||
:export:
|
-------
|
||||||
|
|
||||||
.. kernel-doc:: mm/filemap.c
|
.. kernel-doc:: mm/filemap.c
|
||||||
:export:
|
:export:
|
||||||
|
|
||||||
|
Readahead
|
||||||
|
---------
|
||||||
|
|
||||||
|
.. kernel-doc:: mm/readahead.c
|
||||||
|
:doc: Readahead Overview
|
||||||
|
|
||||||
|
.. kernel-doc:: mm/readahead.c
|
||||||
|
:export:
|
||||||
|
|
||||||
|
Writeback
|
||||||
|
---------
|
||||||
|
|
||||||
.. kernel-doc:: mm/page-writeback.c
|
.. kernel-doc:: mm/page-writeback.c
|
||||||
:export:
|
:export:
|
||||||
|
|
||||||
|
Truncate
|
||||||
|
--------
|
||||||
|
|
||||||
.. kernel-doc:: mm/truncate.c
|
.. kernel-doc:: mm/truncate.c
|
||||||
:export:
|
:export:
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,18 @@ guarded by KFENCE. The default is configurable via the Kconfig option
|
||||||
``CONFIG_KFENCE_SAMPLE_INTERVAL``. Setting ``kfence.sample_interval=0``
|
``CONFIG_KFENCE_SAMPLE_INTERVAL``. Setting ``kfence.sample_interval=0``
|
||||||
disables KFENCE.
|
disables KFENCE.
|
||||||
|
|
||||||
|
The sample interval controls a timer that sets up KFENCE allocations. By
|
||||||
|
default, to keep the real sample interval predictable, the normal timer also
|
||||||
|
causes CPU wake-ups when the system is completely idle. This may be undesirable
|
||||||
|
on power-constrained systems. The boot parameter ``kfence.deferrable=1``
|
||||||
|
instead switches to a "deferrable" timer which does not force CPU wake-ups on
|
||||||
|
idle systems, at the risk of unpredictable sample intervals. The default is
|
||||||
|
configurable via the Kconfig option ``CONFIG_KFENCE_DEFERRABLE``.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
The KUnit test suite is very likely to fail when using a deferrable timer
|
||||||
|
since it currently causes very unpredictable sample intervals.
|
||||||
|
|
||||||
The KFENCE memory pool is of fixed size, and if the pool is exhausted, no
|
The KFENCE memory pool is of fixed size, and if the pool is exhausted, no
|
||||||
further KFENCE allocations occur. With ``CONFIG_KFENCE_NUM_OBJECTS`` (default
|
further KFENCE allocations occur. With ``CONFIG_KFENCE_NUM_OBJECTS`` (default
|
||||||
255), the number of available guarded objects can be controlled. Each object
|
255), the number of available guarded objects can be controlled. Each object
|
||||||
|
|
|
@ -45,6 +45,12 @@ typically between calling iget_locked() and unlocking the inode.
|
||||||
|
|
||||||
At some point that will become mandatory.
|
At some point that will become mandatory.
|
||||||
|
|
||||||
|
**mandatory**
|
||||||
|
|
||||||
|
The foo_inode_info should always be allocated through alloc_inode_sb() rather
|
||||||
|
than kmem_cache_alloc() or kmalloc() related to set up the inode reclaim context
|
||||||
|
correctly.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
**mandatory**
|
**mandatory**
|
||||||
|
|
|
@ -806,12 +806,16 @@ cache in your filesystem. The following members are defined:
|
||||||
object. The pages are consecutive in the page cache and are
|
object. The pages are consecutive in the page cache and are
|
||||||
locked. The implementation should decrement the page refcount
|
locked. The implementation should decrement the page refcount
|
||||||
after starting I/O on each page. Usually the page will be
|
after starting I/O on each page. Usually the page will be
|
||||||
unlocked by the I/O completion handler. If the filesystem decides
|
unlocked by the I/O completion handler. The set of pages are
|
||||||
to stop attempting I/O before reaching the end of the readahead
|
divided into some sync pages followed by some async pages,
|
||||||
window, it can simply return. The caller will decrement the page
|
rac->ra->async_size gives the number of async pages. The
|
||||||
refcount and unlock the remaining pages for you. Set PageUptodate
|
filesystem should attempt to read all sync pages but may decide
|
||||||
if the I/O completes successfully. Setting PageError on any page
|
to stop once it reaches the async pages. If it does decide to
|
||||||
will be ignored; simply unlock the page if an I/O error occurs.
|
stop attempting I/O, it can simply return. The caller will
|
||||||
|
remove the remaining pages from the address space, unlock them
|
||||||
|
and decrement the page refcount. Set PageUptodate if the I/O
|
||||||
|
completes successfully. Setting PageError on any page will be
|
||||||
|
ignored; simply unlock the page if an I/O error occurs.
|
||||||
|
|
||||||
``readpages``
|
``readpages``
|
||||||
called by the VM to read pages associated with the address_space
|
called by the VM to read pages associated with the address_space
|
||||||
|
|
|
@ -13,12 +13,13 @@ primitives that dependent on and optimized for the target address space. On
|
||||||
the other hand, the accuracy and overhead tradeoff mechanism, which is the core
|
the other hand, the accuracy and overhead tradeoff mechanism, which is the core
|
||||||
of DAMON, is in the pure logic space. DAMON separates the two parts in
|
of DAMON, is in the pure logic space. DAMON separates the two parts in
|
||||||
different layers and defines its interface to allow various low level
|
different layers and defines its interface to allow various low level
|
||||||
primitives implementations configurable with the core logic.
|
primitives implementations configurable with the core logic. We call the low
|
||||||
|
level primitives implementations monitoring operations.
|
||||||
|
|
||||||
Due to this separated design and the configurable interface, users can extend
|
Due to this separated design and the configurable interface, users can extend
|
||||||
DAMON for any address space by configuring the core logics with appropriate low
|
DAMON for any address space by configuring the core logics with appropriate
|
||||||
level primitive implementations. If appropriate one is not provided, users can
|
monitoring operations. If appropriate one is not provided, users can implement
|
||||||
implement the primitives on their own.
|
the operations on their own.
|
||||||
|
|
||||||
For example, physical memory, virtual memory, swap space, those for specific
|
For example, physical memory, virtual memory, swap space, those for specific
|
||||||
processes, NUMA nodes, files, and backing memory devices would be supportable.
|
processes, NUMA nodes, files, and backing memory devices would be supportable.
|
||||||
|
@ -26,25 +27,24 @@ Also, if some architectures or devices support special optimized access check
|
||||||
primitives, those will be easily configurable.
|
primitives, those will be easily configurable.
|
||||||
|
|
||||||
|
|
||||||
Reference Implementations of Address Space Specific Primitives
|
Reference Implementations of Address Space Specific Monitoring Operations
|
||||||
==============================================================
|
=========================================================================
|
||||||
|
|
||||||
The low level primitives for the fundamental access monitoring are defined in
|
The monitoring operations are defined in two parts:
|
||||||
two parts:
|
|
||||||
|
|
||||||
1. Identification of the monitoring target address range for the address space.
|
1. Identification of the monitoring target address range for the address space.
|
||||||
2. Access check of specific address range in the target space.
|
2. Access check of specific address range in the target space.
|
||||||
|
|
||||||
DAMON currently provides the implementations of the primitives for the physical
|
DAMON currently provides the implementations of the operations for the physical
|
||||||
and virtual address spaces. Below two subsections describe how those work.
|
and virtual address spaces. Below two subsections describe how those work.
|
||||||
|
|
||||||
|
|
||||||
VMA-based Target Address Range Construction
|
VMA-based Target Address Range Construction
|
||||||
-------------------------------------------
|
-------------------------------------------
|
||||||
|
|
||||||
This is only for the virtual address space primitives implementation. That for
|
This is only for the virtual address space monitoring operations
|
||||||
the physical address space simply asks users to manually set the monitoring
|
implementation. That for the physical address space simply asks users to
|
||||||
target address ranges.
|
manually set the monitoring target address ranges.
|
||||||
|
|
||||||
Only small parts in the super-huge virtual address space of the processes are
|
Only small parts in the super-huge virtual address space of the processes are
|
||||||
mapped to the physical memory and accessed. Thus, tracking the unmapped
|
mapped to the physical memory and accessed. Thus, tracking the unmapped
|
||||||
|
@ -84,9 +84,10 @@ table having a mapping to the address. In this way, the implementations find
|
||||||
and clear the bit(s) for next sampling target address and checks whether the
|
and clear the bit(s) for next sampling target address and checks whether the
|
||||||
bit(s) set again after one sampling period. This could disturb other kernel
|
bit(s) set again after one sampling period. This could disturb other kernel
|
||||||
subsystems using the Accessed bits, namely Idle page tracking and the reclaim
|
subsystems using the Accessed bits, namely Idle page tracking and the reclaim
|
||||||
logic. To avoid such disturbances, DAMON makes it mutually exclusive with Idle
|
logic. DAMON does nothing to avoid disturbing Idle page tracking, so handling
|
||||||
page tracking and uses ``PG_idle`` and ``PG_young`` page flags to solve the
|
the interference is the responsibility of sysadmins. However, it solves the
|
||||||
conflict with the reclaim logic, as Idle page tracking does.
|
conflict with the reclaim logic using ``PG_idle`` and ``PG_young`` page flags,
|
||||||
|
as Idle page tracking does.
|
||||||
|
|
||||||
|
|
||||||
Address Space Independent Core Mechanisms
|
Address Space Independent Core Mechanisms
|
||||||
|
@ -94,8 +95,8 @@ Address Space Independent Core Mechanisms
|
||||||
|
|
||||||
Below four sections describe each of the DAMON core mechanisms and the five
|
Below four sections describe each of the DAMON core mechanisms and the five
|
||||||
monitoring attributes, ``sampling interval``, ``aggregation interval``,
|
monitoring attributes, ``sampling interval``, ``aggregation interval``,
|
||||||
``regions update interval``, ``minimum number of regions``, and ``maximum
|
``update interval``, ``minimum number of regions``, and ``maximum number of
|
||||||
number of regions``.
|
regions``.
|
||||||
|
|
||||||
|
|
||||||
Access Frequency Monitoring
|
Access Frequency Monitoring
|
||||||
|
@ -168,6 +169,8 @@ The monitoring target address range could dynamically changed. For example,
|
||||||
virtual memory could be dynamically mapped and unmapped. Physical memory could
|
virtual memory could be dynamically mapped and unmapped. Physical memory could
|
||||||
be hot-plugged.
|
be hot-plugged.
|
||||||
|
|
||||||
As the changes could be quite frequent in some cases, DAMON checks the dynamic
|
As the changes could be quite frequent in some cases, DAMON allows the
|
||||||
memory mapping changes and applies it to the abstracted target area only for
|
monitoring operations to check dynamic changes including memory mapping changes
|
||||||
each of a user-specified time interval (``regions update interval``).
|
and applies it to monitoring operations-related data structures such as the
|
||||||
|
abstracted monitoring target memory area only for each of a user-specified time
|
||||||
|
interval (``update interval``).
|
||||||
|
|
|
@ -31,7 +31,7 @@ Does DAMON support virtual memory only?
|
||||||
=======================================
|
=======================================
|
||||||
|
|
||||||
No. The core of the DAMON is address space independent. The address space
|
No. The core of the DAMON is address space independent. The address space
|
||||||
specific low level primitive parts including monitoring target regions
|
specific monitoring operations including monitoring target regions
|
||||||
constructions and actual access checks can be implemented and configured on the
|
constructions and actual access checks can be implemented and configured on the
|
||||||
DAMON core by the users. In this way, DAMON users can monitor any address
|
DAMON core by the users. In this way, DAMON users can monitor any address
|
||||||
space with any access check technique.
|
space with any access check technique.
|
||||||
|
|
|
@ -5326,6 +5326,7 @@ DATA ACCESS MONITOR
|
||||||
M: SeongJae Park <sj@kernel.org>
|
M: SeongJae Park <sj@kernel.org>
|
||||||
L: linux-mm@kvack.org
|
L: linux-mm@kvack.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
F: Documentation/ABI/testing/sysfs-kernel-mm-damon
|
||||||
F: Documentation/admin-guide/mm/damon/
|
F: Documentation/admin-guide/mm/damon/
|
||||||
F: Documentation/vm/damon/
|
F: Documentation/vm/damon/
|
||||||
F: include/linux/damon.h
|
F: include/linux/damon.h
|
||||||
|
|
|
@ -38,6 +38,7 @@ config ARM
|
||||||
select ARCH_USE_CMPXCHG_LOCKREF
|
select ARCH_USE_CMPXCHG_LOCKREF
|
||||||
select ARCH_USE_MEMTEST
|
select ARCH_USE_MEMTEST
|
||||||
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
|
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
|
||||||
|
select ARCH_WANT_GENERAL_HUGETLB
|
||||||
select ARCH_WANT_IPC_PARSE_VERSION
|
select ARCH_WANT_IPC_PARSE_VERSION
|
||||||
select ARCH_WANT_LD_ORPHAN_WARN
|
select ARCH_WANT_LD_ORPHAN_WARN
|
||||||
select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
|
select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
|
||||||
|
@ -1509,9 +1510,6 @@ config HW_PERF_EVENTS
|
||||||
def_bool y
|
def_bool y
|
||||||
depends on ARM_PMU
|
depends on ARM_PMU
|
||||||
|
|
||||||
config ARCH_WANT_GENERAL_HUGETLB
|
|
||||||
def_bool y
|
|
||||||
|
|
||||||
config ARM_MODULE_PLTS
|
config ARM_MODULE_PLTS
|
||||||
bool "Use PLTs to allow module memory to spill over into vmalloc area"
|
bool "Use PLTs to allow module memory to spill over into vmalloc area"
|
||||||
depends on MODULES
|
depends on MODULES
|
||||||
|
|
|
@ -406,9 +406,6 @@ static int __init topology_init(void)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for_each_online_node(i)
|
|
||||||
register_one_node(i);
|
|
||||||
|
|
||||||
for_each_possible_cpu(i) {
|
for_each_possible_cpu(i) {
|
||||||
struct cpu *cpu = &per_cpu(cpu_data.cpu, i);
|
struct cpu *cpu = &per_cpu(cpu_data.cpu, i);
|
||||||
cpu->hotpluggable = cpu_can_disable(i);
|
cpu->hotpluggable = cpu_can_disable(i);
|
||||||
|
|
|
@ -356,6 +356,7 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
|
||||||
{
|
{
|
||||||
size_t pagesize = 1UL << shift;
|
size_t pagesize = 1UL << shift;
|
||||||
|
|
||||||
|
entry = pte_mkhuge(entry);
|
||||||
if (pagesize == CONT_PTE_SIZE) {
|
if (pagesize == CONT_PTE_SIZE) {
|
||||||
entry = pte_mkcont(entry);
|
entry = pte_mkcont(entry);
|
||||||
} else if (pagesize == CONT_PMD_SIZE) {
|
} else if (pagesize == CONT_PMD_SIZE) {
|
||||||
|
|
|
@ -29,8 +29,6 @@ int max_kernel_seg = 0x303;
|
||||||
/* indicate pfn's of high memory */
|
/* indicate pfn's of high memory */
|
||||||
unsigned long highstart_pfn, highend_pfn;
|
unsigned long highstart_pfn, highend_pfn;
|
||||||
|
|
||||||
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
|
|
||||||
|
|
||||||
/* Default cache attribute for newly created page tables */
|
/* Default cache attribute for newly created page tables */
|
||||||
unsigned long _dflt_cache_att = CACHEDEF;
|
unsigned long _dflt_cache_att = CACHEDEF;
|
||||||
|
|
||||||
|
|
|
@ -70,16 +70,6 @@ static int __init topology_init(void)
|
||||||
{
|
{
|
||||||
int i, err = 0;
|
int i, err = 0;
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA
|
|
||||||
/*
|
|
||||||
* MCD - Do we want to register all ONLINE nodes, or all POSSIBLE nodes?
|
|
||||||
*/
|
|
||||||
for_each_online_node(i) {
|
|
||||||
if ((err = register_one_node(i)))
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
sysfs_cpus = kcalloc(NR_CPUS, sizeof(struct ia64_cpu), GFP_KERNEL);
|
sysfs_cpus = kcalloc(NR_CPUS, sizeof(struct ia64_cpu), GFP_KERNEL);
|
||||||
if (!sysfs_cpus)
|
if (!sysfs_cpus)
|
||||||
panic("kzalloc in topology_init failed - NR_CPUS too big?");
|
panic("kzalloc in topology_init failed - NR_CPUS too big?");
|
||||||
|
|
|
@ -608,17 +608,11 @@ void __init paging_init(void)
|
||||||
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
|
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
pg_data_t * __init arch_alloc_nodedata(int nid)
|
||||||
pg_data_t *arch_alloc_nodedata(int nid)
|
|
||||||
{
|
{
|
||||||
unsigned long size = compute_pernodesize(nid);
|
unsigned long size = compute_pernodesize(nid);
|
||||||
|
|
||||||
return kzalloc(size, GFP_KERNEL);
|
return memblock_alloc(size, SMP_CACHE_BYTES);
|
||||||
}
|
|
||||||
|
|
||||||
void arch_free_nodedata(pg_data_t *pgdat)
|
|
||||||
{
|
|
||||||
kfree(pgdat);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
|
void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
|
||||||
|
@ -626,7 +620,6 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
|
||||||
pgdat_list[update_node] = update_pgdat;
|
pgdat_list[update_node] = update_pgdat;
|
||||||
scatter_node_data();
|
scatter_node_data();
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||||
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
|
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
|
||||||
|
|
|
@ -12,11 +12,6 @@ static int __init topology_init(void)
|
||||||
{
|
{
|
||||||
int i, ret;
|
int i, ret;
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA
|
|
||||||
for_each_online_node(i)
|
|
||||||
register_one_node(i);
|
|
||||||
#endif /* CONFIG_NUMA */
|
|
||||||
|
|
||||||
for_each_present_cpu(i) {
|
for_each_present_cpu(i) {
|
||||||
struct cpu *c = &per_cpu(cpu_devices, i);
|
struct cpu *c = &per_cpu(cpu_devices, i);
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,6 @@
|
||||||
#include <asm/tlb.h>
|
#include <asm/tlb.h>
|
||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
|
|
||||||
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
|
|
||||||
DEFINE_SPINLOCK(anon_alias_lock);
|
DEFINE_SPINLOCK(anon_alias_lock);
|
||||||
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
|
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
|
||||||
|
|
||||||
|
|
|
@ -38,8 +38,6 @@
|
||||||
|
|
||||||
int mem_init_done;
|
int mem_init_done;
|
||||||
|
|
||||||
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
|
|
||||||
|
|
||||||
static void __init zone_sizes_init(void)
|
static void __init zone_sizes_init(void)
|
||||||
{
|
{
|
||||||
unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
|
unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
|
||||||
|
|
|
@ -19,11 +19,6 @@
|
||||||
|
|
||||||
#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
|
#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
|
||||||
|
|
||||||
/* Alignment per CMA requirement. */
|
|
||||||
#define FADUMP_CMA_ALIGNMENT (PAGE_SIZE << \
|
|
||||||
max_t(unsigned long, MAX_ORDER - 1, \
|
|
||||||
pageblock_order))
|
|
||||||
|
|
||||||
/* FAD commands */
|
/* FAD commands */
|
||||||
#define FADUMP_REGISTER 1
|
#define FADUMP_REGISTER 1
|
||||||
#define FADUMP_UNREGISTER 2
|
#define FADUMP_UNREGISTER 2
|
||||||
|
|
|
@ -71,9 +71,9 @@ static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags
|
||||||
size_t size = 1UL << shift;
|
size_t size = 1UL << shift;
|
||||||
|
|
||||||
if (size == SZ_16K)
|
if (size == SZ_16K)
|
||||||
return __pte(pte_val(entry) & ~_PAGE_HUGE);
|
return __pte(pte_val(entry) | _PAGE_SPS);
|
||||||
else
|
else
|
||||||
return entry;
|
return __pte(pte_val(entry) | _PAGE_SPS | _PAGE_HUGE);
|
||||||
}
|
}
|
||||||
#define arch_make_huge_pte arch_make_huge_pte
|
#define arch_make_huge_pte arch_make_huge_pte
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -112,6 +112,12 @@ static int __init fadump_cma_init(void)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If CMA activation fails, keep the pages reserved, instead of
|
||||||
|
* exposing them to buddy allocator. Same as 'fadump=nocma' case.
|
||||||
|
*/
|
||||||
|
cma_reserve_pages_on_error(fadump_cma);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* So we now have successfully initialized cma area for fadump.
|
* So we now have successfully initialized cma area for fadump.
|
||||||
*/
|
*/
|
||||||
|
@ -544,7 +550,7 @@ int __init fadump_reserve_mem(void)
|
||||||
if (!fw_dump.nocma) {
|
if (!fw_dump.nocma) {
|
||||||
fw_dump.boot_memory_size =
|
fw_dump.boot_memory_size =
|
||||||
ALIGN(fw_dump.boot_memory_size,
|
ALIGN(fw_dump.boot_memory_size,
|
||||||
FADUMP_CMA_ALIGNMENT);
|
CMA_MIN_ALIGNMENT_BYTES);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -1110,14 +1110,6 @@ EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group);
|
||||||
/* NUMA stuff */
|
/* NUMA stuff */
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA
|
#ifdef CONFIG_NUMA
|
||||||
static void __init register_nodes(void)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < MAX_NUMNODES; i++)
|
|
||||||
register_one_node(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
int sysfs_add_device_to_node(struct device *dev, int nid)
|
int sysfs_add_device_to_node(struct device *dev, int nid)
|
||||||
{
|
{
|
||||||
struct node *node = node_devices[nid];
|
struct node *node = node_devices[nid];
|
||||||
|
@ -1132,13 +1124,6 @@ void sysfs_remove_device_from_node(struct device *dev, int nid)
|
||||||
sysfs_remove_link(&node->dev.kobj, kobject_name(&dev->kobj));
|
sysfs_remove_link(&node->dev.kobj, kobject_name(&dev->kobj));
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);
|
EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);
|
||||||
|
|
||||||
#else
|
|
||||||
static void __init register_nodes(void)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Only valid if CPU is present. */
|
/* Only valid if CPU is present. */
|
||||||
|
@ -1155,8 +1140,6 @@ static int __init topology_init(void)
|
||||||
{
|
{
|
||||||
int cpu, r;
|
int cpu, r;
|
||||||
|
|
||||||
register_nodes();
|
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
for_each_possible_cpu(cpu) {
|
||||||
struct cpu *c = &per_cpu(cpu_devices, cpu);
|
struct cpu *c = &per_cpu(cpu_devices, cpu);
|
||||||
|
|
||||||
|
|
|
@ -40,6 +40,7 @@ config RISCV
|
||||||
select ARCH_USE_MEMTEST
|
select ARCH_USE_MEMTEST
|
||||||
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
|
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
|
||||||
select ARCH_WANT_FRAME_POINTERS
|
select ARCH_WANT_FRAME_POINTERS
|
||||||
|
select ARCH_WANT_GENERAL_HUGETLB
|
||||||
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
|
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
|
||||||
select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
|
select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
|
||||||
select BUILDTIME_TABLE_SORT if MMU
|
select BUILDTIME_TABLE_SORT if MMU
|
||||||
|
@ -171,9 +172,6 @@ config ARCH_SPARSEMEM_ENABLE
|
||||||
config ARCH_SELECT_MEMORY_MODEL
|
config ARCH_SELECT_MEMORY_MODEL
|
||||||
def_bool ARCH_SPARSEMEM_ENABLE
|
def_bool ARCH_SPARSEMEM_ENABLE
|
||||||
|
|
||||||
config ARCH_WANT_GENERAL_HUGETLB
|
|
||||||
def_bool y
|
|
||||||
|
|
||||||
config ARCH_SUPPORTS_UPROBES
|
config ARCH_SUPPORTS_UPROBES
|
||||||
def_bool y
|
def_bool y
|
||||||
|
|
||||||
|
|
|
@ -301,9 +301,6 @@ static int __init topology_init(void)
|
||||||
{
|
{
|
||||||
int i, ret;
|
int i, ret;
|
||||||
|
|
||||||
for_each_online_node(i)
|
|
||||||
register_one_node(i);
|
|
||||||
|
|
||||||
for_each_possible_cpu(i) {
|
for_each_possible_cpu(i) {
|
||||||
struct cpu *cpu = &per_cpu(cpu_devices, i);
|
struct cpu *cpu = &per_cpu(cpu_devices, i);
|
||||||
|
|
||||||
|
|
|
@ -33,10 +33,3 @@ void __init numa_setup(void)
|
||||||
NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
|
NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
|
||||||
NODE_DATA(0)->node_id = 0;
|
NODE_DATA(0)->node_id = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __init numa_init_late(void)
|
|
||||||
{
|
|
||||||
register_one_node(0);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
arch_initcall(numa_init_late);
|
|
||||||
|
|
|
@ -46,11 +46,6 @@ static int __init topology_init(void)
|
||||||
{
|
{
|
||||||
int i, ret;
|
int i, ret;
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA
|
|
||||||
for_each_online_node(i)
|
|
||||||
register_one_node(i);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for_each_present_cpu(i) {
|
for_each_present_cpu(i) {
|
||||||
struct cpu *c = &per_cpu(cpu_devices, i);
|
struct cpu *c = &per_cpu(cpu_devices, i);
|
||||||
|
|
||||||
|
|
|
@ -244,22 +244,10 @@ static void __init check_mmu_stats(void)
|
||||||
mmu_stats_supported = 1;
|
mmu_stats_supported = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void register_nodes(void)
|
|
||||||
{
|
|
||||||
#ifdef CONFIG_NUMA
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < MAX_NUMNODES; i++)
|
|
||||||
register_one_node(i);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __init topology_init(void)
|
static int __init topology_init(void)
|
||||||
{
|
{
|
||||||
int cpu, ret;
|
int cpu, ret;
|
||||||
|
|
||||||
register_nodes();
|
|
||||||
|
|
||||||
check_mmu_stats();
|
check_mmu_stats();
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
for_each_possible_cpu(cpu) {
|
||||||
|
|
|
@ -181,6 +181,7 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
|
||||||
{
|
{
|
||||||
pte_t pte;
|
pte_t pte;
|
||||||
|
|
||||||
|
entry = pte_mkhuge(entry);
|
||||||
pte = hugepage_shift_to_tte(entry, shift);
|
pte = hugepage_shift_to_tte(entry, shift);
|
||||||
|
|
||||||
#ifdef CONFIG_SPARC64
|
#ifdef CONFIG_SPARC64
|
||||||
|
|
|
@ -119,6 +119,7 @@ config X86
|
||||||
select ARCH_WANT_DEFAULT_BPF_JIT if X86_64
|
select ARCH_WANT_DEFAULT_BPF_JIT if X86_64
|
||||||
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
|
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
|
||||||
select ARCH_WANTS_NO_INSTR
|
select ARCH_WANTS_NO_INSTR
|
||||||
|
select ARCH_WANT_GENERAL_HUGETLB
|
||||||
select ARCH_WANT_HUGE_PMD_SHARE
|
select ARCH_WANT_HUGE_PMD_SHARE
|
||||||
select ARCH_WANT_LD_ORPHAN_WARN
|
select ARCH_WANT_LD_ORPHAN_WARN
|
||||||
select ARCH_WANTS_RT_DELAYED_SIGNALS
|
select ARCH_WANTS_RT_DELAYED_SIGNALS
|
||||||
|
@ -349,9 +350,6 @@ config ARCH_NR_GPIO
|
||||||
config ARCH_SUSPEND_POSSIBLE
|
config ARCH_SUSPEND_POSSIBLE
|
||||||
def_bool y
|
def_bool y
|
||||||
|
|
||||||
config ARCH_WANT_GENERAL_HUGETLB
|
|
||||||
def_bool y
|
|
||||||
|
|
||||||
config AUDIT_ARCH
|
config AUDIT_ARCH
|
||||||
def_bool y if X86_64
|
def_bool y if X86_64
|
||||||
|
|
||||||
|
|
|
@ -1299,10 +1299,12 @@ static void kill_me_maybe(struct callback_head *cb)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* -EHWPOISON from memory_failure() means that it already sent SIGBUS
|
* -EHWPOISON from memory_failure() means that it already sent SIGBUS
|
||||||
* to the current process with the proper error info, so no need to
|
* to the current process with the proper error info,
|
||||||
* send SIGBUS here again.
|
* -EOPNOTSUPP means hwpoison_filter() filtered the error event,
|
||||||
|
*
|
||||||
|
* In both cases, no further processing is required.
|
||||||
*/
|
*/
|
||||||
if (ret == -EHWPOISON)
|
if (ret == -EHWPOISON || ret == -EOPNOTSUPP)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
pr_err("Memory error not recovered");
|
pr_err("Memory error not recovered");
|
||||||
|
|
|
@ -154,11 +154,6 @@ static int __init topology_init(void)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA
|
|
||||||
for_each_online_node(i)
|
|
||||||
register_one_node(i);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for_each_present_cpu(i)
|
for_each_present_cpu(i)
|
||||||
arch_register_cpu(i);
|
arch_register_cpu(i);
|
||||||
|
|
||||||
|
|
|
@ -738,17 +738,6 @@ void __init x86_numa_init(void)
|
||||||
numa_init(dummy_numa_init);
|
numa_init(dummy_numa_init);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init init_memory_less_node(int nid)
|
|
||||||
{
|
|
||||||
/* Allocate and initialize node data. Memory-less node is now online.*/
|
|
||||||
alloc_node_data(nid);
|
|
||||||
free_area_init_memoryless_node(nid);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* All zonelists will be built later in start_kernel() after per cpu
|
|
||||||
* areas are initialized.
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A node may exist which has one or more Generic Initiators but no CPUs and no
|
* A node may exist which has one or more Generic Initiators but no CPUs and no
|
||||||
|
@ -766,9 +755,18 @@ void __init init_gi_nodes(void)
|
||||||
{
|
{
|
||||||
int nid;
|
int nid;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Exclude this node from
|
||||||
|
* bringup_nonboot_cpus
|
||||||
|
* cpu_up
|
||||||
|
* __try_online_node
|
||||||
|
* register_one_node
|
||||||
|
* because node_subsys is not initialized yet.
|
||||||
|
* TODO remove dependency on node_online
|
||||||
|
*/
|
||||||
for_each_node_state(nid, N_GENERIC_INITIATOR)
|
for_each_node_state(nid, N_GENERIC_INITIATOR)
|
||||||
if (!node_online(nid))
|
if (!node_online(nid))
|
||||||
init_memory_less_node(nid);
|
node_set_online(nid);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -798,8 +796,17 @@ void __init init_cpu_to_node(void)
|
||||||
if (node == NUMA_NO_NODE)
|
if (node == NUMA_NO_NODE)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Exclude this node from
|
||||||
|
* bringup_nonboot_cpus
|
||||||
|
* cpu_up
|
||||||
|
* __try_online_node
|
||||||
|
* register_one_node
|
||||||
|
* because node_subsys is not initialized yet.
|
||||||
|
* TODO remove dependency on node_online
|
||||||
|
*/
|
||||||
if (!node_online(node))
|
if (!node_online(node))
|
||||||
init_memory_less_node(node);
|
node_set_online(node);
|
||||||
|
|
||||||
numa_set_node(cpu, node);
|
numa_set_node(cpu, node);
|
||||||
}
|
}
|
||||||
|
|
|
@ -385,7 +385,7 @@ static struct kmem_cache * bdev_cachep __read_mostly;
|
||||||
|
|
||||||
static struct inode *bdev_alloc_inode(struct super_block *sb)
|
static struct inode *bdev_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
|
struct bdev_inode *ei = alloc_inode_sb(sb, bdev_cachep, GFP_KERNEL);
|
||||||
|
|
||||||
if (!ei)
|
if (!ei)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -5459,7 +5459,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
|
||||||
bfqq = bic_to_bfqq(bic, false);
|
bfqq = bic_to_bfqq(bic, false);
|
||||||
if (bfqq) {
|
if (bfqq) {
|
||||||
bfq_release_process_ref(bfqd, bfqq);
|
bfq_release_process_ref(bfqd, bfqq);
|
||||||
bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic, true);
|
bfqq = bfq_get_queue(bfqd, bio, false, bic, true);
|
||||||
bic_set_bfqq(bic, bfqq, false);
|
bic_set_bfqq(bic, bfqq, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -35,5 +35,6 @@ void __init driver_init(void)
|
||||||
auxiliary_bus_init();
|
auxiliary_bus_init();
|
||||||
cpu_dev_init();
|
cpu_dev_init();
|
||||||
memory_dev_init();
|
memory_dev_init();
|
||||||
|
node_dev_init();
|
||||||
container_dev_init();
|
container_dev_init();
|
||||||
}
|
}
|
||||||
|
|
|
@ -215,6 +215,7 @@ static int memory_block_online(struct memory_block *mem)
|
||||||
adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
|
adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
|
||||||
nr_vmemmap_pages);
|
nr_vmemmap_pages);
|
||||||
|
|
||||||
|
mem->zone = zone;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -225,6 +226,9 @@ static int memory_block_offline(struct memory_block *mem)
|
||||||
unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
|
unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
if (!mem->zone)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Unaccount before offlining, such that unpopulated zone and kthreads
|
* Unaccount before offlining, such that unpopulated zone and kthreads
|
||||||
* can properly be torn down in offline_pages().
|
* can properly be torn down in offline_pages().
|
||||||
|
@ -234,7 +238,7 @@ static int memory_block_offline(struct memory_block *mem)
|
||||||
-nr_vmemmap_pages);
|
-nr_vmemmap_pages);
|
||||||
|
|
||||||
ret = offline_pages(start_pfn + nr_vmemmap_pages,
|
ret = offline_pages(start_pfn + nr_vmemmap_pages,
|
||||||
nr_pages - nr_vmemmap_pages, mem->group);
|
nr_pages - nr_vmemmap_pages, mem->zone, mem->group);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
/* offline_pages() failed. Account back. */
|
/* offline_pages() failed. Account back. */
|
||||||
if (nr_vmemmap_pages)
|
if (nr_vmemmap_pages)
|
||||||
|
@ -246,6 +250,7 @@ static int memory_block_offline(struct memory_block *mem)
|
||||||
if (nr_vmemmap_pages)
|
if (nr_vmemmap_pages)
|
||||||
mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
|
mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
|
||||||
|
|
||||||
|
mem->zone = NULL;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -411,11 +416,10 @@ static ssize_t valid_zones_show(struct device *dev,
|
||||||
*/
|
*/
|
||||||
if (mem->state == MEM_ONLINE) {
|
if (mem->state == MEM_ONLINE) {
|
||||||
/*
|
/*
|
||||||
* The block contains more than one zone can not be offlined.
|
* If !mem->zone, the memory block spans multiple zones and
|
||||||
* This can happen e.g. for ZONE_DMA and ZONE_DMA32
|
* cannot get offlined.
|
||||||
*/
|
*/
|
||||||
default_zone = test_pages_in_a_zone(start_pfn,
|
default_zone = mem->zone;
|
||||||
start_pfn + nr_pages);
|
|
||||||
if (!default_zone)
|
if (!default_zone)
|
||||||
return sysfs_emit(buf, "%s\n", "none");
|
return sysfs_emit(buf, "%s\n", "none");
|
||||||
len += sysfs_emit_at(buf, len, "%s", default_zone->name);
|
len += sysfs_emit_at(buf, len, "%s", default_zone->name);
|
||||||
|
@ -555,6 +559,8 @@ static ssize_t hard_offline_page_store(struct device *dev,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
pfn >>= PAGE_SHIFT;
|
pfn >>= PAGE_SHIFT;
|
||||||
ret = memory_failure(pfn, 0);
|
ret = memory_failure(pfn, 0);
|
||||||
|
if (ret == -EOPNOTSUPP)
|
||||||
|
ret = 0;
|
||||||
return ret ? ret : count;
|
return ret ? ret : count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -613,11 +619,7 @@ static const struct attribute_group *memory_memblk_attr_groups[] = {
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
static int __add_memory_block(struct memory_block *memory)
|
||||||
* register_memory - Setup a sysfs device for a memory block
|
|
||||||
*/
|
|
||||||
static
|
|
||||||
int register_memory(struct memory_block *memory)
|
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
@ -641,9 +643,85 @@ int register_memory(struct memory_block *memory)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int init_memory_block(unsigned long block_id, unsigned long state,
|
static struct zone *early_node_zone_for_memory_block(struct memory_block *mem,
|
||||||
unsigned long nr_vmemmap_pages,
|
int nid)
|
||||||
struct memory_group *group)
|
{
|
||||||
|
const unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
|
||||||
|
const unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
|
||||||
|
struct zone *zone, *matching_zone = NULL;
|
||||||
|
pg_data_t *pgdat = NODE_DATA(nid);
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This logic only works for early memory, when the applicable zones
|
||||||
|
* already span the memory block. We don't expect overlapping zones on
|
||||||
|
* a single node for early memory. So if we're told that some PFNs
|
||||||
|
* of a node fall into this memory block, we can assume that all node
|
||||||
|
* zones that intersect with the memory block are actually applicable.
|
||||||
|
* No need to look at the memmap.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < MAX_NR_ZONES; i++) {
|
||||||
|
zone = pgdat->node_zones + i;
|
||||||
|
if (!populated_zone(zone))
|
||||||
|
continue;
|
||||||
|
if (!zone_intersects(zone, start_pfn, nr_pages))
|
||||||
|
continue;
|
||||||
|
if (!matching_zone) {
|
||||||
|
matching_zone = zone;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* Spans multiple zones ... */
|
||||||
|
matching_zone = NULL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return matching_zone;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_NUMA
|
||||||
|
/**
|
||||||
|
* memory_block_add_nid() - Indicate that system RAM falling into this memory
|
||||||
|
* block device (partially) belongs to the given node.
|
||||||
|
* @mem: The memory block device.
|
||||||
|
* @nid: The node id.
|
||||||
|
* @context: The memory initialization context.
|
||||||
|
*
|
||||||
|
* Indicate that system RAM falling into this memory block (partially) belongs
|
||||||
|
* to the given node. If the context indicates ("early") that we are adding the
|
||||||
|
* node during node device subsystem initialization, this will also properly
|
||||||
|
* set/adjust mem->zone based on the zone ranges of the given node.
|
||||||
|
*/
|
||||||
|
void memory_block_add_nid(struct memory_block *mem, int nid,
|
||||||
|
enum meminit_context context)
|
||||||
|
{
|
||||||
|
if (context == MEMINIT_EARLY && mem->nid != nid) {
|
||||||
|
/*
|
||||||
|
* For early memory we have to determine the zone when setting
|
||||||
|
* the node id and handle multiple nodes spanning a single
|
||||||
|
* memory block by indicate via zone == NULL that we're not
|
||||||
|
* dealing with a single zone. So if we're setting the node id
|
||||||
|
* the first time, determine if there is a single zone. If we're
|
||||||
|
* setting the node id a second time to a different node,
|
||||||
|
* invalidate the single detected zone.
|
||||||
|
*/
|
||||||
|
if (mem->nid == NUMA_NO_NODE)
|
||||||
|
mem->zone = early_node_zone_for_memory_block(mem, nid);
|
||||||
|
else
|
||||||
|
mem->zone = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If this memory block spans multiple nodes, we only indicate
|
||||||
|
* the last processed node. If we span multiple nodes (not applicable
|
||||||
|
* to hotplugged memory), zone == NULL will prohibit memory offlining
|
||||||
|
* and consequently unplug.
|
||||||
|
*/
|
||||||
|
mem->nid = nid;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static int add_memory_block(unsigned long block_id, unsigned long state,
|
||||||
|
unsigned long nr_vmemmap_pages,
|
||||||
|
struct memory_group *group)
|
||||||
{
|
{
|
||||||
struct memory_block *mem;
|
struct memory_block *mem;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
@ -663,17 +741,30 @@ static int init_memory_block(unsigned long block_id, unsigned long state,
|
||||||
mem->nr_vmemmap_pages = nr_vmemmap_pages;
|
mem->nr_vmemmap_pages = nr_vmemmap_pages;
|
||||||
INIT_LIST_HEAD(&mem->group_next);
|
INIT_LIST_HEAD(&mem->group_next);
|
||||||
|
|
||||||
|
#ifndef CONFIG_NUMA
|
||||||
|
if (state == MEM_ONLINE)
|
||||||
|
/*
|
||||||
|
* MEM_ONLINE at this point implies early memory. With NUMA,
|
||||||
|
* we'll determine the zone when setting the node id via
|
||||||
|
* memory_block_add_nid(). Memory hotplug updated the zone
|
||||||
|
* manually when memory onlining/offlining succeeds.
|
||||||
|
*/
|
||||||
|
mem->zone = early_node_zone_for_memory_block(mem, NUMA_NO_NODE);
|
||||||
|
#endif /* CONFIG_NUMA */
|
||||||
|
|
||||||
|
ret = __add_memory_block(mem);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
if (group) {
|
if (group) {
|
||||||
mem->group = group;
|
mem->group = group;
|
||||||
list_add(&mem->group_next, &group->memory_blocks);
|
list_add(&mem->group_next, &group->memory_blocks);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = register_memory(mem);
|
return 0;
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int add_memory_block(unsigned long base_section_nr)
|
static int __init add_boot_memory_block(unsigned long base_section_nr)
|
||||||
{
|
{
|
||||||
int section_count = 0;
|
int section_count = 0;
|
||||||
unsigned long nr;
|
unsigned long nr;
|
||||||
|
@ -685,11 +776,18 @@ static int add_memory_block(unsigned long base_section_nr)
|
||||||
|
|
||||||
if (section_count == 0)
|
if (section_count == 0)
|
||||||
return 0;
|
return 0;
|
||||||
return init_memory_block(memory_block_id(base_section_nr),
|
return add_memory_block(memory_block_id(base_section_nr),
|
||||||
MEM_ONLINE, 0, NULL);
|
MEM_ONLINE, 0, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void unregister_memory(struct memory_block *memory)
|
static int add_hotplug_memory_block(unsigned long block_id,
|
||||||
|
unsigned long nr_vmemmap_pages,
|
||||||
|
struct memory_group *group)
|
||||||
|
{
|
||||||
|
return add_memory_block(block_id, MEM_OFFLINE, nr_vmemmap_pages, group);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void remove_memory_block(struct memory_block *memory)
|
||||||
{
|
{
|
||||||
if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
|
if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
|
||||||
return;
|
return;
|
||||||
|
@ -728,8 +826,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
for (block_id = start_block_id; block_id != end_block_id; block_id++) {
|
for (block_id = start_block_id; block_id != end_block_id; block_id++) {
|
||||||
ret = init_memory_block(block_id, MEM_OFFLINE, vmemmap_pages,
|
ret = add_hotplug_memory_block(block_id, vmemmap_pages, group);
|
||||||
group);
|
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -740,7 +837,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
|
||||||
mem = find_memory_block_by_id(block_id);
|
mem = find_memory_block_by_id(block_id);
|
||||||
if (WARN_ON_ONCE(!mem))
|
if (WARN_ON_ONCE(!mem))
|
||||||
continue;
|
continue;
|
||||||
unregister_memory(mem);
|
remove_memory_block(mem);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -769,7 +866,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
|
||||||
if (WARN_ON_ONCE(!mem))
|
if (WARN_ON_ONCE(!mem))
|
||||||
continue;
|
continue;
|
||||||
unregister_memory_block_under_nodes(mem);
|
unregister_memory_block_under_nodes(mem);
|
||||||
unregister_memory(mem);
|
remove_memory_block(mem);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -829,7 +926,7 @@ void __init memory_dev_init(void)
|
||||||
*/
|
*/
|
||||||
for (nr = 0; nr <= __highest_present_section_nr;
|
for (nr = 0; nr <= __highest_present_section_nr;
|
||||||
nr += sections_per_block) {
|
nr += sections_per_block) {
|
||||||
ret = add_memory_block(nr);
|
ret = add_boot_memory_block(nr);
|
||||||
if (ret)
|
if (ret)
|
||||||
panic("%s() failed to add memory block: %d\n", __func__,
|
panic("%s() failed to add memory block: %d\n", __func__,
|
||||||
ret);
|
ret);
|
||||||
|
|
|
@ -796,15 +796,12 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void do_register_memory_block_under_node(int nid,
|
static void do_register_memory_block_under_node(int nid,
|
||||||
struct memory_block *mem_blk)
|
struct memory_block *mem_blk,
|
||||||
|
enum meminit_context context)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/*
|
memory_block_add_nid(mem_blk, nid, context);
|
||||||
* If this memory block spans multiple nodes, we only indicate
|
|
||||||
* the last processed node.
|
|
||||||
*/
|
|
||||||
mem_blk->nid = nid;
|
|
||||||
|
|
||||||
ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
|
ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
|
||||||
&mem_blk->dev.kobj,
|
&mem_blk->dev.kobj,
|
||||||
|
@ -857,7 +854,7 @@ static int register_mem_block_under_node_early(struct memory_block *mem_blk,
|
||||||
if (page_nid != nid)
|
if (page_nid != nid)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
do_register_memory_block_under_node(nid, mem_blk);
|
do_register_memory_block_under_node(nid, mem_blk, MEMINIT_EARLY);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
/* mem section does not span the specified node */
|
/* mem section does not span the specified node */
|
||||||
|
@ -873,7 +870,7 @@ static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk,
|
||||||
{
|
{
|
||||||
int nid = *(int *)arg;
|
int nid = *(int *)arg;
|
||||||
|
|
||||||
do_register_memory_block_under_node(nid, mem_blk);
|
do_register_memory_block_under_node(nid, mem_blk, MEMINIT_HOTPLUG);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -892,8 +889,9 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
|
||||||
kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
|
kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
|
||||||
}
|
}
|
||||||
|
|
||||||
void link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
|
void register_memory_blocks_under_node(int nid, unsigned long start_pfn,
|
||||||
enum meminit_context context)
|
unsigned long end_pfn,
|
||||||
|
enum meminit_context context)
|
||||||
{
|
{
|
||||||
walk_memory_blocks_func_t func;
|
walk_memory_blocks_func_t func;
|
||||||
|
|
||||||
|
@ -1065,26 +1063,30 @@ static const struct attribute_group *cpu_root_attr_groups[] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
#define NODE_CALLBACK_PRI 2 /* lower than SLAB */
|
#define NODE_CALLBACK_PRI 2 /* lower than SLAB */
|
||||||
static int __init register_node_type(void)
|
void __init node_dev_init(void)
|
||||||
{
|
{
|
||||||
int ret;
|
static struct notifier_block node_memory_callback_nb = {
|
||||||
|
.notifier_call = node_memory_callback,
|
||||||
|
.priority = NODE_CALLBACK_PRI,
|
||||||
|
};
|
||||||
|
int ret, i;
|
||||||
|
|
||||||
BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
|
BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
|
||||||
BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES);
|
BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES);
|
||||||
|
|
||||||
ret = subsys_system_register(&node_subsys, cpu_root_attr_groups);
|
ret = subsys_system_register(&node_subsys, cpu_root_attr_groups);
|
||||||
if (!ret) {
|
if (ret)
|
||||||
static struct notifier_block node_memory_callback_nb = {
|
panic("%s() failed to register subsystem: %d\n", __func__, ret);
|
||||||
.notifier_call = node_memory_callback,
|
|
||||||
.priority = NODE_CALLBACK_PRI,
|
register_hotmemory_notifier(&node_memory_callback_nb);
|
||||||
};
|
|
||||||
register_hotmemory_notifier(&node_memory_callback_nb);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note: we're not going to unregister the node class if we fail
|
* Create all node devices, which will properly link the node
|
||||||
* to register the node state class attribute files.
|
* to applicable memory block devices and already created cpu devices.
|
||||||
*/
|
*/
|
||||||
return ret;
|
for_each_online_node(i) {
|
||||||
|
ret = register_one_node(i);
|
||||||
|
if (ret)
|
||||||
|
panic("%s() failed to add node: %d\n", __func__, ret);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
postcore_initcall(register_node_type);
|
|
||||||
|
|
|
@ -637,9 +637,6 @@ enum {
|
||||||
STATE_SENT, /* Do not change state/UUIDs while this is set */
|
STATE_SENT, /* Do not change state/UUIDs while this is set */
|
||||||
CALLBACK_PENDING, /* Whether we have a call_usermodehelper(, UMH_WAIT_PROC)
|
CALLBACK_PENDING, /* Whether we have a call_usermodehelper(, UMH_WAIT_PROC)
|
||||||
* pending, from drbd worker context.
|
* pending, from drbd worker context.
|
||||||
* If set, bdi_write_congested() returns true,
|
|
||||||
* so shrink_page_list() would not recurse into,
|
|
||||||
* and potentially deadlock on, this drbd worker.
|
|
||||||
*/
|
*/
|
||||||
DISCONNECT_SENT,
|
DISCONNECT_SENT,
|
||||||
|
|
||||||
|
|
|
@ -910,8 +910,7 @@ static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t se
|
||||||
|
|
||||||
switch (rbm) {
|
switch (rbm) {
|
||||||
case RB_CONGESTED_REMOTE:
|
case RB_CONGESTED_REMOTE:
|
||||||
return bdi_read_congested(
|
return 0;
|
||||||
device->ldev->backing_bdev->bd_disk->bdi);
|
|
||||||
case RB_LEAST_PENDING:
|
case RB_LEAST_PENDING:
|
||||||
return atomic_read(&device->local_cnt) >
|
return atomic_read(&device->local_cnt) >
|
||||||
atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
|
atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
|
||||||
|
|
|
@ -282,7 +282,7 @@ static struct inode *dax_alloc_inode(struct super_block *sb)
|
||||||
struct dax_device *dax_dev;
|
struct dax_device *dax_dev;
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
|
|
||||||
dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL);
|
dax_dev = alloc_inode_sb(sb, dax_cache, GFP_KERNEL);
|
||||||
if (!dax_dev)
|
if (!dax_dev)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/memblock.h>
|
#include <linux/memblock.h>
|
||||||
#include <linux/kmemleak.h>
|
#include <linux/kmemleak.h>
|
||||||
|
#include <linux/cma.h>
|
||||||
|
|
||||||
#include "of_private.h"
|
#include "of_private.h"
|
||||||
|
|
||||||
|
@ -116,12 +117,8 @@ static int __init __reserved_mem_alloc_size(unsigned long node,
|
||||||
if (IS_ENABLED(CONFIG_CMA)
|
if (IS_ENABLED(CONFIG_CMA)
|
||||||
&& of_flat_dt_is_compatible(node, "shared-dma-pool")
|
&& of_flat_dt_is_compatible(node, "shared-dma-pool")
|
||||||
&& of_get_flat_dt_prop(node, "reusable", NULL)
|
&& of_get_flat_dt_prop(node, "reusable", NULL)
|
||||||
&& !nomap) {
|
&& !nomap)
|
||||||
unsigned long order =
|
align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES);
|
||||||
max_t(unsigned long, MAX_ORDER - 1, pageblock_order);
|
|
||||||
|
|
||||||
align = max(align, (phys_addr_t)PAGE_SIZE << order);
|
|
||||||
}
|
|
||||||
|
|
||||||
prop = of_get_flat_dt_prop(node, "alloc-ranges", &len);
|
prop = of_get_flat_dt_prop(node, "alloc-ranges", &len);
|
||||||
if (prop) {
|
if (prop) {
|
||||||
|
|
|
@ -3088,7 +3088,7 @@ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx)
|
||||||
{
|
{
|
||||||
struct tty_struct *tty;
|
struct tty_struct *tty;
|
||||||
|
|
||||||
tty = kzalloc(sizeof(*tty), GFP_KERNEL);
|
tty = kzalloc(sizeof(*tty), GFP_KERNEL_ACCOUNT);
|
||||||
if (!tty)
|
if (!tty)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
|
@ -2476,13 +2476,10 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm)
|
||||||
VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD);
|
VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We want subblocks to span at least MAX_ORDER_NR_PAGES and
|
* TODO: once alloc_contig_range() works reliably with pageblock
|
||||||
* pageblock_nr_pages pages. This:
|
* granularity on ZONE_NORMAL, use pageblock_nr_pages instead.
|
||||||
* - Is required for now for alloc_contig_range() to work reliably -
|
|
||||||
* it doesn't properly handle smaller granularity on ZONE_NORMAL.
|
|
||||||
*/
|
*/
|
||||||
sb_size = max_t(uint64_t, MAX_ORDER_NR_PAGES,
|
sb_size = PAGE_SIZE * MAX_ORDER_NR_PAGES;
|
||||||
pageblock_nr_pages) * PAGE_SIZE;
|
|
||||||
sb_size = max_t(uint64_t, vm->device_block_size, sb_size);
|
sb_size = max_t(uint64_t, vm->device_block_size, sb_size);
|
||||||
|
|
||||||
if (sb_size < memory_block_size_bytes() && !force_bbm) {
|
if (sb_size < memory_block_size_bytes() && !force_bbm) {
|
||||||
|
|
|
@ -228,7 +228,7 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct v9fs_inode *v9inode;
|
struct v9fs_inode *v9inode;
|
||||||
|
|
||||||
v9inode = kmem_cache_alloc(v9fs_inode_cache, GFP_KERNEL);
|
v9inode = alloc_inode_sb(sb, v9fs_inode_cache, GFP_KERNEL);
|
||||||
if (!v9inode)
|
if (!v9inode)
|
||||||
return NULL;
|
return NULL;
|
||||||
#ifdef CONFIG_9P_FSCACHE
|
#ifdef CONFIG_9P_FSCACHE
|
||||||
|
|
|
@ -220,7 +220,7 @@ static struct kmem_cache *adfs_inode_cachep;
|
||||||
static struct inode *adfs_alloc_inode(struct super_block *sb)
|
static struct inode *adfs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct adfs_inode_info *ei;
|
struct adfs_inode_info *ei;
|
||||||
ei = kmem_cache_alloc(adfs_inode_cachep, GFP_KERNEL);
|
ei = alloc_inode_sb(sb, adfs_inode_cachep, GFP_KERNEL);
|
||||||
if (!ei)
|
if (!ei)
|
||||||
return NULL;
|
return NULL;
|
||||||
return &ei->vfs_inode;
|
return &ei->vfs_inode;
|
||||||
|
|
|
@ -100,7 +100,7 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct affs_inode_info *i;
|
struct affs_inode_info *i;
|
||||||
|
|
||||||
i = kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL);
|
i = alloc_inode_sb(sb, affs_inode_cachep, GFP_KERNEL);
|
||||||
if (!i)
|
if (!i)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
|
@ -679,7 +679,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct afs_vnode *vnode;
|
struct afs_vnode *vnode;
|
||||||
|
|
||||||
vnode = kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL);
|
vnode = alloc_inode_sb(sb, afs_inode_cachep, GFP_KERNEL);
|
||||||
if (!vnode)
|
if (!vnode)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
|
@ -277,7 +277,7 @@ befs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct befs_inode_info *bi;
|
struct befs_inode_info *bi;
|
||||||
|
|
||||||
bi = kmem_cache_alloc(befs_inode_cachep, GFP_KERNEL);
|
bi = alloc_inode_sb(sb, befs_inode_cachep, GFP_KERNEL);
|
||||||
if (!bi)
|
if (!bi)
|
||||||
return NULL;
|
return NULL;
|
||||||
return &bi->vfs_inode;
|
return &bi->vfs_inode;
|
||||||
|
|
|
@ -239,7 +239,7 @@ static struct kmem_cache *bfs_inode_cachep;
|
||||||
static struct inode *bfs_alloc_inode(struct super_block *sb)
|
static struct inode *bfs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct bfs_inode_info *bi;
|
struct bfs_inode_info *bi;
|
||||||
bi = kmem_cache_alloc(bfs_inode_cachep, GFP_KERNEL);
|
bi = alloc_inode_sb(sb, bfs_inode_cachep, GFP_KERNEL);
|
||||||
if (!bi)
|
if (!bi)
|
||||||
return NULL;
|
return NULL;
|
||||||
return &bi->vfs_inode;
|
return &bi->vfs_inode;
|
||||||
|
|
|
@ -8819,7 +8819,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
|
||||||
struct btrfs_inode *ei;
|
struct btrfs_inode *ei;
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
|
|
||||||
ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_KERNEL);
|
ei = alloc_inode_sb(sb, btrfs_inode_cachep, GFP_KERNEL);
|
||||||
if (!ei)
|
if (!ei)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
|
@ -1235,16 +1235,18 @@ static void bh_lru_install(struct buffer_head *bh)
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
check_irqs_on();
|
check_irqs_on();
|
||||||
|
bh_lru_lock();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the refcount of buffer_head in bh_lru prevents dropping the
|
* the refcount of buffer_head in bh_lru prevents dropping the
|
||||||
* attached page(i.e., try_to_free_buffers) so it could cause
|
* attached page(i.e., try_to_free_buffers) so it could cause
|
||||||
* failing page migration.
|
* failing page migration.
|
||||||
* Skip putting upcoming bh into bh_lru until migration is done.
|
* Skip putting upcoming bh into bh_lru until migration is done.
|
||||||
*/
|
*/
|
||||||
if (lru_cache_disabled())
|
if (lru_cache_disabled()) {
|
||||||
|
bh_lru_unlock();
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
bh_lru_lock();
|
|
||||||
|
|
||||||
b = this_cpu_ptr(&bh_lrus);
|
b = this_cpu_ptr(&bh_lrus);
|
||||||
for (i = 0; i < BH_LRU_SIZE; i++) {
|
for (i = 0; i < BH_LRU_SIZE; i++) {
|
||||||
|
|
|
@ -563,7 +563,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
|
||||||
|
|
||||||
if (atomic_long_inc_return(&fsc->writeback_count) >
|
if (atomic_long_inc_return(&fsc->writeback_count) >
|
||||||
CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
|
CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
|
||||||
set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
|
fsc->write_congested = true;
|
||||||
|
|
||||||
req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), page_off, &len, 0, 1,
|
req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), page_off, &len, 0, 1,
|
||||||
CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc,
|
CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc,
|
||||||
|
@ -623,7 +623,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
|
||||||
|
|
||||||
if (atomic_long_dec_return(&fsc->writeback_count) <
|
if (atomic_long_dec_return(&fsc->writeback_count) <
|
||||||
CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
|
CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
|
||||||
clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
|
fsc->write_congested = false;
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -635,6 +635,10 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
|
||||||
BUG_ON(!inode);
|
BUG_ON(!inode);
|
||||||
ihold(inode);
|
ihold(inode);
|
||||||
|
|
||||||
|
if (wbc->sync_mode == WB_SYNC_NONE &&
|
||||||
|
ceph_inode_to_client(inode)->write_congested)
|
||||||
|
return AOP_WRITEPAGE_ACTIVATE;
|
||||||
|
|
||||||
wait_on_page_fscache(page);
|
wait_on_page_fscache(page);
|
||||||
|
|
||||||
err = writepage_nounlock(page, wbc);
|
err = writepage_nounlock(page, wbc);
|
||||||
|
@ -707,8 +711,7 @@ static void writepages_finish(struct ceph_osd_request *req)
|
||||||
if (atomic_long_dec_return(&fsc->writeback_count) <
|
if (atomic_long_dec_return(&fsc->writeback_count) <
|
||||||
CONGESTION_OFF_THRESH(
|
CONGESTION_OFF_THRESH(
|
||||||
fsc->mount_options->congestion_kb))
|
fsc->mount_options->congestion_kb))
|
||||||
clear_bdi_congested(inode_to_bdi(inode),
|
fsc->write_congested = false;
|
||||||
BLK_RW_ASYNC);
|
|
||||||
|
|
||||||
ceph_put_snap_context(detach_page_private(page));
|
ceph_put_snap_context(detach_page_private(page));
|
||||||
end_page_writeback(page);
|
end_page_writeback(page);
|
||||||
|
@ -760,6 +763,10 @@ static int ceph_writepages_start(struct address_space *mapping,
|
||||||
bool done = false;
|
bool done = false;
|
||||||
bool caching = ceph_is_cache_enabled(inode);
|
bool caching = ceph_is_cache_enabled(inode);
|
||||||
|
|
||||||
|
if (wbc->sync_mode == WB_SYNC_NONE &&
|
||||||
|
fsc->write_congested)
|
||||||
|
return 0;
|
||||||
|
|
||||||
dout("writepages_start %p (mode=%s)\n", inode,
|
dout("writepages_start %p (mode=%s)\n", inode,
|
||||||
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
|
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
|
||||||
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
|
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
|
||||||
|
@ -954,11 +961,8 @@ get_more_pages:
|
||||||
|
|
||||||
if (atomic_long_inc_return(&fsc->writeback_count) >
|
if (atomic_long_inc_return(&fsc->writeback_count) >
|
||||||
CONGESTION_ON_THRESH(
|
CONGESTION_ON_THRESH(
|
||||||
fsc->mount_options->congestion_kb)) {
|
fsc->mount_options->congestion_kb))
|
||||||
set_bdi_congested(inode_to_bdi(inode),
|
fsc->write_congested = true;
|
||||||
BLK_RW_ASYNC);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
pages[locked_pages++] = page;
|
pages[locked_pages++] = page;
|
||||||
pvec.pages[i] = NULL;
|
pvec.pages[i] = NULL;
|
||||||
|
|
|
@ -447,7 +447,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
|
||||||
struct ceph_inode_info *ci;
|
struct ceph_inode_info *ci;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
ci = kmem_cache_alloc(ceph_inode_cachep, GFP_NOFS);
|
ci = alloc_inode_sb(sb, ceph_inode_cachep, GFP_NOFS);
|
||||||
if (!ci)
|
if (!ci)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
|
@ -802,6 +802,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
|
||||||
fsc->have_copy_from2 = true;
|
fsc->have_copy_from2 = true;
|
||||||
|
|
||||||
atomic_long_set(&fsc->writeback_count, 0);
|
atomic_long_set(&fsc->writeback_count, 0);
|
||||||
|
fsc->write_congested = false;
|
||||||
|
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -121,6 +121,7 @@ struct ceph_fs_client {
|
||||||
struct ceph_mds_client *mdsc;
|
struct ceph_mds_client *mdsc;
|
||||||
|
|
||||||
atomic_long_t writeback_count;
|
atomic_long_t writeback_count;
|
||||||
|
bool write_congested;
|
||||||
|
|
||||||
struct workqueue_struct *inode_wq;
|
struct workqueue_struct *inode_wq;
|
||||||
struct workqueue_struct *cap_wq;
|
struct workqueue_struct *cap_wq;
|
||||||
|
|
|
@ -362,7 +362,7 @@ static struct inode *
|
||||||
cifs_alloc_inode(struct super_block *sb)
|
cifs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct cifsInodeInfo *cifs_inode;
|
struct cifsInodeInfo *cifs_inode;
|
||||||
cifs_inode = kmem_cache_alloc(cifs_inode_cachep, GFP_KERNEL);
|
cifs_inode = alloc_inode_sb(sb, cifs_inode_cachep, GFP_KERNEL);
|
||||||
if (!cifs_inode)
|
if (!cifs_inode)
|
||||||
return NULL;
|
return NULL;
|
||||||
cifs_inode->cifsAttrs = 0x20; /* default */
|
cifs_inode->cifsAttrs = 0x20; /* default */
|
||||||
|
|
|
@ -43,7 +43,7 @@ static struct kmem_cache * coda_inode_cachep;
|
||||||
static struct inode *coda_alloc_inode(struct super_block *sb)
|
static struct inode *coda_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct coda_inode_info *ei;
|
struct coda_inode_info *ei;
|
||||||
ei = kmem_cache_alloc(coda_inode_cachep, GFP_KERNEL);
|
ei = alloc_inode_sb(sb, coda_inode_cachep, GFP_KERNEL);
|
||||||
if (!ei)
|
if (!ei)
|
||||||
return NULL;
|
return NULL;
|
||||||
memset(&ei->c_fid, 0, sizeof(struct CodaFid));
|
memset(&ei->c_fid, 0, sizeof(struct CodaFid));
|
||||||
|
|
|
@ -1766,7 +1766,8 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
|
||||||
char *dname;
|
char *dname;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
|
dentry = kmem_cache_alloc_lru(dentry_cache, &sb->s_dentry_lru,
|
||||||
|
GFP_KERNEL);
|
||||||
if (!dentry)
|
if (!dentry)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@ static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
|
||||||
struct ecryptfs_inode_info *inode_info;
|
struct ecryptfs_inode_info *inode_info;
|
||||||
struct inode *inode = NULL;
|
struct inode *inode = NULL;
|
||||||
|
|
||||||
inode_info = kmem_cache_alloc(ecryptfs_inode_info_cache, GFP_KERNEL);
|
inode_info = alloc_inode_sb(sb, ecryptfs_inode_info_cache, GFP_KERNEL);
|
||||||
if (unlikely(!inode_info))
|
if (unlikely(!inode_info))
|
||||||
goto out;
|
goto out;
|
||||||
if (ecryptfs_init_crypt_stat(&inode_info->crypt_stat)) {
|
if (ecryptfs_init_crypt_stat(&inode_info->crypt_stat)) {
|
||||||
|
|
|
@ -69,7 +69,7 @@ static struct kmem_cache * efs_inode_cachep;
|
||||||
static struct inode *efs_alloc_inode(struct super_block *sb)
|
static struct inode *efs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct efs_inode_info *ei;
|
struct efs_inode_info *ei;
|
||||||
ei = kmem_cache_alloc(efs_inode_cachep, GFP_KERNEL);
|
ei = alloc_inode_sb(sb, efs_inode_cachep, GFP_KERNEL);
|
||||||
if (!ei)
|
if (!ei)
|
||||||
return NULL;
|
return NULL;
|
||||||
return &ei->vfs_inode;
|
return &ei->vfs_inode;
|
||||||
|
|
|
@ -84,7 +84,7 @@ static void erofs_inode_init_once(void *ptr)
|
||||||
static struct inode *erofs_alloc_inode(struct super_block *sb)
|
static struct inode *erofs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct erofs_inode *vi =
|
struct erofs_inode *vi =
|
||||||
kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL);
|
alloc_inode_sb(sb, erofs_inode_cachep, GFP_KERNEL);
|
||||||
|
|
||||||
if (!vi)
|
if (!vi)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -183,7 +183,7 @@ static struct inode *exfat_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct exfat_inode_info *ei;
|
struct exfat_inode_info *ei;
|
||||||
|
|
||||||
ei = kmem_cache_alloc(exfat_inode_cachep, GFP_NOFS);
|
ei = alloc_inode_sb(sb, exfat_inode_cachep, GFP_NOFS);
|
||||||
if (!ei)
|
if (!ei)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
|
@ -170,11 +170,6 @@ static void ext2_preread_inode(struct inode *inode)
|
||||||
unsigned long offset;
|
unsigned long offset;
|
||||||
unsigned long block;
|
unsigned long block;
|
||||||
struct ext2_group_desc * gdp;
|
struct ext2_group_desc * gdp;
|
||||||
struct backing_dev_info *bdi;
|
|
||||||
|
|
||||||
bdi = inode_to_bdi(inode);
|
|
||||||
if (bdi_rw_congested(bdi))
|
|
||||||
return;
|
|
||||||
|
|
||||||
block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
|
block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
|
||||||
gdp = ext2_get_group_desc(inode->i_sb, block_group, NULL);
|
gdp = ext2_get_group_desc(inode->i_sb, block_group, NULL);
|
||||||
|
|
|
@ -180,7 +180,7 @@ static struct kmem_cache * ext2_inode_cachep;
|
||||||
static struct inode *ext2_alloc_inode(struct super_block *sb)
|
static struct inode *ext2_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct ext2_inode_info *ei;
|
struct ext2_inode_info *ei;
|
||||||
ei = kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
|
ei = alloc_inode_sb(sb, ext2_inode_cachep, GFP_KERNEL);
|
||||||
if (!ei)
|
if (!ei)
|
||||||
return NULL;
|
return NULL;
|
||||||
ei->i_block_alloc_info = NULL;
|
ei->i_block_alloc_info = NULL;
|
||||||
|
|
|
@ -1316,7 +1316,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct ext4_inode_info *ei;
|
struct ext4_inode_info *ei;
|
||||||
|
|
||||||
ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
|
ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS);
|
||||||
if (!ei)
|
if (!ei)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
|
@ -1504,9 +1504,7 @@ continue_unlock:
|
||||||
if (IS_NOQUOTA(cc->inode))
|
if (IS_NOQUOTA(cc->inode))
|
||||||
return 0;
|
return 0;
|
||||||
ret = 0;
|
ret = 0;
|
||||||
cond_resched();
|
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
|
||||||
congestion_wait(BLK_RW_ASYNC,
|
|
||||||
DEFAULT_IO_TIMEOUT);
|
|
||||||
goto retry_write;
|
goto retry_write;
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
@ -3041,8 +3041,7 @@ result:
|
||||||
} else if (ret == -EAGAIN) {
|
} else if (ret == -EAGAIN) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
if (wbc->sync_mode == WB_SYNC_ALL) {
|
if (wbc->sync_mode == WB_SYNC_ALL) {
|
||||||
cond_resched();
|
f2fs_io_schedule_timeout(
|
||||||
congestion_wait(BLK_RW_ASYNC,
|
|
||||||
DEFAULT_IO_TIMEOUT);
|
DEFAULT_IO_TIMEOUT);
|
||||||
goto retry_write;
|
goto retry_write;
|
||||||
}
|
}
|
||||||
|
|
|
@ -4538,6 +4538,12 @@ static inline bool f2fs_block_unit_discard(struct f2fs_sb_info *sbi)
|
||||||
return F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK;
|
return F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void f2fs_io_schedule_timeout(long timeout)
|
||||||
|
{
|
||||||
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||||
|
io_schedule_timeout(timeout);
|
||||||
|
}
|
||||||
|
|
||||||
#define EFSBADCRC EBADMSG /* Bad CRC detected */
|
#define EFSBADCRC EBADMSG /* Bad CRC detected */
|
||||||
#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
|
#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
|
||||||
|
|
||||||
|
|
|
@ -313,8 +313,7 @@ next:
|
||||||
skip:
|
skip:
|
||||||
iput(inode);
|
iput(inode);
|
||||||
}
|
}
|
||||||
congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
|
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
|
||||||
cond_resched();
|
|
||||||
if (gc_failure) {
|
if (gc_failure) {
|
||||||
if (++looped >= count)
|
if (++looped >= count)
|
||||||
return;
|
return;
|
||||||
|
@ -803,8 +802,7 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
|
||||||
do {
|
do {
|
||||||
ret = __submit_flush_wait(sbi, FDEV(i).bdev);
|
ret = __submit_flush_wait(sbi, FDEV(i).bdev);
|
||||||
if (ret)
|
if (ret)
|
||||||
congestion_wait(BLK_RW_ASYNC,
|
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
|
||||||
DEFAULT_IO_TIMEOUT);
|
|
||||||
} while (ret && --count);
|
} while (ret && --count);
|
||||||
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -3137,7 +3135,7 @@ next:
|
||||||
blk_finish_plug(&plug);
|
blk_finish_plug(&plug);
|
||||||
mutex_unlock(&dcc->cmd_lock);
|
mutex_unlock(&dcc->cmd_lock);
|
||||||
trimmed += __wait_all_discard_cmd(sbi, NULL);
|
trimmed += __wait_all_discard_cmd(sbi, NULL);
|
||||||
congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
|
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
|
||||||
goto next;
|
goto next;
|
||||||
}
|
}
|
||||||
skip:
|
skip:
|
||||||
|
|
|
@ -1345,8 +1345,12 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct f2fs_inode_info *fi;
|
struct f2fs_inode_info *fi;
|
||||||
|
|
||||||
fi = f2fs_kmem_cache_alloc(f2fs_inode_cachep,
|
if (time_to_inject(F2FS_SB(sb), FAULT_SLAB_ALLOC)) {
|
||||||
GFP_F2FS_ZERO, false, F2FS_SB(sb));
|
f2fs_show_injection_info(F2FS_SB(sb), FAULT_SLAB_ALLOC);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
fi = alloc_inode_sb(sb, f2fs_inode_cachep, GFP_F2FS_ZERO);
|
||||||
if (!fi)
|
if (!fi)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -2145,8 +2149,7 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
|
||||||
/* we should flush all the data to keep data consistency */
|
/* we should flush all the data to keep data consistency */
|
||||||
do {
|
do {
|
||||||
sync_inodes_sb(sbi->sb);
|
sync_inodes_sb(sbi->sb);
|
||||||
cond_resched();
|
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
|
||||||
congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
|
|
||||||
} while (get_pages(sbi, F2FS_DIRTY_DATA) && retry--);
|
} while (get_pages(sbi, F2FS_DIRTY_DATA) && retry--);
|
||||||
|
|
||||||
if (unlikely(retry < 0))
|
if (unlikely(retry < 0))
|
||||||
|
@ -2514,8 +2517,7 @@ retry:
|
||||||
&page, &fsdata);
|
&page, &fsdata);
|
||||||
if (unlikely(err)) {
|
if (unlikely(err)) {
|
||||||
if (err == -ENOMEM) {
|
if (err == -ENOMEM) {
|
||||||
congestion_wait(BLK_RW_ASYNC,
|
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
|
||||||
DEFAULT_IO_TIMEOUT);
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
|
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
|
||||||
|
|
|
@ -745,7 +745,7 @@ static struct kmem_cache *fat_inode_cachep;
|
||||||
static struct inode *fat_alloc_inode(struct super_block *sb)
|
static struct inode *fat_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct msdos_inode_info *ei;
|
struct msdos_inode_info *ei;
|
||||||
ei = kmem_cache_alloc(fat_inode_cachep, GFP_NOFS);
|
ei = alloc_inode_sb(sb, fat_inode_cachep, GFP_NOFS);
|
||||||
if (!ei)
|
if (!ei)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
|
@ -124,7 +124,7 @@ static struct inode *vxfs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct vxfs_inode_info *vi;
|
struct vxfs_inode_info *vi;
|
||||||
|
|
||||||
vi = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL);
|
vi = alloc_inode_sb(sb, vxfs_inode_cachep, GFP_KERNEL);
|
||||||
if (!vi)
|
if (!vi)
|
||||||
return NULL;
|
return NULL;
|
||||||
inode_init_once(&vi->vfs_inode);
|
inode_init_once(&vi->vfs_inode);
|
||||||
|
|
|
@ -893,43 +893,6 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner);
|
EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner);
|
||||||
|
|
||||||
/**
|
|
||||||
* inode_congested - test whether an inode is congested
|
|
||||||
* @inode: inode to test for congestion (may be NULL)
|
|
||||||
* @cong_bits: mask of WB_[a]sync_congested bits to test
|
|
||||||
*
|
|
||||||
* Tests whether @inode is congested. @cong_bits is the mask of congestion
|
|
||||||
* bits to test and the return value is the mask of set bits.
|
|
||||||
*
|
|
||||||
* If cgroup writeback is enabled for @inode, the congestion state is
|
|
||||||
* determined by whether the cgwb (cgroup bdi_writeback) for the blkcg
|
|
||||||
* associated with @inode is congested; otherwise, the root wb's congestion
|
|
||||||
* state is used.
|
|
||||||
*
|
|
||||||
* @inode is allowed to be NULL as this function is often called on
|
|
||||||
* mapping->host which is NULL for the swapper space.
|
|
||||||
*/
|
|
||||||
int inode_congested(struct inode *inode, int cong_bits)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Once set, ->i_wb never becomes NULL while the inode is alive.
|
|
||||||
* Start transaction iff ->i_wb is visible.
|
|
||||||
*/
|
|
||||||
if (inode && inode_to_wb_is_valid(inode)) {
|
|
||||||
struct bdi_writeback *wb;
|
|
||||||
struct wb_lock_cookie lock_cookie = {};
|
|
||||||
bool congested;
|
|
||||||
|
|
||||||
wb = unlocked_inode_to_wb_begin(inode, &lock_cookie);
|
|
||||||
congested = wb_congested(wb, cong_bits);
|
|
||||||
unlocked_inode_to_wb_end(inode, &lock_cookie);
|
|
||||||
return congested;
|
|
||||||
}
|
|
||||||
|
|
||||||
return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(inode_congested);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* wb_split_bdi_pages - split nr_pages to write according to bandwidth
|
* wb_split_bdi_pages - split nr_pages to write according to bandwidth
|
||||||
* @wb: target bdi_writeback to split @nr_pages to
|
* @wb: target bdi_writeback to split @nr_pages to
|
||||||
|
@ -2233,7 +2196,6 @@ void wb_workfn(struct work_struct *work)
|
||||||
long pages_written;
|
long pages_written;
|
||||||
|
|
||||||
set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
|
set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
|
||||||
current->flags |= PF_SWAPWRITE;
|
|
||||||
|
|
||||||
if (likely(!current_is_workqueue_rescuer() ||
|
if (likely(!current_is_workqueue_rescuer() ||
|
||||||
!test_bit(WB_registered, &wb->state))) {
|
!test_bit(WB_registered, &wb->state))) {
|
||||||
|
@ -2262,8 +2224,6 @@ void wb_workfn(struct work_struct *work)
|
||||||
wb_wakeup(wb);
|
wb_wakeup(wb);
|
||||||
else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
|
else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
|
||||||
wb_wakeup_delayed(wb);
|
wb_wakeup_delayed(wb);
|
||||||
|
|
||||||
current->flags &= ~PF_SWAPWRITE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -164,7 +164,6 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
|
||||||
{
|
{
|
||||||
unsigned val;
|
unsigned val;
|
||||||
struct fuse_conn *fc;
|
struct fuse_conn *fc;
|
||||||
struct fuse_mount *fm;
|
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
|
|
||||||
ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
|
ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
|
||||||
|
@ -178,22 +177,6 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
|
||||||
down_read(&fc->killsb);
|
down_read(&fc->killsb);
|
||||||
spin_lock(&fc->bg_lock);
|
spin_lock(&fc->bg_lock);
|
||||||
fc->congestion_threshold = val;
|
fc->congestion_threshold = val;
|
||||||
|
|
||||||
/*
|
|
||||||
* Get any fuse_mount belonging to this fuse_conn; s_bdi is
|
|
||||||
* shared between all of them
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (!list_empty(&fc->mounts)) {
|
|
||||||
fm = list_first_entry(&fc->mounts, struct fuse_mount, fc_entry);
|
|
||||||
if (fc->num_background < fc->congestion_threshold) {
|
|
||||||
clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
|
|
||||||
clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
|
|
||||||
} else {
|
|
||||||
set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
|
|
||||||
set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
spin_unlock(&fc->bg_lock);
|
spin_unlock(&fc->bg_lock);
|
||||||
up_read(&fc->killsb);
|
up_read(&fc->killsb);
|
||||||
fuse_conn_put(fc);
|
fuse_conn_put(fc);
|
||||||
|
|
|
@ -315,10 +315,6 @@ void fuse_request_end(struct fuse_req *req)
|
||||||
wake_up(&fc->blocked_waitq);
|
wake_up(&fc->blocked_waitq);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fc->num_background == fc->congestion_threshold && fm->sb) {
|
|
||||||
clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
|
|
||||||
clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
|
|
||||||
}
|
|
||||||
fc->num_background--;
|
fc->num_background--;
|
||||||
fc->active_background--;
|
fc->active_background--;
|
||||||
flush_bg_queue(fc);
|
flush_bg_queue(fc);
|
||||||
|
@ -540,10 +536,6 @@ static bool fuse_request_queue_background(struct fuse_req *req)
|
||||||
fc->num_background++;
|
fc->num_background++;
|
||||||
if (fc->num_background == fc->max_background)
|
if (fc->num_background == fc->max_background)
|
||||||
fc->blocked = 1;
|
fc->blocked = 1;
|
||||||
if (fc->num_background == fc->congestion_threshold && fm->sb) {
|
|
||||||
set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
|
|
||||||
set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
|
|
||||||
}
|
|
||||||
list_add_tail(&req->list, &fc->bg_queue);
|
list_add_tail(&req->list, &fc->bg_queue);
|
||||||
flush_bg_queue(fc);
|
flush_bg_queue(fc);
|
||||||
queued = true;
|
queued = true;
|
||||||
|
|
|
@ -966,6 +966,14 @@ static void fuse_readahead(struct readahead_control *rac)
|
||||||
struct fuse_io_args *ia;
|
struct fuse_io_args *ia;
|
||||||
struct fuse_args_pages *ap;
|
struct fuse_args_pages *ap;
|
||||||
|
|
||||||
|
if (fc->num_background >= fc->congestion_threshold &&
|
||||||
|
rac->ra->async_size >= readahead_count(rac))
|
||||||
|
/*
|
||||||
|
* Congested and only async pages left, so skip the
|
||||||
|
* rest.
|
||||||
|
*/
|
||||||
|
break;
|
||||||
|
|
||||||
nr_pages = readahead_count(rac) - nr_pages;
|
nr_pages = readahead_count(rac) - nr_pages;
|
||||||
if (nr_pages > max_pages)
|
if (nr_pages > max_pages)
|
||||||
nr_pages = max_pages;
|
nr_pages = max_pages;
|
||||||
|
@ -1959,6 +1967,7 @@ err:
|
||||||
|
|
||||||
static int fuse_writepage(struct page *page, struct writeback_control *wbc)
|
static int fuse_writepage(struct page *page, struct writeback_control *wbc)
|
||||||
{
|
{
|
||||||
|
struct fuse_conn *fc = get_fuse_conn(page->mapping->host);
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (fuse_page_is_writeback(page->mapping->host, page->index)) {
|
if (fuse_page_is_writeback(page->mapping->host, page->index)) {
|
||||||
|
@ -1974,6 +1983,10 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (wbc->sync_mode == WB_SYNC_NONE &&
|
||||||
|
fc->num_background >= fc->congestion_threshold)
|
||||||
|
return AOP_WRITEPAGE_ACTIVATE;
|
||||||
|
|
||||||
err = fuse_writepage_locked(page);
|
err = fuse_writepage_locked(page);
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
|
|
||||||
|
@ -2227,6 +2240,10 @@ static int fuse_writepages(struct address_space *mapping,
|
||||||
if (fuse_is_bad(inode))
|
if (fuse_is_bad(inode))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
if (wbc->sync_mode == WB_SYNC_NONE &&
|
||||||
|
fc->num_background >= fc->congestion_threshold)
|
||||||
|
return 0;
|
||||||
|
|
||||||
data.inode = inode;
|
data.inode = inode;
|
||||||
data.wpa = NULL;
|
data.wpa = NULL;
|
||||||
data.ff = NULL;
|
data.ff = NULL;
|
||||||
|
|
|
@ -72,7 +72,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct fuse_inode *fi;
|
struct fuse_inode *fi;
|
||||||
|
|
||||||
fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
|
fi = alloc_inode_sb(sb, fuse_inode_cachep, GFP_KERNEL);
|
||||||
if (!fi)
|
if (!fi)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
|
@ -1425,7 +1425,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct gfs2_inode *ip;
|
struct gfs2_inode *ip;
|
||||||
|
|
||||||
ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
|
ip = alloc_inode_sb(sb, gfs2_inode_cachep, GFP_KERNEL);
|
||||||
if (!ip)
|
if (!ip)
|
||||||
return NULL;
|
return NULL;
|
||||||
ip->i_flags = 0;
|
ip->i_flags = 0;
|
||||||
|
|
|
@ -162,7 +162,7 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct hfs_inode_info *i;
|
struct hfs_inode_info *i;
|
||||||
|
|
||||||
i = kmem_cache_alloc(hfs_inode_cachep, GFP_KERNEL);
|
i = alloc_inode_sb(sb, hfs_inode_cachep, GFP_KERNEL);
|
||||||
return i ? &i->vfs_inode : NULL;
|
return i ? &i->vfs_inode : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -624,7 +624,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct hfsplus_inode_info *i;
|
struct hfsplus_inode_info *i;
|
||||||
|
|
||||||
i = kmem_cache_alloc(hfsplus_inode_cachep, GFP_KERNEL);
|
i = alloc_inode_sb(sb, hfsplus_inode_cachep, GFP_KERNEL);
|
||||||
return i ? &i->vfs_inode : NULL;
|
return i ? &i->vfs_inode : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -222,7 +222,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct hostfs_inode_info *hi;
|
struct hostfs_inode_info *hi;
|
||||||
|
|
||||||
hi = kmem_cache_alloc(hostfs_inode_cache, GFP_KERNEL_ACCOUNT);
|
hi = alloc_inode_sb(sb, hostfs_inode_cache, GFP_KERNEL_ACCOUNT);
|
||||||
if (hi == NULL)
|
if (hi == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
hi->fd = -1;
|
hi->fd = -1;
|
||||||
|
|
|
@ -232,7 +232,7 @@ static struct kmem_cache * hpfs_inode_cachep;
|
||||||
static struct inode *hpfs_alloc_inode(struct super_block *sb)
|
static struct inode *hpfs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct hpfs_inode_info *ei;
|
struct hpfs_inode_info *ei;
|
||||||
ei = kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS);
|
ei = alloc_inode_sb(sb, hpfs_inode_cachep, GFP_NOFS);
|
||||||
if (!ei)
|
if (!ei)
|
||||||
return NULL;
|
return NULL;
|
||||||
return &ei->vfs_inode;
|
return &ei->vfs_inode;
|
||||||
|
|
|
@ -1110,7 +1110,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
|
||||||
|
|
||||||
if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo)))
|
if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo)))
|
||||||
return NULL;
|
return NULL;
|
||||||
p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL);
|
p = alloc_inode_sb(sb, hugetlbfs_inode_cachep, GFP_KERNEL);
|
||||||
if (unlikely(!p)) {
|
if (unlikely(!p)) {
|
||||||
hugetlbfs_inc_free_inodes(sbinfo);
|
hugetlbfs_inc_free_inodes(sbinfo);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -259,7 +259,7 @@ static struct inode *alloc_inode(struct super_block *sb)
|
||||||
if (ops->alloc_inode)
|
if (ops->alloc_inode)
|
||||||
inode = ops->alloc_inode(sb);
|
inode = ops->alloc_inode(sb);
|
||||||
else
|
else
|
||||||
inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
|
inode = alloc_inode_sb(sb, inode_cachep, GFP_KERNEL);
|
||||||
|
|
||||||
if (!inode)
|
if (!inode)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -70,7 +70,7 @@ static struct kmem_cache *isofs_inode_cachep;
|
||||||
static struct inode *isofs_alloc_inode(struct super_block *sb)
|
static struct inode *isofs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct iso_inode_info *ei;
|
struct iso_inode_info *ei;
|
||||||
ei = kmem_cache_alloc(isofs_inode_cachep, GFP_KERNEL);
|
ei = alloc_inode_sb(sb, isofs_inode_cachep, GFP_KERNEL);
|
||||||
if (!ei)
|
if (!ei)
|
||||||
return NULL;
|
return NULL;
|
||||||
return &ei->vfs_inode;
|
return &ei->vfs_inode;
|
||||||
|
|
|
@ -39,7 +39,7 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct jffs2_inode_info *f;
|
struct jffs2_inode_info *f;
|
||||||
|
|
||||||
f = kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL);
|
f = alloc_inode_sb(sb, jffs2_inode_cachep, GFP_KERNEL);
|
||||||
if (!f)
|
if (!f)
|
||||||
return NULL;
|
return NULL;
|
||||||
return &f->vfs_inode;
|
return &f->vfs_inode;
|
||||||
|
|
|
@ -102,7 +102,7 @@ static struct inode *jfs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct jfs_inode_info *jfs_inode;
|
struct jfs_inode_info *jfs_inode;
|
||||||
|
|
||||||
jfs_inode = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS);
|
jfs_inode = alloc_inode_sb(sb, jfs_inode_cachep, GFP_NOFS);
|
||||||
if (!jfs_inode)
|
if (!jfs_inode)
|
||||||
return NULL;
|
return NULL;
|
||||||
#ifdef CONFIG_QUOTA
|
#ifdef CONFIG_QUOTA
|
||||||
|
|
|
@ -63,7 +63,7 @@ static struct kmem_cache * minix_inode_cachep;
|
||||||
static struct inode *minix_alloc_inode(struct super_block *sb)
|
static struct inode *minix_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct minix_inode_info *ei;
|
struct minix_inode_info *ei;
|
||||||
ei = kmem_cache_alloc(minix_inode_cachep, GFP_KERNEL);
|
ei = alloc_inode_sb(sb, minix_inode_cachep, GFP_KERNEL);
|
||||||
if (!ei)
|
if (!ei)
|
||||||
return NULL;
|
return NULL;
|
||||||
return &ei->vfs_inode;
|
return &ei->vfs_inode;
|
||||||
|
|
|
@ -2597,6 +2597,7 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *
|
||||||
struct super_block *sb = mnt->mnt_sb;
|
struct super_block *sb = mnt->mnt_sb;
|
||||||
|
|
||||||
if (!__mnt_is_readonly(mnt) &&
|
if (!__mnt_is_readonly(mnt) &&
|
||||||
|
(!(sb->s_iflags & SB_I_TS_EXPIRY_WARNED)) &&
|
||||||
(ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {
|
(ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {
|
||||||
char *buf = (char *)__get_free_page(GFP_KERNEL);
|
char *buf = (char *)__get_free_page(GFP_KERNEL);
|
||||||
char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM);
|
char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM);
|
||||||
|
@ -2611,6 +2612,7 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *
|
||||||
tm.tm_year+1900, (unsigned long long)sb->s_time_max);
|
tm.tm_year+1900, (unsigned long long)sb->s_time_max);
|
||||||
|
|
||||||
free_page((unsigned long)buf);
|
free_page((unsigned long)buf);
|
||||||
|
sb->s_iflags |= SB_I_TS_EXPIRY_WARNED;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2238,7 +2238,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
|
||||||
struct inode *nfs_alloc_inode(struct super_block *sb)
|
struct inode *nfs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct nfs_inode *nfsi;
|
struct nfs_inode *nfsi;
|
||||||
nfsi = kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL);
|
nfsi = alloc_inode_sb(sb, nfs_inode_cachep, GFP_KERNEL);
|
||||||
if (!nfsi)
|
if (!nfsi)
|
||||||
return NULL;
|
return NULL;
|
||||||
nfsi->flags = 0UL;
|
nfsi->flags = 0UL;
|
||||||
|
|
|
@ -417,7 +417,7 @@ static void nfs_set_page_writeback(struct page *page)
|
||||||
|
|
||||||
if (atomic_long_inc_return(&nfss->writeback) >
|
if (atomic_long_inc_return(&nfss->writeback) >
|
||||||
NFS_CONGESTION_ON_THRESH)
|
NFS_CONGESTION_ON_THRESH)
|
||||||
set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
|
nfss->write_congested = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nfs_end_page_writeback(struct nfs_page *req)
|
static void nfs_end_page_writeback(struct nfs_page *req)
|
||||||
|
@ -433,7 +433,7 @@ static void nfs_end_page_writeback(struct nfs_page *req)
|
||||||
|
|
||||||
end_page_writeback(req->wb_page);
|
end_page_writeback(req->wb_page);
|
||||||
if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
|
if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
|
||||||
clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
|
nfss->write_congested = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -672,6 +672,10 @@ static int nfs_writepage_locked(struct page *page,
|
||||||
struct inode *inode = page_file_mapping(page)->host;
|
struct inode *inode = page_file_mapping(page)->host;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
if (wbc->sync_mode == WB_SYNC_NONE &&
|
||||||
|
NFS_SERVER(inode)->write_congested)
|
||||||
|
return AOP_WRITEPAGE_ACTIVATE;
|
||||||
|
|
||||||
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
|
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
|
||||||
nfs_pageio_init_write(&pgio, inode, 0,
|
nfs_pageio_init_write(&pgio, inode, 0,
|
||||||
false, &nfs_async_write_completion_ops);
|
false, &nfs_async_write_completion_ops);
|
||||||
|
@ -719,6 +723,10 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
|
||||||
int priority = 0;
|
int priority = 0;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
if (wbc->sync_mode == WB_SYNC_NONE &&
|
||||||
|
NFS_SERVER(inode)->write_congested)
|
||||||
|
return 0;
|
||||||
|
|
||||||
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
|
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
|
||||||
|
|
||||||
if (!(mntflags & NFS_MOUNT_WRITE_EAGER) || wbc->for_kupdate ||
|
if (!(mntflags & NFS_MOUNT_WRITE_EAGER) || wbc->for_kupdate ||
|
||||||
|
@ -1893,7 +1901,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
|
||||||
}
|
}
|
||||||
nfss = NFS_SERVER(data->inode);
|
nfss = NFS_SERVER(data->inode);
|
||||||
if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
|
if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
|
||||||
clear_bdi_congested(inode_to_bdi(data->inode), BLK_RW_ASYNC);
|
nfss->write_congested = 0;
|
||||||
|
|
||||||
nfs_init_cinfo(&cinfo, data->inode, data->dreq);
|
nfs_init_cinfo(&cinfo, data->inode, data->dreq);
|
||||||
nfs_commit_end(cinfo.mds);
|
nfs_commit_end(cinfo.mds);
|
||||||
|
|
|
@ -340,18 +340,6 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
|
||||||
struct nilfs_write_info *wi)
|
struct nilfs_write_info *wi)
|
||||||
{
|
{
|
||||||
struct bio *bio = wi->bio;
|
struct bio *bio = wi->bio;
|
||||||
int err;
|
|
||||||
|
|
||||||
if (segbuf->sb_nbio > 0 &&
|
|
||||||
bdi_write_congested(segbuf->sb_super->s_bdi)) {
|
|
||||||
wait_for_completion(&segbuf->sb_bio_event);
|
|
||||||
segbuf->sb_nbio--;
|
|
||||||
if (unlikely(atomic_read(&segbuf->sb_err))) {
|
|
||||||
bio_put(bio);
|
|
||||||
err = -EIO;
|
|
||||||
goto failed;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bio->bi_end_io = nilfs_end_bio_write;
|
bio->bi_end_io = nilfs_end_bio_write;
|
||||||
bio->bi_private = segbuf;
|
bio->bi_private = segbuf;
|
||||||
|
@ -363,10 +351,6 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
|
||||||
wi->nr_vecs = min(wi->max_pages, wi->rest_blocks);
|
wi->nr_vecs = min(wi->max_pages, wi->rest_blocks);
|
||||||
wi->start = wi->end;
|
wi->start = wi->end;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
failed:
|
|
||||||
wi->bio = NULL;
|
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
|
static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
|
||||||
|
|
|
@ -151,7 +151,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct nilfs_inode_info *ii;
|
struct nilfs_inode_info *ii;
|
||||||
|
|
||||||
ii = kmem_cache_alloc(nilfs_inode_cachep, GFP_NOFS);
|
ii = alloc_inode_sb(sb, nilfs_inode_cachep, GFP_NOFS);
|
||||||
if (!ii)
|
if (!ii)
|
||||||
return NULL;
|
return NULL;
|
||||||
ii->i_bh = NULL;
|
ii->i_bh = NULL;
|
||||||
|
|
|
@ -310,7 +310,7 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
|
||||||
ntfs_inode *ni;
|
ntfs_inode *ni;
|
||||||
|
|
||||||
ntfs_debug("Entering.");
|
ntfs_debug("Entering.");
|
||||||
ni = kmem_cache_alloc(ntfs_big_inode_cache, GFP_NOFS);
|
ni = alloc_inode_sb(sb, ntfs_big_inode_cache, GFP_NOFS);
|
||||||
if (likely(ni != NULL)) {
|
if (likely(ni != NULL)) {
|
||||||
ni->state = 0;
|
ni->state = 0;
|
||||||
return VFS_I(ni);
|
return VFS_I(ni);
|
||||||
|
@ -1881,6 +1881,10 @@ int ntfs_read_inode_mount(struct inode *vi)
|
||||||
}
|
}
|
||||||
/* Now allocate memory for the attribute list. */
|
/* Now allocate memory for the attribute list. */
|
||||||
ni->attr_list_size = (u32)ntfs_attr_size(a);
|
ni->attr_list_size = (u32)ntfs_attr_size(a);
|
||||||
|
if (!ni->attr_list_size) {
|
||||||
|
ntfs_error(sb, "Attr_list_size is zero");
|
||||||
|
goto put_err_out;
|
||||||
|
}
|
||||||
ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
|
ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
|
||||||
if (!ni->attr_list) {
|
if (!ni->attr_list) {
|
||||||
ntfs_error(sb, "Not enough memory to allocate buffer "
|
ntfs_error(sb, "Not enough memory to allocate buffer "
|
||||||
|
|
|
@ -399,7 +399,7 @@ static struct kmem_cache *ntfs_inode_cachep;
|
||||||
|
|
||||||
static struct inode *ntfs_alloc_inode(struct super_block *sb)
|
static struct inode *ntfs_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct ntfs_inode *ni = kmem_cache_alloc(ntfs_inode_cachep, GFP_NOFS);
|
struct ntfs_inode *ni = alloc_inode_sb(sb, ntfs_inode_cachep, GFP_NOFS);
|
||||||
|
|
||||||
if (!ni)
|
if (!ni)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue