mirror of
https://gitee.com/bianbu-linux/linux-6.6
synced 2025-04-24 14:07:52 -04:00
MM patches for 6.2-rc1.
- More userfaultfs work from Peter Xu. - Several convert-to-folios series from Sidhartha Kumar and Huang Ying. - Some filemap cleanups from Vishal Moola. - David Hildenbrand added the ability to selftest anon memory COW handling. - Some cpuset simplifications from Liu Shixin. - Addition of vmalloc tracing support by Uladzislau Rezki. - Some pagecache folioifications and simplifications from Matthew Wilcox. - A pagemap cleanup from Kefeng Wang: we have VM_ACCESS_FLAGS, so use it. - Miguel Ojeda contributed some cleanups for our use of the __no_sanitize_thread__ gcc keyword. This series shold have been in the non-MM tree, my bad. - Naoya Horiguchi improved the interaction between memory poisoning and memory section removal for huge pages. - DAMON cleanups and tuneups from SeongJae Park - Tony Luck fixed the handling of COW faults against poisoned pages. - Peter Xu utilized the PTE marker code for handling swapin errors. - Hugh Dickins reworked compound page mapcount handling, simplifying it and making it more efficient. - Removal of the autonuma savedwrite infrastructure from Nadav Amit and David Hildenbrand. - zram support for multiple compression streams from Sergey Senozhatsky. - David Hildenbrand reworked the GUP code's R/O long-term pinning so that drivers no longer need to use the FOLL_FORCE workaround which didn't work very well anyway. - Mel Gorman altered the page allocator so that local IRQs can remnain enabled during per-cpu page allocations. - Vishal Moola removed the try_to_release_page() wrapper. - Stefan Roesch added some per-BDI sysfs tunables which are used to prevent network block devices from dirtying excessive amounts of pagecache. - David Hildenbrand did some cleanup and repair work on KSM COW breaking. - Nhat Pham and Johannes Weiner have implemented writeback in zswap's zsmalloc backend. - Brian Foster has fixed a longstanding corner-case oddity in file[map]_write_and_wait_range(). - sparse-vmemmap changes for MIPS, LoongArch and NIOS2 from Feiyang Chen. - Shiyang Ruan has done some work on fsdax, to make its reflink mode work better under xfstests. Better, but still not perfect. - Christoph Hellwig has removed the .writepage() method from several filesystems. They only need .writepages(). - Yosry Ahmed wrote a series which fixes the memcg reclaim target beancounting. - David Hildenbrand has fixed some of our MM selftests for 32-bit machines. - Many singleton patches, as usual. -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCY5j6ZwAKCRDdBJ7gKXxA jkDYAP9qNeVqp9iuHjZNTqzMXkfmJPsw2kmy2P+VdzYVuQRcJgEAgoV9d7oMq4ml CodAgiA51qwzId3GRytIo/tfWZSezgA= =d19R -----END PGP SIGNATURE----- Merge tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull MM updates from Andrew Morton: - More userfaultfs work from Peter Xu - Several convert-to-folios series from Sidhartha Kumar and Huang Ying - Some filemap cleanups from Vishal Moola - David Hildenbrand added the ability to selftest anon memory COW handling - Some cpuset simplifications from Liu Shixin - Addition of vmalloc tracing support by Uladzislau Rezki - Some pagecache folioifications and simplifications from Matthew Wilcox - A pagemap cleanup from Kefeng Wang: we have VM_ACCESS_FLAGS, so use it - Miguel Ojeda contributed some cleanups for our use of the __no_sanitize_thread__ gcc keyword. This series should have been in the non-MM tree, my bad - Naoya Horiguchi improved the interaction between memory poisoning and memory section removal for huge pages - DAMON cleanups and tuneups from SeongJae Park - Tony Luck fixed the handling of COW faults against poisoned pages - Peter Xu utilized the PTE marker code for handling swapin errors - Hugh Dickins reworked compound page mapcount handling, simplifying it and making it more efficient - Removal of the autonuma savedwrite infrastructure from Nadav Amit and David Hildenbrand - zram support for multiple compression streams from Sergey Senozhatsky - David Hildenbrand reworked the GUP code's R/O long-term pinning so that drivers no longer need to use the FOLL_FORCE workaround which didn't work very well anyway - Mel Gorman altered the page allocator so that local IRQs can remnain enabled during per-cpu page allocations - Vishal Moola removed the try_to_release_page() wrapper - Stefan Roesch added some per-BDI sysfs tunables which are used to prevent network block devices from dirtying excessive amounts of pagecache - David Hildenbrand did some cleanup and repair work on KSM COW breaking - Nhat Pham and Johannes Weiner have implemented writeback in zswap's zsmalloc backend - Brian Foster has fixed a longstanding corner-case oddity in file[map]_write_and_wait_range() - sparse-vmemmap changes for MIPS, LoongArch and NIOS2 from Feiyang Chen - Shiyang Ruan has done some work on fsdax, to make its reflink mode work better under xfstests. Better, but still not perfect - Christoph Hellwig has removed the .writepage() method from several filesystems. They only need .writepages() - Yosry Ahmed wrote a series which fixes the memcg reclaim target beancounting - David Hildenbrand has fixed some of our MM selftests for 32-bit machines - Many singleton patches, as usual * tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (313 commits) mm/hugetlb: set head flag before setting compound_order in __prep_compound_gigantic_folio mm: mmu_gather: allow more than one batch of delayed rmaps mm: fix typo in struct pglist_data code comment kmsan: fix memcpy tests mm: add cond_resched() in swapin_walk_pmd_entry() mm: do not show fs mm pc for VM_LOCKONFAULT pages selftests/vm: ksm_functional_tests: fixes for 32bit selftests/vm: cow: fix compile warning on 32bit selftests/vm: madv_populate: fix missing MADV_POPULATE_(READ|WRITE) definitions mm/gup_test: fix PIN_LONGTERM_TEST_READ with highmem mm,thp,rmap: fix races between updates of subpages_mapcount mm: memcg: fix swapcached stat accounting mm: add nodes= arg to memory.reclaim mm: disable top-tier fallback to reclaim on proactive reclaim selftests: cgroup: make sure reclaim target memcg is unprotected selftests: cgroup: refactor proactive reclaim code to reclaim_until() mm: memcg: fix stale protection of reclaim target memcg mm/mmap: properly unaccount memory on mas_preallocate() failure omfs: remove ->writepage jfs: remove ->writepage ...
This commit is contained in:
commit
e2ca6ba6ba
237 changed files with 9295 additions and 5061 deletions
|
@ -137,3 +137,17 @@ Description:
|
||||||
The writeback_limit file is read-write and specifies the maximum
|
The writeback_limit file is read-write and specifies the maximum
|
||||||
amount of writeback ZRAM can do. The limit could be changed
|
amount of writeback ZRAM can do. The limit could be changed
|
||||||
in run time.
|
in run time.
|
||||||
|
|
||||||
|
What: /sys/block/zram<id>/recomp_algorithm
|
||||||
|
Date: November 2022
|
||||||
|
Contact: Sergey Senozhatsky <senozhatsky@chromium.org>
|
||||||
|
Description:
|
||||||
|
The recomp_algorithm file is read-write and allows to set
|
||||||
|
or show secondary compression algorithms.
|
||||||
|
|
||||||
|
What: /sys/block/zram<id>/recompress
|
||||||
|
Date: November 2022
|
||||||
|
Contact: Sergey Senozhatsky <senozhatsky@chromium.org>
|
||||||
|
Description:
|
||||||
|
The recompress file is write-only and triggers re-compression
|
||||||
|
with secondary compression algorithms.
|
||||||
|
|
|
@ -44,6 +44,21 @@ Description:
|
||||||
|
|
||||||
(read-write)
|
(read-write)
|
||||||
|
|
||||||
|
What: /sys/class/bdi/<bdi>/min_ratio_fine
|
||||||
|
Date: November 2022
|
||||||
|
Contact: Stefan Roesch <shr@devkernel.io>
|
||||||
|
Description:
|
||||||
|
Under normal circumstances each device is given a part of the
|
||||||
|
total write-back cache that relates to its current average
|
||||||
|
writeout speed in relation to the other devices.
|
||||||
|
|
||||||
|
The 'min_ratio_fine' parameter allows assigning a minimum reserve
|
||||||
|
of the write-back cache to a particular device. The value is
|
||||||
|
expressed as part of 1 million. For example, this is useful for
|
||||||
|
providing a minimum QoS.
|
||||||
|
|
||||||
|
(read-write)
|
||||||
|
|
||||||
What: /sys/class/bdi/<bdi>/max_ratio
|
What: /sys/class/bdi/<bdi>/max_ratio
|
||||||
Date: January 2008
|
Date: January 2008
|
||||||
Contact: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
Contact: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||||
|
@ -55,6 +70,59 @@ Description:
|
||||||
mount that is prone to get stuck, or a FUSE mount which cannot
|
mount that is prone to get stuck, or a FUSE mount which cannot
|
||||||
be trusted to play fair.
|
be trusted to play fair.
|
||||||
|
|
||||||
|
(read-write)
|
||||||
|
|
||||||
|
What: /sys/class/bdi/<bdi>/max_ratio_fine
|
||||||
|
Date: November 2022
|
||||||
|
Contact: Stefan Roesch <shr@devkernel.io>
|
||||||
|
Description:
|
||||||
|
Allows limiting a particular device to use not more than the
|
||||||
|
given value of the write-back cache. The value is given as part
|
||||||
|
of 1 million. This is useful in situations where we want to avoid
|
||||||
|
one device taking all or most of the write-back cache. For example
|
||||||
|
in case of an NFS mount that is prone to get stuck, or a FUSE mount
|
||||||
|
which cannot be trusted to play fair.
|
||||||
|
|
||||||
|
(read-write)
|
||||||
|
|
||||||
|
What: /sys/class/bdi/<bdi>/min_bytes
|
||||||
|
Date: October 2022
|
||||||
|
Contact: Stefan Roesch <shr@devkernel.io>
|
||||||
|
Description:
|
||||||
|
Under normal circumstances each device is given a part of the
|
||||||
|
total write-back cache that relates to its current average
|
||||||
|
writeout speed in relation to the other devices.
|
||||||
|
|
||||||
|
The 'min_bytes' parameter allows assigning a minimum
|
||||||
|
percentage of the write-back cache to a particular device
|
||||||
|
expressed in bytes.
|
||||||
|
For example, this is useful for providing a minimum QoS.
|
||||||
|
|
||||||
|
(read-write)
|
||||||
|
|
||||||
|
What: /sys/class/bdi/<bdi>/max_bytes
|
||||||
|
Date: October 2022
|
||||||
|
Contact: Stefan Roesch <shr@devkernel.io>
|
||||||
|
Description:
|
||||||
|
Allows limiting a particular device to use not more than the
|
||||||
|
given 'max_bytes' of the write-back cache. This is useful in
|
||||||
|
situations where we want to avoid one device taking all or
|
||||||
|
most of the write-back cache. For example in case of an NFS
|
||||||
|
mount that is prone to get stuck, a FUSE mount which cannot be
|
||||||
|
trusted to play fair, or a nbd device.
|
||||||
|
|
||||||
|
(read-write)
|
||||||
|
|
||||||
|
What: /sys/class/bdi/<bdi>/strict_limit
|
||||||
|
Date: October 2022
|
||||||
|
Contact: Stefan Roesch <shr@devkernel.io>
|
||||||
|
Description:
|
||||||
|
Forces per-BDI checks for the share of given device in the write-back
|
||||||
|
cache even before the global background dirty limit is reached. This
|
||||||
|
is useful in situations where the global limit is much higher than
|
||||||
|
affordable for given relatively slow (or untrusted) device. Turning
|
||||||
|
strictlimit on has no visible effect if max_ratio is equal to 100%.
|
||||||
|
|
||||||
(read-write)
|
(read-write)
|
||||||
What: /sys/class/bdi/<bdi>/stable_pages_required
|
What: /sys/class/bdi/<bdi>/stable_pages_required
|
||||||
Date: January 2008
|
Date: January 2008
|
||||||
|
|
|
@ -27,6 +27,10 @@ Description: Writing 'on' or 'off' to this file makes the kdamond starts or
|
||||||
makes the kdamond reads the user inputs in the sysfs files
|
makes the kdamond reads the user inputs in the sysfs files
|
||||||
except 'state' again. Writing 'update_schemes_stats' to the
|
except 'state' again. Writing 'update_schemes_stats' to the
|
||||||
file updates contents of schemes stats files of the kdamond.
|
file updates contents of schemes stats files of the kdamond.
|
||||||
|
Writing 'update_schemes_tried_regions' to the file updates
|
||||||
|
contents of 'tried_regions' directory of every scheme directory
|
||||||
|
of this kdamond. Writing 'clear_schemes_tried_regions' to the
|
||||||
|
file removes contents of the 'tried_regions' directory.
|
||||||
|
|
||||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/pid
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/pid
|
||||||
Date: Mar 2022
|
Date: Mar 2022
|
||||||
|
@ -283,3 +287,31 @@ Date: Mar 2022
|
||||||
Contact: SeongJae Park <sj@kernel.org>
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
Description: Reading this file returns the number of the exceed events of
|
Description: Reading this file returns the number of the exceed events of
|
||||||
the scheme's quotas.
|
the scheme's quotas.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/start
|
||||||
|
Date: Oct 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the start address of a memory region
|
||||||
|
that corresponding DAMON-based Operation Scheme's action has
|
||||||
|
tried to be applied.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/end
|
||||||
|
Date: Oct 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the end address of a memory region
|
||||||
|
that corresponding DAMON-based Operation Scheme's action has
|
||||||
|
tried to be applied.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/nr_accesses
|
||||||
|
Date: Oct 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the 'nr_accesses' of a memory region
|
||||||
|
that corresponding DAMON-based Operation Scheme's action has
|
||||||
|
tried to be applied.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/age
|
||||||
|
Date: Oct 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the 'age' of a memory region that
|
||||||
|
corresponding DAMON-based Operation Scheme's action has tried
|
||||||
|
to be applied.
|
||||||
|
|
|
@ -348,8 +348,13 @@ this can be accomplished with::
|
||||||
|
|
||||||
echo huge_idle > /sys/block/zramX/writeback
|
echo huge_idle > /sys/block/zramX/writeback
|
||||||
|
|
||||||
|
If a user chooses to writeback only incompressible pages (pages that none of
|
||||||
|
algorithms can compress) this can be accomplished with::
|
||||||
|
|
||||||
|
echo incompressible > /sys/block/zramX/writeback
|
||||||
|
|
||||||
If an admin wants to write a specific page in zram device to the backing device,
|
If an admin wants to write a specific page in zram device to the backing device,
|
||||||
they could write a page index into the interface.
|
they could write a page index into the interface::
|
||||||
|
|
||||||
echo "page_index=1251" > /sys/block/zramX/writeback
|
echo "page_index=1251" > /sys/block/zramX/writeback
|
||||||
|
|
||||||
|
@ -401,6 +406,87 @@ budget in next setting is user's job.
|
||||||
If admin wants to measure writeback count in a certain period, they could
|
If admin wants to measure writeback count in a certain period, they could
|
||||||
know it via /sys/block/zram0/bd_stat's 3rd column.
|
know it via /sys/block/zram0/bd_stat's 3rd column.
|
||||||
|
|
||||||
|
recompression
|
||||||
|
-------------
|
||||||
|
|
||||||
|
With CONFIG_ZRAM_MULTI_COMP, zram can recompress pages using alternative
|
||||||
|
(secondary) compression algorithms. The basic idea is that alternative
|
||||||
|
compression algorithm can provide better compression ratio at a price of
|
||||||
|
(potentially) slower compression/decompression speeds. Alternative compression
|
||||||
|
algorithm can, for example, be more successful compressing huge pages (those
|
||||||
|
that default algorithm failed to compress). Another application is idle pages
|
||||||
|
recompression - pages that are cold and sit in the memory can be recompressed
|
||||||
|
using more effective algorithm and, hence, reduce zsmalloc memory usage.
|
||||||
|
|
||||||
|
With CONFIG_ZRAM_MULTI_COMP, zram supports up to 4 compression algorithms:
|
||||||
|
one primary and up to 3 secondary ones. Primary zram compressor is explained
|
||||||
|
in "3) Select compression algorithm", secondary algorithms are configured
|
||||||
|
using recomp_algorithm device attribute.
|
||||||
|
|
||||||
|
Example:::
|
||||||
|
|
||||||
|
#show supported recompression algorithms
|
||||||
|
cat /sys/block/zramX/recomp_algorithm
|
||||||
|
#1: lzo lzo-rle lz4 lz4hc [zstd]
|
||||||
|
#2: lzo lzo-rle lz4 [lz4hc] zstd
|
||||||
|
|
||||||
|
Alternative compression algorithms are sorted by priority. In the example
|
||||||
|
above, zstd is used as the first alternative algorithm, which has priority
|
||||||
|
of 1, while lz4hc is configured as a compression algorithm with priority 2.
|
||||||
|
Alternative compression algorithm's priority is provided during algorithms
|
||||||
|
configuration:::
|
||||||
|
|
||||||
|
#select zstd recompression algorithm, priority 1
|
||||||
|
echo "algo=zstd priority=1" > /sys/block/zramX/recomp_algorithm
|
||||||
|
|
||||||
|
#select deflate recompression algorithm, priority 2
|
||||||
|
echo "algo=deflate priority=2" > /sys/block/zramX/recomp_algorithm
|
||||||
|
|
||||||
|
Another device attribute that CONFIG_ZRAM_MULTI_COMP enables is recompress,
|
||||||
|
which controls recompression.
|
||||||
|
|
||||||
|
Examples:::
|
||||||
|
|
||||||
|
#IDLE pages recompression is activated by `idle` mode
|
||||||
|
echo "type=idle" > /sys/block/zramX/recompress
|
||||||
|
|
||||||
|
#HUGE pages recompression is activated by `huge` mode
|
||||||
|
echo "type=huge" > /sys/block/zram0/recompress
|
||||||
|
|
||||||
|
#HUGE_IDLE pages recompression is activated by `huge_idle` mode
|
||||||
|
echo "type=huge_idle" > /sys/block/zramX/recompress
|
||||||
|
|
||||||
|
The number of idle pages can be significant, so user-space can pass a size
|
||||||
|
threshold (in bytes) to the recompress knob: zram will recompress only pages
|
||||||
|
of equal or greater size:::
|
||||||
|
|
||||||
|
#recompress all pages larger than 3000 bytes
|
||||||
|
echo "threshold=3000" > /sys/block/zramX/recompress
|
||||||
|
|
||||||
|
#recompress idle pages larger than 2000 bytes
|
||||||
|
echo "type=idle threshold=2000" > /sys/block/zramX/recompress
|
||||||
|
|
||||||
|
Recompression of idle pages requires memory tracking.
|
||||||
|
|
||||||
|
During re-compression for every page, that matches re-compression criteria,
|
||||||
|
ZRAM iterates the list of registered alternative compression algorithms in
|
||||||
|
order of their priorities. ZRAM stops either when re-compression was
|
||||||
|
successful (re-compressed object is smaller in size than the original one)
|
||||||
|
and matches re-compression criteria (e.g. size threshold) or when there are
|
||||||
|
no secondary algorithms left to try. If none of the secondary algorithms can
|
||||||
|
successfully re-compressed the page such a page is marked as incompressible,
|
||||||
|
so ZRAM will not attempt to re-compress it in the future.
|
||||||
|
|
||||||
|
This re-compression behaviour, when it iterates through the list of
|
||||||
|
registered compression algorithms, increases our chances of finding the
|
||||||
|
algorithm that successfully compresses a particular page. Sometimes, however,
|
||||||
|
it is convenient (and sometimes even necessary) to limit recompression to
|
||||||
|
only one particular algorithm so that it will not try any other algorithms.
|
||||||
|
This can be achieved by providing a algo=NAME parameter:::
|
||||||
|
|
||||||
|
#use zstd algorithm only (if registered)
|
||||||
|
echo "type=huge algo=zstd" > /sys/block/zramX/recompress
|
||||||
|
|
||||||
memory tracking
|
memory tracking
|
||||||
===============
|
===============
|
||||||
|
|
||||||
|
@ -411,9 +497,11 @@ pages of the process with*pagemap.
|
||||||
If you enable the feature, you could see block state via
|
If you enable the feature, you could see block state via
|
||||||
/sys/kernel/debug/zram/zram0/block_state". The output is as follows::
|
/sys/kernel/debug/zram/zram0/block_state". The output is as follows::
|
||||||
|
|
||||||
300 75.033841 .wh.
|
300 75.033841 .wh...
|
||||||
301 63.806904 s...
|
301 63.806904 s.....
|
||||||
302 63.806919 ..hi
|
302 63.806919 ..hi..
|
||||||
|
303 62.801919 ....r.
|
||||||
|
304 146.781902 ..hi.n
|
||||||
|
|
||||||
First column
|
First column
|
||||||
zram's block index.
|
zram's block index.
|
||||||
|
@ -430,6 +518,10 @@ Third column
|
||||||
huge page
|
huge page
|
||||||
i:
|
i:
|
||||||
idle page
|
idle page
|
||||||
|
r:
|
||||||
|
recompressed page (secondary compression algorithm)
|
||||||
|
n:
|
||||||
|
none (including secondary) of algorithms could compress it
|
||||||
|
|
||||||
First line of above example says 300th block is accessed at 75.033841sec
|
First line of above example says 300th block is accessed at 75.033841sec
|
||||||
and the block's state is huge so it is written back to the backing
|
and the block's state is huge so it is written back to the backing
|
||||||
|
|
|
@ -543,7 +543,8 @@ inactive_anon # of bytes of anonymous and swap cache memory on inactive
|
||||||
LRU list.
|
LRU list.
|
||||||
active_anon # of bytes of anonymous and swap cache memory on active
|
active_anon # of bytes of anonymous and swap cache memory on active
|
||||||
LRU list.
|
LRU list.
|
||||||
inactive_file # of bytes of file-backed memory on inactive LRU list.
|
inactive_file # of bytes of file-backed memory and MADV_FREE anonymous memory(
|
||||||
|
LazyFree pages) on inactive LRU list.
|
||||||
active_file # of bytes of file-backed memory on active LRU list.
|
active_file # of bytes of file-backed memory on active LRU list.
|
||||||
unevictable # of bytes of memory that cannot be reclaimed (mlocked etc).
|
unevictable # of bytes of memory that cannot be reclaimed (mlocked etc).
|
||||||
=============== ===============================================================
|
=============== ===============================================================
|
||||||
|
|
|
@ -1245,17 +1245,13 @@ PAGE_SIZE multiple when read back.
|
||||||
This is a simple interface to trigger memory reclaim in the
|
This is a simple interface to trigger memory reclaim in the
|
||||||
target cgroup.
|
target cgroup.
|
||||||
|
|
||||||
This file accepts a single key, the number of bytes to reclaim.
|
This file accepts a string which contains the number of bytes to
|
||||||
No nested keys are currently supported.
|
reclaim.
|
||||||
|
|
||||||
Example::
|
Example::
|
||||||
|
|
||||||
echo "1G" > memory.reclaim
|
echo "1G" > memory.reclaim
|
||||||
|
|
||||||
The interface can be later extended with nested keys to
|
|
||||||
configure the reclaim behavior. For example, specify the
|
|
||||||
type of memory to reclaim from (anon, file, ..).
|
|
||||||
|
|
||||||
Please note that the kernel can over or under reclaim from
|
Please note that the kernel can over or under reclaim from
|
||||||
the target cgroup. If less bytes are reclaimed than the
|
the target cgroup. If less bytes are reclaimed than the
|
||||||
specified amount, -EAGAIN is returned.
|
specified amount, -EAGAIN is returned.
|
||||||
|
@ -1267,6 +1263,13 @@ PAGE_SIZE multiple when read back.
|
||||||
This means that the networking layer will not adapt based on
|
This means that the networking layer will not adapt based on
|
||||||
reclaim induced by memory.reclaim.
|
reclaim induced by memory.reclaim.
|
||||||
|
|
||||||
|
This file also allows the user to specify the nodes to reclaim from,
|
||||||
|
via the 'nodes=' key, for example::
|
||||||
|
|
||||||
|
echo "1G nodes=0,1" > memory.reclaim
|
||||||
|
|
||||||
|
The above instructs the kernel to reclaim memory from nodes 0,1.
|
||||||
|
|
||||||
memory.peak
|
memory.peak
|
||||||
A read-only single value file which exists on non-root
|
A read-only single value file which exists on non-root
|
||||||
cgroups.
|
cgroups.
|
||||||
|
@ -1488,12 +1491,18 @@ PAGE_SIZE multiple when read back.
|
||||||
pgscan_direct (npn)
|
pgscan_direct (npn)
|
||||||
Amount of scanned pages directly (in an inactive LRU list)
|
Amount of scanned pages directly (in an inactive LRU list)
|
||||||
|
|
||||||
|
pgscan_khugepaged (npn)
|
||||||
|
Amount of scanned pages by khugepaged (in an inactive LRU list)
|
||||||
|
|
||||||
pgsteal_kswapd (npn)
|
pgsteal_kswapd (npn)
|
||||||
Amount of reclaimed pages by kswapd
|
Amount of reclaimed pages by kswapd
|
||||||
|
|
||||||
pgsteal_direct (npn)
|
pgsteal_direct (npn)
|
||||||
Amount of reclaimed pages directly
|
Amount of reclaimed pages directly
|
||||||
|
|
||||||
|
pgsteal_khugepaged (npn)
|
||||||
|
Amount of reclaimed pages by khugepaged
|
||||||
|
|
||||||
pgfault (npn)
|
pgfault (npn)
|
||||||
Total number of page faults incurred
|
Total number of page faults incurred
|
||||||
|
|
||||||
|
|
|
@ -88,6 +88,9 @@ comma (","). ::
|
||||||
│ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
|
│ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
|
||||||
│ │ │ │ │ │ │ watermarks/metric,interval_us,high,mid,low
|
│ │ │ │ │ │ │ watermarks/metric,interval_us,high,mid,low
|
||||||
│ │ │ │ │ │ │ stats/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds
|
│ │ │ │ │ │ │ stats/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds
|
||||||
|
│ │ │ │ │ │ │ tried_regions/
|
||||||
|
│ │ │ │ │ │ │ │ 0/start,end,nr_accesses,age
|
||||||
|
│ │ │ │ │ │ │ │ ...
|
||||||
│ │ │ │ │ │ ...
|
│ │ │ │ │ │ ...
|
||||||
│ │ │ │ ...
|
│ │ │ │ ...
|
||||||
│ │ ...
|
│ │ ...
|
||||||
|
@ -125,7 +128,14 @@ in the state. Writing ``commit`` to the ``state`` file makes kdamond reads the
|
||||||
user inputs in the sysfs files except ``state`` file again. Writing
|
user inputs in the sysfs files except ``state`` file again. Writing
|
||||||
``update_schemes_stats`` to ``state`` file updates the contents of stats files
|
``update_schemes_stats`` to ``state`` file updates the contents of stats files
|
||||||
for each DAMON-based operation scheme of the kdamond. For details of the
|
for each DAMON-based operation scheme of the kdamond. For details of the
|
||||||
stats, please refer to :ref:`stats section <sysfs_schemes_stats>`.
|
stats, please refer to :ref:`stats section <sysfs_schemes_stats>`. Writing
|
||||||
|
``update_schemes_tried_regions`` to ``state`` file updates the DAMON-based
|
||||||
|
operation scheme action tried regions directory for each DAMON-based operation
|
||||||
|
scheme of the kdamond. Writing ``clear_schemes_tried_regions`` to ``state``
|
||||||
|
file clears the DAMON-based operating scheme action tried regions directory for
|
||||||
|
each DAMON-based operation scheme of the kdamond. For details of the
|
||||||
|
DAMON-based operation scheme action tried regions directory, please refer to
|
||||||
|
:ref:tried_regions section <sysfs_schemes_tried_regions>`.
|
||||||
|
|
||||||
If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
|
If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
|
||||||
|
|
||||||
|
@ -166,6 +176,8 @@ You can set and get what type of monitoring operations DAMON will use for the
|
||||||
context by writing one of the keywords listed in ``avail_operations`` file and
|
context by writing one of the keywords listed in ``avail_operations`` file and
|
||||||
reading from the ``operations`` file.
|
reading from the ``operations`` file.
|
||||||
|
|
||||||
|
.. _sysfs_monitoring_attrs:
|
||||||
|
|
||||||
contexts/<N>/monitoring_attrs/
|
contexts/<N>/monitoring_attrs/
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
||||||
|
@ -235,6 +247,9 @@ In each region directory, you will find two files (``start`` and ``end``). You
|
||||||
can set and get the start and end addresses of the initial monitoring target
|
can set and get the start and end addresses of the initial monitoring target
|
||||||
region by writing to and reading from the files, respectively.
|
region by writing to and reading from the files, respectively.
|
||||||
|
|
||||||
|
Each region should not overlap with others. ``end`` of directory ``N`` should
|
||||||
|
be equal or smaller than ``start`` of directory ``N+1``.
|
||||||
|
|
||||||
contexts/<N>/schemes/
|
contexts/<N>/schemes/
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
@ -252,8 +267,9 @@ to ``N-1``. Each directory represents each DAMON-based operation scheme.
|
||||||
schemes/<N>/
|
schemes/<N>/
|
||||||
------------
|
------------
|
||||||
|
|
||||||
In each scheme directory, four directories (``access_pattern``, ``quotas``,
|
In each scheme directory, five directories (``access_pattern``, ``quotas``,
|
||||||
``watermarks``, and ``stats``) and one file (``action``) exist.
|
``watermarks``, ``stats``, and ``tried_regions``) and one file (``action``)
|
||||||
|
exist.
|
||||||
|
|
||||||
The ``action`` file is for setting and getting what action you want to apply to
|
The ``action`` file is for setting and getting what action you want to apply to
|
||||||
memory regions having specific access pattern of the interest. The keywords
|
memory regions having specific access pattern of the interest. The keywords
|
||||||
|
@ -348,6 +364,32 @@ should ask DAMON sysfs interface to updte the content of the files for the
|
||||||
stats by writing a special keyword, ``update_schemes_stats`` to the relevant
|
stats by writing a special keyword, ``update_schemes_stats`` to the relevant
|
||||||
``kdamonds/<N>/state`` file.
|
``kdamonds/<N>/state`` file.
|
||||||
|
|
||||||
|
.. _sysfs_schemes_tried_regions:
|
||||||
|
|
||||||
|
schemes/<N>/tried_regions/
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
When a special keyword, ``update_schemes_tried_regions``, is written to the
|
||||||
|
relevant ``kdamonds/<N>/state`` file, DAMON creates directories named integer
|
||||||
|
starting from ``0`` under this directory. Each directory contains files
|
||||||
|
exposing detailed information about each of the memory region that the
|
||||||
|
corresponding scheme's ``action`` has tried to be applied under this directory,
|
||||||
|
during next :ref:`aggregation interval <sysfs_monitoring_attrs>`. The
|
||||||
|
information includes address range, ``nr_accesses``, , and ``age`` of the
|
||||||
|
region.
|
||||||
|
|
||||||
|
The directories will be removed when another special keyword,
|
||||||
|
``clear_schemes_tried_regions``, is written to the relevant
|
||||||
|
``kdamonds/<N>/state`` file.
|
||||||
|
|
||||||
|
tried_regions/<N>/
|
||||||
|
------------------
|
||||||
|
|
||||||
|
In each region directory, you will find four files (``start``, ``end``,
|
||||||
|
``nr_accesses``, and ``age``). Reading the files will show the start and end
|
||||||
|
addresses, ``nr_accesses``, and ``age`` of the region that corresponding
|
||||||
|
DAMON-based operation scheme ``action`` has tried to be applied.
|
||||||
|
|
||||||
Example
|
Example
|
||||||
~~~~~~~
|
~~~~~~~
|
||||||
|
|
||||||
|
@ -465,8 +507,9 @@ regions in case of physical memory monitoring. Therefore, users should set the
|
||||||
monitoring target regions by themselves.
|
monitoring target regions by themselves.
|
||||||
|
|
||||||
In such cases, users can explicitly set the initial monitoring target regions
|
In such cases, users can explicitly set the initial monitoring target regions
|
||||||
as they want, by writing proper values to the ``init_regions`` file. Each line
|
as they want, by writing proper values to the ``init_regions`` file. The input
|
||||||
of the input should represent one region in below form.::
|
should be a sequence of three integers separated by white spaces that represent
|
||||||
|
one region in below form.::
|
||||||
|
|
||||||
<target idx> <start address> <end address>
|
<target idx> <start address> <end address>
|
||||||
|
|
||||||
|
@ -481,9 +524,9 @@ ranges, ``20-40`` and ``50-100`` as that of pid 4242, which is the second one
|
||||||
# cd <debugfs>/damon
|
# cd <debugfs>/damon
|
||||||
# cat target_ids
|
# cat target_ids
|
||||||
42 4242
|
42 4242
|
||||||
# echo "0 1 100
|
# echo "0 1 100 \
|
||||||
0 100 200
|
0 100 200 \
|
||||||
1 20 40
|
1 20 40 \
|
||||||
1 50 100" > init_regions
|
1 50 100" > init_regions
|
||||||
|
|
||||||
Note that this sets the initial monitoring target regions only. In case of
|
Note that this sets the initial monitoring target regions only. In case of
|
||||||
|
|
|
@ -428,14 +428,16 @@ with the memory region, as the case would be with BSS (uninitialized data).
|
||||||
The "pathname" shows the name associated file for this mapping. If the mapping
|
The "pathname" shows the name associated file for this mapping. If the mapping
|
||||||
is not associated with a file:
|
is not associated with a file:
|
||||||
|
|
||||||
============= ====================================
|
=================== ===========================================
|
||||||
[heap] the heap of the program
|
[heap] the heap of the program
|
||||||
[stack] the stack of the main process
|
[stack] the stack of the main process
|
||||||
[vdso] the "virtual dynamic shared object",
|
[vdso] the "virtual dynamic shared object",
|
||||||
the kernel system call handler
|
the kernel system call handler
|
||||||
[anon:<name>] an anonymous mapping that has been
|
[anon:<name>] a private anonymous mapping that has been
|
||||||
named by userspace
|
named by userspace
|
||||||
============= ====================================
|
[anon_shmem:<name>] an anonymous shared memory mapping that has
|
||||||
|
been named by userspace
|
||||||
|
=================== ===========================================
|
||||||
|
|
||||||
or if empty, the mapping is anonymous.
|
or if empty, the mapping is anonymous.
|
||||||
|
|
||||||
|
|
|
@ -94,7 +94,7 @@ PMD Page Table Helpers
|
||||||
+---------------------------+--------------------------------------------------+
|
+---------------------------+--------------------------------------------------+
|
||||||
| pmd_trans_huge | Tests a Transparent Huge Page (THP) at PMD |
|
| pmd_trans_huge | Tests a Transparent Huge Page (THP) at PMD |
|
||||||
+---------------------------+--------------------------------------------------+
|
+---------------------------+--------------------------------------------------+
|
||||||
| pmd_present | Tests a valid mapped PMD |
|
| pmd_present | Tests whether pmd_page() points to valid memory |
|
||||||
+---------------------------+--------------------------------------------------+
|
+---------------------------+--------------------------------------------------+
|
||||||
| pmd_young | Tests a young PMD |
|
| pmd_young | Tests a young PMD |
|
||||||
+---------------------------+--------------------------------------------------+
|
+---------------------------+--------------------------------------------------+
|
||||||
|
|
|
@ -117,31 +117,15 @@ pages:
|
||||||
- ->_refcount in tail pages is always zero: get_page_unless_zero() never
|
- ->_refcount in tail pages is always zero: get_page_unless_zero() never
|
||||||
succeeds on tail pages.
|
succeeds on tail pages.
|
||||||
|
|
||||||
- map/unmap of the pages with PTE entry increment/decrement ->_mapcount
|
- map/unmap of PMD entry for the whole compound page increment/decrement
|
||||||
on relevant sub-page of the compound page.
|
->compound_mapcount, stored in the first tail page of the compound page;
|
||||||
|
and also increment/decrement ->subpages_mapcount (also in the first tail)
|
||||||
|
by COMPOUND_MAPPED when compound_mapcount goes from -1 to 0 or 0 to -1.
|
||||||
|
|
||||||
- map/unmap of the whole compound page is accounted for in compound_mapcount
|
- map/unmap of sub-pages with PTE entry increment/decrement ->_mapcount
|
||||||
(stored in first tail page). For file huge pages, we also increment
|
on relevant sub-page of the compound page, and also increment/decrement
|
||||||
->_mapcount of all sub-pages in order to have race-free detection of
|
->subpages_mapcount, stored in first tail page of the compound page, when
|
||||||
last unmap of subpages.
|
_mapcount goes from -1 to 0 or 0 to -1: counting sub-pages mapped by PTE.
|
||||||
|
|
||||||
PageDoubleMap() indicates that the page is *possibly* mapped with PTEs.
|
|
||||||
|
|
||||||
For anonymous pages, PageDoubleMap() also indicates ->_mapcount in all
|
|
||||||
subpages is offset up by one. This additional reference is required to
|
|
||||||
get race-free detection of unmap of subpages when we have them mapped with
|
|
||||||
both PMDs and PTEs.
|
|
||||||
|
|
||||||
This optimization is required to lower the overhead of per-subpage mapcount
|
|
||||||
tracking. The alternative is to alter ->_mapcount in all subpages on each
|
|
||||||
map/unmap of the whole compound page.
|
|
||||||
|
|
||||||
For anonymous pages, we set PG_double_map when a PMD of the page is split
|
|
||||||
for the first time, but still have a PMD mapping. The additional references
|
|
||||||
go away with the last compound_mapcount.
|
|
||||||
|
|
||||||
File pages get PG_double_map set on the first map of the page with PTE and
|
|
||||||
goes away when the page gets evicted from the page cache.
|
|
||||||
|
|
||||||
split_huge_page internally has to distribute the refcounts in the head
|
split_huge_page internally has to distribute the refcounts in the head
|
||||||
page to the tail pages before clearing all PG_head/tail bits from the page
|
page to the tail pages before clearing all PG_head/tail bits from the page
|
||||||
|
|
12
MAINTAINERS
12
MAINTAINERS
|
@ -13399,10 +13399,20 @@ F: include/linux/memory_hotplug.h
|
||||||
F: include/linux/mm.h
|
F: include/linux/mm.h
|
||||||
F: include/linux/mmzone.h
|
F: include/linux/mmzone.h
|
||||||
F: include/linux/pagewalk.h
|
F: include/linux/pagewalk.h
|
||||||
F: include/linux/vmalloc.h
|
|
||||||
F: mm/
|
F: mm/
|
||||||
F: tools/testing/selftests/vm/
|
F: tools/testing/selftests/vm/
|
||||||
|
|
||||||
|
VMALLOC
|
||||||
|
M: Andrew Morton <akpm@linux-foundation.org>
|
||||||
|
R: Uladzislau Rezki <urezki@gmail.com>
|
||||||
|
R: Christoph Hellwig <hch@infradead.org>
|
||||||
|
L: linux-mm@kvack.org
|
||||||
|
S: Maintained
|
||||||
|
W: http://www.linux-mm.org
|
||||||
|
T: git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
|
||||||
|
F: include/linux/vmalloc.h
|
||||||
|
F: mm/vmalloc.c
|
||||||
|
|
||||||
MEMORY HOT(UN)PLUG
|
MEMORY HOT(UN)PLUG
|
||||||
M: David Hildenbrand <david@redhat.com>
|
M: David Hildenbrand <david@redhat.com>
|
||||||
M: Oscar Salvador <osalvador@suse.de>
|
M: Oscar Salvador <osalvador@suse.de>
|
||||||
|
|
|
@ -313,8 +313,6 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
|
||||||
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
||||||
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
||||||
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
#define pte_ERROR(e) \
|
#define pte_ERROR(e) \
|
||||||
printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
|
printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
|
||||||
#define pmd_ERROR(e) \
|
#define pmd_ERROR(e) \
|
||||||
|
|
|
@ -120,8 +120,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
|
||||||
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
||||||
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
||||||
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||||
#include <asm/hugepage.h>
|
#include <asm/hugepage.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -21,8 +21,6 @@
|
||||||
#define pgd_none(pgd) (0)
|
#define pgd_none(pgd) (0)
|
||||||
#define pgd_bad(pgd) (0)
|
#define pgd_bad(pgd) (0)
|
||||||
#define pgd_clear(pgdp)
|
#define pgd_clear(pgdp)
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
/* FIXME */
|
|
||||||
/*
|
/*
|
||||||
* PMD_SHIFT determines the size of the area a second-level page table can map
|
* PMD_SHIFT determines the size of the area a second-level page table can map
|
||||||
* PGDIR_SHIFT determines what a third-level page table entry can map
|
* PGDIR_SHIFT determines what a third-level page table entry can map
|
||||||
|
|
|
@ -300,10 +300,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
||||||
*/
|
*/
|
||||||
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
|
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
|
||||||
|
|
||||||
/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
|
|
||||||
/* FIXME: this is not correct */
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We provide our own arch_get_unmapped_area to cope with VIPT caches.
|
* We provide our own arch_get_unmapped_area to cope with VIPT caches.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -1020,8 +1020,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
|
||||||
*/
|
*/
|
||||||
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
|
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
|
||||||
|
|
||||||
extern int kern_addr_valid(unsigned long addr);
|
|
||||||
|
|
||||||
#ifdef CONFIG_ARM64_MTE
|
#ifdef CONFIG_ARM64_MTE
|
||||||
|
|
||||||
#define __HAVE_ARCH_PREPARE_TO_SWAP
|
#define __HAVE_ARCH_PREPARE_TO_SWAP
|
||||||
|
|
|
@ -814,53 +814,6 @@ void __init paging_init(void)
|
||||||
create_idmap();
|
create_idmap();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Check whether a kernel address is valid (derived from arch/x86/).
|
|
||||||
*/
|
|
||||||
int kern_addr_valid(unsigned long addr)
|
|
||||||
{
|
|
||||||
pgd_t *pgdp;
|
|
||||||
p4d_t *p4dp;
|
|
||||||
pud_t *pudp, pud;
|
|
||||||
pmd_t *pmdp, pmd;
|
|
||||||
pte_t *ptep, pte;
|
|
||||||
|
|
||||||
addr = arch_kasan_reset_tag(addr);
|
|
||||||
if ((((long)addr) >> VA_BITS) != -1UL)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
pgdp = pgd_offset_k(addr);
|
|
||||||
if (pgd_none(READ_ONCE(*pgdp)))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
p4dp = p4d_offset(pgdp, addr);
|
|
||||||
if (p4d_none(READ_ONCE(*p4dp)))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
pudp = pud_offset(p4dp, addr);
|
|
||||||
pud = READ_ONCE(*pudp);
|
|
||||||
if (pud_none(pud))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (pud_sect(pud))
|
|
||||||
return pfn_valid(pud_pfn(pud));
|
|
||||||
|
|
||||||
pmdp = pmd_offset(pudp, addr);
|
|
||||||
pmd = READ_ONCE(*pmdp);
|
|
||||||
if (pmd_none(pmd))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (pmd_sect(pmd))
|
|
||||||
return pfn_valid(pmd_pfn(pmd));
|
|
||||||
|
|
||||||
ptep = pte_offset_kernel(pmdp, addr);
|
|
||||||
pte = READ_ONCE(*ptep);
|
|
||||||
if (pte_none(pte))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return pfn_valid(pte_pfn(pte));
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||||
static void free_hotplug_page_range(struct page *page, size_t size,
|
static void free_hotplug_page_range(struct page *page, size_t size,
|
||||||
struct vmem_altmap *altmap)
|
struct vmem_altmap *altmap)
|
||||||
|
@ -1184,53 +1137,28 @@ static void free_empty_tables(unsigned long addr, unsigned long end,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node,
|
||||||
|
unsigned long addr, unsigned long next)
|
||||||
|
{
|
||||||
|
pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
|
||||||
|
}
|
||||||
|
|
||||||
|
int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
|
||||||
|
unsigned long addr, unsigned long next)
|
||||||
|
{
|
||||||
|
vmemmap_verify((pte_t *)pmdp, node, addr, next);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
|
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
|
||||||
struct vmem_altmap *altmap)
|
struct vmem_altmap *altmap)
|
||||||
{
|
{
|
||||||
unsigned long addr = start;
|
|
||||||
unsigned long next;
|
|
||||||
pgd_t *pgdp;
|
|
||||||
p4d_t *p4dp;
|
|
||||||
pud_t *pudp;
|
|
||||||
pmd_t *pmdp;
|
|
||||||
|
|
||||||
WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
|
WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
|
||||||
|
|
||||||
if (!IS_ENABLED(CONFIG_ARM64_4K_PAGES))
|
if (!IS_ENABLED(CONFIG_ARM64_4K_PAGES))
|
||||||
return vmemmap_populate_basepages(start, end, node, altmap);
|
return vmemmap_populate_basepages(start, end, node, altmap);
|
||||||
|
else
|
||||||
do {
|
return vmemmap_populate_hugepages(start, end, node, altmap);
|
||||||
next = pmd_addr_end(addr, end);
|
|
||||||
|
|
||||||
pgdp = vmemmap_pgd_populate(addr, node);
|
|
||||||
if (!pgdp)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
p4dp = vmemmap_p4d_populate(pgdp, addr, node);
|
|
||||||
if (!p4dp)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
pudp = vmemmap_pud_populate(p4dp, addr, node);
|
|
||||||
if (!pudp)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
pmdp = pmd_offset(pudp, addr);
|
|
||||||
if (pmd_none(READ_ONCE(*pmdp))) {
|
|
||||||
void *p = NULL;
|
|
||||||
|
|
||||||
p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
|
|
||||||
if (!p) {
|
|
||||||
if (vmemmap_populate_basepages(addr, next, node, altmap))
|
|
||||||
return -ENOMEM;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
|
|
||||||
} else
|
|
||||||
vmemmap_verify((pte_t *)pmdp, node, addr, next);
|
|
||||||
} while (addr = next, addr != end);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||||
|
|
|
@ -202,8 +202,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function is used to determine if a linear map page has been marked as
|
* This function is used to determine if a linear map page has been marked as
|
||||||
* not-valid. Walk the page table and check the PTE_VALID bit. This is based
|
* not-valid. Walk the page table and check the PTE_VALID bit.
|
||||||
* on kern_addr_valid(), which almost does what we need.
|
|
||||||
*
|
*
|
||||||
* Because this is only called on the kernel linear map, p?d_sect() implies
|
* Because this is only called on the kernel linear map, p?d_sect() implies
|
||||||
* p?d_present(). When debug_pagealloc is enabled, sections mappings are
|
* p?d_present(). When debug_pagealloc is enabled, sections mappings are
|
||||||
|
|
|
@ -249,9 +249,6 @@ extern void paging_init(void);
|
||||||
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
|
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
|
||||||
pte_t *pte);
|
pte_t *pte);
|
||||||
|
|
||||||
/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
|
#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
|
||||||
remap_pfn_range(vma, vaddr, pfn, size, prot)
|
remap_pfn_range(vma, vaddr, pfn, size, prot)
|
||||||
|
|
||||||
|
|
|
@ -131,13 +131,6 @@ static inline void clear_page(void *page)
|
||||||
|
|
||||||
#define page_to_virt(page) __va(page_to_phys(page))
|
#define page_to_virt(page) __va(page_to_phys(page))
|
||||||
|
|
||||||
/*
|
|
||||||
* For port to Hexagon Virtual Machine, MAYBE we check for attempts
|
|
||||||
* to reference reserved HVM space, but in any case, the VM will be
|
|
||||||
* protected.
|
|
||||||
*/
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
#include <asm/mem-layout.h>
|
#include <asm/mem-layout.h>
|
||||||
#include <asm-generic/memory_model.h>
|
#include <asm-generic/memory_model.h>
|
||||||
/* XXX Todo: implement assembly-optimized version of getorder. */
|
/* XXX Todo: implement assembly-optimized version of getorder. */
|
||||||
|
|
|
@ -181,22 +181,6 @@ ia64_phys_addr_valid (unsigned long addr)
|
||||||
return (addr & (local_cpu_data->unimpl_pa_mask)) == 0;
|
return (addr & (local_cpu_data->unimpl_pa_mask)) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
|
|
||||||
* memory. For the return value to be meaningful, ADDR must be >=
|
|
||||||
* PAGE_OFFSET. This operation can be relatively expensive (e.g.,
|
|
||||||
* require a hash-, or multi-level tree-lookup or something of that
|
|
||||||
* sort) but it guarantees to return TRUE only if accessing the page
|
|
||||||
* at that address does not cause an error. Note that there may be
|
|
||||||
* addresses for which kern_addr_valid() returns FALSE even though an
|
|
||||||
* access would not cause an error (e.g., this is typically true for
|
|
||||||
* memory mapped I/O regions.
|
|
||||||
*
|
|
||||||
* XXX Need to implement this for IA-64.
|
|
||||||
*/
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now come the defines and routines to manage and access the three-level
|
* Now come the defines and routines to manage and access the three-level
|
||||||
* page table.
|
* page table.
|
||||||
|
|
|
@ -91,21 +91,6 @@ int prepare_hugepage_range(struct file *file,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write)
|
|
||||||
{
|
|
||||||
struct page *page;
|
|
||||||
pte_t *ptep;
|
|
||||||
|
|
||||||
if (REGION_NUMBER(addr) != RGN_HPAGE)
|
|
||||||
return ERR_PTR(-EINVAL);
|
|
||||||
|
|
||||||
ptep = huge_pte_offset(mm, addr, HPAGE_SIZE);
|
|
||||||
if (!ptep || pte_none(*ptep))
|
|
||||||
return NULL;
|
|
||||||
page = pte_page(*ptep);
|
|
||||||
page += ((addr & ~HPAGE_MASK) >> PAGE_SHIFT);
|
|
||||||
return page;
|
|
||||||
}
|
|
||||||
int pmd_huge(pmd_t pmd)
|
int pmd_huge(pmd_t pmd)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -53,6 +53,7 @@ config LOONGARCH
|
||||||
select ARCH_USE_QUEUED_RWLOCKS
|
select ARCH_USE_QUEUED_RWLOCKS
|
||||||
select ARCH_USE_QUEUED_SPINLOCKS
|
select ARCH_USE_QUEUED_SPINLOCKS
|
||||||
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
|
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
|
||||||
|
select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
|
||||||
select ARCH_WANT_LD_ORPHAN_WARN
|
select ARCH_WANT_LD_ORPHAN_WARN
|
||||||
select ARCH_WANTS_NO_INSTR
|
select ARCH_WANTS_NO_INSTR
|
||||||
select BUILDTIME_TABLE_SORT
|
select BUILDTIME_TABLE_SORT
|
||||||
|
@ -488,6 +489,7 @@ config ARCH_FLATMEM_ENABLE
|
||||||
|
|
||||||
config ARCH_SPARSEMEM_ENABLE
|
config ARCH_SPARSEMEM_ENABLE
|
||||||
def_bool y
|
def_bool y
|
||||||
|
select SPARSEMEM_VMEMMAP_ENABLE
|
||||||
help
|
help
|
||||||
Say Y to support efficient handling of sparse physical memory,
|
Say Y to support efficient handling of sparse physical memory,
|
||||||
for architectures which are either NUMA (Non-Uniform Memory Access)
|
for architectures which are either NUMA (Non-Uniform Memory Access)
|
||||||
|
|
|
@ -42,15 +42,6 @@ static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
|
||||||
|
|
||||||
extern void pagetable_init(void);
|
extern void pagetable_init(void);
|
||||||
|
|
||||||
/*
|
|
||||||
* Initialize a new pmd table with invalid pointers.
|
|
||||||
*/
|
|
||||||
extern void pmd_init(unsigned long page, unsigned long pagetable);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Initialize a new pgd / pmd table with invalid pointers.
|
|
||||||
*/
|
|
||||||
extern void pgd_init(unsigned long page);
|
|
||||||
extern pgd_t *pgd_alloc(struct mm_struct *mm);
|
extern pgd_t *pgd_alloc(struct mm_struct *mm);
|
||||||
|
|
||||||
#define __pte_free_tlb(tlb, pte, address) \
|
#define __pte_free_tlb(tlb, pte, address) \
|
||||||
|
@ -76,7 +67,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
|
||||||
}
|
}
|
||||||
|
|
||||||
pmd = (pmd_t *)page_address(pg);
|
pmd = (pmd_t *)page_address(pg);
|
||||||
pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table);
|
pmd_init(pmd);
|
||||||
return pmd;
|
return pmd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,7 +83,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
|
||||||
|
|
||||||
pud = (pud_t *) __get_free_page(GFP_KERNEL);
|
pud = (pud_t *) __get_free_page(GFP_KERNEL);
|
||||||
if (pud)
|
if (pud)
|
||||||
pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
|
pud_init(pud);
|
||||||
return pud;
|
return pud;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
#include <linux/compiler.h>
|
#include <linux/compiler.h>
|
||||||
#include <asm/addrspace.h>
|
#include <asm/addrspace.h>
|
||||||
|
#include <asm/page.h>
|
||||||
#include <asm/pgtable-bits.h>
|
#include <asm/pgtable-bits.h>
|
||||||
|
|
||||||
#if CONFIG_PGTABLE_LEVELS == 2
|
#if CONFIG_PGTABLE_LEVELS == 2
|
||||||
|
@ -59,6 +60,7 @@
|
||||||
#include <linux/mm_types.h>
|
#include <linux/mm_types.h>
|
||||||
#include <linux/mmzone.h>
|
#include <linux/mmzone.h>
|
||||||
#include <asm/fixmap.h>
|
#include <asm/fixmap.h>
|
||||||
|
#include <asm/sparsemem.h>
|
||||||
|
|
||||||
struct mm_struct;
|
struct mm_struct;
|
||||||
struct vm_area_struct;
|
struct vm_area_struct;
|
||||||
|
@ -86,7 +88,10 @@ extern unsigned long zero_page_mask;
|
||||||
#define VMALLOC_START MODULES_END
|
#define VMALLOC_START MODULES_END
|
||||||
#define VMALLOC_END \
|
#define VMALLOC_END \
|
||||||
(vm_map_base + \
|
(vm_map_base + \
|
||||||
min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE)
|
min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
|
||||||
|
|
||||||
|
#define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
|
||||||
|
#define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
|
||||||
|
|
||||||
#define pte_ERROR(e) \
|
#define pte_ERROR(e) \
|
||||||
pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
|
pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
|
||||||
|
@ -237,11 +242,11 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pm
|
||||||
#define pfn_pmd(pfn, prot) __pmd(((pfn) << _PFN_SHIFT) | pgprot_val(prot))
|
#define pfn_pmd(pfn, prot) __pmd(((pfn) << _PFN_SHIFT) | pgprot_val(prot))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize a new pgd / pmd table with invalid pointers.
|
* Initialize a new pgd / pud / pmd table with invalid pointers.
|
||||||
*/
|
*/
|
||||||
extern void pgd_init(unsigned long page);
|
extern void pgd_init(void *addr);
|
||||||
extern void pud_init(unsigned long page, unsigned long pagetable);
|
extern void pud_init(void *addr);
|
||||||
extern void pmd_init(unsigned long page, unsigned long pagetable);
|
extern void pmd_init(void *addr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Non-present pages: high 40 bits are offset, next 8 bits type,
|
* Non-present pages: high 40 bits are offset, next 8 bits type,
|
||||||
|
@ -425,8 +430,6 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
|
||||||
__update_tlb(vma, address, (pte_t *)pmdp);
|
__update_tlb(vma, address, (pte_t *)pmdp);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
static inline unsigned long pmd_pfn(pmd_t pmd)
|
static inline unsigned long pmd_pfn(pmd_t pmd)
|
||||||
{
|
{
|
||||||
return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT;
|
return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT;
|
||||||
|
|
|
@ -11,8 +11,16 @@
|
||||||
#define SECTION_SIZE_BITS 29 /* 2^29 = Largest Huge Page Size */
|
#define SECTION_SIZE_BITS 29 /* 2^29 = Largest Huge Page Size */
|
||||||
#define MAX_PHYSMEM_BITS 48
|
#define MAX_PHYSMEM_BITS 48
|
||||||
|
|
||||||
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||||
|
#define VMEMMAP_SIZE (sizeof(struct page) * (1UL << (cpu_pabits + 1 - PAGE_SHIFT)))
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* CONFIG_SPARSEMEM */
|
#endif /* CONFIG_SPARSEMEM */
|
||||||
|
|
||||||
|
#ifndef VMEMMAP_SIZE
|
||||||
|
#define VMEMMAP_SIZE 0 /* 1, For FLATMEM; 2, For SPARSEMEM without VMEMMAP. */
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||||
int memory_add_physaddr_to_nid(u64 addr);
|
int memory_add_physaddr_to_nid(u64 addr);
|
||||||
#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
|
#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
|
||||||
|
|
|
@ -78,7 +78,7 @@ void __init pcpu_populate_pte(unsigned long addr)
|
||||||
new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
|
new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
|
||||||
pgd_populate(&init_mm, pgd, new);
|
pgd_populate(&init_mm, pgd, new);
|
||||||
#ifndef __PAGETABLE_PUD_FOLDED
|
#ifndef __PAGETABLE_PUD_FOLDED
|
||||||
pud_init((unsigned long)new, (unsigned long)invalid_pmd_table);
|
pud_init(new);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,7 +89,7 @@ void __init pcpu_populate_pte(unsigned long addr)
|
||||||
new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
|
new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
|
||||||
pud_populate(&init_mm, pud, new);
|
pud_populate(&init_mm, pud, new);
|
||||||
#ifndef __PAGETABLE_PMD_FOLDED
|
#ifndef __PAGETABLE_PMD_FOLDED
|
||||||
pmd_init((unsigned long)new, (unsigned long)invalid_pte_table);
|
pmd_init(new);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
#include <linux/pfn.h>
|
#include <linux/pfn.h>
|
||||||
#include <linux/hardirq.h>
|
#include <linux/hardirq.h>
|
||||||
#include <linux/gfp.h>
|
#include <linux/gfp.h>
|
||||||
#include <linux/initrd.h>
|
#include <linux/hugetlb.h>
|
||||||
#include <linux/mmzone.h>
|
#include <linux/mmzone.h>
|
||||||
|
|
||||||
#include <asm/asm-offsets.h>
|
#include <asm/asm-offsets.h>
|
||||||
|
@ -152,6 +152,45 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||||
|
void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
|
||||||
|
unsigned long addr, unsigned long next)
|
||||||
|
{
|
||||||
|
pmd_t entry;
|
||||||
|
|
||||||
|
entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
|
||||||
|
pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
|
||||||
|
set_pmd_at(&init_mm, addr, pmd, entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
int __meminit vmemmap_check_pmd(pmd_t *pmd, int node,
|
||||||
|
unsigned long addr, unsigned long next)
|
||||||
|
{
|
||||||
|
int huge = pmd_val(*pmd) & _PAGE_HUGE;
|
||||||
|
|
||||||
|
if (huge)
|
||||||
|
vmemmap_verify((pte_t *)pmd, node, addr, next);
|
||||||
|
|
||||||
|
return huge;
|
||||||
|
}
|
||||||
|
|
||||||
|
int __meminit vmemmap_populate(unsigned long start, unsigned long end,
|
||||||
|
int node, struct vmem_altmap *altmap)
|
||||||
|
{
|
||||||
|
#if CONFIG_PGTABLE_LEVELS == 2
|
||||||
|
return vmemmap_populate_basepages(start, end, node, NULL);
|
||||||
|
#else
|
||||||
|
return vmemmap_populate_hugepages(start, end, node, NULL);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||||
|
void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
static pte_t *fixmap_pte(unsigned long addr)
|
static pte_t *fixmap_pte(unsigned long addr)
|
||||||
{
|
{
|
||||||
pgd_t *pgd;
|
pgd_t *pgd;
|
||||||
|
@ -168,7 +207,7 @@ static pte_t *fixmap_pte(unsigned long addr)
|
||||||
new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
|
new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
|
||||||
pgd_populate(&init_mm, pgd, new);
|
pgd_populate(&init_mm, pgd, new);
|
||||||
#ifndef __PAGETABLE_PUD_FOLDED
|
#ifndef __PAGETABLE_PUD_FOLDED
|
||||||
pud_init((unsigned long)new, (unsigned long)invalid_pmd_table);
|
pud_init(new);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -179,7 +218,7 @@ static pte_t *fixmap_pte(unsigned long addr)
|
||||||
new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
|
new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
|
||||||
pud_populate(&init_mm, pud, new);
|
pud_populate(&init_mm, pud, new);
|
||||||
#ifndef __PAGETABLE_PMD_FOLDED
|
#ifndef __PAGETABLE_PMD_FOLDED
|
||||||
pmd_init((unsigned long)new, (unsigned long)invalid_pte_table);
|
pmd_init(new);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
|
||||||
ret = (pgd_t *) __get_free_page(GFP_KERNEL);
|
ret = (pgd_t *) __get_free_page(GFP_KERNEL);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
init = pgd_offset(&init_mm, 0UL);
|
init = pgd_offset(&init_mm, 0UL);
|
||||||
pgd_init((unsigned long)ret);
|
pgd_init(ret);
|
||||||
memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
|
memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
|
||||||
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
|
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
|
||||||
}
|
}
|
||||||
|
@ -25,7 +25,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(pgd_alloc);
|
EXPORT_SYMBOL_GPL(pgd_alloc);
|
||||||
|
|
||||||
void pgd_init(unsigned long page)
|
void pgd_init(void *addr)
|
||||||
{
|
{
|
||||||
unsigned long *p, *end;
|
unsigned long *p, *end;
|
||||||
unsigned long entry;
|
unsigned long entry;
|
||||||
|
@ -38,7 +38,7 @@ void pgd_init(unsigned long page)
|
||||||
entry = (unsigned long)invalid_pte_table;
|
entry = (unsigned long)invalid_pte_table;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
p = (unsigned long *) page;
|
p = (unsigned long *)addr;
|
||||||
end = p + PTRS_PER_PGD;
|
end = p + PTRS_PER_PGD;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
@ -56,9 +56,10 @@ void pgd_init(unsigned long page)
|
||||||
EXPORT_SYMBOL_GPL(pgd_init);
|
EXPORT_SYMBOL_GPL(pgd_init);
|
||||||
|
|
||||||
#ifndef __PAGETABLE_PMD_FOLDED
|
#ifndef __PAGETABLE_PMD_FOLDED
|
||||||
void pmd_init(unsigned long addr, unsigned long pagetable)
|
void pmd_init(void *addr)
|
||||||
{
|
{
|
||||||
unsigned long *p, *end;
|
unsigned long *p, *end;
|
||||||
|
unsigned long pagetable = (unsigned long)invalid_pte_table;
|
||||||
|
|
||||||
p = (unsigned long *)addr;
|
p = (unsigned long *)addr;
|
||||||
end = p + PTRS_PER_PMD;
|
end = p + PTRS_PER_PMD;
|
||||||
|
@ -79,9 +80,10 @@ EXPORT_SYMBOL_GPL(pmd_init);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef __PAGETABLE_PUD_FOLDED
|
#ifndef __PAGETABLE_PUD_FOLDED
|
||||||
void pud_init(unsigned long addr, unsigned long pagetable)
|
void pud_init(void *addr)
|
||||||
{
|
{
|
||||||
unsigned long *p, *end;
|
unsigned long *p, *end;
|
||||||
|
unsigned long pagetable = (unsigned long)invalid_pmd_table;
|
||||||
|
|
||||||
p = (unsigned long *)addr;
|
p = (unsigned long *)addr;
|
||||||
end = p + PTRS_PER_PUD;
|
end = p + PTRS_PER_PUD;
|
||||||
|
@ -98,6 +100,7 @@ void pud_init(unsigned long addr, unsigned long pagetable)
|
||||||
p[-1] = pagetable;
|
p[-1] = pagetable;
|
||||||
} while (p != end);
|
} while (p != end);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(pud_init);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
pmd_t mk_pmd(struct page *page, pgprot_t prot)
|
pmd_t mk_pmd(struct page *page, pgprot_t prot)
|
||||||
|
@ -119,12 +122,12 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
||||||
void __init pagetable_init(void)
|
void __init pagetable_init(void)
|
||||||
{
|
{
|
||||||
/* Initialize the entire pgd. */
|
/* Initialize the entire pgd. */
|
||||||
pgd_init((unsigned long)swapper_pg_dir);
|
pgd_init(swapper_pg_dir);
|
||||||
pgd_init((unsigned long)invalid_pg_dir);
|
pgd_init(invalid_pg_dir);
|
||||||
#ifndef __PAGETABLE_PUD_FOLDED
|
#ifndef __PAGETABLE_PUD_FOLDED
|
||||||
pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table);
|
pud_init(invalid_pud_table);
|
||||||
#endif
|
#endif
|
||||||
#ifndef __PAGETABLE_PMD_FOLDED
|
#ifndef __PAGETABLE_PMD_FOLDED
|
||||||
pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table);
|
pmd_init(invalid_pmd_table);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -145,8 +145,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
|
||||||
|
|
||||||
#endif /* !__ASSEMBLY__ */
|
#endif /* !__ASSEMBLY__ */
|
||||||
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
/* MMU-specific headers */
|
/* MMU-specific headers */
|
||||||
|
|
||||||
#ifdef CONFIG_SUN3
|
#ifdef CONFIG_SUN3
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
#define pgd_none(pgd) (0)
|
#define pgd_none(pgd) (0)
|
||||||
#define pgd_bad(pgd) (0)
|
#define pgd_bad(pgd) (0)
|
||||||
#define pgd_clear(pgdp)
|
#define pgd_clear(pgdp)
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
#define pmd_offset(a, b) ((void *)0)
|
#define pmd_offset(a, b) ((void *)0)
|
||||||
|
|
||||||
#define PAGE_NONE __pgprot(0)
|
#define PAGE_NONE __pgprot(0)
|
||||||
|
|
|
@ -416,9 +416,6 @@ extern unsigned long iopa(unsigned long addr);
|
||||||
#define IOMAP_NOCACHE_NONSER 2
|
#define IOMAP_NOCACHE_NONSER 2
|
||||||
#define IOMAP_NO_COPYBACK 3
|
#define IOMAP_NO_COPYBACK 3
|
||||||
|
|
||||||
/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
void do_page_fault(struct pt_regs *regs, unsigned long address,
|
void do_page_fault(struct pt_regs *regs, unsigned long address,
|
||||||
unsigned long error_code);
|
unsigned long error_code);
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
|
||||||
/*
|
/*
|
||||||
* Initialize a new pmd table with invalid pointers.
|
* Initialize a new pmd table with invalid pointers.
|
||||||
*/
|
*/
|
||||||
extern void pmd_init(unsigned long page, unsigned long pagetable);
|
extern void pmd_init(void *addr);
|
||||||
|
|
||||||
#ifndef __PAGETABLE_PMD_FOLDED
|
#ifndef __PAGETABLE_PMD_FOLDED
|
||||||
|
|
||||||
|
@ -44,9 +44,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize a new pgd / pmd table with invalid pointers.
|
* Initialize a new pgd table with invalid pointers.
|
||||||
*/
|
*/
|
||||||
extern void pgd_init(unsigned long page);
|
extern void pgd_init(void *addr);
|
||||||
extern pgd_t *pgd_alloc(struct mm_struct *mm);
|
extern pgd_t *pgd_alloc(struct mm_struct *mm);
|
||||||
|
|
||||||
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
||||||
|
@ -77,7 +77,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
|
||||||
}
|
}
|
||||||
|
|
||||||
pmd = (pmd_t *)page_address(pg);
|
pmd = (pmd_t *)page_address(pg);
|
||||||
pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table);
|
pmd_init(pmd);
|
||||||
return pmd;
|
return pmd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -93,7 +93,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
|
||||||
|
|
||||||
pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_TABLE_ORDER);
|
pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_TABLE_ORDER);
|
||||||
if (pud)
|
if (pud)
|
||||||
pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
|
pud_init(pud);
|
||||||
return pud;
|
return pud;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -313,11 +313,11 @@ static inline pmd_t *pud_pgtable(pud_t pud)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize a new pgd / pmd table with invalid pointers.
|
* Initialize a new pgd / pud / pmd table with invalid pointers.
|
||||||
*/
|
*/
|
||||||
extern void pgd_init(unsigned long page);
|
extern void pgd_init(void *addr);
|
||||||
extern void pud_init(unsigned long page, unsigned long pagetable);
|
extern void pud_init(void *addr);
|
||||||
extern void pmd_init(unsigned long page, unsigned long pagetable);
|
extern void pmd_init(void *addr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Non-present pages: high 40 bits are offset, next 8 bits type,
|
* Non-present pages: high 40 bits are offset, next 8 bits type,
|
||||||
|
|
|
@ -550,8 +550,6 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
|
||||||
__update_tlb(vma, address, pte);
|
__update_tlb(vma, address, pte);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allow physical addresses to be fixed up to help 36-bit peripherals.
|
* Allow physical addresses to be fixed up to help 36-bit peripherals.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -122,8 +122,7 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
|
||||||
if (!cache)
|
if (!cache)
|
||||||
return NULL;
|
return NULL;
|
||||||
new_pmd = kvm_mmu_memory_cache_alloc(cache);
|
new_pmd = kvm_mmu_memory_cache_alloc(cache);
|
||||||
pmd_init((unsigned long)new_pmd,
|
pmd_init(new_pmd);
|
||||||
(unsigned long)invalid_pte_table);
|
|
||||||
pud_populate(NULL, pud, new_pmd);
|
pud_populate(NULL, pud, new_pmd);
|
||||||
}
|
}
|
||||||
pmd = pmd_offset(pud, addr);
|
pmd = pmd_offset(pud, addr);
|
||||||
|
|
|
@ -13,9 +13,9 @@
|
||||||
#include <asm/pgalloc.h>
|
#include <asm/pgalloc.h>
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
|
|
||||||
void pgd_init(unsigned long page)
|
void pgd_init(void *addr)
|
||||||
{
|
{
|
||||||
unsigned long *p = (unsigned long *) page;
|
unsigned long *p = (unsigned long *)addr;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < USER_PTRS_PER_PGD; i+=8) {
|
for (i = 0; i < USER_PTRS_PER_PGD; i+=8) {
|
||||||
|
@ -61,9 +61,8 @@ void __init pagetable_init(void)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Initialize the entire pgd. */
|
/* Initialize the entire pgd. */
|
||||||
pgd_init((unsigned long)swapper_pg_dir);
|
pgd_init(swapper_pg_dir);
|
||||||
pgd_init((unsigned long)swapper_pg_dir
|
pgd_init(&swapper_pg_dir[USER_PTRS_PER_PGD]);
|
||||||
+ sizeof(pgd_t) * USER_PTRS_PER_PGD);
|
|
||||||
|
|
||||||
pgd_base = swapper_pg_dir;
|
pgd_base = swapper_pg_dir;
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
#include <asm/pgalloc.h>
|
#include <asm/pgalloc.h>
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
|
|
||||||
void pgd_init(unsigned long page)
|
void pgd_init(void *addr)
|
||||||
{
|
{
|
||||||
unsigned long *p, *end;
|
unsigned long *p, *end;
|
||||||
unsigned long entry;
|
unsigned long entry;
|
||||||
|
@ -26,7 +26,7 @@ void pgd_init(unsigned long page)
|
||||||
entry = (unsigned long)invalid_pte_table;
|
entry = (unsigned long)invalid_pte_table;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
p = (unsigned long *) page;
|
p = (unsigned long *) addr;
|
||||||
end = p + PTRS_PER_PGD;
|
end = p + PTRS_PER_PGD;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
@ -43,9 +43,10 @@ void pgd_init(unsigned long page)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef __PAGETABLE_PMD_FOLDED
|
#ifndef __PAGETABLE_PMD_FOLDED
|
||||||
void pmd_init(unsigned long addr, unsigned long pagetable)
|
void pmd_init(void *addr)
|
||||||
{
|
{
|
||||||
unsigned long *p, *end;
|
unsigned long *p, *end;
|
||||||
|
unsigned long pagetable = (unsigned long)invalid_pte_table;
|
||||||
|
|
||||||
p = (unsigned long *)addr;
|
p = (unsigned long *)addr;
|
||||||
end = p + PTRS_PER_PMD;
|
end = p + PTRS_PER_PMD;
|
||||||
|
@ -66,9 +67,10 @@ EXPORT_SYMBOL_GPL(pmd_init);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef __PAGETABLE_PUD_FOLDED
|
#ifndef __PAGETABLE_PUD_FOLDED
|
||||||
void pud_init(unsigned long addr, unsigned long pagetable)
|
void pud_init(void *addr)
|
||||||
{
|
{
|
||||||
unsigned long *p, *end;
|
unsigned long *p, *end;
|
||||||
|
unsigned long pagetable = (unsigned long)invalid_pmd_table;
|
||||||
|
|
||||||
p = (unsigned long *)addr;
|
p = (unsigned long *)addr;
|
||||||
end = p + PTRS_PER_PUD;
|
end = p + PTRS_PER_PUD;
|
||||||
|
@ -108,12 +110,12 @@ void __init pagetable_init(void)
|
||||||
pgd_t *pgd_base;
|
pgd_t *pgd_base;
|
||||||
|
|
||||||
/* Initialize the entire pgd. */
|
/* Initialize the entire pgd. */
|
||||||
pgd_init((unsigned long)swapper_pg_dir);
|
pgd_init(swapper_pg_dir);
|
||||||
#ifndef __PAGETABLE_PUD_FOLDED
|
#ifndef __PAGETABLE_PUD_FOLDED
|
||||||
pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table);
|
pud_init(invalid_pud_table);
|
||||||
#endif
|
#endif
|
||||||
#ifndef __PAGETABLE_PMD_FOLDED
|
#ifndef __PAGETABLE_PMD_FOLDED
|
||||||
pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table);
|
pmd_init(invalid_pmd_table);
|
||||||
#endif
|
#endif
|
||||||
pgd_base = swapper_pg_dir;
|
pgd_base = swapper_pg_dir;
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -15,7 +15,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
|
||||||
ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER);
|
ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
init = pgd_offset(&init_mm, 0UL);
|
init = pgd_offset(&init_mm, 0UL);
|
||||||
pgd_init((unsigned long)ret);
|
pgd_init(ret);
|
||||||
memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
|
memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
|
||||||
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
|
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,11 +26,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
|
||||||
set_pmd(pmd, __pmd((unsigned long)page_address(pte)));
|
set_pmd(pmd, __pmd((unsigned long)page_address(pte)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Initialize a new pmd table with invalid pointers.
|
|
||||||
*/
|
|
||||||
extern void pmd_init(unsigned long page, unsigned long pagetable);
|
|
||||||
|
|
||||||
extern pgd_t *pgd_alloc(struct mm_struct *mm);
|
extern pgd_t *pgd_alloc(struct mm_struct *mm);
|
||||||
|
|
||||||
#define __pte_free_tlb(tlb, pte, addr) \
|
#define __pte_free_tlb(tlb, pte, addr) \
|
||||||
|
|
|
@ -249,8 +249,6 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
|
||||||
#define __swp_entry_to_pte(swp) ((pte_t) { (swp).val })
|
#define __swp_entry_to_pte(swp) ((pte_t) { (swp).val })
|
||||||
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
||||||
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
extern void __init paging_init(void);
|
extern void __init paging_init(void);
|
||||||
extern void __init mmu_init(void);
|
extern void __init mmu_init(void);
|
||||||
|
|
||||||
|
|
|
@ -50,9 +50,6 @@ struct thread_struct {
|
||||||
unsigned long kpsr;
|
unsigned long kpsr;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define INIT_MMAP \
|
|
||||||
{ &init_mm, (0), (0), __pgprot(0x0), VM_READ | VM_WRITE | VM_EXEC }
|
|
||||||
|
|
||||||
# define INIT_THREAD { \
|
# define INIT_THREAD { \
|
||||||
.kregs = NULL, \
|
.kregs = NULL, \
|
||||||
.ksp = 0, \
|
.ksp = 0, \
|
||||||
|
|
|
@ -395,8 +395,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
|
||||||
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
||||||
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
||||||
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
typedef pte_t *pte_addr_t;
|
typedef pte_t *pte_addr_t;
|
||||||
|
|
||||||
#endif /* __ASSEMBLY__ */
|
#endif /* __ASSEMBLY__ */
|
||||||
|
|
|
@ -23,21 +23,6 @@
|
||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
#include <asm/cache.h>
|
#include <asm/cache.h>
|
||||||
|
|
||||||
/*
|
|
||||||
* kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
|
|
||||||
* memory. For the return value to be meaningful, ADDR must be >=
|
|
||||||
* PAGE_OFFSET. This operation can be relatively expensive (e.g.,
|
|
||||||
* require a hash-, or multi-level tree-lookup or something of that
|
|
||||||
* sort) but it guarantees to return TRUE only if accessing the page
|
|
||||||
* at that address does not cause an error. Note that there may be
|
|
||||||
* addresses for which kern_addr_valid() returns FALSE even though an
|
|
||||||
* access would not cause an error (e.g., this is typically true for
|
|
||||||
* memory mapped I/O regions.
|
|
||||||
*
|
|
||||||
* XXX Need to implement this for parisc.
|
|
||||||
*/
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
/* This is for the serialization of PxTLB broadcasts. At least on the N class
|
/* This is for the serialization of PxTLB broadcasts. At least on the N class
|
||||||
* systems, only one PxTLB inter processor broadcast can be active at any one
|
* systems, only one PxTLB inter processor broadcast can be active at any one
|
||||||
* time on the Merced bus. */
|
* time on the Merced bus. */
|
||||||
|
|
|
@ -18,8 +18,7 @@
|
||||||
#include <linux/kthread.h>
|
#include <linux/kthread.h>
|
||||||
#include <linux/initrd.h>
|
#include <linux/initrd.h>
|
||||||
#include <linux/pgtable.h>
|
#include <linux/pgtable.h>
|
||||||
#include <linux/swap.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/swapops.h>
|
|
||||||
|
|
||||||
#include <asm/pdc.h>
|
#include <asm/pdc.h>
|
||||||
#include <asm/pdcpat.h>
|
#include <asm/pdcpat.h>
|
||||||
|
@ -232,7 +231,7 @@ void __init pdc_pdt_init(void)
|
||||||
|
|
||||||
/* mark memory page bad */
|
/* mark memory page bad */
|
||||||
memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE);
|
memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE);
|
||||||
num_poisoned_pages_inc();
|
num_poisoned_pages_inc(addr >> PAGE_SHIFT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -401,35 +401,9 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
|
||||||
#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
|
#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
|
||||||
#define pmdp_clear_flush_young pmdp_test_and_clear_young
|
#define pmdp_clear_flush_young pmdp_test_and_clear_young
|
||||||
|
|
||||||
static inline int __pte_write(pte_t pte)
|
|
||||||
{
|
|
||||||
return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA_BALANCING
|
|
||||||
#define pte_savedwrite pte_savedwrite
|
|
||||||
static inline bool pte_savedwrite(pte_t pte)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Saved write ptes are prot none ptes that doesn't have
|
|
||||||
* privileged bit sit. We mark prot none as one which has
|
|
||||||
* present and pviliged bit set and RWX cleared. To mark
|
|
||||||
* protnone which used to have _PAGE_WRITE set we clear
|
|
||||||
* the privileged bit.
|
|
||||||
*/
|
|
||||||
return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
#define pte_savedwrite pte_savedwrite
|
|
||||||
static inline bool pte_savedwrite(pte_t pte)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static inline int pte_write(pte_t pte)
|
static inline int pte_write(pte_t pte)
|
||||||
{
|
{
|
||||||
return __pte_write(pte) || pte_savedwrite(pte);
|
return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int pte_read(pte_t pte)
|
static inline int pte_read(pte_t pte)
|
||||||
|
@ -441,24 +415,16 @@ static inline int pte_read(pte_t pte)
|
||||||
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
|
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
|
||||||
pte_t *ptep)
|
pte_t *ptep)
|
||||||
{
|
{
|
||||||
if (__pte_write(*ptep))
|
if (pte_write(*ptep))
|
||||||
pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
|
pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
|
||||||
else if (unlikely(pte_savedwrite(*ptep)))
|
|
||||||
pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
|
#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
|
||||||
static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
|
static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
|
||||||
unsigned long addr, pte_t *ptep)
|
unsigned long addr, pte_t *ptep)
|
||||||
{
|
{
|
||||||
/*
|
if (pte_write(*ptep))
|
||||||
* We should not find protnone for hugetlb, but this complete the
|
|
||||||
* interface.
|
|
||||||
*/
|
|
||||||
if (__pte_write(*ptep))
|
|
||||||
pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
|
pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
|
||||||
else if (unlikely(pte_savedwrite(*ptep)))
|
|
||||||
pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
|
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
|
||||||
|
@ -535,36 +501,6 @@ static inline int pte_protnone(pte_t pte)
|
||||||
return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE | _PAGE_RWX)) ==
|
return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE | _PAGE_RWX)) ==
|
||||||
cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
|
cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define pte_mk_savedwrite pte_mk_savedwrite
|
|
||||||
static inline pte_t pte_mk_savedwrite(pte_t pte)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Used by Autonuma subsystem to preserve the write bit
|
|
||||||
* while marking the pte PROT_NONE. Only allow this
|
|
||||||
* on PROT_NONE pte
|
|
||||||
*/
|
|
||||||
VM_BUG_ON((pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_RWX | _PAGE_PRIVILEGED)) !=
|
|
||||||
cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED));
|
|
||||||
return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
|
|
||||||
}
|
|
||||||
|
|
||||||
#define pte_clear_savedwrite pte_clear_savedwrite
|
|
||||||
static inline pte_t pte_clear_savedwrite(pte_t pte)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Used by KSM subsystem to make a protnone pte readonly.
|
|
||||||
*/
|
|
||||||
VM_BUG_ON(!pte_protnone(pte));
|
|
||||||
return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
#define pte_clear_savedwrite pte_clear_savedwrite
|
|
||||||
static inline pte_t pte_clear_savedwrite(pte_t pte)
|
|
||||||
{
|
|
||||||
VM_WARN_ON(1);
|
|
||||||
return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_NUMA_BALANCING */
|
#endif /* CONFIG_NUMA_BALANCING */
|
||||||
|
|
||||||
static inline bool pte_hw_valid(pte_t pte)
|
static inline bool pte_hw_valid(pte_t pte)
|
||||||
|
@ -641,8 +577,6 @@ static inline unsigned long pte_pfn(pte_t pte)
|
||||||
/* Generic modifiers for PTE bits */
|
/* Generic modifiers for PTE bits */
|
||||||
static inline pte_t pte_wrprotect(pte_t pte)
|
static inline pte_t pte_wrprotect(pte_t pte)
|
||||||
{
|
{
|
||||||
if (unlikely(pte_savedwrite(pte)))
|
|
||||||
return pte_clear_savedwrite(pte);
|
|
||||||
return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
|
return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1139,8 +1073,6 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
|
||||||
#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
|
#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
|
||||||
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
|
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
|
||||||
#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
|
#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
|
||||||
#define pmd_mk_savedwrite(pmd) pte_pmd(pte_mk_savedwrite(pmd_pte(pmd)))
|
|
||||||
#define pmd_clear_savedwrite(pmd) pte_pmd(pte_clear_savedwrite(pmd_pte(pmd)))
|
|
||||||
|
|
||||||
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
|
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
|
||||||
#define pmd_soft_dirty(pmd) pte_soft_dirty(pmd_pte(pmd))
|
#define pmd_soft_dirty(pmd) pte_soft_dirty(pmd_pte(pmd))
|
||||||
|
@ -1162,8 +1094,6 @@ static inline int pmd_protnone(pmd_t pmd)
|
||||||
#endif /* CONFIG_NUMA_BALANCING */
|
#endif /* CONFIG_NUMA_BALANCING */
|
||||||
|
|
||||||
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
|
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
|
||||||
#define __pmd_write(pmd) __pte_write(pmd_pte(pmd))
|
|
||||||
#define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd))
|
|
||||||
|
|
||||||
#define pmd_access_permitted pmd_access_permitted
|
#define pmd_access_permitted pmd_access_permitted
|
||||||
static inline bool pmd_access_permitted(pmd_t pmd, bool write)
|
static inline bool pmd_access_permitted(pmd_t pmd, bool write)
|
||||||
|
@ -1241,10 +1171,8 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
|
||||||
static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
|
static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
|
||||||
pmd_t *pmdp)
|
pmd_t *pmdp)
|
||||||
{
|
{
|
||||||
if (__pmd_write((*pmdp)))
|
if (pmd_write(*pmdp))
|
||||||
pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
|
pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
|
||||||
else if (unlikely(pmd_savedwrite(*pmdp)))
|
|
||||||
pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -81,13 +81,6 @@ void poking_init(void);
|
||||||
extern unsigned long ioremap_bot;
|
extern unsigned long ioremap_bot;
|
||||||
extern const pgprot_t protection_map[16];
|
extern const pgprot_t protection_map[16];
|
||||||
|
|
||||||
/*
|
|
||||||
* kern_addr_valid is intended to indicate whether an address is a valid
|
|
||||||
* kernel address. Most 32-bit archs define it as always true (like this)
|
|
||||||
* but most 64-bit archs actually perform a test. What should we do here?
|
|
||||||
*/
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
|
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
|
||||||
#define pmd_large(pmd) 0
|
#define pmd_large(pmd) 0
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -265,7 +265,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
|
||||||
}
|
}
|
||||||
pte = kvmppc_read_update_linux_pte(ptep, writing);
|
pte = kvmppc_read_update_linux_pte(ptep, writing);
|
||||||
if (pte_present(pte) && !pte_protnone(pte)) {
|
if (pte_present(pte) && !pte_protnone(pte)) {
|
||||||
if (writing && !__pte_write(pte))
|
if (writing && !pte_write(pte))
|
||||||
/* make the actual HPTE be read-only */
|
/* make the actual HPTE be read-only */
|
||||||
ptel = hpte_make_readonly(ptel);
|
ptel = hpte_make_readonly(ptel);
|
||||||
is_ci = pte_ci(pte);
|
is_ci = pte_ci(pte);
|
||||||
|
|
|
@ -506,43 +506,6 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
|
||||||
} while (addr = next, addr != end);
|
} while (addr = next, addr != end);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct page *follow_huge_pd(struct vm_area_struct *vma,
|
|
||||||
unsigned long address, hugepd_t hpd,
|
|
||||||
int flags, int pdshift)
|
|
||||||
{
|
|
||||||
pte_t *ptep;
|
|
||||||
spinlock_t *ptl;
|
|
||||||
struct page *page = NULL;
|
|
||||||
unsigned long mask;
|
|
||||||
int shift = hugepd_shift(hpd);
|
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
|
||||||
|
|
||||||
retry:
|
|
||||||
/*
|
|
||||||
* hugepage directory entries are protected by mm->page_table_lock
|
|
||||||
* Use this instead of huge_pte_lockptr
|
|
||||||
*/
|
|
||||||
ptl = &mm->page_table_lock;
|
|
||||||
spin_lock(ptl);
|
|
||||||
|
|
||||||
ptep = hugepte_offset(hpd, address, pdshift);
|
|
||||||
if (pte_present(*ptep)) {
|
|
||||||
mask = (1UL << shift) - 1;
|
|
||||||
page = pte_page(*ptep);
|
|
||||||
page += ((address & mask) >> PAGE_SHIFT);
|
|
||||||
if (flags & FOLL_GET)
|
|
||||||
get_page(page);
|
|
||||||
} else {
|
|
||||||
if (is_hugetlb_entry_migration(*ptep)) {
|
|
||||||
spin_unlock(ptl);
|
|
||||||
__migration_entry_wait(mm, ptep, ptl);
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
spin_unlock(ptl);
|
|
||||||
return page;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool __init arch_hugetlb_valid_size(unsigned long size)
|
bool __init arch_hugetlb_valid_size(unsigned long size)
|
||||||
{
|
{
|
||||||
int shift = __ffs(size);
|
int shift = __ffs(size);
|
||||||
|
|
|
@ -802,8 +802,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
|
||||||
|
|
||||||
#endif /* !CONFIG_MMU */
|
#endif /* !CONFIG_MMU */
|
||||||
|
|
||||||
#define kern_addr_valid(addr) (1) /* FIXME */
|
|
||||||
|
|
||||||
extern char _start[];
|
extern char _start[];
|
||||||
extern void *_dtb_early_va;
|
extern void *_dtb_early_va;
|
||||||
extern uintptr_t _dtb_early_pa;
|
extern uintptr_t _dtb_early_pa;
|
||||||
|
|
|
@ -1774,8 +1774,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
|
||||||
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
||||||
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
||||||
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
extern int vmem_add_mapping(unsigned long start, unsigned long size);
|
extern int vmem_add_mapping(unsigned long start, unsigned long size);
|
||||||
extern void vmem_remove_mapping(unsigned long start, unsigned long size);
|
extern void vmem_remove_mapping(unsigned long start, unsigned long size);
|
||||||
extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);
|
extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);
|
||||||
|
|
|
@ -25,7 +25,8 @@
|
||||||
void __tlb_remove_table(void *_table);
|
void __tlb_remove_table(void *_table);
|
||||||
static inline void tlb_flush(struct mmu_gather *tlb);
|
static inline void tlb_flush(struct mmu_gather *tlb);
|
||||||
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
|
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
|
||||||
struct page *page, int page_size);
|
struct encoded_page *page,
|
||||||
|
int page_size);
|
||||||
|
|
||||||
#define tlb_flush tlb_flush
|
#define tlb_flush tlb_flush
|
||||||
#define pte_free_tlb pte_free_tlb
|
#define pte_free_tlb pte_free_tlb
|
||||||
|
@ -40,11 +41,15 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
|
||||||
* Release the page cache reference for a pte removed by
|
* Release the page cache reference for a pte removed by
|
||||||
* tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
|
* tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
|
||||||
* has already been freed, so just do free_page_and_swap_cache.
|
* has already been freed, so just do free_page_and_swap_cache.
|
||||||
|
*
|
||||||
|
* s390 doesn't delay rmap removal, so there is nothing encoded in
|
||||||
|
* the page pointer.
|
||||||
*/
|
*/
|
||||||
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
|
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
|
||||||
struct page *page, int page_size)
|
struct encoded_page *page,
|
||||||
|
int page_size)
|
||||||
{
|
{
|
||||||
free_page_and_swap_cache(page);
|
free_page_and_swap_cache(encoded_page_ptr(page));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -336,12 +336,11 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
|
||||||
static unsigned long __gmap_segment_gaddr(unsigned long *entry)
|
static unsigned long __gmap_segment_gaddr(unsigned long *entry)
|
||||||
{
|
{
|
||||||
struct page *page;
|
struct page *page;
|
||||||
unsigned long offset, mask;
|
unsigned long offset;
|
||||||
|
|
||||||
offset = (unsigned long) entry / sizeof(unsigned long);
|
offset = (unsigned long) entry / sizeof(unsigned long);
|
||||||
offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
|
offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
|
||||||
mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
|
page = pmd_pgtable_page((pmd_t *) entry);
|
||||||
page = virt_to_page((void *)((unsigned long) entry & mask));
|
|
||||||
return page->index + offset;
|
return page->index + offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -92,8 +92,6 @@ static inline unsigned long phys_addr_mask(void)
|
||||||
|
|
||||||
typedef pte_t *pte_addr_t;
|
typedef pte_t *pte_addr_t;
|
||||||
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
#define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT)))
|
#define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT)))
|
||||||
|
|
||||||
struct vm_area_struct;
|
struct vm_area_struct;
|
||||||
|
|
|
@ -368,12 +368,6 @@ __get_iospace (unsigned long addr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern unsigned long *sparc_valid_addr_bitmap;
|
|
||||||
|
|
||||||
/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
|
|
||||||
#define kern_addr_valid(addr) \
|
|
||||||
(test_bit(__pa((unsigned long)(addr))>>20, sparc_valid_addr_bitmap))
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For sparc32&64, the pfn in io_remap_pfn_range() carries <iospace> in
|
* For sparc32&64, the pfn in io_remap_pfn_range() carries <iospace> in
|
||||||
* its high 4 bits. These macros/functions put it there or get it from there.
|
* its high 4 bits. These macros/functions put it there or get it from there.
|
||||||
|
|
|
@ -37,8 +37,7 @@
|
||||||
|
|
||||||
#include "mm_32.h"
|
#include "mm_32.h"
|
||||||
|
|
||||||
unsigned long *sparc_valid_addr_bitmap;
|
static unsigned long *sparc_valid_addr_bitmap;
|
||||||
EXPORT_SYMBOL(sparc_valid_addr_bitmap);
|
|
||||||
|
|
||||||
unsigned long phys_base;
|
unsigned long phys_base;
|
||||||
EXPORT_SYMBOL(phys_base);
|
EXPORT_SYMBOL(phys_base);
|
||||||
|
|
|
@ -1667,7 +1667,6 @@ bool kern_addr_valid(unsigned long addr)
|
||||||
|
|
||||||
return pfn_valid(pte_pfn(*pte));
|
return pfn_valid(pte_pfn(*pte));
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(kern_addr_valid);
|
|
||||||
|
|
||||||
static unsigned long __ref kernel_map_hugepud(unsigned long vstart,
|
static unsigned long __ref kernel_map_hugepud(unsigned long vstart,
|
||||||
unsigned long vend,
|
unsigned long vend,
|
||||||
|
|
|
@ -298,8 +298,6 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
|
||||||
((swp_entry_t) { pte_val(pte_mkuptodate(pte)) })
|
((swp_entry_t) { pte_val(pte_mkuptodate(pte)) })
|
||||||
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
||||||
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
/* Clear a kernel PTE and flush it from the TLB */
|
/* Clear a kernel PTE and flush it from the TLB */
|
||||||
#define kpte_clear_flush(ptep, vaddr) \
|
#define kpte_clear_flush(ptep, vaddr) \
|
||||||
do { \
|
do { \
|
||||||
|
|
|
@ -292,7 +292,23 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
|
||||||
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
|
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
|
||||||
static inline int pte_uffd_wp(pte_t pte)
|
static inline int pte_uffd_wp(pte_t pte)
|
||||||
{
|
{
|
||||||
return pte_flags(pte) & _PAGE_UFFD_WP;
|
bool wp = pte_flags(pte) & _PAGE_UFFD_WP;
|
||||||
|
|
||||||
|
#ifdef CONFIG_DEBUG_VM
|
||||||
|
/*
|
||||||
|
* Having write bit for wr-protect-marked present ptes is fatal,
|
||||||
|
* because it means the uffd-wp bit will be ignored and write will
|
||||||
|
* just go through.
|
||||||
|
*
|
||||||
|
* Use any chance of pgtable walking to verify this (e.g., when
|
||||||
|
* page swapped out or being migrated for all purposes). It means
|
||||||
|
* something is already wrong. Tell the admin even before the
|
||||||
|
* process crashes. We also nail it with wrong pgtable setup.
|
||||||
|
*/
|
||||||
|
WARN_ON_ONCE(wp && pte_write(pte));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return wp;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline pte_t pte_mkuffd_wp(pte_t pte)
|
static inline pte_t pte_mkuffd_wp(pte_t pte)
|
||||||
|
|
|
@ -47,15 +47,6 @@ do { \
|
||||||
|
|
||||||
#endif /* !__ASSEMBLY__ */
|
#endif /* !__ASSEMBLY__ */
|
||||||
|
|
||||||
/*
|
|
||||||
* kern_addr_valid() is (1) for FLATMEM and (0) for SPARSEMEM
|
|
||||||
*/
|
|
||||||
#ifdef CONFIG_FLATMEM
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
#else
|
|
||||||
#define kern_addr_valid(kaddr) (0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is used to calculate the .brk reservation for initial pagetables.
|
* This is used to calculate the .brk reservation for initial pagetables.
|
||||||
* Enough space is reserved to allocate pagetables sufficient to cover all
|
* Enough space is reserved to allocate pagetables sufficient to cover all
|
||||||
|
|
|
@ -240,7 +240,6 @@ static inline void native_pgd_clear(pgd_t *pgd)
|
||||||
#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
|
#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
|
||||||
#define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val })
|
#define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val })
|
||||||
|
|
||||||
extern int kern_addr_valid(unsigned long addr);
|
|
||||||
extern void cleanup_highmap(void);
|
extern void cleanup_highmap(void);
|
||||||
|
|
||||||
#define HAVE_ARCH_UNMAPPED_AREA
|
#define HAVE_ARCH_UNMAPPED_AREA
|
||||||
|
|
|
@ -268,7 +268,7 @@ static struct sgx_encl_page *sgx_encl_load_page_in_vma(struct sgx_encl *encl,
|
||||||
unsigned long addr,
|
unsigned long addr,
|
||||||
unsigned long vm_flags)
|
unsigned long vm_flags)
|
||||||
{
|
{
|
||||||
unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
|
unsigned long vm_prot_bits = vm_flags & VM_ACCESS_FLAGS;
|
||||||
struct sgx_encl_page *entry;
|
struct sgx_encl_page *entry;
|
||||||
|
|
||||||
entry = xa_load(&encl->page_array, PFN_DOWN(addr));
|
entry = xa_load(&encl->page_array, PFN_DOWN(addr));
|
||||||
|
@ -502,7 +502,7 @@ static void sgx_vma_open(struct vm_area_struct *vma)
|
||||||
int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
|
int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
|
||||||
unsigned long end, unsigned long vm_flags)
|
unsigned long end, unsigned long vm_flags)
|
||||||
{
|
{
|
||||||
unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
|
unsigned long vm_prot_bits = vm_flags & VM_ACCESS_FLAGS;
|
||||||
struct sgx_encl_page *page;
|
struct sgx_encl_page *page;
|
||||||
unsigned long count = 0;
|
unsigned long count = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
|
@ -1416,47 +1416,6 @@ void mark_rodata_ro(void)
|
||||||
debug_checkwx();
|
debug_checkwx();
|
||||||
}
|
}
|
||||||
|
|
||||||
int kern_addr_valid(unsigned long addr)
|
|
||||||
{
|
|
||||||
unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
|
|
||||||
pgd_t *pgd;
|
|
||||||
p4d_t *p4d;
|
|
||||||
pud_t *pud;
|
|
||||||
pmd_t *pmd;
|
|
||||||
pte_t *pte;
|
|
||||||
|
|
||||||
if (above != 0 && above != -1UL)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
pgd = pgd_offset_k(addr);
|
|
||||||
if (pgd_none(*pgd))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
p4d = p4d_offset(pgd, addr);
|
|
||||||
if (!p4d_present(*p4d))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
pud = pud_offset(p4d, addr);
|
|
||||||
if (!pud_present(*pud))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (pud_large(*pud))
|
|
||||||
return pfn_valid(pud_pfn(*pud));
|
|
||||||
|
|
||||||
pmd = pmd_offset(pud, addr);
|
|
||||||
if (!pmd_present(*pmd))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (pmd_large(*pmd))
|
|
||||||
return pfn_valid(pmd_pfn(*pmd));
|
|
||||||
|
|
||||||
pte = pte_offset_kernel(pmd, addr);
|
|
||||||
if (pte_none(*pte))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return pfn_valid(pte_pfn(*pte));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Block size is the minimum amount of memory which can be hotplugged or
|
* Block size is the minimum amount of memory which can be hotplugged or
|
||||||
* hotremoved. It must be power of two and must be equal or larger than
|
* hotremoved. It must be power of two and must be equal or larger than
|
||||||
|
@ -1533,37 +1492,9 @@ static long __meminitdata addr_start, addr_end;
|
||||||
static void __meminitdata *p_start, *p_end;
|
static void __meminitdata *p_start, *p_end;
|
||||||
static int __meminitdata node_start;
|
static int __meminitdata node_start;
|
||||||
|
|
||||||
static int __meminit vmemmap_populate_hugepages(unsigned long start,
|
void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
|
||||||
unsigned long end, int node, struct vmem_altmap *altmap)
|
unsigned long addr, unsigned long next)
|
||||||
{
|
{
|
||||||
unsigned long addr;
|
|
||||||
unsigned long next;
|
|
||||||
pgd_t *pgd;
|
|
||||||
p4d_t *p4d;
|
|
||||||
pud_t *pud;
|
|
||||||
pmd_t *pmd;
|
|
||||||
|
|
||||||
for (addr = start; addr < end; addr = next) {
|
|
||||||
next = pmd_addr_end(addr, end);
|
|
||||||
|
|
||||||
pgd = vmemmap_pgd_populate(addr, node);
|
|
||||||
if (!pgd)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
p4d = vmemmap_p4d_populate(pgd, addr, node);
|
|
||||||
if (!p4d)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
pud = vmemmap_pud_populate(p4d, addr, node);
|
|
||||||
if (!pud)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
pmd = pmd_offset(pud, addr);
|
|
||||||
if (pmd_none(*pmd)) {
|
|
||||||
void *p;
|
|
||||||
|
|
||||||
p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
|
|
||||||
if (p) {
|
|
||||||
pte_t entry;
|
pte_t entry;
|
||||||
|
|
||||||
entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
|
entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
|
||||||
|
@ -1586,19 +1517,19 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
|
||||||
if (!IS_ALIGNED(addr, PMD_SIZE) ||
|
if (!IS_ALIGNED(addr, PMD_SIZE) ||
|
||||||
!IS_ALIGNED(next, PMD_SIZE))
|
!IS_ALIGNED(next, PMD_SIZE))
|
||||||
vmemmap_use_new_sub_pmd(addr, next);
|
vmemmap_use_new_sub_pmd(addr, next);
|
||||||
|
}
|
||||||
|
|
||||||
continue;
|
int __meminit vmemmap_check_pmd(pmd_t *pmd, int node,
|
||||||
} else if (altmap)
|
unsigned long addr, unsigned long next)
|
||||||
return -ENOMEM; /* no fallback */
|
{
|
||||||
} else if (pmd_large(*pmd)) {
|
int large = pmd_large(*pmd);
|
||||||
|
|
||||||
|
if (pmd_large(*pmd)) {
|
||||||
vmemmap_verify((pte_t *)pmd, node, addr, next);
|
vmemmap_verify((pte_t *)pmd, node, addr, next);
|
||||||
vmemmap_use_sub_pmd(addr, next);
|
vmemmap_use_sub_pmd(addr, next);
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
if (vmemmap_populate_basepages(addr, next, node, NULL))
|
|
||||||
return -ENOMEM;
|
return large;
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
|
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
|
||||||
|
|
|
@ -386,8 +386,6 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define kern_addr_valid(addr) (1)
|
|
||||||
|
|
||||||
extern void update_mmu_cache(struct vm_area_struct * vma,
|
extern void update_mmu_cache(struct vm_area_struct * vma,
|
||||||
unsigned long address, pte_t *ptep);
|
unsigned long address, pte_t *ptep);
|
||||||
|
|
||||||
|
|
|
@ -780,11 +780,6 @@ static int hmat_callback(struct notifier_block *self,
|
||||||
return NOTIFY_OK;
|
return NOTIFY_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct notifier_block hmat_callback_nb = {
|
|
||||||
.notifier_call = hmat_callback,
|
|
||||||
.priority = 2,
|
|
||||||
};
|
|
||||||
|
|
||||||
static __init void hmat_free_structures(void)
|
static __init void hmat_free_structures(void)
|
||||||
{
|
{
|
||||||
struct memory_target *target, *tnext;
|
struct memory_target *target, *tnext;
|
||||||
|
@ -867,7 +862,7 @@ static __init int hmat_init(void)
|
||||||
hmat_register_targets();
|
hmat_register_targets();
|
||||||
|
|
||||||
/* Keep the table and structures if the notifier may use them */
|
/* Keep the table and structures if the notifier may use them */
|
||||||
if (!register_hotmemory_notifier(&hmat_callback_nb))
|
if (!hotplug_memory_notifier(hmat_callback, HMAT_CALLBACK_PRI))
|
||||||
return 0;
|
return 0;
|
||||||
out_put:
|
out_put:
|
||||||
hmat_free_structures();
|
hmat_free_structures();
|
||||||
|
|
|
@ -175,6 +175,15 @@ int memory_notify(unsigned long val, void *v)
|
||||||
return blocking_notifier_call_chain(&memory_chain, val, v);
|
return blocking_notifier_call_chain(&memory_chain, val, v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
|
||||||
|
static unsigned long memblk_nr_poison(struct memory_block *mem);
|
||||||
|
#else
|
||||||
|
static inline unsigned long memblk_nr_poison(struct memory_block *mem)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static int memory_block_online(struct memory_block *mem)
|
static int memory_block_online(struct memory_block *mem)
|
||||||
{
|
{
|
||||||
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
|
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
|
||||||
|
@ -183,6 +192,9 @@ static int memory_block_online(struct memory_block *mem)
|
||||||
struct zone *zone;
|
struct zone *zone;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
if (memblk_nr_poison(mem))
|
||||||
|
return -EHWPOISON;
|
||||||
|
|
||||||
zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
|
zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
|
||||||
start_pfn, nr_pages);
|
start_pfn, nr_pages);
|
||||||
|
|
||||||
|
@ -864,6 +876,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
|
||||||
mem = find_memory_block_by_id(block_id);
|
mem = find_memory_block_by_id(block_id);
|
||||||
if (WARN_ON_ONCE(!mem))
|
if (WARN_ON_ONCE(!mem))
|
||||||
continue;
|
continue;
|
||||||
|
num_poisoned_pages_sub(-1UL, memblk_nr_poison(mem));
|
||||||
unregister_memory_block_under_nodes(mem);
|
unregister_memory_block_under_nodes(mem);
|
||||||
remove_memory_block(mem);
|
remove_memory_block(mem);
|
||||||
}
|
}
|
||||||
|
@ -1164,3 +1177,28 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
|
||||||
|
void memblk_nr_poison_inc(unsigned long pfn)
|
||||||
|
{
|
||||||
|
const unsigned long block_id = pfn_to_block_id(pfn);
|
||||||
|
struct memory_block *mem = find_memory_block_by_id(block_id);
|
||||||
|
|
||||||
|
if (mem)
|
||||||
|
atomic_long_inc(&mem->nr_hwpoison);
|
||||||
|
}
|
||||||
|
|
||||||
|
void memblk_nr_poison_sub(unsigned long pfn, long i)
|
||||||
|
{
|
||||||
|
const unsigned long block_id = pfn_to_block_id(pfn);
|
||||||
|
struct memory_block *mem = find_memory_block_by_id(block_id);
|
||||||
|
|
||||||
|
if (mem)
|
||||||
|
atomic_long_sub(i, &mem->nr_hwpoison);
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned long memblk_nr_poison(struct memory_block *mem)
|
||||||
|
{
|
||||||
|
return atomic_long_read(&mem->nr_hwpoison);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
|
@ -78,3 +78,12 @@ config ZRAM_MEMORY_TRACKING
|
||||||
/sys/kernel/debug/zram/zramX/block_state.
|
/sys/kernel/debug/zram/zramX/block_state.
|
||||||
|
|
||||||
See Documentation/admin-guide/blockdev/zram.rst for more information.
|
See Documentation/admin-guide/blockdev/zram.rst for more information.
|
||||||
|
|
||||||
|
config ZRAM_MULTI_COMP
|
||||||
|
bool "Enable multiple compression streams"
|
||||||
|
depends on ZRAM
|
||||||
|
help
|
||||||
|
This will enable multi-compression streams, so that ZRAM can
|
||||||
|
re-compress pages using a potentially slower but more effective
|
||||||
|
compression algorithm. Note, that IDLE page recompression
|
||||||
|
requires ZRAM_MEMORY_TRACKING.
|
||||||
|
|
|
@ -206,7 +206,7 @@ void zcomp_destroy(struct zcomp *comp)
|
||||||
* case of allocation error, or any other error potentially
|
* case of allocation error, or any other error potentially
|
||||||
* returned by zcomp_init().
|
* returned by zcomp_init().
|
||||||
*/
|
*/
|
||||||
struct zcomp *zcomp_create(const char *compress)
|
struct zcomp *zcomp_create(const char *alg)
|
||||||
{
|
{
|
||||||
struct zcomp *comp;
|
struct zcomp *comp;
|
||||||
int error;
|
int error;
|
||||||
|
@ -216,14 +216,14 @@ struct zcomp *zcomp_create(const char *compress)
|
||||||
* is not loaded yet. We must do it here, otherwise we are about to
|
* is not loaded yet. We must do it here, otherwise we are about to
|
||||||
* call /sbin/modprobe under CPU hot-plug lock.
|
* call /sbin/modprobe under CPU hot-plug lock.
|
||||||
*/
|
*/
|
||||||
if (!zcomp_available_algorithm(compress))
|
if (!zcomp_available_algorithm(alg))
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL);
|
comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL);
|
||||||
if (!comp)
|
if (!comp)
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
comp->name = compress;
|
comp->name = alg;
|
||||||
error = zcomp_init(comp);
|
error = zcomp_init(comp);
|
||||||
if (error) {
|
if (error) {
|
||||||
kfree(comp);
|
kfree(comp);
|
||||||
|
|
|
@ -27,7 +27,7 @@ int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node);
|
||||||
ssize_t zcomp_available_show(const char *comp, char *buf);
|
ssize_t zcomp_available_show(const char *comp, char *buf);
|
||||||
bool zcomp_available_algorithm(const char *comp);
|
bool zcomp_available_algorithm(const char *comp);
|
||||||
|
|
||||||
struct zcomp *zcomp_create(const char *comp);
|
struct zcomp *zcomp_create(const char *alg);
|
||||||
void zcomp_destroy(struct zcomp *comp);
|
void zcomp_destroy(struct zcomp *comp);
|
||||||
|
|
||||||
struct zcomp_strm *zcomp_stream_get(struct zcomp *comp);
|
struct zcomp_strm *zcomp_stream_get(struct zcomp *comp);
|
||||||
|
|
|
@ -155,6 +155,25 @@ static inline bool is_partial_io(struct bio_vec *bvec)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio)
|
||||||
|
{
|
||||||
|
prio &= ZRAM_COMP_PRIORITY_MASK;
|
||||||
|
/*
|
||||||
|
* Clear previous priority value first, in case if we recompress
|
||||||
|
* further an already recompressed page
|
||||||
|
*/
|
||||||
|
zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
|
||||||
|
ZRAM_COMP_PRIORITY_BIT1);
|
||||||
|
zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u32 zram_get_priority(struct zram *zram, u32 index)
|
||||||
|
{
|
||||||
|
u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1;
|
||||||
|
|
||||||
|
return prio & ZRAM_COMP_PRIORITY_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if request is within bounds and aligned on zram logical blocks.
|
* Check if request is within bounds and aligned on zram logical blocks.
|
||||||
*/
|
*/
|
||||||
|
@ -188,16 +207,13 @@ static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
|
||||||
static inline void update_used_max(struct zram *zram,
|
static inline void update_used_max(struct zram *zram,
|
||||||
const unsigned long pages)
|
const unsigned long pages)
|
||||||
{
|
{
|
||||||
unsigned long old_max, cur_max;
|
unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages);
|
||||||
|
|
||||||
old_max = atomic_long_read(&zram->stats.max_used_pages);
|
|
||||||
|
|
||||||
do {
|
do {
|
||||||
cur_max = old_max;
|
if (cur_max >= pages)
|
||||||
if (pages > cur_max)
|
return;
|
||||||
old_max = atomic_long_cmpxchg(
|
} while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages,
|
||||||
&zram->stats.max_used_pages, cur_max, pages);
|
&cur_max, pages));
|
||||||
} while (old_max != cur_max);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void zram_fill_page(void *ptr, unsigned long len,
|
static inline void zram_fill_page(void *ptr, unsigned long len,
|
||||||
|
@ -632,7 +648,7 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
|
||||||
#define PAGE_WRITEBACK 0
|
#define PAGE_WRITEBACK 0
|
||||||
#define HUGE_WRITEBACK (1<<0)
|
#define HUGE_WRITEBACK (1<<0)
|
||||||
#define IDLE_WRITEBACK (1<<1)
|
#define IDLE_WRITEBACK (1<<1)
|
||||||
|
#define INCOMPRESSIBLE_WRITEBACK (1<<2)
|
||||||
|
|
||||||
static ssize_t writeback_store(struct device *dev,
|
static ssize_t writeback_store(struct device *dev,
|
||||||
struct device_attribute *attr, const char *buf, size_t len)
|
struct device_attribute *attr, const char *buf, size_t len)
|
||||||
|
@ -653,6 +669,8 @@ static ssize_t writeback_store(struct device *dev,
|
||||||
mode = HUGE_WRITEBACK;
|
mode = HUGE_WRITEBACK;
|
||||||
else if (sysfs_streq(buf, "huge_idle"))
|
else if (sysfs_streq(buf, "huge_idle"))
|
||||||
mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
|
mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
|
||||||
|
else if (sysfs_streq(buf, "incompressible"))
|
||||||
|
mode = INCOMPRESSIBLE_WRITEBACK;
|
||||||
else {
|
else {
|
||||||
if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
|
if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -720,6 +738,10 @@ static ssize_t writeback_store(struct device *dev,
|
||||||
if (mode & HUGE_WRITEBACK &&
|
if (mode & HUGE_WRITEBACK &&
|
||||||
!zram_test_flag(zram, index, ZRAM_HUGE))
|
!zram_test_flag(zram, index, ZRAM_HUGE))
|
||||||
goto next;
|
goto next;
|
||||||
|
if (mode & INCOMPRESSIBLE_WRITEBACK &&
|
||||||
|
!zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
|
||||||
|
goto next;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clearing ZRAM_UNDER_WB is duty of caller.
|
* Clearing ZRAM_UNDER_WB is duty of caller.
|
||||||
* IOW, zram_free_page never clear it.
|
* IOW, zram_free_page never clear it.
|
||||||
|
@ -753,8 +775,12 @@ static ssize_t writeback_store(struct device *dev,
|
||||||
zram_clear_flag(zram, index, ZRAM_IDLE);
|
zram_clear_flag(zram, index, ZRAM_IDLE);
|
||||||
zram_slot_unlock(zram, index);
|
zram_slot_unlock(zram, index);
|
||||||
/*
|
/*
|
||||||
* Return last IO error unless every IO were
|
* BIO errors are not fatal, we continue and simply
|
||||||
* not suceeded.
|
* attempt to writeback the remaining objects (pages).
|
||||||
|
* At the same time we need to signal user-space that
|
||||||
|
* some writes (at least one, but also could be all of
|
||||||
|
* them) were not successful and we do so by returning
|
||||||
|
* the most recent BIO error.
|
||||||
*/
|
*/
|
||||||
ret = err;
|
ret = err;
|
||||||
continue;
|
continue;
|
||||||
|
@ -920,13 +946,16 @@ static ssize_t read_block_state(struct file *file, char __user *buf,
|
||||||
|
|
||||||
ts = ktime_to_timespec64(zram->table[index].ac_time);
|
ts = ktime_to_timespec64(zram->table[index].ac_time);
|
||||||
copied = snprintf(kbuf + written, count,
|
copied = snprintf(kbuf + written, count,
|
||||||
"%12zd %12lld.%06lu %c%c%c%c\n",
|
"%12zd %12lld.%06lu %c%c%c%c%c%c\n",
|
||||||
index, (s64)ts.tv_sec,
|
index, (s64)ts.tv_sec,
|
||||||
ts.tv_nsec / NSEC_PER_USEC,
|
ts.tv_nsec / NSEC_PER_USEC,
|
||||||
zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
|
zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
|
||||||
zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
|
zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
|
||||||
zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
|
zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
|
||||||
zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
|
zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
|
||||||
|
zram_get_priority(zram, index) ? 'r' : '.',
|
||||||
|
zram_test_flag(zram, index,
|
||||||
|
ZRAM_INCOMPRESSIBLE) ? 'n' : '.');
|
||||||
|
|
||||||
if (count <= copied) {
|
if (count <= copied) {
|
||||||
zram_slot_unlock(zram, index);
|
zram_slot_unlock(zram, index);
|
||||||
|
@ -1000,47 +1029,144 @@ static ssize_t max_comp_streams_store(struct device *dev,
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t comp_algorithm_show(struct device *dev,
|
static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg)
|
||||||
struct device_attribute *attr, char *buf)
|
|
||||||
{
|
{
|
||||||
size_t sz;
|
/* Do not free statically defined compression algorithms */
|
||||||
struct zram *zram = dev_to_zram(dev);
|
if (zram->comp_algs[prio] != default_compressor)
|
||||||
|
kfree(zram->comp_algs[prio]);
|
||||||
|
|
||||||
|
zram->comp_algs[prio] = alg;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio, char *buf)
|
||||||
|
{
|
||||||
|
ssize_t sz;
|
||||||
|
|
||||||
down_read(&zram->init_lock);
|
down_read(&zram->init_lock);
|
||||||
sz = zcomp_available_show(zram->compressor, buf);
|
sz = zcomp_available_show(zram->comp_algs[prio], buf);
|
||||||
up_read(&zram->init_lock);
|
up_read(&zram->init_lock);
|
||||||
|
|
||||||
return sz;
|
return sz;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t comp_algorithm_store(struct device *dev,
|
static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf)
|
||||||
struct device_attribute *attr, const char *buf, size_t len)
|
|
||||||
{
|
{
|
||||||
struct zram *zram = dev_to_zram(dev);
|
char *compressor;
|
||||||
char compressor[ARRAY_SIZE(zram->compressor)];
|
|
||||||
size_t sz;
|
size_t sz;
|
||||||
|
|
||||||
strscpy(compressor, buf, sizeof(compressor));
|
sz = strlen(buf);
|
||||||
|
if (sz >= CRYPTO_MAX_ALG_NAME)
|
||||||
|
return -E2BIG;
|
||||||
|
|
||||||
|
compressor = kstrdup(buf, GFP_KERNEL);
|
||||||
|
if (!compressor)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
/* ignore trailing newline */
|
/* ignore trailing newline */
|
||||||
sz = strlen(compressor);
|
|
||||||
if (sz > 0 && compressor[sz - 1] == '\n')
|
if (sz > 0 && compressor[sz - 1] == '\n')
|
||||||
compressor[sz - 1] = 0x00;
|
compressor[sz - 1] = 0x00;
|
||||||
|
|
||||||
if (!zcomp_available_algorithm(compressor))
|
if (!zcomp_available_algorithm(compressor)) {
|
||||||
|
kfree(compressor);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
down_write(&zram->init_lock);
|
down_write(&zram->init_lock);
|
||||||
if (init_done(zram)) {
|
if (init_done(zram)) {
|
||||||
up_write(&zram->init_lock);
|
up_write(&zram->init_lock);
|
||||||
|
kfree(compressor);
|
||||||
pr_info("Can't change algorithm for initialized device\n");
|
pr_info("Can't change algorithm for initialized device\n");
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
|
|
||||||
strcpy(zram->compressor, compressor);
|
comp_algorithm_set(zram, prio, compressor);
|
||||||
up_write(&zram->init_lock);
|
up_write(&zram->init_lock);
|
||||||
return len;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ssize_t comp_algorithm_show(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct zram *zram = dev_to_zram(dev);
|
||||||
|
|
||||||
|
return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t comp_algorithm_store(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
const char *buf,
|
||||||
|
size_t len)
|
||||||
|
{
|
||||||
|
struct zram *zram = dev_to_zram(dev);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf);
|
||||||
|
return ret ? ret : len;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_ZRAM_MULTI_COMP
|
||||||
|
static ssize_t recomp_algorithm_show(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct zram *zram = dev_to_zram(dev);
|
||||||
|
ssize_t sz = 0;
|
||||||
|
u32 prio;
|
||||||
|
|
||||||
|
for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
|
||||||
|
if (!zram->comp_algs[prio])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "#%d: ", prio);
|
||||||
|
sz += __comp_algorithm_show(zram, prio, buf + sz);
|
||||||
|
}
|
||||||
|
|
||||||
|
return sz;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t recomp_algorithm_store(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
const char *buf,
|
||||||
|
size_t len)
|
||||||
|
{
|
||||||
|
struct zram *zram = dev_to_zram(dev);
|
||||||
|
int prio = ZRAM_SECONDARY_COMP;
|
||||||
|
char *args, *param, *val;
|
||||||
|
char *alg = NULL;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
args = skip_spaces(buf);
|
||||||
|
while (*args) {
|
||||||
|
args = next_arg(args, ¶m, &val);
|
||||||
|
|
||||||
|
if (!*val)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!strcmp(param, "algo")) {
|
||||||
|
alg = val;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!strcmp(param, "priority")) {
|
||||||
|
ret = kstrtoint(val, 10, &prio);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!alg)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
ret = __comp_algorithm_store(zram, prio, alg);
|
||||||
|
return ret ? ret : len;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static ssize_t compact_store(struct device *dev,
|
static ssize_t compact_store(struct device *dev,
|
||||||
struct device_attribute *attr, const char *buf, size_t len)
|
struct device_attribute *attr, const char *buf, size_t len)
|
||||||
{
|
{
|
||||||
|
@ -1210,6 +1336,11 @@ static void zram_free_page(struct zram *zram, size_t index)
|
||||||
atomic64_dec(&zram->stats.huge_pages);
|
atomic64_dec(&zram->stats.huge_pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
|
||||||
|
zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE);
|
||||||
|
|
||||||
|
zram_set_priority(zram, index, 0);
|
||||||
|
|
||||||
if (zram_test_flag(zram, index, ZRAM_WB)) {
|
if (zram_test_flag(zram, index, ZRAM_WB)) {
|
||||||
zram_clear_flag(zram, index, ZRAM_WB);
|
zram_clear_flag(zram, index, ZRAM_WB);
|
||||||
free_block_bdev(zram, zram_get_element(zram, index));
|
free_block_bdev(zram, zram_get_element(zram, index));
|
||||||
|
@ -1242,32 +1373,37 @@ out:
|
||||||
~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
|
~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
|
/*
|
||||||
struct bio *bio, bool partial_io)
|
* Reads a page from the writeback devices. Corresponding ZRAM slot
|
||||||
|
* should be unlocked.
|
||||||
|
*/
|
||||||
|
static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page,
|
||||||
|
u32 index, struct bio *bio, bool partial_io)
|
||||||
|
{
|
||||||
|
struct bio_vec bvec = {
|
||||||
|
.bv_page = page,
|
||||||
|
.bv_len = PAGE_SIZE,
|
||||||
|
.bv_offset = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio,
|
||||||
|
partial_io);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reads (decompresses if needed) a page from zspool (zsmalloc).
|
||||||
|
* Corresponding ZRAM slot should be locked.
|
||||||
|
*/
|
||||||
|
static int zram_read_from_zspool(struct zram *zram, struct page *page,
|
||||||
|
u32 index)
|
||||||
{
|
{
|
||||||
struct zcomp_strm *zstrm;
|
struct zcomp_strm *zstrm;
|
||||||
unsigned long handle;
|
unsigned long handle;
|
||||||
unsigned int size;
|
unsigned int size;
|
||||||
void *src, *dst;
|
void *src, *dst;
|
||||||
|
u32 prio;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
zram_slot_lock(zram, index);
|
|
||||||
if (zram_test_flag(zram, index, ZRAM_WB)) {
|
|
||||||
struct bio_vec bvec;
|
|
||||||
|
|
||||||
zram_slot_unlock(zram, index);
|
|
||||||
/* A null bio means rw_page was used, we must fallback to bio */
|
|
||||||
if (!bio)
|
|
||||||
return -EOPNOTSUPP;
|
|
||||||
|
|
||||||
bvec.bv_page = page;
|
|
||||||
bvec.bv_len = PAGE_SIZE;
|
|
||||||
bvec.bv_offset = 0;
|
|
||||||
return read_from_bdev(zram, &bvec,
|
|
||||||
zram_get_element(zram, index),
|
|
||||||
bio, partial_io);
|
|
||||||
}
|
|
||||||
|
|
||||||
handle = zram_get_handle(zram, index);
|
handle = zram_get_handle(zram, index);
|
||||||
if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
|
if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
|
||||||
unsigned long value;
|
unsigned long value;
|
||||||
|
@ -1277,14 +1413,15 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
|
||||||
mem = kmap_atomic(page);
|
mem = kmap_atomic(page);
|
||||||
zram_fill_page(mem, PAGE_SIZE, value);
|
zram_fill_page(mem, PAGE_SIZE, value);
|
||||||
kunmap_atomic(mem);
|
kunmap_atomic(mem);
|
||||||
zram_slot_unlock(zram, index);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
size = zram_get_obj_size(zram, index);
|
size = zram_get_obj_size(zram, index);
|
||||||
|
|
||||||
if (size != PAGE_SIZE)
|
if (size != PAGE_SIZE) {
|
||||||
zstrm = zcomp_stream_get(zram->comp);
|
prio = zram_get_priority(zram, index);
|
||||||
|
zstrm = zcomp_stream_get(zram->comps[prio]);
|
||||||
|
}
|
||||||
|
|
||||||
src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
|
src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
|
||||||
if (size == PAGE_SIZE) {
|
if (size == PAGE_SIZE) {
|
||||||
|
@ -1296,13 +1433,36 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
|
||||||
dst = kmap_atomic(page);
|
dst = kmap_atomic(page);
|
||||||
ret = zcomp_decompress(zstrm, src, size, dst);
|
ret = zcomp_decompress(zstrm, src, size, dst);
|
||||||
kunmap_atomic(dst);
|
kunmap_atomic(dst);
|
||||||
zcomp_stream_put(zram->comp);
|
zcomp_stream_put(zram->comps[prio]);
|
||||||
}
|
}
|
||||||
zs_unmap_object(zram->mem_pool, handle);
|
zs_unmap_object(zram->mem_pool, handle);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
|
||||||
|
struct bio *bio, bool partial_io)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
zram_slot_lock(zram, index);
|
||||||
|
if (!zram_test_flag(zram, index, ZRAM_WB)) {
|
||||||
|
/* Slot should be locked through out the function call */
|
||||||
|
ret = zram_read_from_zspool(zram, page, index);
|
||||||
|
zram_slot_unlock(zram, index);
|
||||||
|
} else {
|
||||||
|
/* Slot should be unlocked before the function call */
|
||||||
zram_slot_unlock(zram, index);
|
zram_slot_unlock(zram, index);
|
||||||
|
|
||||||
|
/* A null bio means rw_page was used, we must fallback to bio */
|
||||||
|
if (!bio)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
|
ret = zram_bvec_read_from_bdev(zram, page, index, bio,
|
||||||
|
partial_io);
|
||||||
|
}
|
||||||
|
|
||||||
/* Should NEVER happen. Return bio error if it does. */
|
/* Should NEVER happen. Return bio error if it does. */
|
||||||
if (WARN_ON(ret))
|
if (WARN_ON(ret < 0))
|
||||||
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
|
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -1363,13 +1523,13 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
|
||||||
kunmap_atomic(mem);
|
kunmap_atomic(mem);
|
||||||
|
|
||||||
compress_again:
|
compress_again:
|
||||||
zstrm = zcomp_stream_get(zram->comp);
|
zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
|
||||||
src = kmap_atomic(page);
|
src = kmap_atomic(page);
|
||||||
ret = zcomp_compress(zstrm, src, &comp_len);
|
ret = zcomp_compress(zstrm, src, &comp_len);
|
||||||
kunmap_atomic(src);
|
kunmap_atomic(src);
|
||||||
|
|
||||||
if (unlikely(ret)) {
|
if (unlikely(ret)) {
|
||||||
zcomp_stream_put(zram->comp);
|
zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
|
||||||
pr_err("Compression failed! err=%d\n", ret);
|
pr_err("Compression failed! err=%d\n", ret);
|
||||||
zs_free(zram->mem_pool, handle);
|
zs_free(zram->mem_pool, handle);
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -1390,19 +1550,19 @@ compress_again:
|
||||||
* if we have a 'non-null' handle here then we are coming
|
* if we have a 'non-null' handle here then we are coming
|
||||||
* from the slow path and handle has already been allocated.
|
* from the slow path and handle has already been allocated.
|
||||||
*/
|
*/
|
||||||
if (IS_ERR((void *)handle))
|
if (IS_ERR_VALUE(handle))
|
||||||
handle = zs_malloc(zram->mem_pool, comp_len,
|
handle = zs_malloc(zram->mem_pool, comp_len,
|
||||||
__GFP_KSWAPD_RECLAIM |
|
__GFP_KSWAPD_RECLAIM |
|
||||||
__GFP_NOWARN |
|
__GFP_NOWARN |
|
||||||
__GFP_HIGHMEM |
|
__GFP_HIGHMEM |
|
||||||
__GFP_MOVABLE);
|
__GFP_MOVABLE);
|
||||||
if (IS_ERR((void *)handle)) {
|
if (IS_ERR_VALUE(handle)) {
|
||||||
zcomp_stream_put(zram->comp);
|
zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
|
||||||
atomic64_inc(&zram->stats.writestall);
|
atomic64_inc(&zram->stats.writestall);
|
||||||
handle = zs_malloc(zram->mem_pool, comp_len,
|
handle = zs_malloc(zram->mem_pool, comp_len,
|
||||||
GFP_NOIO | __GFP_HIGHMEM |
|
GFP_NOIO | __GFP_HIGHMEM |
|
||||||
__GFP_MOVABLE);
|
__GFP_MOVABLE);
|
||||||
if (IS_ERR((void *)handle))
|
if (IS_ERR_VALUE(handle))
|
||||||
return PTR_ERR((void *)handle);
|
return PTR_ERR((void *)handle);
|
||||||
|
|
||||||
if (comp_len != PAGE_SIZE)
|
if (comp_len != PAGE_SIZE)
|
||||||
|
@ -1414,14 +1574,14 @@ compress_again:
|
||||||
* zstrm buffer back. It is necessary that the dereferencing
|
* zstrm buffer back. It is necessary that the dereferencing
|
||||||
* of the zstrm variable below occurs correctly.
|
* of the zstrm variable below occurs correctly.
|
||||||
*/
|
*/
|
||||||
zstrm = zcomp_stream_get(zram->comp);
|
zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
|
||||||
}
|
}
|
||||||
|
|
||||||
alloced_pages = zs_get_total_pages(zram->mem_pool);
|
alloced_pages = zs_get_total_pages(zram->mem_pool);
|
||||||
update_used_max(zram, alloced_pages);
|
update_used_max(zram, alloced_pages);
|
||||||
|
|
||||||
if (zram->limit_pages && alloced_pages > zram->limit_pages) {
|
if (zram->limit_pages && alloced_pages > zram->limit_pages) {
|
||||||
zcomp_stream_put(zram->comp);
|
zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
|
||||||
zs_free(zram->mem_pool, handle);
|
zs_free(zram->mem_pool, handle);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
@ -1435,7 +1595,7 @@ compress_again:
|
||||||
if (comp_len == PAGE_SIZE)
|
if (comp_len == PAGE_SIZE)
|
||||||
kunmap_atomic(src);
|
kunmap_atomic(src);
|
||||||
|
|
||||||
zcomp_stream_put(zram->comp);
|
zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
|
||||||
zs_unmap_object(zram->mem_pool, handle);
|
zs_unmap_object(zram->mem_pool, handle);
|
||||||
atomic64_add(comp_len, &zram->stats.compr_data_size);
|
atomic64_add(comp_len, &zram->stats.compr_data_size);
|
||||||
out:
|
out:
|
||||||
|
@ -1504,6 +1664,274 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_ZRAM_MULTI_COMP
|
||||||
|
/*
|
||||||
|
* This function will decompress (unless it's ZRAM_HUGE) the page and then
|
||||||
|
* attempt to compress it using provided compression algorithm priority
|
||||||
|
* (which is potentially more effective).
|
||||||
|
*
|
||||||
|
* Corresponding ZRAM slot should be locked.
|
||||||
|
*/
|
||||||
|
static int zram_recompress(struct zram *zram, u32 index, struct page *page,
|
||||||
|
u32 threshold, u32 prio, u32 prio_max)
|
||||||
|
{
|
||||||
|
struct zcomp_strm *zstrm = NULL;
|
||||||
|
unsigned long handle_old;
|
||||||
|
unsigned long handle_new;
|
||||||
|
unsigned int comp_len_old;
|
||||||
|
unsigned int comp_len_new;
|
||||||
|
unsigned int class_index_old;
|
||||||
|
unsigned int class_index_new;
|
||||||
|
u32 num_recomps = 0;
|
||||||
|
void *src, *dst;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
handle_old = zram_get_handle(zram, index);
|
||||||
|
if (!handle_old)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
comp_len_old = zram_get_obj_size(zram, index);
|
||||||
|
/*
|
||||||
|
* Do not recompress objects that are already "small enough".
|
||||||
|
*/
|
||||||
|
if (comp_len_old < threshold)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ret = zram_read_from_zspool(zram, page, index);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old);
|
||||||
|
/*
|
||||||
|
* Iterate the secondary comp algorithms list (in order of priority)
|
||||||
|
* and try to recompress the page.
|
||||||
|
*/
|
||||||
|
for (; prio < prio_max; prio++) {
|
||||||
|
if (!zram->comps[prio])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Skip if the object is already re-compressed with a higher
|
||||||
|
* priority algorithm (or same algorithm).
|
||||||
|
*/
|
||||||
|
if (prio <= zram_get_priority(zram, index))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
num_recomps++;
|
||||||
|
zstrm = zcomp_stream_get(zram->comps[prio]);
|
||||||
|
src = kmap_atomic(page);
|
||||||
|
ret = zcomp_compress(zstrm, src, &comp_len_new);
|
||||||
|
kunmap_atomic(src);
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
zcomp_stream_put(zram->comps[prio]);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
class_index_new = zs_lookup_class_index(zram->mem_pool,
|
||||||
|
comp_len_new);
|
||||||
|
|
||||||
|
/* Continue until we make progress */
|
||||||
|
if (class_index_new >= class_index_old ||
|
||||||
|
(threshold && comp_len_new >= threshold)) {
|
||||||
|
zcomp_stream_put(zram->comps[prio]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Recompression was successful so break out */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We did not try to recompress, e.g. when we have only one
|
||||||
|
* secondary algorithm and the page is already recompressed
|
||||||
|
* using that algorithm
|
||||||
|
*/
|
||||||
|
if (!zstrm)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (class_index_new >= class_index_old) {
|
||||||
|
/*
|
||||||
|
* Secondary algorithms failed to re-compress the page
|
||||||
|
* in a way that would save memory, mark the object as
|
||||||
|
* incompressible so that we will not try to compress
|
||||||
|
* it again.
|
||||||
|
*
|
||||||
|
* We need to make sure that all secondary algorithms have
|
||||||
|
* failed, so we test if the number of recompressions matches
|
||||||
|
* the number of active secondary algorithms.
|
||||||
|
*/
|
||||||
|
if (num_recomps == zram->num_active_comps - 1)
|
||||||
|
zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Successful recompression but above threshold */
|
||||||
|
if (threshold && comp_len_new >= threshold)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* No direct reclaim (slow path) for handle allocation and no
|
||||||
|
* re-compression attempt (unlike in __zram_bvec_write()) since
|
||||||
|
* we already have stored that object in zsmalloc. If we cannot
|
||||||
|
* alloc memory for recompressed object then we bail out and
|
||||||
|
* simply keep the old (existing) object in zsmalloc.
|
||||||
|
*/
|
||||||
|
handle_new = zs_malloc(zram->mem_pool, comp_len_new,
|
||||||
|
__GFP_KSWAPD_RECLAIM |
|
||||||
|
__GFP_NOWARN |
|
||||||
|
__GFP_HIGHMEM |
|
||||||
|
__GFP_MOVABLE);
|
||||||
|
if (IS_ERR_VALUE(handle_new)) {
|
||||||
|
zcomp_stream_put(zram->comps[prio]);
|
||||||
|
return PTR_ERR((void *)handle_new);
|
||||||
|
}
|
||||||
|
|
||||||
|
dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO);
|
||||||
|
memcpy(dst, zstrm->buffer, comp_len_new);
|
||||||
|
zcomp_stream_put(zram->comps[prio]);
|
||||||
|
|
||||||
|
zs_unmap_object(zram->mem_pool, handle_new);
|
||||||
|
|
||||||
|
zram_free_page(zram, index);
|
||||||
|
zram_set_handle(zram, index, handle_new);
|
||||||
|
zram_set_obj_size(zram, index, comp_len_new);
|
||||||
|
zram_set_priority(zram, index, prio);
|
||||||
|
|
||||||
|
atomic64_add(comp_len_new, &zram->stats.compr_data_size);
|
||||||
|
atomic64_inc(&zram->stats.pages_stored);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define RECOMPRESS_IDLE (1 << 0)
|
||||||
|
#define RECOMPRESS_HUGE (1 << 1)
|
||||||
|
|
||||||
|
static ssize_t recompress_store(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
const char *buf, size_t len)
|
||||||
|
{
|
||||||
|
u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS;
|
||||||
|
struct zram *zram = dev_to_zram(dev);
|
||||||
|
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
|
||||||
|
char *args, *param, *val, *algo = NULL;
|
||||||
|
u32 mode = 0, threshold = 0;
|
||||||
|
unsigned long index;
|
||||||
|
struct page *page;
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
args = skip_spaces(buf);
|
||||||
|
while (*args) {
|
||||||
|
args = next_arg(args, ¶m, &val);
|
||||||
|
|
||||||
|
if (!*val)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!strcmp(param, "type")) {
|
||||||
|
if (!strcmp(val, "idle"))
|
||||||
|
mode = RECOMPRESS_IDLE;
|
||||||
|
if (!strcmp(val, "huge"))
|
||||||
|
mode = RECOMPRESS_HUGE;
|
||||||
|
if (!strcmp(val, "huge_idle"))
|
||||||
|
mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!strcmp(param, "threshold")) {
|
||||||
|
/*
|
||||||
|
* We will re-compress only idle objects equal or
|
||||||
|
* greater in size than watermark.
|
||||||
|
*/
|
||||||
|
ret = kstrtouint(val, 10, &threshold);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!strcmp(param, "algo")) {
|
||||||
|
algo = val;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (threshold >= PAGE_SIZE)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
down_read(&zram->init_lock);
|
||||||
|
if (!init_done(zram)) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto release_init_lock;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (algo) {
|
||||||
|
bool found = false;
|
||||||
|
|
||||||
|
for (; prio < ZRAM_MAX_COMPS; prio++) {
|
||||||
|
if (!zram->comp_algs[prio])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!strcmp(zram->comp_algs[prio], algo)) {
|
||||||
|
prio_max = min(prio + 1, ZRAM_MAX_COMPS);
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!found) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto release_init_lock;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
page = alloc_page(GFP_KERNEL);
|
||||||
|
if (!page) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto release_init_lock;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = len;
|
||||||
|
for (index = 0; index < nr_pages; index++) {
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
zram_slot_lock(zram, index);
|
||||||
|
|
||||||
|
if (!zram_allocated(zram, index))
|
||||||
|
goto next;
|
||||||
|
|
||||||
|
if (mode & RECOMPRESS_IDLE &&
|
||||||
|
!zram_test_flag(zram, index, ZRAM_IDLE))
|
||||||
|
goto next;
|
||||||
|
|
||||||
|
if (mode & RECOMPRESS_HUGE &&
|
||||||
|
!zram_test_flag(zram, index, ZRAM_HUGE))
|
||||||
|
goto next;
|
||||||
|
|
||||||
|
if (zram_test_flag(zram, index, ZRAM_WB) ||
|
||||||
|
zram_test_flag(zram, index, ZRAM_UNDER_WB) ||
|
||||||
|
zram_test_flag(zram, index, ZRAM_SAME) ||
|
||||||
|
zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
|
||||||
|
goto next;
|
||||||
|
|
||||||
|
err = zram_recompress(zram, index, page, threshold,
|
||||||
|
prio, prio_max);
|
||||||
|
next:
|
||||||
|
zram_slot_unlock(zram, index);
|
||||||
|
if (err) {
|
||||||
|
ret = err;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
cond_resched();
|
||||||
|
}
|
||||||
|
|
||||||
|
__free_page(page);
|
||||||
|
|
||||||
|
release_init_lock:
|
||||||
|
up_read(&zram->init_lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* zram_bio_discard - handler on discard request
|
* zram_bio_discard - handler on discard request
|
||||||
* @index: physical block index in PAGE_SIZE units
|
* @index: physical block index in PAGE_SIZE units
|
||||||
|
@ -1553,11 +1981,9 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!op_is_write(op)) {
|
if (!op_is_write(op)) {
|
||||||
atomic64_inc(&zram->stats.num_reads);
|
|
||||||
ret = zram_bvec_read(zram, bvec, index, offset, bio);
|
ret = zram_bvec_read(zram, bvec, index, offset, bio);
|
||||||
flush_dcache_page(bvec->bv_page);
|
flush_dcache_page(bvec->bv_page);
|
||||||
} else {
|
} else {
|
||||||
atomic64_inc(&zram->stats.num_writes);
|
|
||||||
ret = zram_bvec_write(zram, bvec, index, offset, bio);
|
ret = zram_bvec_write(zram, bvec, index, offset, bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1710,6 +2136,21 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void zram_destroy_comps(struct zram *zram)
|
||||||
|
{
|
||||||
|
u32 prio;
|
||||||
|
|
||||||
|
for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) {
|
||||||
|
struct zcomp *comp = zram->comps[prio];
|
||||||
|
|
||||||
|
zram->comps[prio] = NULL;
|
||||||
|
if (!comp)
|
||||||
|
continue;
|
||||||
|
zcomp_destroy(comp);
|
||||||
|
zram->num_active_comps--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void zram_reset_device(struct zram *zram)
|
static void zram_reset_device(struct zram *zram)
|
||||||
{
|
{
|
||||||
down_write(&zram->init_lock);
|
down_write(&zram->init_lock);
|
||||||
|
@ -1727,11 +2168,11 @@ static void zram_reset_device(struct zram *zram)
|
||||||
/* I/O operation under all of CPU are done so let's free */
|
/* I/O operation under all of CPU are done so let's free */
|
||||||
zram_meta_free(zram, zram->disksize);
|
zram_meta_free(zram, zram->disksize);
|
||||||
zram->disksize = 0;
|
zram->disksize = 0;
|
||||||
|
zram_destroy_comps(zram);
|
||||||
memset(&zram->stats, 0, sizeof(zram->stats));
|
memset(&zram->stats, 0, sizeof(zram->stats));
|
||||||
zcomp_destroy(zram->comp);
|
|
||||||
zram->comp = NULL;
|
|
||||||
reset_bdev(zram);
|
reset_bdev(zram);
|
||||||
|
|
||||||
|
comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
|
||||||
up_write(&zram->init_lock);
|
up_write(&zram->init_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1742,6 +2183,7 @@ static ssize_t disksize_store(struct device *dev,
|
||||||
struct zcomp *comp;
|
struct zcomp *comp;
|
||||||
struct zram *zram = dev_to_zram(dev);
|
struct zram *zram = dev_to_zram(dev);
|
||||||
int err;
|
int err;
|
||||||
|
u32 prio;
|
||||||
|
|
||||||
disksize = memparse(buf, NULL);
|
disksize = memparse(buf, NULL);
|
||||||
if (!disksize)
|
if (!disksize)
|
||||||
|
@ -1760,22 +2202,29 @@ static ssize_t disksize_store(struct device *dev,
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
comp = zcomp_create(zram->compressor);
|
for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) {
|
||||||
|
if (!zram->comp_algs[prio])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
comp = zcomp_create(zram->comp_algs[prio]);
|
||||||
if (IS_ERR(comp)) {
|
if (IS_ERR(comp)) {
|
||||||
pr_err("Cannot initialise %s compressing backend\n",
|
pr_err("Cannot initialise %s compressing backend\n",
|
||||||
zram->compressor);
|
zram->comp_algs[prio]);
|
||||||
err = PTR_ERR(comp);
|
err = PTR_ERR(comp);
|
||||||
goto out_free_meta;
|
goto out_free_comps;
|
||||||
}
|
}
|
||||||
|
|
||||||
zram->comp = comp;
|
zram->comps[prio] = comp;
|
||||||
|
zram->num_active_comps++;
|
||||||
|
}
|
||||||
zram->disksize = disksize;
|
zram->disksize = disksize;
|
||||||
set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
|
set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
|
||||||
up_write(&zram->init_lock);
|
up_write(&zram->init_lock);
|
||||||
|
|
||||||
return len;
|
return len;
|
||||||
|
|
||||||
out_free_meta:
|
out_free_comps:
|
||||||
|
zram_destroy_comps(zram);
|
||||||
zram_meta_free(zram, disksize);
|
zram_meta_free(zram, disksize);
|
||||||
out_unlock:
|
out_unlock:
|
||||||
up_write(&zram->init_lock);
|
up_write(&zram->init_lock);
|
||||||
|
@ -1860,6 +2309,10 @@ static DEVICE_ATTR_WO(writeback);
|
||||||
static DEVICE_ATTR_RW(writeback_limit);
|
static DEVICE_ATTR_RW(writeback_limit);
|
||||||
static DEVICE_ATTR_RW(writeback_limit_enable);
|
static DEVICE_ATTR_RW(writeback_limit_enable);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_ZRAM_MULTI_COMP
|
||||||
|
static DEVICE_ATTR_RW(recomp_algorithm);
|
||||||
|
static DEVICE_ATTR_WO(recompress);
|
||||||
|
#endif
|
||||||
|
|
||||||
static struct attribute *zram_disk_attrs[] = {
|
static struct attribute *zram_disk_attrs[] = {
|
||||||
&dev_attr_disksize.attr,
|
&dev_attr_disksize.attr,
|
||||||
|
@ -1883,6 +2336,10 @@ static struct attribute *zram_disk_attrs[] = {
|
||||||
&dev_attr_bd_stat.attr,
|
&dev_attr_bd_stat.attr,
|
||||||
#endif
|
#endif
|
||||||
&dev_attr_debug_stat.attr,
|
&dev_attr_debug_stat.attr,
|
||||||
|
#ifdef CONFIG_ZRAM_MULTI_COMP
|
||||||
|
&dev_attr_recomp_algorithm.attr,
|
||||||
|
&dev_attr_recompress.attr,
|
||||||
|
#endif
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1962,7 +2419,7 @@ static int zram_add(void)
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_cleanup_disk;
|
goto out_cleanup_disk;
|
||||||
|
|
||||||
strscpy(zram->compressor, default_compressor, sizeof(zram->compressor));
|
comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
|
||||||
|
|
||||||
zram_debugfs_register(zram);
|
zram_debugfs_register(zram);
|
||||||
pr_info("Added device: %s\n", zram->disk->disk_name);
|
pr_info("Added device: %s\n", zram->disk->disk_name);
|
||||||
|
|
|
@ -40,6 +40,9 @@
|
||||||
*/
|
*/
|
||||||
#define ZRAM_FLAG_SHIFT (PAGE_SHIFT + 1)
|
#define ZRAM_FLAG_SHIFT (PAGE_SHIFT + 1)
|
||||||
|
|
||||||
|
/* Only 2 bits are allowed for comp priority index */
|
||||||
|
#define ZRAM_COMP_PRIORITY_MASK 0x3
|
||||||
|
|
||||||
/* Flags for zram pages (table[page_no].flags) */
|
/* Flags for zram pages (table[page_no].flags) */
|
||||||
enum zram_pageflags {
|
enum zram_pageflags {
|
||||||
/* zram slot is locked */
|
/* zram slot is locked */
|
||||||
|
@ -49,6 +52,10 @@ enum zram_pageflags {
|
||||||
ZRAM_UNDER_WB, /* page is under writeback */
|
ZRAM_UNDER_WB, /* page is under writeback */
|
||||||
ZRAM_HUGE, /* Incompressible page */
|
ZRAM_HUGE, /* Incompressible page */
|
||||||
ZRAM_IDLE, /* not accessed page since last idle marking */
|
ZRAM_IDLE, /* not accessed page since last idle marking */
|
||||||
|
ZRAM_INCOMPRESSIBLE, /* none of the algorithms could compress it */
|
||||||
|
|
||||||
|
ZRAM_COMP_PRIORITY_BIT1, /* First bit of comp priority index */
|
||||||
|
ZRAM_COMP_PRIORITY_BIT2, /* Second bit of comp priority index */
|
||||||
|
|
||||||
__NR_ZRAM_PAGEFLAGS,
|
__NR_ZRAM_PAGEFLAGS,
|
||||||
};
|
};
|
||||||
|
@ -69,8 +76,6 @@ struct zram_table_entry {
|
||||||
|
|
||||||
struct zram_stats {
|
struct zram_stats {
|
||||||
atomic64_t compr_data_size; /* compressed size of pages stored */
|
atomic64_t compr_data_size; /* compressed size of pages stored */
|
||||||
atomic64_t num_reads; /* failed + successful */
|
|
||||||
atomic64_t num_writes; /* --do-- */
|
|
||||||
atomic64_t failed_reads; /* can happen when memory is too low */
|
atomic64_t failed_reads; /* can happen when memory is too low */
|
||||||
atomic64_t failed_writes; /* can happen when memory is too low */
|
atomic64_t failed_writes; /* can happen when memory is too low */
|
||||||
atomic64_t invalid_io; /* non-page-aligned I/O requests */
|
atomic64_t invalid_io; /* non-page-aligned I/O requests */
|
||||||
|
@ -89,10 +94,20 @@ struct zram_stats {
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_ZRAM_MULTI_COMP
|
||||||
|
#define ZRAM_PRIMARY_COMP 0U
|
||||||
|
#define ZRAM_SECONDARY_COMP 1U
|
||||||
|
#define ZRAM_MAX_COMPS 4U
|
||||||
|
#else
|
||||||
|
#define ZRAM_PRIMARY_COMP 0U
|
||||||
|
#define ZRAM_SECONDARY_COMP 0U
|
||||||
|
#define ZRAM_MAX_COMPS 1U
|
||||||
|
#endif
|
||||||
|
|
||||||
struct zram {
|
struct zram {
|
||||||
struct zram_table_entry *table;
|
struct zram_table_entry *table;
|
||||||
struct zs_pool *mem_pool;
|
struct zs_pool *mem_pool;
|
||||||
struct zcomp *comp;
|
struct zcomp *comps[ZRAM_MAX_COMPS];
|
||||||
struct gendisk *disk;
|
struct gendisk *disk;
|
||||||
/* Prevent concurrent execution of device init */
|
/* Prevent concurrent execution of device init */
|
||||||
struct rw_semaphore init_lock;
|
struct rw_semaphore init_lock;
|
||||||
|
@ -107,7 +122,8 @@ struct zram {
|
||||||
* we can store in a disk.
|
* we can store in a disk.
|
||||||
*/
|
*/
|
||||||
u64 disksize; /* bytes */
|
u64 disksize; /* bytes */
|
||||||
char compressor[CRYPTO_MAX_ALG_NAME];
|
const char *comp_algs[ZRAM_MAX_COMPS];
|
||||||
|
s8 num_active_comps;
|
||||||
/*
|
/*
|
||||||
* zram is claimed so open request will be failed
|
* zram is claimed so open request will be failed
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -256,7 +256,7 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str
|
||||||
* becoming writable and makes is_cow_mapping(vm_flags) false.
|
* becoming writable and makes is_cow_mapping(vm_flags) false.
|
||||||
*/
|
*/
|
||||||
if (is_cow_mapping(vma->vm_flags) &&
|
if (is_cow_mapping(vma->vm_flags) &&
|
||||||
!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
|
!(vma->vm_flags & VM_ACCESS_FLAGS))
|
||||||
vma->vm_flags &= ~VM_MAYWRITE;
|
vma->vm_flags &= ~VM_MAYWRITE;
|
||||||
|
|
||||||
return drm_gem_ttm_mmap(obj, vma);
|
return drm_gem_ttm_mmap(obj, vma);
|
||||||
|
|
|
@ -643,6 +643,7 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
|
||||||
struct page **pvec = NULL;
|
struct page **pvec = NULL;
|
||||||
struct etnaviv_gem_userptr *userptr = &etnaviv_obj->userptr;
|
struct etnaviv_gem_userptr *userptr = &etnaviv_obj->userptr;
|
||||||
int ret, pinned = 0, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
|
int ret, pinned = 0, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
|
||||||
|
unsigned int gup_flags = FOLL_LONGTERM;
|
||||||
|
|
||||||
might_lock_read(¤t->mm->mmap_lock);
|
might_lock_read(¤t->mm->mmap_lock);
|
||||||
|
|
||||||
|
@ -653,14 +654,15 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
|
||||||
if (!pvec)
|
if (!pvec)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
if (!userptr->ro)
|
||||||
|
gup_flags |= FOLL_WRITE;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
unsigned num_pages = npages - pinned;
|
unsigned num_pages = npages - pinned;
|
||||||
uint64_t ptr = userptr->ptr + pinned * PAGE_SIZE;
|
uint64_t ptr = userptr->ptr + pinned * PAGE_SIZE;
|
||||||
struct page **pages = pvec + pinned;
|
struct page **pages = pvec + pinned;
|
||||||
|
|
||||||
ret = pin_user_pages_fast(ptr, num_pages,
|
ret = pin_user_pages_fast(ptr, num_pages, gup_flags, pages);
|
||||||
FOLL_WRITE | FOLL_FORCE | FOLL_LONGTERM,
|
|
||||||
pages);
|
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
unpin_user_pages(pvec, pinned);
|
unpin_user_pages(pvec, pinned);
|
||||||
kvfree(pvec);
|
kvfree(pvec);
|
||||||
|
|
|
@ -477,7 +477,7 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct g2d_data *g2d,
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = pin_user_pages_fast(start, npages,
|
ret = pin_user_pages_fast(start, npages,
|
||||||
FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM,
|
FOLL_WRITE | FOLL_LONGTERM,
|
||||||
g2d_userptr->pages);
|
g2d_userptr->pages);
|
||||||
if (ret != npages) {
|
if (ret != npages) {
|
||||||
DRM_DEV_ERROR(g2d->dev,
|
DRM_DEV_ERROR(g2d->dev,
|
||||||
|
|
|
@ -156,7 +156,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
unsigned long npages;
|
unsigned long npages;
|
||||||
int pinned, ret;
|
int pinned, ret;
|
||||||
unsigned int gup_flags = FOLL_WRITE;
|
unsigned int gup_flags = FOLL_LONGTERM;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the combination of the addr and size requested for this memory
|
* If the combination of the addr and size requested for this memory
|
||||||
|
@ -210,8 +210,8 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
|
||||||
|
|
||||||
cur_base = addr & PAGE_MASK;
|
cur_base = addr & PAGE_MASK;
|
||||||
|
|
||||||
if (!umem->writable)
|
if (umem->writable)
|
||||||
gup_flags |= FOLL_FORCE;
|
gup_flags |= FOLL_WRITE;
|
||||||
|
|
||||||
while (npages) {
|
while (npages) {
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
@ -219,7 +219,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
|
||||||
min_t(unsigned long, npages,
|
min_t(unsigned long, npages,
|
||||||
PAGE_SIZE /
|
PAGE_SIZE /
|
||||||
sizeof(struct page *)),
|
sizeof(struct page *)),
|
||||||
gup_flags | FOLL_LONGTERM, page_list);
|
gup_flags, page_list);
|
||||||
if (pinned < 0) {
|
if (pinned < 0) {
|
||||||
ret = pinned;
|
ret = pinned;
|
||||||
goto umem_release;
|
goto umem_release;
|
||||||
|
|
|
@ -110,7 +110,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
|
||||||
for (got = 0; got < num_pages; got += ret) {
|
for (got = 0; got < num_pages; got += ret) {
|
||||||
ret = pin_user_pages(start_page + got * PAGE_SIZE,
|
ret = pin_user_pages(start_page + got * PAGE_SIZE,
|
||||||
num_pages - got,
|
num_pages - got,
|
||||||
FOLL_LONGTERM | FOLL_WRITE | FOLL_FORCE,
|
FOLL_LONGTERM | FOLL_WRITE,
|
||||||
p + got, NULL);
|
p + got, NULL);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
mmap_read_unlock(current->mm);
|
mmap_read_unlock(current->mm);
|
||||||
|
|
|
@ -85,6 +85,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
|
||||||
int dmasync, struct usnic_uiom_reg *uiomr)
|
int dmasync, struct usnic_uiom_reg *uiomr)
|
||||||
{
|
{
|
||||||
struct list_head *chunk_list = &uiomr->chunk_list;
|
struct list_head *chunk_list = &uiomr->chunk_list;
|
||||||
|
unsigned int gup_flags = FOLL_LONGTERM;
|
||||||
struct page **page_list;
|
struct page **page_list;
|
||||||
struct scatterlist *sg;
|
struct scatterlist *sg;
|
||||||
struct usnic_uiom_chunk *chunk;
|
struct usnic_uiom_chunk *chunk;
|
||||||
|
@ -96,7 +97,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
|
||||||
int off;
|
int off;
|
||||||
int i;
|
int i;
|
||||||
dma_addr_t pa;
|
dma_addr_t pa;
|
||||||
unsigned int gup_flags;
|
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -131,8 +131,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
gup_flags = FOLL_WRITE;
|
if (writable)
|
||||||
gup_flags |= (writable) ? 0 : FOLL_FORCE;
|
gup_flags |= FOLL_WRITE;
|
||||||
cur_base = addr & PAGE_MASK;
|
cur_base = addr & PAGE_MASK;
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
|
@ -140,8 +140,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
|
||||||
ret = pin_user_pages(cur_base,
|
ret = pin_user_pages(cur_base,
|
||||||
min_t(unsigned long, npages,
|
min_t(unsigned long, npages,
|
||||||
PAGE_SIZE / sizeof(struct page *)),
|
PAGE_SIZE / sizeof(struct page *)),
|
||||||
gup_flags | FOLL_LONGTERM,
|
gup_flags, page_list, NULL);
|
||||||
page_list, NULL);
|
|
||||||
|
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
|
@ -368,7 +368,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
|
||||||
struct mm_struct *mm_s;
|
struct mm_struct *mm_s;
|
||||||
u64 first_page_va;
|
u64 first_page_va;
|
||||||
unsigned long mlock_limit;
|
unsigned long mlock_limit;
|
||||||
unsigned int foll_flags = FOLL_WRITE;
|
unsigned int foll_flags = FOLL_LONGTERM;
|
||||||
int num_pages, num_chunks, i, rv = 0;
|
int num_pages, num_chunks, i, rv = 0;
|
||||||
|
|
||||||
if (!can_do_mlock())
|
if (!can_do_mlock())
|
||||||
|
@ -391,8 +391,8 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
|
||||||
|
|
||||||
mmgrab(mm_s);
|
mmgrab(mm_s);
|
||||||
|
|
||||||
if (!writable)
|
if (writable)
|
||||||
foll_flags |= FOLL_FORCE;
|
foll_flags |= FOLL_WRITE;
|
||||||
|
|
||||||
mmap_read_lock(mm_s);
|
mmap_read_lock(mm_s);
|
||||||
|
|
||||||
|
@ -423,8 +423,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
|
||||||
while (nents) {
|
while (nents) {
|
||||||
struct page **plist = &umem->page_chunk[i].plist[got];
|
struct page **plist = &umem->page_chunk[i].plist[got];
|
||||||
|
|
||||||
rv = pin_user_pages(first_page_va, nents,
|
rv = pin_user_pages(first_page_va, nents, foll_flags,
|
||||||
foll_flags | FOLL_LONGTERM,
|
|
||||||
plist, NULL);
|
plist, NULL);
|
||||||
if (rv < 0)
|
if (rv < 0)
|
||||||
goto out_sem_up;
|
goto out_sem_up;
|
||||||
|
|
|
@ -37,7 +37,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, bool write,
|
||||||
struct frame_vector *vec)
|
struct frame_vector *vec)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
unsigned int gup_flags = FOLL_FORCE | FOLL_LONGTERM;
|
unsigned int gup_flags = FOLL_LONGTERM;
|
||||||
|
|
||||||
if (nr_frames == 0)
|
if (nr_frames == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -115,7 +115,7 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
|
||||||
|
|
||||||
/* Pin user pages for DMA Xfer */
|
/* Pin user pages for DMA Xfer */
|
||||||
err = pin_user_pages_unlocked(user_dma.uaddr, user_dma.page_count,
|
err = pin_user_pages_unlocked(user_dma.uaddr, user_dma.page_count,
|
||||||
dma->map, FOLL_FORCE);
|
dma->map, 0);
|
||||||
|
|
||||||
if (user_dma.page_count != err) {
|
if (user_dma.page_count != err) {
|
||||||
IVTV_DEBUG_WARN("failed to map user pages, returned %d instead of %d\n",
|
IVTV_DEBUG_WARN("failed to map user pages, returned %d instead of %d\n",
|
||||||
|
|
|
@ -63,12 +63,11 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
|
||||||
|
|
||||||
/* Pin user pages for DMA Xfer */
|
/* Pin user pages for DMA Xfer */
|
||||||
y_pages = pin_user_pages_unlocked(y_dma.uaddr,
|
y_pages = pin_user_pages_unlocked(y_dma.uaddr,
|
||||||
y_dma.page_count, &dma->map[0], FOLL_FORCE);
|
y_dma.page_count, &dma->map[0], 0);
|
||||||
uv_pages = 0; /* silence gcc. value is set and consumed only if: */
|
uv_pages = 0; /* silence gcc. value is set and consumed only if: */
|
||||||
if (y_pages == y_dma.page_count) {
|
if (y_pages == y_dma.page_count) {
|
||||||
uv_pages = pin_user_pages_unlocked(uv_dma.uaddr,
|
uv_pages = pin_user_pages_unlocked(uv_dma.uaddr,
|
||||||
uv_dma.page_count, &dma->map[y_pages],
|
uv_dma.page_count, &dma->map[y_pages], 0);
|
||||||
FOLL_FORCE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (y_pages != y_dma.page_count || uv_pages != uv_dma.page_count) {
|
if (y_pages != y_dma.page_count || uv_pages != uv_dma.page_count) {
|
||||||
|
|
|
@ -151,17 +151,16 @@ static void videobuf_dma_init(struct videobuf_dmabuf *dma)
|
||||||
static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
|
static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
|
||||||
int direction, unsigned long data, unsigned long size)
|
int direction, unsigned long data, unsigned long size)
|
||||||
{
|
{
|
||||||
|
unsigned int gup_flags = FOLL_LONGTERM;
|
||||||
unsigned long first, last;
|
unsigned long first, last;
|
||||||
int err, rw = 0;
|
int err;
|
||||||
unsigned int flags = FOLL_FORCE;
|
|
||||||
|
|
||||||
dma->direction = direction;
|
dma->direction = direction;
|
||||||
switch (dma->direction) {
|
switch (dma->direction) {
|
||||||
case DMA_FROM_DEVICE:
|
case DMA_FROM_DEVICE:
|
||||||
rw = READ;
|
gup_flags |= FOLL_WRITE;
|
||||||
break;
|
break;
|
||||||
case DMA_TO_DEVICE:
|
case DMA_TO_DEVICE:
|
||||||
rw = WRITE;
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
BUG();
|
BUG();
|
||||||
|
@ -177,14 +176,11 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
|
||||||
if (NULL == dma->pages)
|
if (NULL == dma->pages)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
if (rw == READ)
|
|
||||||
flags |= FOLL_WRITE;
|
|
||||||
|
|
||||||
dprintk(1, "init user [0x%lx+0x%lx => %lu pages]\n",
|
dprintk(1, "init user [0x%lx+0x%lx => %lu pages]\n",
|
||||||
data, size, dma->nr_pages);
|
data, size, dma->nr_pages);
|
||||||
|
|
||||||
err = pin_user_pages(data & PAGE_MASK, dma->nr_pages,
|
err = pin_user_pages(data & PAGE_MASK, dma->nr_pages, gup_flags,
|
||||||
flags | FOLL_LONGTERM, dma->pages, NULL);
|
dma->pages, NULL);
|
||||||
|
|
||||||
if (err != dma->nr_pages) {
|
if (err != dma->nr_pages) {
|
||||||
dma->nr_pages = (err >= 0) ? err : 0;
|
dma->nr_pages = (err >= 0) ? err : 0;
|
||||||
|
|
|
@ -2312,8 +2312,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
|
||||||
if (!userptr->pages)
|
if (!userptr->pages)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
rc = pin_user_pages_fast(start, npages,
|
rc = pin_user_pages_fast(start, npages, FOLL_WRITE | FOLL_LONGTERM,
|
||||||
FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM,
|
|
||||||
userptr->pages);
|
userptr->pages);
|
||||||
|
|
||||||
if (rc != npages) {
|
if (rc != npages) {
|
||||||
|
|
201
fs/dax.c
201
fs/dax.c
|
@ -334,35 +334,41 @@ static unsigned long dax_end_pfn(void *entry)
|
||||||
for (pfn = dax_to_pfn(entry); \
|
for (pfn = dax_to_pfn(entry); \
|
||||||
pfn < dax_end_pfn(entry); pfn++)
|
pfn < dax_end_pfn(entry); pfn++)
|
||||||
|
|
||||||
static inline bool dax_mapping_is_cow(struct address_space *mapping)
|
static inline bool dax_page_is_shared(struct page *page)
|
||||||
{
|
{
|
||||||
return (unsigned long)mapping == PAGE_MAPPING_DAX_COW;
|
return page->mapping == PAGE_MAPPING_DAX_SHARED;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set the page->mapping with FS_DAX_MAPPING_COW flag, increase the refcount.
|
* Set the page->mapping with PAGE_MAPPING_DAX_SHARED flag, increase the
|
||||||
|
* refcount.
|
||||||
*/
|
*/
|
||||||
static inline void dax_mapping_set_cow(struct page *page)
|
static inline void dax_page_share_get(struct page *page)
|
||||||
{
|
{
|
||||||
if ((uintptr_t)page->mapping != PAGE_MAPPING_DAX_COW) {
|
if (page->mapping != PAGE_MAPPING_DAX_SHARED) {
|
||||||
/*
|
/*
|
||||||
* Reset the index if the page was already mapped
|
* Reset the index if the page was already mapped
|
||||||
* regularly before.
|
* regularly before.
|
||||||
*/
|
*/
|
||||||
if (page->mapping)
|
if (page->mapping)
|
||||||
page->index = 1;
|
page->share = 1;
|
||||||
page->mapping = (void *)PAGE_MAPPING_DAX_COW;
|
page->mapping = PAGE_MAPPING_DAX_SHARED;
|
||||||
}
|
}
|
||||||
page->index++;
|
page->share++;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned long dax_page_share_put(struct page *page)
|
||||||
|
{
|
||||||
|
return --page->share;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When it is called in dax_insert_entry(), the cow flag will indicate that
|
* When it is called in dax_insert_entry(), the shared flag will indicate that
|
||||||
* whether this entry is shared by multiple files. If so, set the page->mapping
|
* whether this entry is shared by multiple files. If so, set the page->mapping
|
||||||
* FS_DAX_MAPPING_COW, and use page->index as refcount.
|
* PAGE_MAPPING_DAX_SHARED, and use page->share as refcount.
|
||||||
*/
|
*/
|
||||||
static void dax_associate_entry(void *entry, struct address_space *mapping,
|
static void dax_associate_entry(void *entry, struct address_space *mapping,
|
||||||
struct vm_area_struct *vma, unsigned long address, bool cow)
|
struct vm_area_struct *vma, unsigned long address, bool shared)
|
||||||
{
|
{
|
||||||
unsigned long size = dax_entry_size(entry), pfn, index;
|
unsigned long size = dax_entry_size(entry), pfn, index;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
@ -374,8 +380,8 @@ static void dax_associate_entry(void *entry, struct address_space *mapping,
|
||||||
for_each_mapped_pfn(entry, pfn) {
|
for_each_mapped_pfn(entry, pfn) {
|
||||||
struct page *page = pfn_to_page(pfn);
|
struct page *page = pfn_to_page(pfn);
|
||||||
|
|
||||||
if (cow) {
|
if (shared) {
|
||||||
dax_mapping_set_cow(page);
|
dax_page_share_get(page);
|
||||||
} else {
|
} else {
|
||||||
WARN_ON_ONCE(page->mapping);
|
WARN_ON_ONCE(page->mapping);
|
||||||
page->mapping = mapping;
|
page->mapping = mapping;
|
||||||
|
@ -396,9 +402,9 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
|
||||||
struct page *page = pfn_to_page(pfn);
|
struct page *page = pfn_to_page(pfn);
|
||||||
|
|
||||||
WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
|
WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
|
||||||
if (dax_mapping_is_cow(page->mapping)) {
|
if (dax_page_is_shared(page)) {
|
||||||
/* keep the CoW flag if this page is still shared */
|
/* keep the shared flag if this page is still shared */
|
||||||
if (page->index-- > 0)
|
if (dax_page_share_put(page) > 0)
|
||||||
continue;
|
continue;
|
||||||
} else
|
} else
|
||||||
WARN_ON_ONCE(page->mapping && page->mapping != mapping);
|
WARN_ON_ONCE(page->mapping && page->mapping != mapping);
|
||||||
|
@ -840,12 +846,6 @@ static bool dax_fault_is_synchronous(const struct iomap_iter *iter,
|
||||||
(iter->iomap.flags & IOMAP_F_DIRTY);
|
(iter->iomap.flags & IOMAP_F_DIRTY);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool dax_fault_is_cow(const struct iomap_iter *iter)
|
|
||||||
{
|
|
||||||
return (iter->flags & IOMAP_WRITE) &&
|
|
||||||
(iter->iomap.flags & IOMAP_F_SHARED);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* By this point grab_mapping_entry() has ensured that we have a locked entry
|
* By this point grab_mapping_entry() has ensured that we have a locked entry
|
||||||
* of the appropriate size so we don't have to worry about downgrading PMDs to
|
* of the appropriate size so we don't have to worry about downgrading PMDs to
|
||||||
|
@ -859,13 +859,14 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
|
||||||
{
|
{
|
||||||
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
|
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
|
||||||
void *new_entry = dax_make_entry(pfn, flags);
|
void *new_entry = dax_make_entry(pfn, flags);
|
||||||
bool dirty = !dax_fault_is_synchronous(iter, vmf->vma);
|
bool write = iter->flags & IOMAP_WRITE;
|
||||||
bool cow = dax_fault_is_cow(iter);
|
bool dirty = write && !dax_fault_is_synchronous(iter, vmf->vma);
|
||||||
|
bool shared = iter->iomap.flags & IOMAP_F_SHARED;
|
||||||
|
|
||||||
if (dirty)
|
if (dirty)
|
||||||
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
|
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
|
||||||
|
|
||||||
if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
|
if (shared || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
|
||||||
unsigned long index = xas->xa_index;
|
unsigned long index = xas->xa_index;
|
||||||
/* we are replacing a zero page with block mapping */
|
/* we are replacing a zero page with block mapping */
|
||||||
if (dax_is_pmd_entry(entry))
|
if (dax_is_pmd_entry(entry))
|
||||||
|
@ -877,12 +878,12 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
|
||||||
|
|
||||||
xas_reset(xas);
|
xas_reset(xas);
|
||||||
xas_lock_irq(xas);
|
xas_lock_irq(xas);
|
||||||
if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
|
if (shared || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
|
||||||
void *old;
|
void *old;
|
||||||
|
|
||||||
dax_disassociate_entry(entry, mapping, false);
|
dax_disassociate_entry(entry, mapping, false);
|
||||||
dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address,
|
dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address,
|
||||||
cow);
|
shared);
|
||||||
/*
|
/*
|
||||||
* Only swap our new entry into the page cache if the current
|
* Only swap our new entry into the page cache if the current
|
||||||
* entry is a zero page or an empty entry. If a normal PTE or
|
* entry is a zero page or an empty entry. If a normal PTE or
|
||||||
|
@ -902,7 +903,7 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
|
||||||
if (dirty)
|
if (dirty)
|
||||||
xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
|
xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
|
||||||
|
|
||||||
if (cow)
|
if (write && shared)
|
||||||
xas_set_mark(xas, PAGECACHE_TAG_TOWRITE);
|
xas_set_mark(xas, PAGECACHE_TAG_TOWRITE);
|
||||||
|
|
||||||
xas_unlock_irq(xas);
|
xas_unlock_irq(xas);
|
||||||
|
@ -1086,7 +1087,8 @@ out:
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* dax_iomap_cow_copy - Copy the data from source to destination before write
|
* dax_iomap_copy_around - Prepare for an unaligned write to a shared/cow page
|
||||||
|
* by copying the data before and after the range to be written.
|
||||||
* @pos: address to do copy from.
|
* @pos: address to do copy from.
|
||||||
* @length: size of copy operation.
|
* @length: size of copy operation.
|
||||||
* @align_size: aligned w.r.t align_size (either PMD_SIZE or PAGE_SIZE)
|
* @align_size: aligned w.r.t align_size (either PMD_SIZE or PAGE_SIZE)
|
||||||
|
@ -1095,48 +1097,70 @@ out:
|
||||||
*
|
*
|
||||||
* This can be called from two places. Either during DAX write fault (page
|
* This can be called from two places. Either during DAX write fault (page
|
||||||
* aligned), to copy the length size data to daddr. Or, while doing normal DAX
|
* aligned), to copy the length size data to daddr. Or, while doing normal DAX
|
||||||
* write operation, dax_iomap_actor() might call this to do the copy of either
|
* write operation, dax_iomap_iter() might call this to do the copy of either
|
||||||
* start or end unaligned address. In the latter case the rest of the copy of
|
* start or end unaligned address. In the latter case the rest of the copy of
|
||||||
* aligned ranges is taken care by dax_iomap_actor() itself.
|
* aligned ranges is taken care by dax_iomap_iter() itself.
|
||||||
|
* If the srcmap contains invalid data, such as HOLE and UNWRITTEN, zero the
|
||||||
|
* area to make sure no old data remains.
|
||||||
*/
|
*/
|
||||||
static int dax_iomap_cow_copy(loff_t pos, uint64_t length, size_t align_size,
|
static int dax_iomap_copy_around(loff_t pos, uint64_t length, size_t align_size,
|
||||||
const struct iomap *srcmap, void *daddr)
|
const struct iomap *srcmap, void *daddr)
|
||||||
{
|
{
|
||||||
loff_t head_off = pos & (align_size - 1);
|
loff_t head_off = pos & (align_size - 1);
|
||||||
size_t size = ALIGN(head_off + length, align_size);
|
size_t size = ALIGN(head_off + length, align_size);
|
||||||
loff_t end = pos + length;
|
loff_t end = pos + length;
|
||||||
loff_t pg_end = round_up(end, align_size);
|
loff_t pg_end = round_up(end, align_size);
|
||||||
|
/* copy_all is usually in page fault case */
|
||||||
bool copy_all = head_off == 0 && end == pg_end;
|
bool copy_all = head_off == 0 && end == pg_end;
|
||||||
|
/* zero the edges if srcmap is a HOLE or IOMAP_UNWRITTEN */
|
||||||
|
bool zero_edge = srcmap->flags & IOMAP_F_SHARED ||
|
||||||
|
srcmap->type == IOMAP_UNWRITTEN;
|
||||||
void *saddr = 0;
|
void *saddr = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
if (!zero_edge) {
|
||||||
ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL);
|
ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
if (copy_all) {
|
if (copy_all) {
|
||||||
|
if (zero_edge)
|
||||||
|
memset(daddr, 0, size);
|
||||||
|
else
|
||||||
ret = copy_mc_to_kernel(daddr, saddr, length);
|
ret = copy_mc_to_kernel(daddr, saddr, length);
|
||||||
return ret ? -EIO : 0;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Copy the head part of the range */
|
/* Copy the head part of the range */
|
||||||
if (head_off) {
|
if (head_off) {
|
||||||
|
if (zero_edge)
|
||||||
|
memset(daddr, 0, head_off);
|
||||||
|
else {
|
||||||
ret = copy_mc_to_kernel(daddr, saddr, head_off);
|
ret = copy_mc_to_kernel(daddr, saddr, head_off);
|
||||||
if (ret)
|
if (ret)
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Copy the tail part of the range */
|
/* Copy the tail part of the range */
|
||||||
if (end < pg_end) {
|
if (end < pg_end) {
|
||||||
loff_t tail_off = head_off + length;
|
loff_t tail_off = head_off + length;
|
||||||
loff_t tail_len = pg_end - end;
|
loff_t tail_len = pg_end - end;
|
||||||
|
|
||||||
ret = copy_mc_to_kernel(daddr + tail_off, saddr + tail_off,
|
if (zero_edge)
|
||||||
tail_len);
|
memset(daddr + tail_off, 0, tail_len);
|
||||||
|
else {
|
||||||
|
ret = copy_mc_to_kernel(daddr + tail_off,
|
||||||
|
saddr + tail_off, tail_len);
|
||||||
if (ret)
|
if (ret)
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
return 0;
|
}
|
||||||
|
out:
|
||||||
|
if (zero_edge)
|
||||||
|
dax_flush(srcmap->dax_dev, daddr, size);
|
||||||
|
return ret ? -EIO : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1221,6 +1245,58 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_FS_DAX_PMD */
|
#endif /* CONFIG_FS_DAX_PMD */
|
||||||
|
|
||||||
|
static s64 dax_unshare_iter(struct iomap_iter *iter)
|
||||||
|
{
|
||||||
|
struct iomap *iomap = &iter->iomap;
|
||||||
|
const struct iomap *srcmap = iomap_iter_srcmap(iter);
|
||||||
|
loff_t pos = iter->pos;
|
||||||
|
loff_t length = iomap_length(iter);
|
||||||
|
int id = 0;
|
||||||
|
s64 ret = 0;
|
||||||
|
void *daddr = NULL, *saddr = NULL;
|
||||||
|
|
||||||
|
/* don't bother with blocks that are not shared to start with */
|
||||||
|
if (!(iomap->flags & IOMAP_F_SHARED))
|
||||||
|
return length;
|
||||||
|
/* don't bother with holes or unwritten extents */
|
||||||
|
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
|
||||||
|
return length;
|
||||||
|
|
||||||
|
id = dax_read_lock();
|
||||||
|
ret = dax_iomap_direct_access(iomap, pos, length, &daddr, NULL);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
ret = dax_iomap_direct_access(srcmap, pos, length, &saddr, NULL);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
ret = copy_mc_to_kernel(daddr, saddr, length);
|
||||||
|
if (ret)
|
||||||
|
ret = -EIO;
|
||||||
|
|
||||||
|
out_unlock:
|
||||||
|
dax_read_unlock(id);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
|
||||||
|
const struct iomap_ops *ops)
|
||||||
|
{
|
||||||
|
struct iomap_iter iter = {
|
||||||
|
.inode = inode,
|
||||||
|
.pos = pos,
|
||||||
|
.len = len,
|
||||||
|
.flags = IOMAP_WRITE | IOMAP_UNSHARE | IOMAP_DAX,
|
||||||
|
};
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
while ((ret = iomap_iter(&iter, ops)) > 0)
|
||||||
|
iter.processed = dax_unshare_iter(&iter);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(dax_file_unshare);
|
||||||
|
|
||||||
static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size)
|
static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size)
|
||||||
{
|
{
|
||||||
const struct iomap *iomap = &iter->iomap;
|
const struct iomap *iomap = &iter->iomap;
|
||||||
|
@ -1235,13 +1311,10 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size)
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
memset(kaddr + offset, 0, size);
|
memset(kaddr + offset, 0, size);
|
||||||
if (srcmap->addr != iomap->addr) {
|
if (iomap->flags & IOMAP_F_SHARED)
|
||||||
ret = dax_iomap_cow_copy(pos, size, PAGE_SIZE, srcmap,
|
ret = dax_iomap_copy_around(pos, size, PAGE_SIZE, srcmap,
|
||||||
kaddr);
|
kaddr);
|
||||||
if (ret < 0)
|
else
|
||||||
return ret;
|
|
||||||
dax_flush(iomap->dax_dev, kaddr, PAGE_SIZE);
|
|
||||||
} else
|
|
||||||
dax_flush(iomap->dax_dev, kaddr + offset, size);
|
dax_flush(iomap->dax_dev, kaddr + offset, size);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -1258,6 +1331,15 @@ static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
|
||||||
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
|
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
|
||||||
return length;
|
return length;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* invalidate the pages whose sharing state is to be changed
|
||||||
|
* because of CoW.
|
||||||
|
*/
|
||||||
|
if (iomap->flags & IOMAP_F_SHARED)
|
||||||
|
invalidate_inode_pages2_range(iter->inode->i_mapping,
|
||||||
|
pos >> PAGE_SHIFT,
|
||||||
|
(pos + length - 1) >> PAGE_SHIFT);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
unsigned offset = offset_in_page(pos);
|
unsigned offset = offset_in_page(pos);
|
||||||
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
|
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
|
||||||
|
@ -1318,12 +1400,13 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
||||||
struct iov_iter *iter)
|
struct iov_iter *iter)
|
||||||
{
|
{
|
||||||
const struct iomap *iomap = &iomi->iomap;
|
const struct iomap *iomap = &iomi->iomap;
|
||||||
const struct iomap *srcmap = &iomi->srcmap;
|
const struct iomap *srcmap = iomap_iter_srcmap(iomi);
|
||||||
loff_t length = iomap_length(iomi);
|
loff_t length = iomap_length(iomi);
|
||||||
loff_t pos = iomi->pos;
|
loff_t pos = iomi->pos;
|
||||||
struct dax_device *dax_dev = iomap->dax_dev;
|
struct dax_device *dax_dev = iomap->dax_dev;
|
||||||
loff_t end = pos + length, done = 0;
|
loff_t end = pos + length, done = 0;
|
||||||
bool write = iov_iter_rw(iter) == WRITE;
|
bool write = iov_iter_rw(iter) == WRITE;
|
||||||
|
bool cow = write && iomap->flags & IOMAP_F_SHARED;
|
||||||
ssize_t ret = 0;
|
ssize_t ret = 0;
|
||||||
size_t xfer;
|
size_t xfer;
|
||||||
int id;
|
int id;
|
||||||
|
@ -1350,7 +1433,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
||||||
* into page tables. We have to tear down these mappings so that data
|
* into page tables. We have to tear down these mappings so that data
|
||||||
* written by write(2) is visible in mmap.
|
* written by write(2) is visible in mmap.
|
||||||
*/
|
*/
|
||||||
if (iomap->flags & IOMAP_F_NEW) {
|
if (iomap->flags & IOMAP_F_NEW || cow) {
|
||||||
invalidate_inode_pages2_range(iomi->inode->i_mapping,
|
invalidate_inode_pages2_range(iomi->inode->i_mapping,
|
||||||
pos >> PAGE_SHIFT,
|
pos >> PAGE_SHIFT,
|
||||||
(end - 1) >> PAGE_SHIFT);
|
(end - 1) >> PAGE_SHIFT);
|
||||||
|
@ -1384,10 +1467,9 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (write &&
|
if (cow) {
|
||||||
srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) {
|
ret = dax_iomap_copy_around(pos, length, PAGE_SIZE,
|
||||||
ret = dax_iomap_cow_copy(pos, length, PAGE_SIZE, srcmap,
|
srcmap, kaddr);
|
||||||
kaddr);
|
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1532,7 +1614,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
|
||||||
struct xa_state *xas, void **entry, bool pmd)
|
struct xa_state *xas, void **entry, bool pmd)
|
||||||
{
|
{
|
||||||
const struct iomap *iomap = &iter->iomap;
|
const struct iomap *iomap = &iter->iomap;
|
||||||
const struct iomap *srcmap = &iter->srcmap;
|
const struct iomap *srcmap = iomap_iter_srcmap(iter);
|
||||||
size_t size = pmd ? PMD_SIZE : PAGE_SIZE;
|
size_t size = pmd ? PMD_SIZE : PAGE_SIZE;
|
||||||
loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT;
|
loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT;
|
||||||
bool write = iter->flags & IOMAP_WRITE;
|
bool write = iter->flags & IOMAP_WRITE;
|
||||||
|
@ -1563,9 +1645,8 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
|
||||||
|
|
||||||
*entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, entry_flags);
|
*entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, entry_flags);
|
||||||
|
|
||||||
if (write &&
|
if (write && iomap->flags & IOMAP_F_SHARED) {
|
||||||
srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) {
|
err = dax_iomap_copy_around(pos, size, size, srcmap, kaddr);
|
||||||
err = dax_iomap_cow_copy(pos, size, size, srcmap, kaddr);
|
|
||||||
if (err)
|
if (err)
|
||||||
return dax_fault_return(err);
|
return dax_fault_return(err);
|
||||||
}
|
}
|
||||||
|
@ -1936,15 +2017,15 @@ int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
|
||||||
.len = len,
|
.len = len,
|
||||||
.flags = IOMAP_DAX,
|
.flags = IOMAP_DAX,
|
||||||
};
|
};
|
||||||
int ret;
|
int ret, compared = 0;
|
||||||
|
|
||||||
while ((ret = iomap_iter(&src_iter, ops)) > 0) {
|
while ((ret = iomap_iter(&src_iter, ops)) > 0 &&
|
||||||
while ((ret = iomap_iter(&dst_iter, ops)) > 0) {
|
(ret = iomap_iter(&dst_iter, ops)) > 0) {
|
||||||
dst_iter.processed = dax_range_compare_iter(&src_iter,
|
compared = dax_range_compare_iter(&src_iter, &dst_iter, len,
|
||||||
&dst_iter, len, same);
|
same);
|
||||||
}
|
if (compared < 0)
|
||||||
if (ret <= 0)
|
return ret;
|
||||||
src_iter.processed = ret;
|
src_iter.processed = dst_iter.processed = compared;
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -345,11 +345,6 @@ static void exfat_readahead(struct readahead_control *rac)
|
||||||
mpage_readahead(rac, exfat_get_block);
|
mpage_readahead(rac, exfat_get_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int exfat_writepage(struct page *page, struct writeback_control *wbc)
|
|
||||||
{
|
|
||||||
return block_write_full_page(page, exfat_get_block, wbc);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int exfat_writepages(struct address_space *mapping,
|
static int exfat_writepages(struct address_space *mapping,
|
||||||
struct writeback_control *wbc)
|
struct writeback_control *wbc)
|
||||||
{
|
{
|
||||||
|
@ -473,12 +468,12 @@ static const struct address_space_operations exfat_aops = {
|
||||||
.invalidate_folio = block_invalidate_folio,
|
.invalidate_folio = block_invalidate_folio,
|
||||||
.read_folio = exfat_read_folio,
|
.read_folio = exfat_read_folio,
|
||||||
.readahead = exfat_readahead,
|
.readahead = exfat_readahead,
|
||||||
.writepage = exfat_writepage,
|
|
||||||
.writepages = exfat_writepages,
|
.writepages = exfat_writepages,
|
||||||
.write_begin = exfat_write_begin,
|
.write_begin = exfat_write_begin,
|
||||||
.write_end = exfat_write_end,
|
.write_end = exfat_write_end,
|
||||||
.direct_IO = exfat_direct_IO,
|
.direct_IO = exfat_direct_IO,
|
||||||
.bmap = exfat_aop_bmap
|
.bmap = exfat_aop_bmap,
|
||||||
|
.migrate_folio = buffer_migrate_folio,
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline unsigned long exfat_hash(loff_t i_pos)
|
static inline unsigned long exfat_hash(loff_t i_pos)
|
||||||
|
|
|
@ -253,6 +253,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
||||||
{
|
{
|
||||||
struct inode *orig_inode = file_inode(o_filp);
|
struct inode *orig_inode = file_inode(o_filp);
|
||||||
struct page *pagep[2] = {NULL, NULL};
|
struct page *pagep[2] = {NULL, NULL};
|
||||||
|
struct folio *folio[2] = {NULL, NULL};
|
||||||
handle_t *handle;
|
handle_t *handle;
|
||||||
ext4_lblk_t orig_blk_offset, donor_blk_offset;
|
ext4_lblk_t orig_blk_offset, donor_blk_offset;
|
||||||
unsigned long blocksize = orig_inode->i_sb->s_blocksize;
|
unsigned long blocksize = orig_inode->i_sb->s_blocksize;
|
||||||
|
@ -313,6 +314,13 @@ again:
|
||||||
* hold page's lock, if it is still the case data copy is not
|
* hold page's lock, if it is still the case data copy is not
|
||||||
* necessary, just swap data blocks between orig and donor.
|
* necessary, just swap data blocks between orig and donor.
|
||||||
*/
|
*/
|
||||||
|
folio[0] = page_folio(pagep[0]);
|
||||||
|
folio[1] = page_folio(pagep[1]);
|
||||||
|
|
||||||
|
VM_BUG_ON_FOLIO(folio_test_large(folio[0]), folio[0]);
|
||||||
|
VM_BUG_ON_FOLIO(folio_test_large(folio[1]), folio[1]);
|
||||||
|
VM_BUG_ON_FOLIO(folio_nr_pages(folio[0]) != folio_nr_pages(folio[1]), folio[1]);
|
||||||
|
|
||||||
if (unwritten) {
|
if (unwritten) {
|
||||||
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
||||||
/* If any of extents in range became initialized we have to
|
/* If any of extents in range became initialized we have to
|
||||||
|
@ -331,10 +339,10 @@ again:
|
||||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||||
goto data_copy;
|
goto data_copy;
|
||||||
}
|
}
|
||||||
if ((page_has_private(pagep[0]) &&
|
if ((folio_has_private(folio[0]) &&
|
||||||
!try_to_release_page(pagep[0], 0)) ||
|
!filemap_release_folio(folio[0], 0)) ||
|
||||||
(page_has_private(pagep[1]) &&
|
(folio_has_private(folio[1]) &&
|
||||||
!try_to_release_page(pagep[1], 0))) {
|
!filemap_release_folio(folio[1], 0))) {
|
||||||
*err = -EBUSY;
|
*err = -EBUSY;
|
||||||
goto drop_data_sem;
|
goto drop_data_sem;
|
||||||
}
|
}
|
||||||
|
@ -344,19 +352,21 @@ again:
|
||||||
block_len_in_page, 1, err);
|
block_len_in_page, 1, err);
|
||||||
drop_data_sem:
|
drop_data_sem:
|
||||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||||
goto unlock_pages;
|
goto unlock_folios;
|
||||||
}
|
}
|
||||||
data_copy:
|
data_copy:
|
||||||
*err = mext_page_mkuptodate(pagep[0], from, from + replaced_size);
|
*err = mext_page_mkuptodate(&folio[0]->page, from, from + replaced_size);
|
||||||
if (*err)
|
if (*err)
|
||||||
goto unlock_pages;
|
goto unlock_folios;
|
||||||
|
|
||||||
/* At this point all buffers in range are uptodate, old mapping layout
|
/* At this point all buffers in range are uptodate, old mapping layout
|
||||||
* is no longer required, try to drop it now. */
|
* is no longer required, try to drop it now. */
|
||||||
if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) ||
|
if ((folio_has_private(folio[0]) &&
|
||||||
(page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) {
|
!filemap_release_folio(folio[0], 0)) ||
|
||||||
|
(folio_has_private(folio[1]) &&
|
||||||
|
!filemap_release_folio(folio[1], 0))) {
|
||||||
*err = -EBUSY;
|
*err = -EBUSY;
|
||||||
goto unlock_pages;
|
goto unlock_folios;
|
||||||
}
|
}
|
||||||
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
||||||
replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
|
replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
|
||||||
|
@ -369,13 +379,13 @@ data_copy:
|
||||||
replaced_size =
|
replaced_size =
|
||||||
block_len_in_page << orig_inode->i_blkbits;
|
block_len_in_page << orig_inode->i_blkbits;
|
||||||
} else
|
} else
|
||||||
goto unlock_pages;
|
goto unlock_folios;
|
||||||
}
|
}
|
||||||
/* Perform all necessary steps similar write_begin()/write_end()
|
/* Perform all necessary steps similar write_begin()/write_end()
|
||||||
* but keeping in mind that i_size will not change */
|
* but keeping in mind that i_size will not change */
|
||||||
if (!page_has_buffers(pagep[0]))
|
if (!folio_buffers(folio[0]))
|
||||||
create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0);
|
create_empty_buffers(&folio[0]->page, 1 << orig_inode->i_blkbits, 0);
|
||||||
bh = page_buffers(pagep[0]);
|
bh = folio_buffers(folio[0]);
|
||||||
for (i = 0; i < data_offset_in_page; i++)
|
for (i = 0; i < data_offset_in_page; i++)
|
||||||
bh = bh->b_this_page;
|
bh = bh->b_this_page;
|
||||||
for (i = 0; i < block_len_in_page; i++) {
|
for (i = 0; i < block_len_in_page; i++) {
|
||||||
|
@ -385,7 +395,7 @@ data_copy:
|
||||||
bh = bh->b_this_page;
|
bh = bh->b_this_page;
|
||||||
}
|
}
|
||||||
if (!*err)
|
if (!*err)
|
||||||
*err = block_commit_write(pagep[0], from, from + replaced_size);
|
*err = block_commit_write(&folio[0]->page, from, from + replaced_size);
|
||||||
|
|
||||||
if (unlikely(*err < 0))
|
if (unlikely(*err < 0))
|
||||||
goto repair_branches;
|
goto repair_branches;
|
||||||
|
@ -395,11 +405,11 @@ data_copy:
|
||||||
*err = ext4_jbd2_inode_add_write(handle, orig_inode,
|
*err = ext4_jbd2_inode_add_write(handle, orig_inode,
|
||||||
(loff_t)orig_page_offset << PAGE_SHIFT, replaced_size);
|
(loff_t)orig_page_offset << PAGE_SHIFT, replaced_size);
|
||||||
|
|
||||||
unlock_pages:
|
unlock_folios:
|
||||||
unlock_page(pagep[0]);
|
folio_unlock(folio[0]);
|
||||||
put_page(pagep[0]);
|
folio_put(folio[0]);
|
||||||
unlock_page(pagep[1]);
|
folio_unlock(folio[1]);
|
||||||
put_page(pagep[1]);
|
folio_put(folio[1]);
|
||||||
stop_journal:
|
stop_journal:
|
||||||
ext4_journal_stop(handle);
|
ext4_journal_stop(handle);
|
||||||
if (*err == -ENOSPC &&
|
if (*err == -ENOSPC &&
|
||||||
|
@ -430,7 +440,7 @@ repair_branches:
|
||||||
*err = -EIO;
|
*err = -EIO;
|
||||||
}
|
}
|
||||||
replaced_count = 0;
|
replaced_count = 0;
|
||||||
goto unlock_pages;
|
goto unlock_folios;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -194,11 +194,6 @@ static int fat_get_block(struct inode *inode, sector_t iblock,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int fat_writepage(struct page *page, struct writeback_control *wbc)
|
|
||||||
{
|
|
||||||
return block_write_full_page(page, fat_get_block, wbc);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int fat_writepages(struct address_space *mapping,
|
static int fat_writepages(struct address_space *mapping,
|
||||||
struct writeback_control *wbc)
|
struct writeback_control *wbc)
|
||||||
{
|
{
|
||||||
|
@ -346,12 +341,12 @@ static const struct address_space_operations fat_aops = {
|
||||||
.invalidate_folio = block_invalidate_folio,
|
.invalidate_folio = block_invalidate_folio,
|
||||||
.read_folio = fat_read_folio,
|
.read_folio = fat_read_folio,
|
||||||
.readahead = fat_readahead,
|
.readahead = fat_readahead,
|
||||||
.writepage = fat_writepage,
|
|
||||||
.writepages = fat_writepages,
|
.writepages = fat_writepages,
|
||||||
.write_begin = fat_write_begin,
|
.write_begin = fat_write_begin,
|
||||||
.write_end = fat_write_end,
|
.write_end = fat_write_end,
|
||||||
.direct_IO = fat_direct_IO,
|
.direct_IO = fat_direct_IO,
|
||||||
.bmap = _fat_bmap
|
.bmap = _fat_bmap,
|
||||||
|
.migrate_folio = buffer_migrate_folio,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -764,11 +764,11 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
|
||||||
return ncpy;
|
return ncpy;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int fuse_check_page(struct page *page)
|
static int fuse_check_folio(struct folio *folio)
|
||||||
{
|
{
|
||||||
if (page_mapcount(page) ||
|
if (folio_mapped(folio) ||
|
||||||
page->mapping != NULL ||
|
folio->mapping != NULL ||
|
||||||
(page->flags & PAGE_FLAGS_CHECK_AT_PREP &
|
(folio->flags & PAGE_FLAGS_CHECK_AT_PREP &
|
||||||
~(1 << PG_locked |
|
~(1 << PG_locked |
|
||||||
1 << PG_referenced |
|
1 << PG_referenced |
|
||||||
1 << PG_uptodate |
|
1 << PG_uptodate |
|
||||||
|
@ -778,7 +778,7 @@ static int fuse_check_page(struct page *page)
|
||||||
1 << PG_reclaim |
|
1 << PG_reclaim |
|
||||||
1 << PG_waiters |
|
1 << PG_waiters |
|
||||||
LRU_GEN_MASK | LRU_REFS_MASK))) {
|
LRU_GEN_MASK | LRU_REFS_MASK))) {
|
||||||
dump_page(page, "fuse: trying to steal weird page");
|
dump_page(&folio->page, "fuse: trying to steal weird page");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -787,11 +787,11 @@ static int fuse_check_page(struct page *page)
|
||||||
static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
|
static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
struct page *oldpage = *pagep;
|
struct folio *oldfolio = page_folio(*pagep);
|
||||||
struct page *newpage;
|
struct folio *newfolio;
|
||||||
struct pipe_buffer *buf = cs->pipebufs;
|
struct pipe_buffer *buf = cs->pipebufs;
|
||||||
|
|
||||||
get_page(oldpage);
|
folio_get(oldfolio);
|
||||||
err = unlock_request(cs->req);
|
err = unlock_request(cs->req);
|
||||||
if (err)
|
if (err)
|
||||||
goto out_put_old;
|
goto out_put_old;
|
||||||
|
@ -814,35 +814,36 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
|
||||||
if (!pipe_buf_try_steal(cs->pipe, buf))
|
if (!pipe_buf_try_steal(cs->pipe, buf))
|
||||||
goto out_fallback;
|
goto out_fallback;
|
||||||
|
|
||||||
newpage = buf->page;
|
newfolio = page_folio(buf->page);
|
||||||
|
|
||||||
if (!PageUptodate(newpage))
|
if (!folio_test_uptodate(newfolio))
|
||||||
SetPageUptodate(newpage);
|
folio_mark_uptodate(newfolio);
|
||||||
|
|
||||||
ClearPageMappedToDisk(newpage);
|
folio_clear_mappedtodisk(newfolio);
|
||||||
|
|
||||||
if (fuse_check_page(newpage) != 0)
|
if (fuse_check_folio(newfolio) != 0)
|
||||||
goto out_fallback_unlock;
|
goto out_fallback_unlock;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is a new and locked page, it shouldn't be mapped or
|
* This is a new and locked page, it shouldn't be mapped or
|
||||||
* have any special flags on it
|
* have any special flags on it
|
||||||
*/
|
*/
|
||||||
if (WARN_ON(page_mapped(oldpage)))
|
if (WARN_ON(folio_mapped(oldfolio)))
|
||||||
goto out_fallback_unlock;
|
goto out_fallback_unlock;
|
||||||
if (WARN_ON(page_has_private(oldpage)))
|
if (WARN_ON(folio_has_private(oldfolio)))
|
||||||
goto out_fallback_unlock;
|
goto out_fallback_unlock;
|
||||||
if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
|
if (WARN_ON(folio_test_dirty(oldfolio) ||
|
||||||
|
folio_test_writeback(oldfolio)))
|
||||||
goto out_fallback_unlock;
|
goto out_fallback_unlock;
|
||||||
if (WARN_ON(PageMlocked(oldpage)))
|
if (WARN_ON(folio_test_mlocked(oldfolio)))
|
||||||
goto out_fallback_unlock;
|
goto out_fallback_unlock;
|
||||||
|
|
||||||
replace_page_cache_page(oldpage, newpage);
|
replace_page_cache_folio(oldfolio, newfolio);
|
||||||
|
|
||||||
get_page(newpage);
|
folio_get(newfolio);
|
||||||
|
|
||||||
if (!(buf->flags & PIPE_BUF_FLAG_LRU))
|
if (!(buf->flags & PIPE_BUF_FLAG_LRU))
|
||||||
lru_cache_add(newpage);
|
folio_add_lru(newfolio);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Release while we have extra ref on stolen page. Otherwise
|
* Release while we have extra ref on stolen page. Otherwise
|
||||||
|
@ -855,28 +856,28 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
|
||||||
if (test_bit(FR_ABORTED, &cs->req->flags))
|
if (test_bit(FR_ABORTED, &cs->req->flags))
|
||||||
err = -ENOENT;
|
err = -ENOENT;
|
||||||
else
|
else
|
||||||
*pagep = newpage;
|
*pagep = &newfolio->page;
|
||||||
spin_unlock(&cs->req->waitq.lock);
|
spin_unlock(&cs->req->waitq.lock);
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
unlock_page(newpage);
|
folio_unlock(newfolio);
|
||||||
put_page(newpage);
|
folio_put(newfolio);
|
||||||
goto out_put_old;
|
goto out_put_old;
|
||||||
}
|
}
|
||||||
|
|
||||||
unlock_page(oldpage);
|
folio_unlock(oldfolio);
|
||||||
/* Drop ref for ap->pages[] array */
|
/* Drop ref for ap->pages[] array */
|
||||||
put_page(oldpage);
|
folio_put(oldfolio);
|
||||||
cs->len = 0;
|
cs->len = 0;
|
||||||
|
|
||||||
err = 0;
|
err = 0;
|
||||||
out_put_old:
|
out_put_old:
|
||||||
/* Drop ref obtained in this function */
|
/* Drop ref obtained in this function */
|
||||||
put_page(oldpage);
|
folio_put(oldfolio);
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
out_fallback_unlock:
|
out_fallback_unlock:
|
||||||
unlock_page(newpage);
|
folio_unlock(newfolio);
|
||||||
out_fallback:
|
out_fallback:
|
||||||
cs->pg = buf->page;
|
cs->pg = buf->page;
|
||||||
cs->offset = buf->offset;
|
cs->offset = buf->offset;
|
||||||
|
|
|
@ -173,12 +173,12 @@ const struct address_space_operations hfs_aops = {
|
||||||
.dirty_folio = block_dirty_folio,
|
.dirty_folio = block_dirty_folio,
|
||||||
.invalidate_folio = block_invalidate_folio,
|
.invalidate_folio = block_invalidate_folio,
|
||||||
.read_folio = hfs_read_folio,
|
.read_folio = hfs_read_folio,
|
||||||
.writepage = hfs_writepage,
|
|
||||||
.write_begin = hfs_write_begin,
|
.write_begin = hfs_write_begin,
|
||||||
.write_end = generic_write_end,
|
.write_end = generic_write_end,
|
||||||
.bmap = hfs_bmap,
|
.bmap = hfs_bmap,
|
||||||
.direct_IO = hfs_direct_IO,
|
.direct_IO = hfs_direct_IO,
|
||||||
.writepages = hfs_writepages,
|
.writepages = hfs_writepages,
|
||||||
|
.migrate_folio = buffer_migrate_folio,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -170,12 +170,12 @@ const struct address_space_operations hfsplus_aops = {
|
||||||
.dirty_folio = block_dirty_folio,
|
.dirty_folio = block_dirty_folio,
|
||||||
.invalidate_folio = block_invalidate_folio,
|
.invalidate_folio = block_invalidate_folio,
|
||||||
.read_folio = hfsplus_read_folio,
|
.read_folio = hfsplus_read_folio,
|
||||||
.writepage = hfsplus_writepage,
|
|
||||||
.write_begin = hfsplus_write_begin,
|
.write_begin = hfsplus_write_begin,
|
||||||
.write_end = generic_write_end,
|
.write_end = generic_write_end,
|
||||||
.bmap = hfsplus_bmap,
|
.bmap = hfsplus_bmap,
|
||||||
.direct_IO = hfsplus_direct_IO,
|
.direct_IO = hfsplus_direct_IO,
|
||||||
.writepages = hfsplus_writepages,
|
.writepages = hfsplus_writepages,
|
||||||
|
.migrate_folio = buffer_migrate_folio,
|
||||||
};
|
};
|
||||||
|
|
||||||
const struct dentry_operations hfsplus_dentry_operations = {
|
const struct dentry_operations hfsplus_dentry_operations = {
|
||||||
|
|
|
@ -163,11 +163,6 @@ static int hpfs_read_folio(struct file *file, struct folio *folio)
|
||||||
return mpage_read_folio(folio, hpfs_get_block);
|
return mpage_read_folio(folio, hpfs_get_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int hpfs_writepage(struct page *page, struct writeback_control *wbc)
|
|
||||||
{
|
|
||||||
return block_write_full_page(page, hpfs_get_block, wbc);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void hpfs_readahead(struct readahead_control *rac)
|
static void hpfs_readahead(struct readahead_control *rac)
|
||||||
{
|
{
|
||||||
mpage_readahead(rac, hpfs_get_block);
|
mpage_readahead(rac, hpfs_get_block);
|
||||||
|
@ -248,12 +243,12 @@ const struct address_space_operations hpfs_aops = {
|
||||||
.dirty_folio = block_dirty_folio,
|
.dirty_folio = block_dirty_folio,
|
||||||
.invalidate_folio = block_invalidate_folio,
|
.invalidate_folio = block_invalidate_folio,
|
||||||
.read_folio = hpfs_read_folio,
|
.read_folio = hpfs_read_folio,
|
||||||
.writepage = hpfs_writepage,
|
|
||||||
.readahead = hpfs_readahead,
|
.readahead = hpfs_readahead,
|
||||||
.writepages = hpfs_writepages,
|
.writepages = hpfs_writepages,
|
||||||
.write_begin = hpfs_write_begin,
|
.write_begin = hpfs_write_begin,
|
||||||
.write_end = hpfs_write_end,
|
.write_end = hpfs_write_end,
|
||||||
.bmap = _hpfs_bmap
|
.bmap = _hpfs_bmap,
|
||||||
|
.migrate_folio = buffer_migrate_folio,
|
||||||
};
|
};
|
||||||
|
|
||||||
const struct file_operations hpfs_file_ops =
|
const struct file_operations hpfs_file_ops =
|
||||||
|
|
|
@ -370,11 +370,11 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void hugetlb_delete_from_page_cache(struct page *page)
|
static void hugetlb_delete_from_page_cache(struct folio *folio)
|
||||||
{
|
{
|
||||||
ClearPageDirty(page);
|
folio_clear_dirty(folio);
|
||||||
ClearPageUptodate(page);
|
folio_clear_uptodate(folio);
|
||||||
delete_from_page_cache(page);
|
filemap_remove_folio(folio);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -580,8 +580,8 @@ static bool remove_inode_single_folio(struct hstate *h, struct inode *inode,
|
||||||
* map could fail. Correspondingly, the subpool and global
|
* map could fail. Correspondingly, the subpool and global
|
||||||
* reserve usage count can need to be adjusted.
|
* reserve usage count can need to be adjusted.
|
||||||
*/
|
*/
|
||||||
VM_BUG_ON(HPageRestoreReserve(&folio->page));
|
VM_BUG_ON_FOLIO(folio_test_hugetlb_restore_reserve(folio), folio);
|
||||||
hugetlb_delete_from_page_cache(&folio->page);
|
hugetlb_delete_from_page_cache(folio);
|
||||||
ret = true;
|
ret = true;
|
||||||
if (!truncate_op) {
|
if (!truncate_op) {
|
||||||
if (unlikely(hugetlb_unreserve_pages(inode, index,
|
if (unlikely(hugetlb_unreserve_pages(inode, index,
|
||||||
|
@ -1097,10 +1097,10 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping,
|
||||||
if (rc != MIGRATEPAGE_SUCCESS)
|
if (rc != MIGRATEPAGE_SUCCESS)
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
if (hugetlb_page_subpool(&src->page)) {
|
if (hugetlb_folio_subpool(src)) {
|
||||||
hugetlb_set_page_subpool(&dst->page,
|
hugetlb_set_folio_subpool(dst,
|
||||||
hugetlb_page_subpool(&src->page));
|
hugetlb_folio_subpool(src));
|
||||||
hugetlb_set_page_subpool(&src->page, NULL);
|
hugetlb_set_folio_subpool(src, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mode != MIGRATE_SYNC_NO_COPY)
|
if (mode != MIGRATE_SYNC_NO_COPY)
|
||||||
|
@ -1279,7 +1279,7 @@ static const struct address_space_operations hugetlbfs_aops = {
|
||||||
|
|
||||||
static void init_once(void *foo)
|
static void init_once(void *foo)
|
||||||
{
|
{
|
||||||
struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
|
struct hugetlbfs_inode_info *ei = foo;
|
||||||
|
|
||||||
inode_init_once(&ei->vfs_inode);
|
inode_init_once(&ei->vfs_inode);
|
||||||
}
|
}
|
||||||
|
@ -1377,7 +1377,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
|
||||||
|
|
||||||
case Opt_size:
|
case Opt_size:
|
||||||
/* memparse() will accept a K/M/G without a digit */
|
/* memparse() will accept a K/M/G without a digit */
|
||||||
if (!isdigit(param->string[0]))
|
if (!param->string || !isdigit(param->string[0]))
|
||||||
goto bad_val;
|
goto bad_val;
|
||||||
ctx->max_size_opt = memparse(param->string, &rest);
|
ctx->max_size_opt = memparse(param->string, &rest);
|
||||||
ctx->max_val_type = SIZE_STD;
|
ctx->max_val_type = SIZE_STD;
|
||||||
|
@ -1387,7 +1387,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
|
||||||
|
|
||||||
case Opt_nr_inodes:
|
case Opt_nr_inodes:
|
||||||
/* memparse() will accept a K/M/G without a digit */
|
/* memparse() will accept a K/M/G without a digit */
|
||||||
if (!isdigit(param->string[0]))
|
if (!param->string || !isdigit(param->string[0]))
|
||||||
goto bad_val;
|
goto bad_val;
|
||||||
ctx->nr_inodes = memparse(param->string, &rest);
|
ctx->nr_inodes = memparse(param->string, &rest);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1403,7 +1403,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
|
||||||
|
|
||||||
case Opt_min_size:
|
case Opt_min_size:
|
||||||
/* memparse() will accept a K/M/G without a digit */
|
/* memparse() will accept a K/M/G without a digit */
|
||||||
if (!isdigit(param->string[0]))
|
if (!param->string || !isdigit(param->string[0]))
|
||||||
goto bad_val;
|
goto bad_val;
|
||||||
ctx->min_size_opt = memparse(param->string, &rest);
|
ctx->min_size_opt = memparse(param->string, &rest);
|
||||||
ctx->min_val_type = SIZE_STD;
|
ctx->min_val_type = SIZE_STD;
|
||||||
|
|
|
@ -264,11 +264,6 @@ int jfs_get_block(struct inode *ip, sector_t lblock,
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int jfs_writepage(struct page *page, struct writeback_control *wbc)
|
|
||||||
{
|
|
||||||
return block_write_full_page(page, jfs_get_block, wbc);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int jfs_writepages(struct address_space *mapping,
|
static int jfs_writepages(struct address_space *mapping,
|
||||||
struct writeback_control *wbc)
|
struct writeback_control *wbc)
|
||||||
{
|
{
|
||||||
|
@ -355,12 +350,12 @@ const struct address_space_operations jfs_aops = {
|
||||||
.invalidate_folio = block_invalidate_folio,
|
.invalidate_folio = block_invalidate_folio,
|
||||||
.read_folio = jfs_read_folio,
|
.read_folio = jfs_read_folio,
|
||||||
.readahead = jfs_readahead,
|
.readahead = jfs_readahead,
|
||||||
.writepage = jfs_writepage,
|
|
||||||
.writepages = jfs_writepages,
|
.writepages = jfs_writepages,
|
||||||
.write_begin = jfs_write_begin,
|
.write_begin = jfs_write_begin,
|
||||||
.write_end = jfs_write_end,
|
.write_end = jfs_write_end,
|
||||||
.bmap = jfs_bmap,
|
.bmap = jfs_bmap,
|
||||||
.direct_IO = jfs_direct_IO,
|
.direct_IO = jfs_direct_IO,
|
||||||
|
.migrate_folio = buffer_migrate_folio,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -294,11 +294,6 @@ static void omfs_readahead(struct readahead_control *rac)
|
||||||
mpage_readahead(rac, omfs_get_block);
|
mpage_readahead(rac, omfs_get_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int omfs_writepage(struct page *page, struct writeback_control *wbc)
|
|
||||||
{
|
|
||||||
return block_write_full_page(page, omfs_get_block, wbc);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
omfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
|
omfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
|
||||||
{
|
{
|
||||||
|
@ -375,10 +370,10 @@ const struct address_space_operations omfs_aops = {
|
||||||
.invalidate_folio = block_invalidate_folio,
|
.invalidate_folio = block_invalidate_folio,
|
||||||
.read_folio = omfs_read_folio,
|
.read_folio = omfs_read_folio,
|
||||||
.readahead = omfs_readahead,
|
.readahead = omfs_readahead,
|
||||||
.writepage = omfs_writepage,
|
|
||||||
.writepages = omfs_writepages,
|
.writepages = omfs_writepages,
|
||||||
.write_begin = omfs_write_begin,
|
.write_begin = omfs_write_begin,
|
||||||
.write_end = generic_write_end,
|
.write_end = generic_write_end,
|
||||||
.bmap = omfs_bmap,
|
.bmap = omfs_bmap,
|
||||||
|
.migrate_folio = buffer_migrate_folio,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,6 @@
|
||||||
#include <linux/capability.h>
|
#include <linux/capability.h>
|
||||||
#include <linux/elf.h>
|
#include <linux/elf.h>
|
||||||
#include <linux/elfcore.h>
|
#include <linux/elfcore.h>
|
||||||
#include <linux/notifier.h>
|
|
||||||
#include <linux/vmalloc.h>
|
#include <linux/vmalloc.h>
|
||||||
#include <linux/highmem.h>
|
#include <linux/highmem.h>
|
||||||
#include <linux/printk.h>
|
#include <linux/printk.h>
|
||||||
|
@ -541,13 +540,11 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
|
||||||
fallthrough;
|
fallthrough;
|
||||||
case KCORE_VMEMMAP:
|
case KCORE_VMEMMAP:
|
||||||
case KCORE_TEXT:
|
case KCORE_TEXT:
|
||||||
if (kern_addr_valid(start)) {
|
|
||||||
/*
|
/*
|
||||||
* Using bounce buffer to bypass the
|
* Using bounce buffer to bypass the
|
||||||
* hardened user copy kernel text checks.
|
* hardened user copy kernel text checks.
|
||||||
*/
|
*/
|
||||||
if (copy_from_kernel_nofault(buf, (void *)start,
|
if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
|
||||||
tsz)) {
|
|
||||||
if (clear_user(buffer, tsz)) {
|
if (clear_user(buffer, tsz)) {
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -558,12 +555,6 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
if (clear_user(buffer, tsz)) {
|
|
||||||
ret = -EFAULT;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
pr_warn_once("Unhandled KCORE type: %d\n", m->type);
|
pr_warn_once("Unhandled KCORE type: %d\n", m->type);
|
||||||
|
@ -638,10 +629,6 @@ static int __meminit kcore_callback(struct notifier_block *self,
|
||||||
return NOTIFY_OK;
|
return NOTIFY_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct notifier_block kcore_callback_nb __meminitdata = {
|
|
||||||
.notifier_call = kcore_callback,
|
|
||||||
.priority = 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
static struct kcore_list kcore_vmalloc;
|
static struct kcore_list kcore_vmalloc;
|
||||||
|
|
||||||
|
@ -694,7 +681,7 @@ static int __init proc_kcore_init(void)
|
||||||
add_modules_range();
|
add_modules_range();
|
||||||
/* Store direct-map area from physical memory map */
|
/* Store direct-map area from physical memory map */
|
||||||
kcore_update_ram();
|
kcore_update_ram();
|
||||||
register_hotmemory_notifier(&kcore_callback_nb);
|
hotplug_memory_notifier(kcore_callback, DEFAULT_CALLBACK_PRI);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -274,6 +274,7 @@ static void show_vma_header_prefix(struct seq_file *m,
|
||||||
static void
|
static void
|
||||||
show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
|
show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
|
||||||
{
|
{
|
||||||
|
struct anon_vma_name *anon_name = NULL;
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
struct mm_struct *mm = vma->vm_mm;
|
||||||
struct file *file = vma->vm_file;
|
struct file *file = vma->vm_file;
|
||||||
vm_flags_t flags = vma->vm_flags;
|
vm_flags_t flags = vma->vm_flags;
|
||||||
|
@ -293,6 +294,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
|
||||||
start = vma->vm_start;
|
start = vma->vm_start;
|
||||||
end = vma->vm_end;
|
end = vma->vm_end;
|
||||||
show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
|
show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
|
||||||
|
if (mm)
|
||||||
|
anon_name = anon_vma_name(vma);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Print the dentry name for named mappings, and a
|
* Print the dentry name for named mappings, and a
|
||||||
|
@ -300,6 +303,13 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
|
||||||
*/
|
*/
|
||||||
if (file) {
|
if (file) {
|
||||||
seq_pad(m, ' ');
|
seq_pad(m, ' ');
|
||||||
|
/*
|
||||||
|
* If user named this anon shared memory via
|
||||||
|
* prctl(PR_SET_VMA ..., use the provided name.
|
||||||
|
*/
|
||||||
|
if (anon_name)
|
||||||
|
seq_printf(m, "[anon_shmem:%s]", anon_name->name);
|
||||||
|
else
|
||||||
seq_file_path(m, file, "\n");
|
seq_file_path(m, file, "\n");
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
@ -312,8 +322,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
|
||||||
|
|
||||||
name = arch_vma_name(vma);
|
name = arch_vma_name(vma);
|
||||||
if (!name) {
|
if (!name) {
|
||||||
struct anon_vma_name *anon_name;
|
|
||||||
|
|
||||||
if (!mm) {
|
if (!mm) {
|
||||||
name = "[vdso]";
|
name = "[vdso]";
|
||||||
goto done;
|
goto done;
|
||||||
|
@ -330,7 +338,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
anon_name = anon_vma_name(vma);
|
|
||||||
if (anon_name) {
|
if (anon_name) {
|
||||||
seq_pad(m, ' ');
|
seq_pad(m, ' ');
|
||||||
seq_printf(m, "[anon:%s]", anon_name->name);
|
seq_printf(m, "[anon:%s]", anon_name->name);
|
||||||
|
@ -667,6 +674,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
|
||||||
[ilog2(VM_RAND_READ)] = "rr",
|
[ilog2(VM_RAND_READ)] = "rr",
|
||||||
[ilog2(VM_DONTCOPY)] = "dc",
|
[ilog2(VM_DONTCOPY)] = "dc",
|
||||||
[ilog2(VM_DONTEXPAND)] = "de",
|
[ilog2(VM_DONTEXPAND)] = "de",
|
||||||
|
[ilog2(VM_LOCKONFAULT)] = "lf",
|
||||||
[ilog2(VM_ACCOUNT)] = "ac",
|
[ilog2(VM_ACCOUNT)] = "ac",
|
||||||
[ilog2(VM_NORESERVE)] = "nr",
|
[ilog2(VM_NORESERVE)] = "nr",
|
||||||
[ilog2(VM_HUGETLB)] = "ht",
|
[ilog2(VM_HUGETLB)] = "ht",
|
||||||
|
|
|
@ -1138,10 +1138,6 @@ xfs_ioctl_setattr_xflags(
|
||||||
if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip))
|
if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip))
|
||||||
ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
|
ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
|
||||||
|
|
||||||
/* Don't allow us to set DAX mode for a reflinked file for now. */
|
|
||||||
if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
/* diflags2 only valid for v3 inodes. */
|
/* diflags2 only valid for v3 inodes. */
|
||||||
i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
|
i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
|
||||||
if (i_flags2 && !xfs_has_v3inodes(mp))
|
if (i_flags2 && !xfs_has_v3inodes(mp))
|
||||||
|
|
|
@ -1215,7 +1215,7 @@ xfs_read_iomap_begin(
|
||||||
return error;
|
return error;
|
||||||
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
|
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
|
||||||
&nimaps, 0);
|
&nimaps, 0);
|
||||||
if (!error && (flags & IOMAP_REPORT))
|
if (!error && ((flags & IOMAP_REPORT) || IS_DAX(inode)))
|
||||||
error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
|
error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
|
||||||
xfs_iunlock(ip, lockmode);
|
xfs_iunlock(ip, lockmode);
|
||||||
|
|
||||||
|
@ -1370,7 +1370,7 @@ xfs_zero_range(
|
||||||
|
|
||||||
if (IS_DAX(inode))
|
if (IS_DAX(inode))
|
||||||
return dax_zero_range(inode, pos, len, did_zero,
|
return dax_zero_range(inode, pos, len, did_zero,
|
||||||
&xfs_direct_write_iomap_ops);
|
&xfs_dax_write_iomap_ops);
|
||||||
return iomap_zero_range(inode, pos, len, did_zero,
|
return iomap_zero_range(inode, pos, len, did_zero,
|
||||||
&xfs_buffered_write_iomap_ops);
|
&xfs_buffered_write_iomap_ops);
|
||||||
}
|
}
|
||||||
|
@ -1385,7 +1385,7 @@ xfs_truncate_page(
|
||||||
|
|
||||||
if (IS_DAX(inode))
|
if (IS_DAX(inode))
|
||||||
return dax_truncate_page(inode, pos, did_zero,
|
return dax_truncate_page(inode, pos, did_zero,
|
||||||
&xfs_direct_write_iomap_ops);
|
&xfs_dax_write_iomap_ops);
|
||||||
return iomap_truncate_page(inode, pos, did_zero,
|
return iomap_truncate_page(inode, pos, did_zero,
|
||||||
&xfs_buffered_write_iomap_ops);
|
&xfs_buffered_write_iomap_ops);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1187,10 +1187,6 @@ xfs_inode_supports_dax(
|
||||||
if (!S_ISREG(VFS_I(ip)->i_mode))
|
if (!S_ISREG(VFS_I(ip)->i_mode))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* Only supported on non-reflinked files. */
|
|
||||||
if (xfs_is_reflink_inode(ip))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* Block size must match page size */
|
/* Block size must match page size */
|
||||||
if (mp->m_sb.sb_blocksize != PAGE_SIZE)
|
if (mp->m_sb.sb_blocksize != PAGE_SIZE)
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -1693,6 +1693,10 @@ xfs_reflink_unshare(
|
||||||
|
|
||||||
inode_dio_wait(inode);
|
inode_dio_wait(inode);
|
||||||
|
|
||||||
|
if (IS_DAX(inode))
|
||||||
|
error = dax_file_unshare(inode, offset, len,
|
||||||
|
&xfs_dax_write_iomap_ops);
|
||||||
|
else
|
||||||
error = iomap_file_unshare(inode, offset, len,
|
error = iomap_file_unshare(inode, offset, len,
|
||||||
&xfs_buffered_write_iomap_ops);
|
&xfs_buffered_write_iomap_ops);
|
||||||
if (error)
|
if (error)
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue