mirror of
https://gitee.com/bianbu-linux/linux-6.6
synced 2025-04-24 14:07:52 -04:00
A reworked version of the opt-in L1D flush mechanism:
A stop gap for potential future speculation related hardware vulnerabilities and a mechanism for truly security paranoid applications. It allows a task to request that the L1D cache is flushed when the kernel switches to a different mm. This can be requested via prctl(). Changes vs. the previous versions: - Get rid of the software flush fallback - Make the handling consistent with other mitigations - Kill the task when it ends up on a SMT enabled core which defeats the purpose of L1D flushing obviously -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmEsn0oTHHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYoa5fD/47vHGtjAtDr/DaXR1C6F9AvVbKEl8p oNHn8IukE6ts6G4dFH9wUvo/Ut0K3kxX54I+BATew0LTy6tsQeUYh/xjwXMupgNV oKOc9waoqdFvju3ayLFWJmuACLdXpyrGC1j35Aji61zSbR/GdtZ4oDxbuN2YJDAT BTcgKrBM5nQm94JNa083RQSCU5LJxbC7ETkIh6NR73RSPCjUC1Wpxy1sAQAa2MPD 8EzcJ/DjVGaHCI7adX10sz3xdUcyOz7qYz16HpoMGx+oSiq7pGEBtUiK97EYMcrB s+ADFUjYmx/pbEWv2r4c9zxNh7ZV3aLBsWwi7bScHIsv8GjrsA/mYLWskuwOV6BB 22qZjfd0c4raiJwd+nmSx+D2Szv6lZ20gP+krtP2VNC6hUv7ft0VPLySiaFMmUHj quooDZis/W5n+4C9Q8Rk9uUtKzzJOngqW+duftiixHiNQ/ECP/QCAHhZYck/NOkL tZkNj6lJj9+2iR7mhbYROZ+wrYQzRvqNb2pJJQoi/wA0q7wPSKBi3m+51lPsht5W tn94CpaDDZ4IB7Fe1NtcA0UpYJSWpDQGlau4qp92HMCCIcRFfQEm+m9x8axwcj7m ECblHJYBPHuNcCHvPA8kHvr1nd6UUXrGPIo8TK8YhUUbK6pO0OjdNzZX496ia/2g pLzaW2ENTPLbXg== =27wH -----END PGP SIGNATURE----- Merge tag 'x86-cpu-2021-08-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 cache flush updates from Thomas Gleixner: "A reworked version of the opt-in L1D flush mechanism. This is a stop gap for potential future speculation related hardware vulnerabilities and a mechanism for truly security paranoid applications. It allows a task to request that the L1D cache is flushed when the kernel switches to a different mm. This can be requested via prctl(). Changes vs the previous versions: - Get rid of the software flush fallback - Make the handling consistent with other mitigations - Kill the task when it ends up on a SMT enabled core which defeats the purpose of L1D flushing obviously" * tag 'x86-cpu-2021-08-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: Documentation: Add L1D flushing Documentation x86, prctl: Hook L1D flushing in via prctl x86/mm: Prepare for opt-in based L1D flush in switch_mm() x86/process: Make room for TIF_SPEC_L1D_FLUSH sched: Add task_work callback for paranoid L1D flush x86/mm: Refactor cond_ibpb() to support other use cases x86/smp: Add a per-cpu view of SMT state
This commit is contained in:
commit
0a096f240a
15 changed files with 284 additions and 31 deletions
|
@ -16,3 +16,4 @@ are configurable at compile, boot or run time.
|
||||||
multihit.rst
|
multihit.rst
|
||||||
special-register-buffer-data-sampling.rst
|
special-register-buffer-data-sampling.rst
|
||||||
core-scheduling.rst
|
core-scheduling.rst
|
||||||
|
l1d_flush.rst
|
||||||
|
|
69
Documentation/admin-guide/hw-vuln/l1d_flush.rst
Normal file
69
Documentation/admin-guide/hw-vuln/l1d_flush.rst
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
L1D Flushing
|
||||||
|
============
|
||||||
|
|
||||||
|
With an increasing number of vulnerabilities being reported around data
|
||||||
|
leaks from the Level 1 Data cache (L1D) the kernel provides an opt-in
|
||||||
|
mechanism to flush the L1D cache on context switch.
|
||||||
|
|
||||||
|
This mechanism can be used to address e.g. CVE-2020-0550. For applications
|
||||||
|
the mechanism keeps them safe from vulnerabilities, related to leaks
|
||||||
|
(snooping of) from the L1D cache.
|
||||||
|
|
||||||
|
|
||||||
|
Related CVEs
|
||||||
|
------------
|
||||||
|
The following CVEs can be addressed by this
|
||||||
|
mechanism
|
||||||
|
|
||||||
|
============= ======================== ==================
|
||||||
|
CVE-2020-0550 Improper Data Forwarding OS related aspects
|
||||||
|
============= ======================== ==================
|
||||||
|
|
||||||
|
Usage Guidelines
|
||||||
|
----------------
|
||||||
|
|
||||||
|
Please see document: :ref:`Documentation/userspace-api/spec_ctrl.rst
|
||||||
|
<set_spec_ctrl>` for details.
|
||||||
|
|
||||||
|
**NOTE**: The feature is disabled by default, applications need to
|
||||||
|
specifically opt into the feature to enable it.
|
||||||
|
|
||||||
|
Mitigation
|
||||||
|
----------
|
||||||
|
|
||||||
|
When PR_SET_L1D_FLUSH is enabled for a task a flush of the L1D cache is
|
||||||
|
performed when the task is scheduled out and the incoming task belongs to a
|
||||||
|
different process and therefore to a different address space.
|
||||||
|
|
||||||
|
If the underlying CPU supports L1D flushing in hardware, the hardware
|
||||||
|
mechanism is used, software fallback for the mitigation, is not supported.
|
||||||
|
|
||||||
|
Mitigation control on the kernel command line
|
||||||
|
---------------------------------------------
|
||||||
|
|
||||||
|
The kernel command line allows to control the L1D flush mitigations at boot
|
||||||
|
time with the option "l1d_flush=". The valid arguments for this option are:
|
||||||
|
|
||||||
|
============ =============================================================
|
||||||
|
on Enables the prctl interface, applications trying to use
|
||||||
|
the prctl() will fail with an error if l1d_flush is not
|
||||||
|
enabled
|
||||||
|
============ =============================================================
|
||||||
|
|
||||||
|
By default the mechanism is disabled.
|
||||||
|
|
||||||
|
Limitations
|
||||||
|
-----------
|
||||||
|
|
||||||
|
The mechanism does not mitigate L1D data leaks between tasks belonging to
|
||||||
|
different processes which are concurrently executing on sibling threads of
|
||||||
|
a physical CPU core when SMT is enabled on the system.
|
||||||
|
|
||||||
|
This can be addressed by controlled placement of processes on physical CPU
|
||||||
|
cores or by disabling SMT. See the relevant chapter in the L1TF mitigation
|
||||||
|
document: :ref:`Documentation/admin-guide/hw-vuln/l1tf.rst <smt_control>`.
|
||||||
|
|
||||||
|
**NOTE** : The opt-in of a task for L1D flushing works only when the task's
|
||||||
|
affinity is limited to cores running in non-SMT mode. If a task which
|
||||||
|
requested L1D flushing is scheduled on a SMT-enabled core the kernel sends
|
||||||
|
a SIGBUS to the task.
|
|
@ -2421,6 +2421,23 @@
|
||||||
feature (tagged TLBs) on capable Intel chips.
|
feature (tagged TLBs) on capable Intel chips.
|
||||||
Default is 1 (enabled)
|
Default is 1 (enabled)
|
||||||
|
|
||||||
|
l1d_flush= [X86,INTEL]
|
||||||
|
Control mitigation for L1D based snooping vulnerability.
|
||||||
|
|
||||||
|
Certain CPUs are vulnerable to an exploit against CPU
|
||||||
|
internal buffers which can forward information to a
|
||||||
|
disclosure gadget under certain conditions.
|
||||||
|
|
||||||
|
In vulnerable processors, the speculatively
|
||||||
|
forwarded data can be used in a cache side channel
|
||||||
|
attack, to access data to which the attacker does
|
||||||
|
not have direct access.
|
||||||
|
|
||||||
|
This parameter controls the mitigation. The
|
||||||
|
options are:
|
||||||
|
|
||||||
|
on - enable the interface for the mitigation
|
||||||
|
|
||||||
l1tf= [X86] Control mitigation of the L1TF vulnerability on
|
l1tf= [X86] Control mitigation of the L1TF vulnerability on
|
||||||
affected CPUs
|
affected CPUs
|
||||||
|
|
||||||
|
|
|
@ -106,3 +106,11 @@ Speculation misfeature controls
|
||||||
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
|
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
|
||||||
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
|
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
|
||||||
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);
|
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);
|
||||||
|
|
||||||
|
- PR_SPEC_L1D_FLUSH: Flush L1D Cache on context switch out of the task
|
||||||
|
(works only when tasks run on non SMT cores)
|
||||||
|
|
||||||
|
Invocations:
|
||||||
|
* prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_L1D_FLUSH, 0, 0, 0);
|
||||||
|
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_L1D_FLUSH, PR_SPEC_ENABLE, 0, 0);
|
||||||
|
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_L1D_FLUSH, PR_SPEC_DISABLE, 0, 0);
|
||||||
|
|
|
@ -1282,6 +1282,9 @@ config ARCH_SPLIT_ARG64
|
||||||
config ARCH_HAS_ELFCORE_COMPAT
|
config ARCH_HAS_ELFCORE_COMPAT
|
||||||
bool
|
bool
|
||||||
|
|
||||||
|
config ARCH_HAS_PARANOID_L1D_FLUSH
|
||||||
|
bool
|
||||||
|
|
||||||
source "kernel/gcov/Kconfig"
|
source "kernel/gcov/Kconfig"
|
||||||
|
|
||||||
source "scripts/gcc-plugins/Kconfig"
|
source "scripts/gcc-plugins/Kconfig"
|
||||||
|
|
|
@ -119,6 +119,7 @@ config X86
|
||||||
select ARCH_WANT_HUGE_PMD_SHARE
|
select ARCH_WANT_HUGE_PMD_SHARE
|
||||||
select ARCH_WANT_LD_ORPHAN_WARN
|
select ARCH_WANT_LD_ORPHAN_WARN
|
||||||
select ARCH_WANTS_THP_SWAP if X86_64
|
select ARCH_WANTS_THP_SWAP if X86_64
|
||||||
|
select ARCH_HAS_PARANOID_L1D_FLUSH
|
||||||
select BUILDTIME_TABLE_SORT
|
select BUILDTIME_TABLE_SORT
|
||||||
select CLKEVT_I8253
|
select CLKEVT_I8253
|
||||||
select CLOCKSOURCE_VALIDATE_LAST_CYCLE
|
select CLOCKSOURCE_VALIDATE_LAST_CYCLE
|
||||||
|
|
|
@ -252,6 +252,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
|
||||||
DECLARE_STATIC_KEY_FALSE(mds_user_clear);
|
DECLARE_STATIC_KEY_FALSE(mds_user_clear);
|
||||||
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
|
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
|
||||||
|
|
||||||
|
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
|
||||||
|
|
||||||
#include <asm/segment.h>
|
#include <asm/segment.h>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -136,6 +136,8 @@ struct cpuinfo_x86 {
|
||||||
u16 logical_die_id;
|
u16 logical_die_id;
|
||||||
/* Index into per_cpu list: */
|
/* Index into per_cpu list: */
|
||||||
u16 cpu_index;
|
u16 cpu_index;
|
||||||
|
/* Is SMT active on this core? */
|
||||||
|
bool smt_active;
|
||||||
u32 microcode;
|
u32 microcode;
|
||||||
/* Address space bits used by the cache internally */
|
/* Address space bits used by the cache internally */
|
||||||
u8 x86_cache_bits;
|
u8 x86_cache_bits;
|
||||||
|
|
|
@ -81,7 +81,7 @@ struct thread_info {
|
||||||
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
|
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
|
||||||
#define TIF_SSBD 5 /* Speculative store bypass disable */
|
#define TIF_SSBD 5 /* Speculative store bypass disable */
|
||||||
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
|
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
|
||||||
#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */
|
#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
|
||||||
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
|
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
|
||||||
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
|
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
|
||||||
#define TIF_PATCH_PENDING 13 /* pending live patching update */
|
#define TIF_PATCH_PENDING 13 /* pending live patching update */
|
||||||
|
@ -93,6 +93,7 @@ struct thread_info {
|
||||||
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
|
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
|
||||||
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
|
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
|
||||||
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
|
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
|
||||||
|
#define TIF_SPEC_FORCE_UPDATE 23 /* Force speculation MSR update in context switch */
|
||||||
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
|
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
|
||||||
#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
|
#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
|
||||||
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
|
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
|
||||||
|
@ -104,7 +105,7 @@ struct thread_info {
|
||||||
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
|
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
|
||||||
#define _TIF_SSBD (1 << TIF_SSBD)
|
#define _TIF_SSBD (1 << TIF_SSBD)
|
||||||
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
|
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
|
||||||
#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
|
#define _TIF_SPEC_L1D_FLUSH (1 << TIF_SPEC_L1D_FLUSH)
|
||||||
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
|
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
|
||||||
#define _TIF_UPROBE (1 << TIF_UPROBE)
|
#define _TIF_UPROBE (1 << TIF_UPROBE)
|
||||||
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
|
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
|
||||||
|
@ -115,6 +116,7 @@ struct thread_info {
|
||||||
#define _TIF_SLD (1 << TIF_SLD)
|
#define _TIF_SLD (1 << TIF_SLD)
|
||||||
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
|
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
|
||||||
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
|
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
|
||||||
|
#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
|
||||||
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
|
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
|
||||||
#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
|
#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
|
||||||
#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
|
#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
|
||||||
|
|
|
@ -83,7 +83,7 @@ struct tlb_state {
|
||||||
/* Last user mm for optimizing IBPB */
|
/* Last user mm for optimizing IBPB */
|
||||||
union {
|
union {
|
||||||
struct mm_struct *last_user_mm;
|
struct mm_struct *last_user_mm;
|
||||||
unsigned long last_user_mm_ibpb;
|
unsigned long last_user_mm_spec;
|
||||||
};
|
};
|
||||||
|
|
||||||
u16 loaded_mm_asid;
|
u16 loaded_mm_asid;
|
||||||
|
|
|
@ -43,6 +43,7 @@ static void __init mds_select_mitigation(void);
|
||||||
static void __init mds_print_mitigation(void);
|
static void __init mds_print_mitigation(void);
|
||||||
static void __init taa_select_mitigation(void);
|
static void __init taa_select_mitigation(void);
|
||||||
static void __init srbds_select_mitigation(void);
|
static void __init srbds_select_mitigation(void);
|
||||||
|
static void __init l1d_flush_select_mitigation(void);
|
||||||
|
|
||||||
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
|
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
|
||||||
u64 x86_spec_ctrl_base;
|
u64 x86_spec_ctrl_base;
|
||||||
|
@ -76,6 +77,13 @@ EXPORT_SYMBOL_GPL(mds_user_clear);
|
||||||
DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
|
DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
|
||||||
EXPORT_SYMBOL_GPL(mds_idle_clear);
|
EXPORT_SYMBOL_GPL(mds_idle_clear);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Controls whether l1d flush based mitigations are enabled,
|
||||||
|
* based on hw features and admin setting via boot parameter
|
||||||
|
* defaults to false
|
||||||
|
*/
|
||||||
|
DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
|
||||||
|
|
||||||
void __init check_bugs(void)
|
void __init check_bugs(void)
|
||||||
{
|
{
|
||||||
identify_boot_cpu();
|
identify_boot_cpu();
|
||||||
|
@ -111,6 +119,7 @@ void __init check_bugs(void)
|
||||||
mds_select_mitigation();
|
mds_select_mitigation();
|
||||||
taa_select_mitigation();
|
taa_select_mitigation();
|
||||||
srbds_select_mitigation();
|
srbds_select_mitigation();
|
||||||
|
l1d_flush_select_mitigation();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* As MDS and TAA mitigations are inter-related, print MDS
|
* As MDS and TAA mitigations are inter-related, print MDS
|
||||||
|
@ -491,6 +500,34 @@ static int __init srbds_parse_cmdline(char *str)
|
||||||
}
|
}
|
||||||
early_param("srbds", srbds_parse_cmdline);
|
early_param("srbds", srbds_parse_cmdline);
|
||||||
|
|
||||||
|
#undef pr_fmt
|
||||||
|
#define pr_fmt(fmt) "L1D Flush : " fmt
|
||||||
|
|
||||||
|
enum l1d_flush_mitigations {
|
||||||
|
L1D_FLUSH_OFF = 0,
|
||||||
|
L1D_FLUSH_ON,
|
||||||
|
};
|
||||||
|
|
||||||
|
static enum l1d_flush_mitigations l1d_flush_mitigation __initdata = L1D_FLUSH_OFF;
|
||||||
|
|
||||||
|
static void __init l1d_flush_select_mitigation(void)
|
||||||
|
{
|
||||||
|
if (!l1d_flush_mitigation || !boot_cpu_has(X86_FEATURE_FLUSH_L1D))
|
||||||
|
return;
|
||||||
|
|
||||||
|
static_branch_enable(&switch_mm_cond_l1d_flush);
|
||||||
|
pr_info("Conditional flush on switch_mm() enabled\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __init l1d_flush_parse_cmdline(char *str)
|
||||||
|
{
|
||||||
|
if (!strcmp(str, "on"))
|
||||||
|
l1d_flush_mitigation = L1D_FLUSH_ON;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
early_param("l1d_flush", l1d_flush_parse_cmdline);
|
||||||
|
|
||||||
#undef pr_fmt
|
#undef pr_fmt
|
||||||
#define pr_fmt(fmt) "Spectre V1 : " fmt
|
#define pr_fmt(fmt) "Spectre V1 : " fmt
|
||||||
|
|
||||||
|
@ -1215,6 +1252,24 @@ static void task_update_spec_tif(struct task_struct *tsk)
|
||||||
speculation_ctrl_update_current();
|
speculation_ctrl_update_current();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int l1d_flush_prctl_set(struct task_struct *task, unsigned long ctrl)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (!static_branch_unlikely(&switch_mm_cond_l1d_flush))
|
||||||
|
return -EPERM;
|
||||||
|
|
||||||
|
switch (ctrl) {
|
||||||
|
case PR_SPEC_ENABLE:
|
||||||
|
set_ti_thread_flag(&task->thread_info, TIF_SPEC_L1D_FLUSH);
|
||||||
|
return 0;
|
||||||
|
case PR_SPEC_DISABLE:
|
||||||
|
clear_ti_thread_flag(&task->thread_info, TIF_SPEC_L1D_FLUSH);
|
||||||
|
return 0;
|
||||||
|
default:
|
||||||
|
return -ERANGE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
|
static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
|
||||||
{
|
{
|
||||||
if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
|
if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
|
||||||
|
@ -1324,6 +1379,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
|
||||||
return ssb_prctl_set(task, ctrl);
|
return ssb_prctl_set(task, ctrl);
|
||||||
case PR_SPEC_INDIRECT_BRANCH:
|
case PR_SPEC_INDIRECT_BRANCH:
|
||||||
return ib_prctl_set(task, ctrl);
|
return ib_prctl_set(task, ctrl);
|
||||||
|
case PR_SPEC_L1D_FLUSH:
|
||||||
|
return l1d_flush_prctl_set(task, ctrl);
|
||||||
default:
|
default:
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
}
|
}
|
||||||
|
@ -1340,6 +1397,17 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static int l1d_flush_prctl_get(struct task_struct *task)
|
||||||
|
{
|
||||||
|
if (!static_branch_unlikely(&switch_mm_cond_l1d_flush))
|
||||||
|
return PR_SPEC_FORCE_DISABLE;
|
||||||
|
|
||||||
|
if (test_ti_thread_flag(&task->thread_info, TIF_SPEC_L1D_FLUSH))
|
||||||
|
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
|
||||||
|
else
|
||||||
|
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
|
||||||
|
}
|
||||||
|
|
||||||
static int ssb_prctl_get(struct task_struct *task)
|
static int ssb_prctl_get(struct task_struct *task)
|
||||||
{
|
{
|
||||||
switch (ssb_mode) {
|
switch (ssb_mode) {
|
||||||
|
@ -1390,6 +1458,8 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
|
||||||
return ssb_prctl_get(task);
|
return ssb_prctl_get(task);
|
||||||
case PR_SPEC_INDIRECT_BRANCH:
|
case PR_SPEC_INDIRECT_BRANCH:
|
||||||
return ib_prctl_get(task);
|
return ib_prctl_get(task);
|
||||||
|
case PR_SPEC_L1D_FLUSH:
|
||||||
|
return l1d_flush_prctl_get(task);
|
||||||
default:
|
default:
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
}
|
}
|
||||||
|
|
|
@ -610,6 +610,9 @@ void set_cpu_sibling_map(int cpu)
|
||||||
if (threads > __max_smt_threads)
|
if (threads > __max_smt_threads)
|
||||||
__max_smt_threads = threads;
|
__max_smt_threads = threads;
|
||||||
|
|
||||||
|
for_each_cpu(i, topology_sibling_cpumask(cpu))
|
||||||
|
cpu_data(i).smt_active = threads > 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This needs a separate iteration over the cpus because we rely on all
|
* This needs a separate iteration over the cpus because we rely on all
|
||||||
* topology_sibling_cpumask links to be set-up.
|
* topology_sibling_cpumask links to be set-up.
|
||||||
|
@ -1552,8 +1555,13 @@ static void remove_siblinginfo(int cpu)
|
||||||
|
|
||||||
for_each_cpu(sibling, topology_die_cpumask(cpu))
|
for_each_cpu(sibling, topology_die_cpumask(cpu))
|
||||||
cpumask_clear_cpu(cpu, topology_die_cpumask(sibling));
|
cpumask_clear_cpu(cpu, topology_die_cpumask(sibling));
|
||||||
for_each_cpu(sibling, topology_sibling_cpumask(cpu))
|
|
||||||
|
for_each_cpu(sibling, topology_sibling_cpumask(cpu)) {
|
||||||
cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
|
cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
|
||||||
|
if (cpumask_weight(topology_sibling_cpumask(sibling)) == 1)
|
||||||
|
cpu_data(sibling).smt_active = false;
|
||||||
|
}
|
||||||
|
|
||||||
for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
|
for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
|
||||||
cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
|
cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
|
||||||
cpumask_clear(cpu_llc_shared_mask(cpu));
|
cpumask_clear(cpu_llc_shared_mask(cpu));
|
||||||
|
|
|
@ -8,11 +8,13 @@
|
||||||
#include <linux/export.h>
|
#include <linux/export.h>
|
||||||
#include <linux/cpu.h>
|
#include <linux/cpu.h>
|
||||||
#include <linux/debugfs.h>
|
#include <linux/debugfs.h>
|
||||||
|
#include <linux/sched/smt.h>
|
||||||
|
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
||||||
#include <asm/nospec-branch.h>
|
#include <asm/nospec-branch.h>
|
||||||
#include <asm/cache.h>
|
#include <asm/cache.h>
|
||||||
|
#include <asm/cacheflush.h>
|
||||||
#include <asm/apic.h>
|
#include <asm/apic.h>
|
||||||
#include <asm/perf_event.h>
|
#include <asm/perf_event.h>
|
||||||
|
|
||||||
|
@ -43,10 +45,15 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
|
* Bits to mangle the TIF_SPEC_* state into the mm pointer which is
|
||||||
* stored in cpu_tlb_state.last_user_mm_ibpb.
|
* stored in cpu_tlb_state.last_user_mm_spec.
|
||||||
*/
|
*/
|
||||||
#define LAST_USER_MM_IBPB 0x1UL
|
#define LAST_USER_MM_IBPB 0x1UL
|
||||||
|
#define LAST_USER_MM_L1D_FLUSH 0x2UL
|
||||||
|
#define LAST_USER_MM_SPEC_MASK (LAST_USER_MM_IBPB | LAST_USER_MM_L1D_FLUSH)
|
||||||
|
|
||||||
|
/* Bits to set when tlbstate and flush is (re)initialized */
|
||||||
|
#define LAST_USER_MM_INIT LAST_USER_MM_IBPB
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The x86 feature is called PCID (Process Context IDentifier). It is similar
|
* The x86 feature is called PCID (Process Context IDentifier). It is similar
|
||||||
|
@ -317,20 +324,70 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
|
/*
|
||||||
|
* Invoked from return to user/guest by a task that opted-in to L1D
|
||||||
|
* flushing but ended up running on an SMT enabled core due to wrong
|
||||||
|
* affinity settings or CPU hotplug. This is part of the paranoid L1D flush
|
||||||
|
* contract which this task requested.
|
||||||
|
*/
|
||||||
|
static void l1d_flush_force_sigbus(struct callback_head *ch)
|
||||||
{
|
{
|
||||||
unsigned long next_tif = task_thread_info(next)->flags;
|
force_sig(SIGBUS);
|
||||||
unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
|
|
||||||
|
|
||||||
return (unsigned long)next->mm | ibpb;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cond_ibpb(struct task_struct *next)
|
static void l1d_flush_evaluate(unsigned long prev_mm, unsigned long next_mm,
|
||||||
|
struct task_struct *next)
|
||||||
{
|
{
|
||||||
if (!next || !next->mm)
|
/* Flush L1D if the outgoing task requests it */
|
||||||
|
if (prev_mm & LAST_USER_MM_L1D_FLUSH)
|
||||||
|
wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
|
||||||
|
|
||||||
|
/* Check whether the incoming task opted in for L1D flush */
|
||||||
|
if (likely(!(next_mm & LAST_USER_MM_L1D_FLUSH)))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
* Validate that it is not running on an SMT sibling as this would
|
||||||
|
* make the excercise pointless because the siblings share L1D. If
|
||||||
|
* it runs on a SMT sibling, notify it with SIGBUS on return to
|
||||||
|
* user/guest
|
||||||
|
*/
|
||||||
|
if (this_cpu_read(cpu_info.smt_active)) {
|
||||||
|
clear_ti_thread_flag(&next->thread_info, TIF_SPEC_L1D_FLUSH);
|
||||||
|
next->l1d_flush_kill.func = l1d_flush_force_sigbus;
|
||||||
|
task_work_add(next, &next->l1d_flush_kill, TWA_RESUME);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned long mm_mangle_tif_spec_bits(struct task_struct *next)
|
||||||
|
{
|
||||||
|
unsigned long next_tif = task_thread_info(next)->flags;
|
||||||
|
unsigned long spec_bits = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_SPEC_MASK;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure that the bit shift above works as expected and the two flags
|
||||||
|
* end up in bit 0 and 1.
|
||||||
|
*/
|
||||||
|
BUILD_BUG_ON(TIF_SPEC_L1D_FLUSH != TIF_SPEC_IB + 1);
|
||||||
|
|
||||||
|
return (unsigned long)next->mm | spec_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cond_mitigation(struct task_struct *next)
|
||||||
|
{
|
||||||
|
unsigned long prev_mm, next_mm;
|
||||||
|
|
||||||
|
if (!next || !next->mm)
|
||||||
|
return;
|
||||||
|
|
||||||
|
next_mm = mm_mangle_tif_spec_bits(next);
|
||||||
|
prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Avoid user/user BTB poisoning by flushing the branch predictor
|
||||||
|
* when switching between processes. This stops one process from
|
||||||
|
* doing Spectre-v2 attacks on another.
|
||||||
|
*
|
||||||
* Both, the conditional and the always IBPB mode use the mm
|
* Both, the conditional and the always IBPB mode use the mm
|
||||||
* pointer to avoid the IBPB when switching between tasks of the
|
* pointer to avoid the IBPB when switching between tasks of the
|
||||||
* same process. Using the mm pointer instead of mm->context.ctx_id
|
* same process. Using the mm pointer instead of mm->context.ctx_id
|
||||||
|
@ -340,8 +397,6 @@ static void cond_ibpb(struct task_struct *next)
|
||||||
* exposed data is not really interesting.
|
* exposed data is not really interesting.
|
||||||
*/
|
*/
|
||||||
if (static_branch_likely(&switch_mm_cond_ibpb)) {
|
if (static_branch_likely(&switch_mm_cond_ibpb)) {
|
||||||
unsigned long prev_mm, next_mm;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is a bit more complex than the always mode because
|
* This is a bit more complex than the always mode because
|
||||||
* it has to handle two cases:
|
* it has to handle two cases:
|
||||||
|
@ -371,20 +426,14 @@ static void cond_ibpb(struct task_struct *next)
|
||||||
* Optimize this with reasonably small overhead for the
|
* Optimize this with reasonably small overhead for the
|
||||||
* above cases. Mangle the TIF_SPEC_IB bit into the mm
|
* above cases. Mangle the TIF_SPEC_IB bit into the mm
|
||||||
* pointer of the incoming task which is stored in
|
* pointer of the incoming task which is stored in
|
||||||
* cpu_tlbstate.last_user_mm_ibpb for comparison.
|
* cpu_tlbstate.last_user_mm_spec for comparison.
|
||||||
*/
|
*
|
||||||
next_mm = mm_mangle_tif_spec_ib(next);
|
|
||||||
prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Issue IBPB only if the mm's are different and one or
|
* Issue IBPB only if the mm's are different and one or
|
||||||
* both have the IBPB bit set.
|
* both have the IBPB bit set.
|
||||||
*/
|
*/
|
||||||
if (next_mm != prev_mm &&
|
if (next_mm != prev_mm &&
|
||||||
(next_mm | prev_mm) & LAST_USER_MM_IBPB)
|
(next_mm | prev_mm) & LAST_USER_MM_IBPB)
|
||||||
indirect_branch_prediction_barrier();
|
indirect_branch_prediction_barrier();
|
||||||
|
|
||||||
this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (static_branch_unlikely(&switch_mm_always_ibpb)) {
|
if (static_branch_unlikely(&switch_mm_always_ibpb)) {
|
||||||
|
@ -393,11 +442,22 @@ static void cond_ibpb(struct task_struct *next)
|
||||||
* different context than the user space task which ran
|
* different context than the user space task which ran
|
||||||
* last on this CPU.
|
* last on this CPU.
|
||||||
*/
|
*/
|
||||||
if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
|
if ((prev_mm & ~LAST_USER_MM_SPEC_MASK) !=
|
||||||
|
(unsigned long)next->mm)
|
||||||
indirect_branch_prediction_barrier();
|
indirect_branch_prediction_barrier();
|
||||||
this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (static_branch_unlikely(&switch_mm_cond_l1d_flush)) {
|
||||||
|
/*
|
||||||
|
* Flush L1D when the outgoing task requested it and/or
|
||||||
|
* check whether the incoming task requested L1D flushing
|
||||||
|
* and ended up on an SMT sibling.
|
||||||
|
*/
|
||||||
|
if (unlikely((prev_mm | next_mm) & LAST_USER_MM_L1D_FLUSH))
|
||||||
|
l1d_flush_evaluate(prev_mm, next_mm, next);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this_cpu_write(cpu_tlbstate.last_user_mm_spec, next_mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_PERF_EVENTS
|
#ifdef CONFIG_PERF_EVENTS
|
||||||
|
@ -531,11 +591,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||||
need_flush = true;
|
need_flush = true;
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* Avoid user/user BTB poisoning by flushing the branch
|
* Apply process to process speculation vulnerability
|
||||||
* predictor when switching between processes. This stops
|
* mitigations if applicable.
|
||||||
* one process from doing Spectre-v2 attacks on another.
|
|
||||||
*/
|
*/
|
||||||
cond_ibpb(tsk);
|
cond_mitigation(tsk);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Stop remote flushes for the previous mm.
|
* Stop remote flushes for the previous mm.
|
||||||
|
@ -643,7 +702,7 @@ void initialize_tlbstate_and_flush(void)
|
||||||
write_cr3(build_cr3(mm->pgd, 0));
|
write_cr3(build_cr3(mm->pgd, 0));
|
||||||
|
|
||||||
/* Reinitialize tlbstate. */
|
/* Reinitialize tlbstate. */
|
||||||
this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
|
this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
|
||||||
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
|
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
|
||||||
this_cpu_write(cpu_tlbstate.next_asid, 1);
|
this_cpu_write(cpu_tlbstate.next_asid, 1);
|
||||||
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
|
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
|
||||||
|
|
|
@ -1474,6 +1474,16 @@ struct task_struct {
|
||||||
struct llist_head kretprobe_instances;
|
struct llist_head kretprobe_instances;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH
|
||||||
|
/*
|
||||||
|
* If L1D flush is supported on mm context switch
|
||||||
|
* then we use this callback head to queue kill work
|
||||||
|
* to kill tasks that are not running on SMT disabled
|
||||||
|
* cores
|
||||||
|
*/
|
||||||
|
struct callback_head l1d_flush_kill;
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* New fields for task_struct should be added above here, so that
|
* New fields for task_struct should be added above here, so that
|
||||||
* they are included in the randomized portion of task_struct.
|
* they are included in the randomized portion of task_struct.
|
||||||
|
|
|
@ -213,6 +213,7 @@ struct prctl_mm_map {
|
||||||
/* Speculation control variants */
|
/* Speculation control variants */
|
||||||
# define PR_SPEC_STORE_BYPASS 0
|
# define PR_SPEC_STORE_BYPASS 0
|
||||||
# define PR_SPEC_INDIRECT_BRANCH 1
|
# define PR_SPEC_INDIRECT_BRANCH 1
|
||||||
|
# define PR_SPEC_L1D_FLUSH 2
|
||||||
/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
|
/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
|
||||||
# define PR_SPEC_NOT_AFFECTED 0
|
# define PR_SPEC_NOT_AFFECTED 0
|
||||||
# define PR_SPEC_PRCTL (1UL << 0)
|
# define PR_SPEC_PRCTL (1UL << 0)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue