Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-next-6.5-2023-06-02:

amdgpu:
- SR-IOV fixes
- Warning fixes
- Misc code cleanups and spelling fixes
- DCN 3.2 updates
- Improved DC FAMS support for better power management
- Improved DC SubVP support for better power management
- DCN 3.1.x fixes
- Max IB size query
- DC GPU reset fixes
- RAS updates
- DCN 3.0.x fixes
- S/G display fixes
- CP shadow buffer support
- Implement connector force callback
- Z8 power improvements
- PSP 13.0.10 vbflash support
- Mode2 reset fixes
- Store MQDs in VRAM to improve queue switch latency
- VCN 3.x fixes
- JPEG 3.x fixes
- Enable DC_FP on LoongArch
- GFXOFF fixes
- GC 9.4.3 partition support
- SDMA 4.4.2 partition support
- VCN/JPEG 4.0.3 partition support
- VCN 4.0.3 updates
- NBIO 7.9 updates
- GC 9.4.3 updates
- Take NUMA into account when allocating memory
- Handle NUMA for partitions
- SMU 13.0.6 updates
- GC 9.4.3 RAS updates
- Stop including unused swiotlb.h
- SMU 13.0.7 fixes
- Fix clock output ordering on some APUs
- Clean up DC FPGA code
- GFX9 preemption fixes
- Misc irq fixes
- S0ix fixes
- Add new DRM_AMDGPU_WERROR config parameter to help with CI
- PCIe fix for RDNA2
- kdoc fixes
- Documentation updates

amdkfd:
- Query TTM mem limit rather than hardcoding it
- GC 9.4.3 partition support
- Handle NUMA for partitions

radeon:
- Fix possible double free
- Stop including unused swiotlb.h
- Fix possible division by zero

ttm:
- Add query for TTM mem limit
- Add NUMA awareness to pools
- Export ttm_pool_fini()

UAPI:
- Add new ctx query flag to better handle GPU resets
  Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22290
- Add new interface to query and set shadow buffer for RDNA3
  Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21986
- Add new INFO query for max IB size
  Proposed userspace: https://gitlab.freedesktop.org/bnieuwenhuizen/mesa/-/commits/ib-rejection-v3

amd-drm-next-6.5-2023-06-09:

amdgpu:
- S0ix fixes
- Initial SMU13 Overdrive support
- kdoc fixes
- Misc clode cleanups
- Flexible array fixes
- Display OTG fixes
- SMU 13.0.6 updates
- Revert some broken clock counter updates
- Misc display fixes
- GFX9 preemption fixes
- Add support for newer EEPROM bad page table format
- Add missing radeon secondary id
- Add support for new colorspace KMS API
- CSA fix
- Stable pstate fixes for APUs
- make vbl interface admin only
- Handle PCI accelerator class

amdkfd:
- Add debugger support for gdb

radeon:
- Fix possible UAF

drm:
- Add Colorspace functionality

UAPI:
- Add debugger interface for enabling gdb
  Proposed userspace: https://github.com/ROCm-Developer-Tools/ROCdbgapi/tree/wip-dbgapi
- Add KMS colorspace API
  Discussion: https://lists.freedesktop.org/archives/dri-devel/2023-June/408128.html

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230609174817.7764-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2023-06-15 14:11:22 +10:00
commit 901bdf5ea1
498 changed files with 45772 additions and 8021 deletions

View file

@ -5,6 +5,8 @@ Ryzen 4000 series, RENOIR, DCN 2.1, 9.3, VCN 2.2, 4.1.2, 11.0.3
Ryzen 3000 series / AMD Ryzen Embedded V1*/R1* with Radeon Vega Gfx, RAVEN2, DCN 1.0, 9.2.2, VCN 1.0.1, 4.1.1, 10.0.1 Ryzen 3000 series / AMD Ryzen Embedded V1*/R1* with Radeon Vega Gfx, RAVEN2, DCN 1.0, 9.2.2, VCN 1.0.1, 4.1.1, 10.0.1
SteamDeck, VANGOGH, DCN 3.0.1, 10.3.1, VCN 3.1.0, 5.2.1, 11.5.0 SteamDeck, VANGOGH, DCN 3.0.1, 10.3.1, VCN 3.1.0, 5.2.1, 11.5.0
Ryzen 5000 series / Ryzen 7x30 series, GREEN SARDINE / Cezanne / Barcelo / Barcelo-R, DCN 2.1, 9.3, VCN 2.2, 4.1.1, 12.0.1 Ryzen 5000 series / Ryzen 7x30 series, GREEN SARDINE / Cezanne / Barcelo / Barcelo-R, DCN 2.1, 9.3, VCN 2.2, 4.1.1, 12.0.1
Ryzen 6000 series / Ryzen 7x35 series, YELLOW CARP / Rembrandt / Rembrandt+, 3.1.2, 10.3.3, VCN 3.1.1, 5.2.3, 13.0.3 Ryzen 6000 series / Ryzen 7x35 series / Ryzen 7x36 series, YELLOW CARP / Rembrandt / Rembrandt-R, 3.1.2, 10.3.3, VCN 3.1.1, 5.2.3, 13.0.3
Ryzen 7000 series (AM5), Raphael, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5 Ryzen 7000 series (AM5), Raphael, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
Ryzen 7x45 series (FL1), / Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8 Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8
Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
1 Product Name Code Reference DCN/DCE version GC version VCE/UVD/VCN version SDMA version MP0 version
5 Ryzen 3000 series / AMD Ryzen Embedded V1*/R1* with Radeon Vega Gfx RAVEN2 DCN 1.0 9.2.2 VCN 1.0.1 4.1.1 10.0.1
6 SteamDeck VANGOGH DCN 3.0.1 10.3.1 VCN 3.1.0 5.2.1 11.5.0
7 Ryzen 5000 series / Ryzen 7x30 series GREEN SARDINE / Cezanne / Barcelo / Barcelo-R DCN 2.1 9.3 VCN 2.2 4.1.1 12.0.1
8 Ryzen 6000 series / Ryzen 7x35 series Ryzen 6000 series / Ryzen 7x35 series / Ryzen 7x36 series YELLOW CARP / Rembrandt / Rembrandt+ YELLOW CARP / Rembrandt / Rembrandt-R 3.1.2 10.3.3 VCN 3.1.1 5.2.3 13.0.3
9 Ryzen 7000 series (AM5) Raphael 3.1.5 10.3.6 3.1.2 5.2.6 13.0.5
10 Ryzen 7x45 series (FL1) / Dragon Range 3.1.5 10.3.6 3.1.2 5.2.6 13.0.5
11 Ryzen 7x20 series Mendocino 3.1.6 10.3.7 3.1.1 5.2.7 13.0.8
12 Ryzen 7x40 series Phoenix 3.1.4 11.0.1 / 11.0.4 4.0.2 6.0.1 13.0.4 / 13.0.11

View file

@ -140,6 +140,7 @@ obj-$(CONFIG_DRM_TTM) += ttm/
obj-$(CONFIG_DRM_SCHED) += scheduler/ obj-$(CONFIG_DRM_SCHED) += scheduler/
obj-$(CONFIG_DRM_RADEON)+= radeon/ obj-$(CONFIG_DRM_RADEON)+= radeon/
obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/ obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/
obj-$(CONFIG_DRM_I915) += i915/ obj-$(CONFIG_DRM_I915) += i915/
obj-$(CONFIG_DRM_KMB_DISPLAY) += kmb/ obj-$(CONFIG_DRM_KMB_DISPLAY) += kmb/
obj-$(CONFIG_DRM_MGAG200) += mgag200/ obj-$(CONFIG_DRM_MGAG200) += mgag200/

View file

@ -69,6 +69,16 @@ config DRM_AMDGPU_USERPTR
This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
isn't already selected to enabled full userptr support. isn't already selected to enabled full userptr support.
config DRM_AMDGPU_WERROR
bool "Force the compiler to throw an error instead of a warning when compiling"
depends on DRM_AMDGPU
depends on EXPERT
depends on !COMPILE_TEST
default n
help
Add -Werror to the build flags for amdgpu.ko.
Only enable this if you are warning code for amdgpu.ko.
source "drivers/gpu/drm/amd/acp/Kconfig" source "drivers/gpu/drm/amd/acp/Kconfig"
source "drivers/gpu/drm/amd/display/Kconfig" source "drivers/gpu/drm/amd/display/Kconfig"
source "drivers/gpu/drm/amd/amdkfd/Kconfig" source "drivers/gpu/drm/amd/amdkfd/Kconfig"

View file

@ -39,6 +39,15 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \ -I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
-I$(FULL_AMD_PATH)/amdkfd -I$(FULL_AMD_PATH)/amdkfd
subdir-ccflags-y := -Wextra
subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
subdir-ccflags-y += -Wno-unused-parameter
subdir-ccflags-y += -Wno-type-limits
subdir-ccflags-y += -Wno-sign-compare
subdir-ccflags-y += -Wno-missing-field-initializers
subdir-ccflags-y += -Wno-override-init
subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror
amdgpu-y := amdgpu_drv.o amdgpu-y := amdgpu_drv.o
# add KMS driver # add KMS driver
@ -60,7 +69,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
amdgpu_ring_mux.o amdgpu_ring_mux.o amdgpu_xcp.o
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
@ -78,7 +87,7 @@ amdgpu-y += \
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \
sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \ sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
nbio_v7_9.o nbio_v7_9.o aqua_vanjaram_reg_init.o
# add DF block # add DF block
amdgpu-y += \ amdgpu-y += \
@ -183,12 +192,14 @@ amdgpu-y += \
vcn_v2_5.o \ vcn_v2_5.o \
vcn_v3_0.o \ vcn_v3_0.o \
vcn_v4_0.o \ vcn_v4_0.o \
vcn_v4_0_3.o \
amdgpu_jpeg.o \ amdgpu_jpeg.o \
jpeg_v1_0.o \ jpeg_v1_0.o \
jpeg_v2_0.o \ jpeg_v2_0.o \
jpeg_v2_5.o \ jpeg_v2_5.o \
jpeg_v3_0.o \ jpeg_v3_0.o \
jpeg_v4_0.o jpeg_v4_0.o \
jpeg_v4_0_3.o
# add ATHUB block # add ATHUB block
amdgpu-y += \ amdgpu-y += \
@ -203,6 +214,7 @@ amdgpu-y += \
smuio_v11_0.o \ smuio_v11_0.o \
smuio_v11_0_6.o \ smuio_v11_0_6.o \
smuio_v13_0.o \ smuio_v13_0.o \
smuio_v13_0_3.o \
smuio_v13_0_6.o smuio_v13_0_6.o
# add reset block # add reset block
@ -228,6 +240,7 @@ amdgpu-y += \
amdgpu_amdkfd_gfx_v9.o \ amdgpu_amdkfd_gfx_v9.o \
amdgpu_amdkfd_arcturus.o \ amdgpu_amdkfd_arcturus.o \
amdgpu_amdkfd_aldebaran.o \ amdgpu_amdkfd_aldebaran.o \
amdgpu_amdkfd_gc_9_4_3.o \
amdgpu_amdkfd_gfx_v10.o \ amdgpu_amdkfd_gfx_v10.o \
amdgpu_amdkfd_gfx_v10_3.o \ amdgpu_amdkfd_gfx_v10_3.o \
amdgpu_amdkfd_gfx_v11.o amdgpu_amdkfd_gfx_v11.o

View file

@ -107,8 +107,9 @@
#include "amdgpu_fdinfo.h" #include "amdgpu_fdinfo.h"
#include "amdgpu_mca.h" #include "amdgpu_mca.h"
#include "amdgpu_ras.h" #include "amdgpu_ras.h"
#include "amdgpu_xcp.h"
#define MAX_GPU_INSTANCE 16 #define MAX_GPU_INSTANCE 64
struct amdgpu_gpu_instance struct amdgpu_gpu_instance
{ {
@ -212,6 +213,8 @@ extern int amdgpu_noretry;
extern int amdgpu_force_asic_type; extern int amdgpu_force_asic_type;
extern int amdgpu_smartshift_bias; extern int amdgpu_smartshift_bias;
extern int amdgpu_use_xgmi_p2p; extern int amdgpu_use_xgmi_p2p;
extern int amdgpu_mtype_local;
extern bool enforce_isolation;
#ifdef CONFIG_HSA_AMD #ifdef CONFIG_HSA_AMD
extern int sched_policy; extern int sched_policy;
extern bool debug_evictions; extern bool debug_evictions;
@ -242,9 +245,10 @@ extern int amdgpu_num_kcq;
extern int amdgpu_vcnfw_log; extern int amdgpu_vcnfw_log;
extern int amdgpu_sg_display; extern int amdgpu_sg_display;
extern int amdgpu_user_partt_mode;
#define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_SG_THRESHOLD (256*1024*1024)
#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
#define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2) #define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2)
@ -282,6 +286,7 @@ extern int amdgpu_sg_display;
#define AMDGPU_SMARTSHIFT_MAX_BIAS (100) #define AMDGPU_SMARTSHIFT_MAX_BIAS (100)
#define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100)
struct amdgpu_xcp_mgr;
struct amdgpu_device; struct amdgpu_device;
struct amdgpu_irq_src; struct amdgpu_irq_src;
struct amdgpu_fpriv; struct amdgpu_fpriv;
@ -463,6 +468,8 @@ struct amdgpu_fpriv {
struct mutex bo_list_lock; struct mutex bo_list_lock;
struct idr bo_list_handles; struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr; struct amdgpu_ctx_mgr ctx_mgr;
/** GPU partition selection */
uint32_t xcp_id;
}; };
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
@ -573,6 +580,8 @@ struct amdgpu_asic_funcs {
/* query video codecs */ /* query video codecs */
int (*query_video_codecs)(struct amdgpu_device *adev, bool encode, int (*query_video_codecs)(struct amdgpu_device *adev, bool encode,
const struct amdgpu_video_codecs **codecs); const struct amdgpu_video_codecs **codecs);
/* encode "> 32bits" smn addressing */
u64 (*encode_ext_smn_addressing)(int ext_id);
}; };
/* /*
@ -607,6 +616,9 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t); typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t);
typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
typedef uint32_t (*amdgpu_rreg_ext_t)(struct amdgpu_device*, uint64_t);
typedef void (*amdgpu_wreg_ext_t)(struct amdgpu_device*, uint64_t, uint32_t);
typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t); typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t);
typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t); typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t);
@ -657,7 +669,7 @@ enum amd_hw_ip_block_type {
MAX_HWIP MAX_HWIP
}; };
#define HWIP_MAX_INSTANCE 28 #define HWIP_MAX_INSTANCE 44
#define HW_ID_MAX 300 #define HW_ID_MAX 300
#define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv)) #define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv))
@ -665,6 +677,17 @@ enum amd_hw_ip_block_type {
#define IP_VERSION_MIN(ver) (((ver) >> 8) & 0xFF) #define IP_VERSION_MIN(ver) (((ver) >> 8) & 0xFF)
#define IP_VERSION_REV(ver) ((ver) & 0xFF) #define IP_VERSION_REV(ver) ((ver) & 0xFF)
struct amdgpu_ip_map_info {
/* Map of logical to actual dev instances/mask */
uint32_t dev_inst[MAX_HWIP][HWIP_MAX_INSTANCE];
int8_t (*logical_to_dev_inst)(struct amdgpu_device *adev,
enum amd_hw_ip_block_type block,
int8_t inst);
uint32_t (*logical_to_dev_mask)(struct amdgpu_device *adev,
enum amd_hw_ip_block_type block,
uint32_t mask);
};
struct amd_powerplay { struct amd_powerplay {
void *pp_handle; void *pp_handle;
const struct amd_pm_funcs *pp_funcs; const struct amd_pm_funcs *pp_funcs;
@ -750,6 +773,7 @@ struct amdgpu_device {
struct amdgpu_acp acp; struct amdgpu_acp acp;
#endif #endif
struct amdgpu_hive_info *hive; struct amdgpu_hive_info *hive;
struct amdgpu_xcp_mgr *xcp_mgr;
/* ASIC */ /* ASIC */
enum amd_asic_type asic_type; enum amd_asic_type asic_type;
uint32_t family; uint32_t family;
@ -797,6 +821,8 @@ struct amdgpu_device {
amdgpu_wreg_t pcie_wreg; amdgpu_wreg_t pcie_wreg;
amdgpu_rreg_t pciep_rreg; amdgpu_rreg_t pciep_rreg;
amdgpu_wreg_t pciep_wreg; amdgpu_wreg_t pciep_wreg;
amdgpu_rreg_ext_t pcie_rreg_ext;
amdgpu_wreg_ext_t pcie_wreg_ext;
amdgpu_rreg64_t pcie_rreg64; amdgpu_rreg64_t pcie_rreg64;
amdgpu_wreg64_t pcie_wreg64; amdgpu_wreg64_t pcie_wreg64;
/* protects concurrent UVD register access */ /* protects concurrent UVD register access */
@ -830,7 +856,7 @@ struct amdgpu_device {
dma_addr_t dummy_page_addr; dma_addr_t dummy_page_addr;
struct amdgpu_vm_manager vm_manager; struct amdgpu_vm_manager vm_manager;
struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS]; struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS];
unsigned num_vmhubs; DECLARE_BITMAP(vmhubs_mask, AMDGPU_MAX_VMHUBS);
/* memory management */ /* memory management */
struct amdgpu_mman mman; struct amdgpu_mman mman;
@ -962,6 +988,7 @@ struct amdgpu_device {
/* soc15 register offset based on ip, instance and segment */ /* soc15 register offset based on ip, instance and segment */
uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
struct amdgpu_ip_map_info ip_map;
/* delayed work_func for deferring clockgating during resume */ /* delayed work_func for deferring clockgating during resume */
struct delayed_work delayed_init_work; struct delayed_work delayed_init_work;
@ -1020,6 +1047,9 @@ struct amdgpu_device {
struct pci_saved_state *pci_state; struct pci_saved_state *pci_state;
pci_channel_state_t pci_channel_state; pci_channel_state_t pci_channel_state;
/* Track auto wait count on s_barrier settings */
bool barrier_has_auto_waitcnt;
struct amdgpu_reset_control *reset_cntl; struct amdgpu_reset_control *reset_cntl;
uint32_t ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE]; uint32_t ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE];
@ -1050,6 +1080,8 @@ struct amdgpu_device {
bool job_hang; bool job_hang;
bool dc_enabled; bool dc_enabled;
/* Mask of active clusters */
uint32_t aid_mask;
}; };
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
@ -1081,11 +1113,18 @@ size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
void *buf, size_t size, bool write); void *buf, size_t size, bool write);
uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
uint32_t inst, uint32_t reg_addr, char reg_name[],
uint32_t expected_value, uint32_t mask);
uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t acc_flags); uint32_t reg, uint32_t acc_flags);
u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
u64 reg_addr);
void amdgpu_device_wreg(struct amdgpu_device *adev, void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v, uint32_t reg, uint32_t v,
uint32_t acc_flags); uint32_t acc_flags);
void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
u64 reg_addr, u32 reg_data);
void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
uint32_t reg, uint32_t v); uint32_t reg, uint32_t v);
void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
@ -1137,6 +1176,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v)) #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg)) #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v)) #define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v))
#define RREG32_PCIE_EXT(reg) adev->pcie_rreg_ext(adev, (reg))
#define WREG32_PCIE_EXT(reg, v) adev->pcie_wreg_ext(adev, (reg), (v))
#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg)) #define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg))
#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v)) #define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v))
#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg)) #define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
@ -1204,7 +1245,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
/* /*
* ASICs macro. * ASICs macro.
*/ */
#define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state)) #define amdgpu_asic_set_vga_state(adev, state) \
((adev)->asic_funcs->set_vga_state ? (adev)->asic_funcs->set_vga_state((adev), (state)) : 0)
#define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev)) #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
#define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev)) #define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev))
#define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev)) #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
@ -1235,6 +1277,10 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); #define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
#define for_each_inst(i, inst_mask) \
for (i = ffs(inst_mask) - 1; inst_mask; \
inst_mask &= ~(1U << i), i = ffs(inst_mask) - 1)
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
/* Common functions */ /* Common functions */
@ -1348,6 +1394,12 @@ struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock);
/* amdgpu_acpi.c */ /* amdgpu_acpi.c */
struct amdgpu_numa_info {
uint64_t size;
int pxm;
int nid;
};
/* ATCS Device/Driver State */ /* ATCS Device/Driver State */
#define AMDGPU_ATCS_PSC_DEV_STATE_D0 0 #define AMDGPU_ATCS_PSC_DEV_STATE_D0 0
#define AMDGPU_ATCS_PSC_DEV_STATE_D3_HOT 3 #define AMDGPU_ATCS_PSC_DEV_STATE_D3_HOT 3
@ -1365,15 +1417,32 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
u8 dev_state, bool drv_state); u8 dev_state, bool drv_state);
int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state); int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state);
int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
u64 *tmr_size);
int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
struct amdgpu_numa_info *numa_info);
void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps); void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps);
bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev); bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
void amdgpu_acpi_detect(void); void amdgpu_acpi_detect(void);
void amdgpu_acpi_release(void);
#else #else
static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; } static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
static inline int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev,
u64 *tmr_offset, u64 *tmr_size)
{
return -EINVAL;
}
static inline int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev,
int xcc_id,
struct amdgpu_numa_info *numa_info)
{
return -EINVAL;
}
static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; }
static inline void amdgpu_acpi_detect(void) { } static inline void amdgpu_acpi_detect(void) { }
static inline void amdgpu_acpi_release(void) { }
static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; } static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; }
static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
u8 dev_state, bool drv_state) { return 0; } u8 dev_state, bool drv_state) { return 0; }

View file

@ -26,6 +26,7 @@
#include <linux/acpi.h> #include <linux/acpi.h>
#include <linux/backlight.h> #include <linux/backlight.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/xarray.h>
#include <linux/power_supply.h> #include <linux/power_supply.h>
#include <linux/pm_runtime.h> #include <linux/pm_runtime.h>
#include <linux/suspend.h> #include <linux/suspend.h>
@ -38,6 +39,45 @@
#include "amd_acpi.h" #include "amd_acpi.h"
#include "atom.h" #include "atom.h"
/* Declare GUID for AMD _DSM method for XCCs */
static const guid_t amd_xcc_dsm_guid = GUID_INIT(0x8267f5d5, 0xa556, 0x44f2,
0xb8, 0xb4, 0x45, 0x56, 0x2e,
0x8c, 0x5b, 0xec);
#define AMD_XCC_HID_START 3000
#define AMD_XCC_DSM_GET_NUM_FUNCS 0
#define AMD_XCC_DSM_GET_SUPP_MODE 1
#define AMD_XCC_DSM_GET_XCP_MODE 2
#define AMD_XCC_DSM_GET_VF_XCC_MAPPING 4
#define AMD_XCC_DSM_GET_TMR_INFO 5
#define AMD_XCC_DSM_NUM_FUNCS 5
#define AMD_XCC_MAX_HID 24
struct xarray numa_info_xa;
/* Encapsulates the XCD acpi object information */
struct amdgpu_acpi_xcc_info {
struct list_head list;
struct amdgpu_numa_info *numa_info;
uint8_t xcp_node;
uint8_t phy_id;
acpi_handle handle;
};
struct amdgpu_acpi_dev_info {
struct list_head list;
struct list_head xcc_list;
uint16_t bdf;
uint16_t supp_xcp_mode;
uint16_t xcp_mode;
uint16_t mem_mode;
uint64_t tmr_base;
uint64_t tmr_size;
};
struct list_head amdgpu_acpi_dev_list;
struct amdgpu_atif_notification_cfg { struct amdgpu_atif_notification_cfg {
bool enabled; bool enabled;
int command_code; int command_code;
@ -801,6 +841,343 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_sta
return r; return r;
} }
#ifdef CONFIG_ACPI_NUMA
static inline uint64_t amdgpu_acpi_get_numa_size(int nid)
{
/* This is directly using si_meminfo_node implementation as the
* function is not exported.
*/
int zone_type;
uint64_t managed_pages = 0;
pg_data_t *pgdat = NODE_DATA(nid);
for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
managed_pages +=
zone_managed_pages(&pgdat->node_zones[zone_type]);
return managed_pages * PAGE_SIZE;
}
static struct amdgpu_numa_info *amdgpu_acpi_get_numa_info(uint32_t pxm)
{
struct amdgpu_numa_info *numa_info;
int nid;
numa_info = xa_load(&numa_info_xa, pxm);
if (!numa_info) {
struct sysinfo info;
numa_info = kzalloc(sizeof *numa_info, GFP_KERNEL);
if (!numa_info)
return NULL;
nid = pxm_to_node(pxm);
numa_info->pxm = pxm;
numa_info->nid = nid;
if (numa_info->nid == NUMA_NO_NODE) {
si_meminfo(&info);
numa_info->size = info.totalram * info.mem_unit;
} else {
numa_info->size = amdgpu_acpi_get_numa_size(nid);
}
xa_store(&numa_info_xa, numa_info->pxm, numa_info, GFP_KERNEL);
}
return numa_info;
}
#endif
/**
* amdgpu_acpi_get_node_id - obtain the NUMA node id for corresponding amdgpu
* acpi device handle
*
* @handle: acpi handle
* @numa_info: amdgpu_numa_info structure holding numa information
*
* Queries the ACPI interface to fetch the corresponding NUMA Node ID for a
* given amdgpu acpi device.
*
* Returns ACPI STATUS OK with Node ID on success or the corresponding failure reason
*/
static acpi_status amdgpu_acpi_get_node_id(acpi_handle handle,
struct amdgpu_numa_info **numa_info)
{
#ifdef CONFIG_ACPI_NUMA
u64 pxm;
acpi_status status;
if (!numa_info)
return_ACPI_STATUS(AE_ERROR);
status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
if (ACPI_FAILURE(status))
return status;
*numa_info = amdgpu_acpi_get_numa_info(pxm);
if (!*numa_info)
return_ACPI_STATUS(AE_ERROR);
return_ACPI_STATUS(AE_OK);
#else
return_ACPI_STATUS(AE_NOT_EXIST);
#endif
}
static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf)
{
struct amdgpu_acpi_dev_info *acpi_dev;
if (list_empty(&amdgpu_acpi_dev_list))
return NULL;
list_for_each_entry(acpi_dev, &amdgpu_acpi_dev_list, list)
if (acpi_dev->bdf == bdf)
return acpi_dev;
return NULL;
}
static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info,
struct amdgpu_acpi_xcc_info *xcc_info, u16 bdf)
{
struct amdgpu_acpi_dev_info *tmp;
union acpi_object *obj;
int ret = -ENOENT;
*dev_info = NULL;
tmp = kzalloc(sizeof(struct amdgpu_acpi_dev_info), GFP_KERNEL);
if (!tmp)
return -ENOMEM;
INIT_LIST_HEAD(&tmp->xcc_list);
INIT_LIST_HEAD(&tmp->list);
tmp->bdf = bdf;
obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
AMD_XCC_DSM_GET_SUPP_MODE, NULL,
ACPI_TYPE_INTEGER);
if (!obj) {
acpi_handle_debug(xcc_info->handle,
"_DSM function %d evaluation failed",
AMD_XCC_DSM_GET_SUPP_MODE);
ret = -ENOENT;
goto out;
}
tmp->supp_xcp_mode = obj->integer.value & 0xFFFF;
ACPI_FREE(obj);
obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
AMD_XCC_DSM_GET_XCP_MODE, NULL,
ACPI_TYPE_INTEGER);
if (!obj) {
acpi_handle_debug(xcc_info->handle,
"_DSM function %d evaluation failed",
AMD_XCC_DSM_GET_XCP_MODE);
ret = -ENOENT;
goto out;
}
tmp->xcp_mode = obj->integer.value & 0xFFFF;
tmp->mem_mode = (obj->integer.value >> 32) & 0xFFFF;
ACPI_FREE(obj);
/* Evaluate DSMs and fill XCC information */
obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
AMD_XCC_DSM_GET_TMR_INFO, NULL,
ACPI_TYPE_PACKAGE);
if (!obj || obj->package.count < 2) {
acpi_handle_debug(xcc_info->handle,
"_DSM function %d evaluation failed",
AMD_XCC_DSM_GET_TMR_INFO);
ret = -ENOENT;
goto out;
}
tmp->tmr_base = obj->package.elements[0].integer.value;
tmp->tmr_size = obj->package.elements[1].integer.value;
ACPI_FREE(obj);
DRM_DEBUG_DRIVER(
"New dev(%x): Supported xcp mode: %x curr xcp_mode : %x mem mode : %x, tmr base: %llx tmr size: %llx ",
tmp->bdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode,
tmp->tmr_base, tmp->tmr_size);
list_add_tail(&tmp->list, &amdgpu_acpi_dev_list);
*dev_info = tmp;
return 0;
out:
if (obj)
ACPI_FREE(obj);
kfree(tmp);
return ret;
}
static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info,
u16 *bdf)
{
union acpi_object *obj;
acpi_status status;
int ret = -ENOENT;
obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
AMD_XCC_DSM_GET_NUM_FUNCS, NULL,
ACPI_TYPE_INTEGER);
if (!obj || obj->integer.value != AMD_XCC_DSM_NUM_FUNCS)
goto out;
ACPI_FREE(obj);
/* Evaluate DSMs and fill XCC information */
obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
AMD_XCC_DSM_GET_VF_XCC_MAPPING, NULL,
ACPI_TYPE_INTEGER);
if (!obj) {
acpi_handle_debug(xcc_info->handle,
"_DSM function %d evaluation failed",
AMD_XCC_DSM_GET_VF_XCC_MAPPING);
ret = -EINVAL;
goto out;
}
/* PF xcc id [39:32] */
xcc_info->phy_id = (obj->integer.value >> 32) & 0xFF;
/* xcp node of this xcc [47:40] */
xcc_info->xcp_node = (obj->integer.value >> 40) & 0xFF;
/* PF bus/dev/fn of this xcc [63:48] */
*bdf = (obj->integer.value >> 48) & 0xFFFF;
ACPI_FREE(obj);
obj = NULL;
status =
amdgpu_acpi_get_node_id(xcc_info->handle, &xcc_info->numa_info);
/* TODO: check if this check is required */
if (ACPI_SUCCESS(status))
ret = 0;
out:
if (obj)
ACPI_FREE(obj);
return ret;
}
static int amdgpu_acpi_enumerate_xcc(void)
{
struct amdgpu_acpi_dev_info *dev_info = NULL;
struct amdgpu_acpi_xcc_info *xcc_info;
struct acpi_device *acpi_dev;
char hid[ACPI_ID_LEN];
int ret, id;
u16 bdf;
INIT_LIST_HEAD(&amdgpu_acpi_dev_list);
xa_init(&numa_info_xa);
for (id = 0; id < AMD_XCC_MAX_HID; id++) {
sprintf(hid, "%s%d", "AMD", AMD_XCC_HID_START + id);
acpi_dev = acpi_dev_get_first_match_dev(hid, NULL, -1);
/* These ACPI objects are expected to be in sequential order. If
* one is not found, no need to check the rest.
*/
if (!acpi_dev) {
DRM_DEBUG_DRIVER("No matching acpi device found for %s",
hid);
break;
}
xcc_info = kzalloc(sizeof(struct amdgpu_acpi_xcc_info),
GFP_KERNEL);
if (!xcc_info) {
DRM_ERROR("Failed to allocate memory for xcc info\n");
return -ENOMEM;
}
INIT_LIST_HEAD(&xcc_info->list);
xcc_info->handle = acpi_device_handle(acpi_dev);
acpi_dev_put(acpi_dev);
ret = amdgpu_acpi_get_xcc_info(xcc_info, &bdf);
if (ret) {
kfree(xcc_info);
continue;
}
dev_info = amdgpu_acpi_get_dev(bdf);
if (!dev_info)
ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, bdf);
if (ret == -ENOMEM)
return ret;
if (!dev_info) {
kfree(xcc_info);
continue;
}
list_add_tail(&xcc_info->list, &dev_info->xcc_list);
}
return 0;
}
int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
u64 *tmr_size)
{
struct amdgpu_acpi_dev_info *dev_info;
u16 bdf;
if (!tmr_offset || !tmr_size)
return -EINVAL;
bdf = (adev->pdev->bus->number << 8) | adev->pdev->devfn;
dev_info = amdgpu_acpi_get_dev(bdf);
if (!dev_info)
return -ENOENT;
*tmr_offset = dev_info->tmr_base;
*tmr_size = dev_info->tmr_size;
return 0;
}
int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
struct amdgpu_numa_info *numa_info)
{
struct amdgpu_acpi_dev_info *dev_info;
struct amdgpu_acpi_xcc_info *xcc_info;
u16 bdf;
if (!numa_info)
return -EINVAL;
bdf = (adev->pdev->bus->number << 8) | adev->pdev->devfn;
dev_info = amdgpu_acpi_get_dev(bdf);
if (!dev_info)
return -ENOENT;
list_for_each_entry(xcc_info, &dev_info->xcc_list, list) {
if (xcc_info->phy_id == xcc_id) {
memcpy(numa_info, xcc_info->numa_info,
sizeof(*numa_info));
return 0;
}
}
return -ENOENT;
}
/** /**
* amdgpu_acpi_event - handle notify events * amdgpu_acpi_event - handle notify events
* *
@ -1054,6 +1431,36 @@ void amdgpu_acpi_detect(void)
} else { } else {
atif->backlight_caps.caps_valid = false; atif->backlight_caps.caps_valid = false;
} }
amdgpu_acpi_enumerate_xcc();
}
void amdgpu_acpi_release(void)
{
struct amdgpu_acpi_dev_info *dev_info, *dev_tmp;
struct amdgpu_acpi_xcc_info *xcc_info, *xcc_tmp;
struct amdgpu_numa_info *numa_info;
unsigned long index;
xa_for_each(&numa_info_xa, index, numa_info) {
kfree(numa_info);
xa_erase(&numa_info_xa, index);
}
if (list_empty(&amdgpu_acpi_dev_list))
return;
list_for_each_entry_safe(dev_info, dev_tmp, &amdgpu_acpi_dev_list,
list) {
list_for_each_entry_safe(xcc_info, xcc_tmp, &dev_info->xcc_list,
list) {
list_del(&xcc_info->list);
kfree(xcc_info);
}
list_del(&dev_info->list);
kfree(dev_info);
}
} }
#if IS_ENABLED(CONFIG_SUSPEND) #if IS_ENABLED(CONFIG_SUSPEND)
@ -1092,16 +1499,20 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
* S0ix even though the system is suspending to idle, so return false * S0ix even though the system is suspending to idle, so return false
* in that case. * in that case.
*/ */
if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) {
dev_warn_once(adev->dev, dev_err_once(adev->dev,
"Power consumption will be higher as BIOS has not been configured for suspend-to-idle.\n" "Power consumption will be higher as BIOS has not been configured for suspend-to-idle.\n"
"To use suspend-to-idle change the sleep mode in BIOS setup.\n"); "To use suspend-to-idle change the sleep mode in BIOS setup.\n");
return false;
}
#if !IS_ENABLED(CONFIG_AMD_PMC) #if !IS_ENABLED(CONFIG_AMD_PMC)
dev_warn_once(adev->dev, dev_err_once(adev->dev,
"Power consumption will be higher as the kernel has not been compiled with CONFIG_AMD_PMC.\n"); "Power consumption will be higher as the kernel has not been compiled with CONFIG_AMD_PMC.\n");
#endif /* CONFIG_AMD_PMC */ return false;
#else
return true; return true;
#endif /* CONFIG_AMD_PMC */
} }
#endif /* CONFIG_SUSPEND */ #endif /* CONFIG_SUSPEND */

View file

@ -53,7 +53,6 @@ int amdgpu_amdkfd_init(void)
amdgpu_amdkfd_total_mem_size *= si.mem_unit; amdgpu_amdkfd_total_mem_size *= si.mem_unit;
ret = kgd2kfd_init(); ret = kgd2kfd_init();
amdgpu_amdkfd_gpuvm_init_mem_limits();
kfd_initialized = !ret; kfd_initialized = !ret;
return ret; return ret;
@ -143,6 +142,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
int i; int i;
int last_valid_bit; int last_valid_bit;
amdgpu_amdkfd_gpuvm_init_mem_limits();
if (adev->kfd.dev) { if (adev->kfd.dev) {
struct kgd2kfd_shared_resources gpu_resources = { struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = .compute_vmid_bitmap =
@ -162,7 +163,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
* clear * clear
*/ */
bitmap_complement(gpu_resources.cp_queue_bitmap, bitmap_complement(gpu_resources.cp_queue_bitmap,
adev->gfx.mec.queue_bitmap, adev->gfx.mec_bitmap[0].queue_bitmap,
KGD_MAX_QUEUES); KGD_MAX_QUEUES);
/* According to linux/bitmap.h we shouldn't use bitmap_clear if /* According to linux/bitmap.h we shouldn't use bitmap_clear if
@ -427,14 +428,23 @@ uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
} }
void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
struct kfd_local_mem_info *mem_info) struct kfd_local_mem_info *mem_info,
struct amdgpu_xcp *xcp)
{ {
memset(mem_info, 0, sizeof(*mem_info)); memset(mem_info, 0, sizeof(*mem_info));
if (xcp) {
if (adev->gmc.real_vram_size == adev->gmc.visible_vram_size)
mem_info->local_mem_size_public =
KFD_XCP_MEMORY_SIZE(adev, xcp->id);
else
mem_info->local_mem_size_private =
KFD_XCP_MEMORY_SIZE(adev, xcp->id);
} else {
mem_info->local_mem_size_public = adev->gmc.visible_vram_size; mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
mem_info->local_mem_size_private = adev->gmc.real_vram_size - mem_info->local_mem_size_private = adev->gmc.real_vram_size -
adev->gmc.visible_vram_size; adev->gmc.visible_vram_size;
}
mem_info->vram_width = adev->gmc.vram_width; mem_info->vram_width = adev->gmc.vram_width;
pr_debug("Address base: %pap public 0x%llx private 0x%llx\n", pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
@ -497,7 +507,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
struct amdgpu_device **dmabuf_adev, struct amdgpu_device **dmabuf_adev,
uint64_t *bo_size, void *metadata_buffer, uint64_t *bo_size, void *metadata_buffer,
size_t buffer_size, uint32_t *metadata_size, size_t buffer_size, uint32_t *metadata_size,
uint32_t *flags) uint32_t *flags, int8_t *xcp_id)
{ {
struct dma_buf *dma_buf; struct dma_buf *dma_buf;
struct drm_gem_object *obj; struct drm_gem_object *obj;
@ -541,6 +551,8 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
*flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC; *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
} }
if (xcp_id)
*xcp_id = bo->xcp_id;
out_put: out_put:
dma_buf_put(dma_buf); dma_buf_put(dma_buf);
@ -732,17 +744,19 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
if (adev->family == AMDGPU_FAMILY_AI) { if (adev->family == AMDGPU_FAMILY_AI) {
int i; int i;
for (i = 0; i < adev->num_vmhubs; i++) for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
} else { } else {
amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0);
} }
return 0; return 0;
} }
int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t pasid, enum TLB_FLUSH_TYPE flush_type) uint16_t pasid,
enum TLB_FLUSH_TYPE flush_type,
uint32_t inst)
{ {
bool all_hub = false; bool all_hub = false;
@ -750,7 +764,7 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
adev->family == AMDGPU_FAMILY_RV) adev->family == AMDGPU_FAMILY_RV)
all_hub = true; all_hub = true;
return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst);
} }
bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
@ -758,11 +772,32 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
return adev->have_atomics_support; return adev->have_atomics_support;
} }
void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev)
{
amdgpu_device_flush_hdp(adev, NULL);
}
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset) void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset)
{ {
amdgpu_umc_poison_handler(adev, reset); amdgpu_umc_poison_handler(adev, reset);
} }
int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
uint32_t *payload)
{
int ret;
/* Device or IH ring is not ready so bail. */
ret = amdgpu_ih_wait_on_checkpoint_process_ts(adev, &adev->irq.ih);
if (ret)
return ret;
/* Send payload to fence KFD interrupts */
amdgpu_amdkfd_interrupt(adev, payload);
return 0;
}
bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
{ {
if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status) if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status)
@ -770,3 +805,28 @@ bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
else else
return false; return false;
} }
int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
{
return kgd2kfd_check_and_lock_kfd();
}
void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev)
{
kgd2kfd_unlock_kfd();
}
u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
{
u64 tmp;
s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id);
if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) {
tmp = adev->gmc.mem_partitions[mem_id].size;
do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
return ALIGN_DOWN(tmp, PAGE_SIZE);
} else {
return adev->gmc.real_vram_size;
}
}

View file

@ -30,10 +30,12 @@
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/mmu_notifier.h> #include <linux/mmu_notifier.h>
#include <linux/memremap.h>
#include <kgd_kfd_interface.h> #include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h> #include <drm/ttm/ttm_execbuf_util.h>
#include "amdgpu_sync.h" #include "amdgpu_sync.h"
#include "amdgpu_vm.h" #include "amdgpu_vm.h"
#include "amdgpu_xcp.h"
extern uint64_t amdgpu_amdkfd_total_mem_size; extern uint64_t amdgpu_amdkfd_total_mem_size;
@ -97,10 +99,13 @@ struct amdgpu_amdkfd_fence {
struct amdgpu_kfd_dev { struct amdgpu_kfd_dev {
struct kfd_dev *dev; struct kfd_dev *dev;
int64_t vram_used; int64_t vram_used[MAX_XCP];
uint64_t vram_used_aligned; uint64_t vram_used_aligned[MAX_XCP];
bool init_complete; bool init_complete;
struct work_struct reset_work; struct work_struct reset_work;
/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
struct dev_pagemap pgmap;
}; };
enum kgd_engine_type { enum kgd_engine_type {
@ -151,6 +156,8 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev);
void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev);
int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
enum kgd_engine_type engine, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr, uint32_t vmid, uint64_t gpu_addr,
@ -160,7 +167,8 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
uint16_t vmid); uint16_t vmid);
int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t pasid, enum TLB_FLUSH_TYPE flush_type); uint16_t pasid, enum TLB_FLUSH_TYPE flush_type,
uint32_t inst);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
@ -224,7 +232,8 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev, uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
enum kgd_engine_type type); enum kgd_engine_type type);
void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
struct kfd_local_mem_info *mem_info); struct kfd_local_mem_info *mem_info,
struct amdgpu_xcp *xcp);
uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev); uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev);
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev); uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev);
@ -234,13 +243,15 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
struct amdgpu_device **dmabuf_adev, struct amdgpu_device **dmabuf_adev,
uint64_t *bo_size, void *metadata_buffer, uint64_t *bo_size, void *metadata_buffer,
size_t buffer_size, uint32_t *metadata_size, size_t buffer_size, uint32_t *metadata_size,
uint32_t *flags); uint32_t *flags, int8_t *xcp_id);
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
struct amdgpu_device *src); struct amdgpu_device *src);
int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
struct amdgpu_device *src, struct amdgpu_device *src,
bool is_min); bool is_min);
int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min); int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min);
int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
uint32_t *payload);
/* Read user wptr from a specified user address space with page fault /* Read user wptr from a specified user address space with page fault
* disabled. The memory must be pinned and mapped to the hardware when * disabled. The memory must be pinned and mapped to the hardware when
@ -279,7 +290,8 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
void *drm_priv); void *drm_priv);
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
uint8_t xcp_id);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_device *adev, uint64_t va, uint64_t size, struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem, void *drm_priv, struct kgd_mem **mem,
@ -310,6 +322,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
uint64_t *mmap_offset); uint64_t *mmap_offset);
int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem, int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
struct dma_buf **dmabuf); struct dma_buf **dmabuf);
void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev);
int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
struct tile_config *config); struct tile_config *config);
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
@ -319,9 +332,18 @@ void amdgpu_amdkfd_block_mmu_notifications(void *p);
int amdgpu_amdkfd_criu_resume(void *p); int amdgpu_amdkfd_criu_resume(void *p);
bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev); bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag); uint64_t size, u32 alloc_flag, int8_t xcp_id);
void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag); uint64_t size, u32 alloc_flag, int8_t xcp_id);
u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id);
#define KFD_XCP_MEM_ID(adev, xcp_id) \
((adev)->xcp_mgr && (xcp_id) >= 0 ?\
(adev)->xcp_mgr->xcp[(xcp_id)].mem_id : -1)
#define KFD_XCP_MEMORY_SIZE(adev, xcp_id) amdgpu_amdkfd_xcp_memory_size((adev), (xcp_id))
#if IS_ENABLED(CONFIG_HSA_AMD) #if IS_ENABLED(CONFIG_HSA_AMD)
void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
@ -352,6 +374,17 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
{ {
} }
#endif #endif
#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
int kgd2kfd_init_zone_device(struct amdgpu_device *adev);
#else
static inline
int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
{
return 0;
}
#endif
/* KGD2KFD callbacks */ /* KGD2KFD callbacks */
int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger); int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger);
int kgd2kfd_resume_mm(struct mm_struct *mm); int kgd2kfd_resume_mm(struct mm_struct *mm);
@ -372,6 +405,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
int kgd2kfd_check_and_lock_kfd(void);
void kgd2kfd_unlock_kfd(void);
#else #else
static inline int kgd2kfd_init(void) static inline int kgd2kfd_init(void)
{ {
@ -437,5 +472,14 @@ static inline
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
{ {
} }
static inline int kgd2kfd_check_and_lock_kfd(void)
{
return 0;
}
static inline void kgd2kfd_unlock_kfd(void)
{
}
#endif #endif
#endif /* AMDGPU_AMDKFD_H_INCLUDED */ #endif /* AMDGPU_AMDKFD_H_INCLUDED */

View file

@ -23,6 +23,149 @@
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
#include "amdgpu_amdkfd_arcturus.h" #include "amdgpu_amdkfd_arcturus.h"
#include "amdgpu_amdkfd_gfx_v9.h" #include "amdgpu_amdkfd_gfx_v9.h"
#include "gc/gc_9_4_2_offset.h"
#include "gc/gc_9_4_2_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
/*
* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
*
* restore_dbg_registers is ignored here but is a general interface requirement
* for devices that support GFXOFF and where the RLC save/restore list
* does not support hw registers for debugging i.e. the driver has to manually
* initialize the debug mode registers after it has disabled GFX off during the
* debug session.
*/
static uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
bool restore_dbg_registers,
uint32_t vmid)
{
uint32_t data = 0;
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
return data;
}
/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
static uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device *adev,
bool keep_trap_enabled,
uint32_t vmid)
{
uint32_t data = 0;
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
return data;
}
static int kgd_aldebaran_validate_trap_override_request(struct amdgpu_device *adev,
uint32_t trap_override,
uint32_t *trap_mask_supported)
{
*trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
KFD_DBG_TRAP_MASK_FP_OVERFLOW |
KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
KFD_DBG_TRAP_MASK_FP_INEXACT |
KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
return -EPERM;
return 0;
}
/* returns TRAP_EN, EXCP_EN and EXCP_RPLACE. */
static uint32_t kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device *adev,
uint32_t vmid,
uint32_t trap_override,
uint32_t trap_mask_bits,
uint32_t trap_mask_request,
uint32_t *trap_mask_prev,
uint32_t kfd_dbg_trap_cntl_prev)
{
uint32_t data = 0;
*trap_mask_prev = REG_GET_FIELD(kfd_dbg_trap_cntl_prev, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
trap_mask_bits = (trap_mask_bits & trap_mask_request) |
(*trap_mask_prev & ~trap_mask_request);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, trap_mask_bits);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
return data;
}
static uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
uint8_t wave_launch_mode,
uint32_t vmid)
{
uint32_t data = 0;
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
return data;
}
#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
static uint32_t kgd_gfx_aldebaran_set_address_watch(
struct amdgpu_device *adev,
uint64_t watch_address,
uint32_t watch_address_mask,
uint32_t watch_id,
uint32_t watch_mode,
uint32_t debug_vmid)
{
uint32_t watch_address_high;
uint32_t watch_address_low;
uint32_t watch_address_cntl;
watch_address_cntl = 0;
watch_address_low = lower_32_bits(watch_address);
watch_address_high = upper_32_bits(watch_address) & 0xffff;
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
MODE,
watch_mode);
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
MASK,
watch_address_mask >> 6);
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
VALID,
1);
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_high);
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_low);
return watch_address_cntl;
}
static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct amdgpu_device *adev,
uint32_t watch_id)
{
return 0;
}
const struct kfd2kgd_calls aldebaran_kfd2kgd = { const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
@ -42,5 +185,14 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
.disable_debug_trap = kgd_aldebaran_disable_debug_trap,
.validate_trap_override_request = kgd_aldebaran_validate_trap_override_request,
.set_wave_launch_trap_override = kgd_aldebaran_set_wave_launch_trap_override,
.set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
.set_address_watch = kgd_gfx_aldebaran_set_address_watch,
.clear_address_watch = kgd_gfx_aldebaran_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
}; };

View file

@ -26,6 +26,7 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
#include "amdgpu_amdkfd_arcturus.h" #include "amdgpu_amdkfd_arcturus.h"
#include "amdgpu_reset.h"
#include "sdma0/sdma0_4_2_2_offset.h" #include "sdma0/sdma0_4_2_2_offset.h"
#include "sdma0/sdma0_4_2_2_sh_mask.h" #include "sdma0/sdma0_4_2_2_sh_mask.h"
#include "sdma1/sdma1_4_2_2_offset.h" #include "sdma1/sdma1_4_2_2_offset.h"
@ -48,6 +49,8 @@
#include "amdgpu_amdkfd_gfx_v9.h" #include "amdgpu_amdkfd_gfx_v9.h"
#include "gfxhub_v1_0.h" #include "gfxhub_v1_0.h"
#include "mmhub_v9_4.h" #include "mmhub_v9_4.h"
#include "gc/gc_9_0_offset.h"
#include "gc/gc_9_0_sh_mask.h"
#define HQD_N_REGS 56 #define HQD_N_REGS 56
#define DUMP_REG(addr) do { \ #define DUMP_REG(addr) do { \
@ -276,6 +279,117 @@ int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
return 0; return 0;
} }
/*
* Helper used to suspend/resume gfx pipe for image post process work to set
* barrier behaviour.
*/
static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool suspend)
{
int i, r = 0;
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
if (!(ring && ring->sched.thread))
continue;
/* stop secheduler and drain ring. */
if (suspend) {
drm_sched_stop(&ring->sched, NULL);
r = amdgpu_fence_wait_empty(ring);
if (r)
goto out;
} else {
drm_sched_start(&ring->sched, false);
}
}
out:
/* return on resume or failure to drain rings. */
if (!suspend || r)
return r;
return amdgpu_device_ip_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GFX);
}
static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_waitcnt)
{
uint32_t data;
WRITE_ONCE(adev->barrier_has_auto_waitcnt, enable_waitcnt);
if (!down_read_trylock(&adev->reset_domain->sem))
return;
amdgpu_amdkfd_suspend(adev, false);
if (suspend_resume_compute_scheduler(adev, true))
goto out;
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG));
data = REG_SET_FIELD(data, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
!enable_waitcnt);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG), data);
out:
suspend_resume_compute_scheduler(adev, false);
amdgpu_amdkfd_resume(adev, false);
up_read(&adev->reset_domain->sem);
}
/*
* restore_dbg_registers is ignored here but is a general interface requirement
* for devices that support GFXOFF and where the RLC save/restore list
* does not support hw registers for debugging i.e. the driver has to manually
* initialize the debug mode registers after it has disabled GFX off during the
* debug session.
*/
static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
bool restore_dbg_registers,
uint32_t vmid)
{
mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
set_barrier_auto_waitcnt(adev, true);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
/*
* keep_trap_enabled is ignored here but is a general interface requirement
* for devices that support multi-process debugging where the performance
* overhead from trap temporary setup needs to be bypassed when the debug
* session has ended.
*/
static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
bool keep_trap_enabled,
uint32_t vmid)
{
mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
set_barrier_auto_waitcnt(adev, false);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
const struct kfd2kgd_calls arcturus_kfd2kgd = { const struct kfd2kgd_calls arcturus_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@ -294,6 +408,15 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = .set_vm_context_page_table_base =
kgd_gfx_v9_set_vm_context_page_table_base, kgd_gfx_v9_set_vm_context_page_table_base,
.enable_debug_trap = kgd_arcturus_enable_debug_trap,
.disable_debug_trap = kgd_arcturus_disable_debug_trap,
.validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
.set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
.set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
.set_address_watch = kgd_gfx_v9_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
}; };

View file

@ -0,0 +1,384 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_amdkfd_gfx_v9.h"
#include "gc/gc_9_4_3_offset.h"
#include "gc/gc_9_4_3_sh_mask.h"
#include "athub/athub_1_8_0_offset.h"
#include "athub/athub_1_8_0_sh_mask.h"
#include "oss/osssys_4_4_2_offset.h"
#include "oss/osssys_4_4_2_sh_mask.h"
#include "v9_structs.h"
#include "soc15.h"
#include "sdma/sdma_4_4_2_offset.h"
#include "sdma/sdma_4_4_2_sh_mask.h"
static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
{
return (struct v9_sdma_mqd *)mqd;
}
static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
unsigned int engine_id,
unsigned int queue_id)
{
uint32_t sdma_engine_reg_base =
SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, engine_id),
regSDMA_RLC0_RB_CNTL) -
regSDMA_RLC0_RB_CNTL;
uint32_t retval = sdma_engine_reg_base +
queue_id * (regSDMA_RLC1_RB_CNTL - regSDMA_RLC0_RB_CNTL);
pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
queue_id, retval);
return retval;
}
static int kgd_gfx_v9_4_3_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
struct v9_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
uint32_t data;
uint64_t data64;
uint64_t __user *wptr64 = (uint64_t __user *)wptr;
m = get_sdma_mqd(mqd);
sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL,
m->sdmax_rlcx_rb_cntl & (~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK));
end_jiffies = msecs_to_jiffies(2000) + jiffies;
while (true) {
data = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS);
if (data & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
}
usleep_range(500, 1000);
}
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL_OFFSET,
m->sdmax_rlcx_doorbell_offset);
data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA_RLC0_DOORBELL,
ENABLE, 1);
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, data);
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR,
m->sdmax_rlcx_rb_rptr);
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 1);
if (read_user_wptr(mm, wptr64, data64)) {
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
lower_32_bits(data64));
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
upper_32_bits(data64));
} else {
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
m->sdmax_rlcx_rb_rptr);
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
}
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 0);
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE_HI,
m->sdmax_rlcx_rb_base_hi);
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_LO,
m->sdmax_rlcx_rb_rptr_addr_lo);
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_HI,
m->sdmax_rlcx_rb_rptr_addr_hi);
data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA_RLC0_RB_CNTL,
RB_ENABLE, 1);
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, data);
return 0;
}
static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+12)
#define DUMP_REG(addr) do { \
if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
break; \
(*dump)[i][0] = (addr) << 2; \
(*dump)[i++][1] = RREG32(addr); \
} while (0)
*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
for (reg = regSDMA_RLC0_RB_CNTL; reg <= regSDMA_RLC0_DOORBELL; reg++)
DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = regSDMA_RLC0_STATUS; reg <= regSDMA_RLC0_CSA_ADDR_HI; reg++)
DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = regSDMA_RLC0_IB_SUB_REMAIN;
reg <= regSDMA_RLC0_MINOR_PTR_UPDATE; reg++)
DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = regSDMA_RLC0_MIDCMD_DATA0;
reg <= regSDMA_RLC0_MIDCMD_CNTL; reg++)
DUMP_REG(sdma_rlc_reg_offset + reg);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
return 0;
}
static bool kgd_gfx_v9_4_3_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
{
struct v9_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
m = get_sdma_mqd(mqd);
sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL);
if (sdma_rlc_rb_cntl & SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK)
return true;
return false;
}
static int kgd_gfx_v9_4_3_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout)
{
struct v9_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
m = get_sdma_mqd(mqd);
sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL);
temp = temp & ~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK;
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, temp);
while (true) {
temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS);
if (temp & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
}
usleep_range(500, 1000);
}
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, 0);
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL,
RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL) |
SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK);
m->sdmax_rlcx_rb_rptr =
RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR);
m->sdmax_rlcx_rb_rptr_hi =
RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI);
return 0;
}
static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev,
u32 pasid, unsigned int vmid, uint32_t xcc_inst)
{
unsigned long timeout;
unsigned int reg;
unsigned int phy_inst = GET_INST(GC, xcc_inst);
/* Every two XCCs share one AID */
unsigned int aid = phy_inst / 2;
/*
* We have to assume that there is no outstanding mapping.
* The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
* a mapping is in progress or because a mapping finished
* and the SW cleared it.
* So the protocol is to always wait & clear.
*/
uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
ATC_VMID0_PASID_MAPPING__VALID_MASK;
WREG32(SOC15_REG_OFFSET(ATHUB, 0,
regATC_VMID0_PASID_MAPPING) + vmid, pasid_mapping);
timeout = jiffies + msecs_to_jiffies(10);
while (!(RREG32(SOC15_REG_OFFSET(ATHUB, 0,
regATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
(1U << vmid))) {
if (time_after(jiffies, timeout)) {
pr_err("Fail to program VMID-PASID mapping\n");
return -ETIME;
}
cpu_relax();
}
WREG32(SOC15_REG_OFFSET(ATHUB, 0,
regATC_VMID_PASID_MAPPING_UPDATE_STATUS),
1U << vmid);
reg = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX));
/* Every 4 numbers is a cycle. 1st is AID, 2nd and 3rd are XCDs,
* and the 4th is reserved. Therefore "aid * 4 + (xcc_inst % 2) + 1"
* programs _LUT for XCC and "aid * 4" for AID where the XCC connects
* to.
*/
WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX),
aid * 4 + (phy_inst % 2) + 1);
WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid,
pasid_mapping);
WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX),
aid * 4);
WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid,
pasid_mapping);
WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), reg);
return 0;
}
static inline struct v9_mqd *get_mqd(void *mqd)
{
return (struct v9_mqd *)mqd;
}
static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift,
uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
struct v9_mqd *m;
uint32_t *mqd_hqd;
uint32_t reg, hqd_base, hqd_end, data;
m = get_mqd(mqd);
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
/* HQD registers extend to CP_HQD_AQL_DISPATCH_ID_HI */
mqd_hqd = &m->cp_mqd_base_addr_lo;
hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_MQD_BASE_ADDR);
hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI);
for (reg = hqd_base; reg <= hqd_end; reg++)
WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL),
data);
if (wptr) {
/* Don't read wptr with get_user because the user
* context may not be accessible (if this function
* runs in a work queue). Instead trigger a one-shot
* polling read from memory in the CP. This assumes
* that wptr is GPU-accessible in the queue's VMID via
* ATC or SVM. WPTR==RPTR before starting the poll so
* the CP starts fetching new commands from the right
* place.
*
* Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
* tricky. Assume that the queue didn't overflow. The
* number of valid bits in the 32-bit RPTR depends on
* the queue size. The remaining bits are taken from
* the saved 64-bit WPTR. If the WPTR wrapped, add the
* queue size.
*/
uint32_t queue_size =
2 << REG_GET_FIELD(m->cp_hqd_pq_control,
CP_HQD_PQ_CONTROL, QUEUE_SIZE);
uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
guessed_wptr += queue_size;
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO),
lower_32_bits(guessed_wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI),
upper_32_bits(guessed_wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR),
lower_32_bits((uintptr_t)wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1),
(uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id,
queue_id));
}
/* Start the EOP fetcher */
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR),
REG_SET_FIELD(m->cp_hqd_eop_rptr,
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data);
kgd_gfx_v9_release_queue(adev, inst);
return 0;
}
const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping,
.init_interrupts = kgd_gfx_v9_init_interrupts,
.hqd_load = kgd_gfx_v9_4_3_hqd_load,
.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
.hqd_sdma_load = kgd_gfx_v9_4_3_hqd_sdma_load,
.hqd_dump = kgd_gfx_v9_hqd_dump,
.hqd_sdma_dump = kgd_gfx_v9_4_3_hqd_sdma_dump,
.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
.hqd_sdma_is_occupied = kgd_gfx_v9_4_3_hqd_sdma_is_occupied,
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
.hqd_sdma_destroy = kgd_gfx_v9_4_3_hqd_sdma_destroy,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base =
kgd_gfx_v9_set_vm_context_page_table_base,
.program_trap_handler_settings =
kgd_gfx_v9_program_trap_handler_settings
};

View file

@ -21,6 +21,7 @@
*/ */
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
#include "amdgpu_amdkfd_gfx_v10.h"
#include "gc/gc_10_1_0_offset.h" #include "gc/gc_10_1_0_offset.h"
#include "gc/gc_10_1_0_sh_mask.h" #include "gc/gc_10_1_0_sh_mask.h"
#include "athub/athub_2_0_0_offset.h" #include "athub/athub_2_0_0_offset.h"
@ -31,6 +32,7 @@
#include "v10_structs.h" #include "v10_structs.h"
#include "nv.h" #include "nv.h"
#include "nvd.h" #include "nvd.h"
#include <uapi/linux/kfd_ioctl.h>
enum hqd_dequeue_request_type { enum hqd_dequeue_request_type {
NO_ACTION = 0, NO_ACTION = 0,
@ -79,7 +81,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
uint32_t sh_mem_config, uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit, uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases) uint32_t sh_mem_bases, uint32_t inst)
{ {
lock_srbm(adev, 0, 0, 0, vmid); lock_srbm(adev, 0, 0, 0, vmid);
@ -91,7 +93,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
} }
static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
unsigned int vmid) unsigned int vmid, uint32_t inst)
{ {
/* /*
* We have to assume that there is no outstanding mapping. * We have to assume that there is no outstanding mapping.
@ -135,7 +137,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
* but still works * but still works
*/ */
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst)
{ {
uint32_t mec; uint32_t mec;
uint32_t pipe; uint32_t pipe;
@ -205,7 +208,7 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift, uint32_t __user *wptr, uint32_t wptr_shift,
uint32_t wptr_mask, struct mm_struct *mm) uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{ {
struct v10_compute_mqd *m; struct v10_compute_mqd *m;
uint32_t *mqd_hqd; uint32_t *mqd_hqd;
@ -286,9 +289,9 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t doorbell_off) uint32_t doorbell_off, uint32_t inst)
{ {
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v10_compute_mqd *m; struct v10_compute_mqd *m;
uint32_t mec, pipe; uint32_t mec, pipe;
int r; int r;
@ -303,7 +306,7 @@ static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id); mec, pipe, queue_id);
spin_lock(&adev->gfx.kiq.ring_lock); spin_lock(&adev->gfx.kiq[0].ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7); r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) { if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r); pr_err("Failed to alloc KIQ (%d).\n", r);
@ -330,7 +333,7 @@ static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
amdgpu_ring_commit(kiq_ring); amdgpu_ring_commit(kiq_ring);
out_unlock: out_unlock:
spin_unlock(&adev->gfx.kiq.ring_lock); spin_unlock(&adev->gfx.kiq[0].ring_lock);
release_queue(adev); release_queue(adev);
return r; return r;
@ -338,7 +341,7 @@ out_unlock:
static int kgd_hqd_dump(struct amdgpu_device *adev, static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs) uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{ {
uint32_t i = 0, reg; uint32_t i = 0, reg;
#define HQD_N_REGS 56 #define HQD_N_REGS 56
@ -469,7 +472,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
static bool kgd_hqd_is_occupied(struct amdgpu_device *adev, static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id, uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id) uint32_t queue_id, uint32_t inst)
{ {
uint32_t act; uint32_t act;
bool retval = false; bool retval = false;
@ -510,7 +513,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type, enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id, unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id) uint32_t queue_id, uint32_t inst)
{ {
enum hqd_dequeue_request_type type; enum hqd_dequeue_request_type type;
unsigned long end_jiffies; unsigned long end_jiffies;
@ -673,7 +676,7 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
static int kgd_wave_control_execute(struct amdgpu_device *adev, static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val, uint32_t gfx_index_val,
uint32_t sq_cmd) uint32_t sq_cmd, uint32_t inst)
{ {
uint32_t data = 0; uint32_t data = 0;
@ -708,8 +711,295 @@ static void set_vm_context_page_table_base(struct amdgpu_device *adev,
adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
} }
/*
* GFX10 helper for wave launch stall requirements on debug trap setting.
*
* vmid:
* Target VMID to stall/unstall.
*
* stall:
* 0-unstall wave launch (enable), 1-stall wave launch (disable).
* After wavefront launch has been stalled, allocated waves must drain from
* SPI in order for debug trap settings to take effect on those waves.
* This is roughly a ~3500 clock cycle wait on SPI where a read on
* SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
* KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
*
* NOTE: We can afford to clear the entire STALL_VMID field on unstall
* because current GFX10 chips cannot support multi-process debugging due to
* trap configuration and masking being limited to global scope. Always
* assume single process conditions.
*
*/
#define KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY 110
static void kgd_gfx_v10_set_wave_launch_stall(struct amdgpu_device *adev, uint32_t vmid, bool stall)
{
uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
int i;
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
stall ? 1 << vmid : 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
if (!stall)
return;
for (i = 0; i < KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
}
uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
bool restore_dbg_registers,
uint32_t vmid)
{
mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
/* assume gfx off is disabled for the debug session if rlc restore not supported. */
if (restore_dbg_registers) {
uint32_t data = 0;
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
VMID_SEL, 1 << vmid);
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
TRAP_EN, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
bool keep_trap_enabled,
uint32_t vmid)
{
mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
uint32_t trap_override,
uint32_t *trap_mask_supported)
{
*trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
/* The SPI_GDBG_TRAP_MASK register is global and affects all
* processes. Only allow OR-ing the address-watch bit, since
* this only affects processes under the debugger. Other bits
* should stay 0 to avoid the debugger interfering with other
* processes.
*/
if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
return -EINVAL;
return 0;
}
uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
uint32_t vmid,
uint32_t trap_override,
uint32_t trap_mask_bits,
uint32_t trap_mask_request,
uint32_t *trap_mask_prev,
uint32_t kfd_dbg_trap_cntl_prev)
{
uint32_t data, wave_cntl_prev;
mutex_lock(&adev->grbm_idx_mutex);
wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
*trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
trap_mask_bits = (trap_mask_bits & trap_mask_request) |
(*trap_mask_prev & ~trap_mask_request);
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
/* We need to preserve wave launch mode stall settings. */
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
uint8_t wave_launch_mode,
uint32_t vmid)
{
uint32_t data = 0;
bool is_mode_set = !!wave_launch_mode;
mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
VMID_MASK, is_mode_set ? 1 << vmid : 0);
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
MODE, is_mode_set ? wave_launch_mode : 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
uint64_t watch_address,
uint32_t watch_address_mask,
uint32_t watch_id,
uint32_t watch_mode,
uint32_t debug_vmid)
{
uint32_t watch_address_high;
uint32_t watch_address_low;
uint32_t watch_address_cntl;
watch_address_cntl = 0;
watch_address_low = lower_32_bits(watch_address);
watch_address_high = upper_32_bits(watch_address) & 0xffff;
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
VMID,
debug_vmid);
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
MODE,
watch_mode);
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
MASK,
watch_address_mask >> 7);
/* Turning off this watch point until we set all the registers */
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
VALID,
0);
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_cntl);
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_high);
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_low);
/* Enable the watch point */
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
VALID,
1);
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_cntl);
return 0;
}
uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
uint32_t watch_id)
{
uint32_t watch_address_cntl;
watch_address_cntl = 0;
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_cntl);
return 0;
}
/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
* The values read are:
* ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
* atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
* wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
* gws_wait_time -- Wait Count for Global Wave Syncs.
* que_sleep_wait_time -- Wait Count for Dequeue Retry.
* sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
* sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
* deq_retry_wait_time -- Wait Count for Global Wave Syncs.
*/
void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times)
{
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
}
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data)
{
*reg_data = wait_times;
/*
* The CP cannont handle a 0 grace period input and will result in
* an infinite grace period being set so set to 1 to prevent this.
*/
if (grace_period == 0)
grace_period = 1;
*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
SCH_WAVE,
grace_period);
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
static void program_trap_handler_settings(struct amdgpu_device *adev, static void program_trap_handler_settings(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst)
{ {
lock_srbm(adev, 0, 0, 0, vmid); lock_srbm(adev, 0, 0, 0, vmid);
@ -750,5 +1040,14 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.get_atc_vmid_pasid_mapping_info = .get_atc_vmid_pasid_mapping_info =
get_atc_vmid_pasid_mapping_info, get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = set_vm_context_page_table_base, .set_vm_context_page_table_base = set_vm_context_page_table_base,
.enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
.validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
.set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
.set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
.set_address_watch = kgd_gfx_v10_set_address_watch,
.clear_address_watch = kgd_gfx_v10_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
.program_trap_handler_settings = program_trap_handler_settings, .program_trap_handler_settings = program_trap_handler_settings,
}; };

View file

@ -0,0 +1,55 @@
/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
bool restore_dbg_registers,
uint32_t vmid);
uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
bool keep_trap_enabled,
uint32_t vmid);
int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
uint32_t trap_override,
uint32_t *trap_mask_supported);
uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
uint32_t vmid,
uint32_t trap_override,
uint32_t trap_mask_bits,
uint32_t trap_mask_request,
uint32_t *trap_mask_prev,
uint32_t kfd_dbg_trap_cntl_prev);
uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
uint8_t wave_launch_mode,
uint32_t vmid);
uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
uint64_t watch_address,
uint32_t watch_address_mask,
uint32_t watch_id,
uint32_t watch_mode,
uint32_t debug_vmid);
uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
uint32_t watch_id);
void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data);

View file

@ -22,6 +22,7 @@
#include <linux/mmu_context.h> #include <linux/mmu_context.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
#include "amdgpu_amdkfd_gfx_v10.h"
#include "gc/gc_10_3_0_offset.h" #include "gc/gc_10_3_0_offset.h"
#include "gc/gc_10_3_0_sh_mask.h" #include "gc/gc_10_3_0_sh_mask.h"
#include "oss/osssys_5_0_0_offset.h" #include "oss/osssys_5_0_0_offset.h"
@ -80,7 +81,7 @@ static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t v
uint32_t sh_mem_config, uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit, uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases) uint32_t sh_mem_bases, uint32_t inst)
{ {
lock_srbm(adev, 0, 0, 0, vmid); lock_srbm(adev, 0, 0, 0, vmid);
@ -93,7 +94,7 @@ static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t v
/* ATC is defeatured on Sienna_Cichlid */ /* ATC is defeatured on Sienna_Cichlid */
static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int pasid, static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int pasid,
unsigned int vmid) unsigned int vmid, uint32_t inst)
{ {
uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT; uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
@ -105,7 +106,8 @@ static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int
return 0; return 0;
} }
static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id) static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst)
{ {
uint32_t mec; uint32_t mec;
uint32_t pipe; uint32_t pipe;
@ -177,7 +179,7 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd, static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift, uint32_t __user *wptr, uint32_t wptr_shift,
uint32_t wptr_mask, struct mm_struct *mm) uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{ {
struct v10_compute_mqd *m; struct v10_compute_mqd *m;
uint32_t *mqd_hqd; uint32_t *mqd_hqd;
@ -273,9 +275,9 @@ static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd, static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t doorbell_off) uint32_t doorbell_off, uint32_t inst)
{ {
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v10_compute_mqd *m; struct v10_compute_mqd *m;
uint32_t mec, pipe; uint32_t mec, pipe;
int r; int r;
@ -290,7 +292,7 @@ static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id); mec, pipe, queue_id);
spin_lock(&adev->gfx.kiq.ring_lock); spin_lock(&adev->gfx.kiq[0].ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7); r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) { if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r); pr_err("Failed to alloc KIQ (%d).\n", r);
@ -317,7 +319,7 @@ static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
amdgpu_ring_commit(kiq_ring); amdgpu_ring_commit(kiq_ring);
out_unlock: out_unlock:
spin_unlock(&adev->gfx.kiq.ring_lock); spin_unlock(&adev->gfx.kiq[0].ring_lock);
release_queue(adev); release_queue(adev);
return r; return r;
@ -325,7 +327,7 @@ out_unlock:
static int hqd_dump_v10_3(struct amdgpu_device *adev, static int hqd_dump_v10_3(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs) uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{ {
uint32_t i = 0, reg; uint32_t i = 0, reg;
#define HQD_N_REGS 56 #define HQD_N_REGS 56
@ -456,7 +458,7 @@ static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev,
static bool hqd_is_occupied_v10_3(struct amdgpu_device *adev, static bool hqd_is_occupied_v10_3(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id, uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id) uint32_t queue_id, uint32_t inst)
{ {
uint32_t act; uint32_t act;
bool retval = false; bool retval = false;
@ -498,7 +500,7 @@ static bool hqd_sdma_is_occupied_v10_3(struct amdgpu_device *adev,
static int hqd_destroy_v10_3(struct amdgpu_device *adev, void *mqd, static int hqd_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type, enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id, unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id) uint32_t queue_id, uint32_t inst)
{ {
enum hqd_dequeue_request_type type; enum hqd_dequeue_request_type type;
unsigned long end_jiffies; unsigned long end_jiffies;
@ -586,7 +588,7 @@ static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
static int wave_control_execute_v10_3(struct amdgpu_device *adev, static int wave_control_execute_v10_3(struct amdgpu_device *adev,
uint32_t gfx_index_val, uint32_t gfx_index_val,
uint32_t sq_cmd) uint32_t sq_cmd, uint32_t inst)
{ {
uint32_t data = 0; uint32_t data = 0;
@ -628,7 +630,8 @@ static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev,
} }
static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev, static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst)
{ {
lock_srbm(adev, 0, 0, 0, vmid); lock_srbm(adev, 0, 0, 0, vmid);
@ -652,142 +655,6 @@ static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev,
unlock_srbm(adev); unlock_srbm(adev);
} }
#if 0
uint32_t enable_debug_trap_v10_3(struct amdgpu_device *adev,
uint32_t trap_debug_wave_launch_mode,
uint32_t vmid)
{
uint32_t data = 0;
uint32_t orig_wave_cntl_value;
uint32_t orig_stall_vmid;
mutex_lock(&adev->grbm_idx_mutex);
orig_wave_cntl_value = RREG32(SOC15_REG_OFFSET(GC,
0,
mmSPI_GDBG_WAVE_CNTL));
orig_stall_vmid = REG_GET_FIELD(orig_wave_cntl_value,
SPI_GDBG_WAVE_CNTL,
STALL_VMID);
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
data = 0;
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), orig_stall_vmid);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
uint32_t disable_debug_trap_v10_3(struct amdgpu_device *adev)
{
mutex_lock(&adev->grbm_idx_mutex);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
uint32_t set_wave_launch_trap_override_v10_3(struct amdgpu_device *adev,
uint32_t trap_override,
uint32_t trap_mask)
{
uint32_t data = 0;
mutex_lock(&adev->grbm_idx_mutex);
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
data = 0;
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
EXCP_EN, trap_mask);
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
REPLACE, trap_override);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
uint32_t set_wave_launch_mode_v10_3(struct amdgpu_device *adev,
uint8_t wave_launch_mode,
uint32_t vmid)
{
uint32_t data = 0;
bool is_stall_mode;
bool is_mode_set;
is_stall_mode = (wave_launch_mode == 4);
is_mode_set = (wave_launch_mode != 0 && wave_launch_mode != 4);
mutex_lock(&adev->grbm_idx_mutex);
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
VMID_MASK, is_mode_set ? 1 << vmid : 0);
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
MODE, is_mode_set ? wave_launch_mode : 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
STALL_VMID, is_stall_mode ? 1 << vmid : 0);
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
STALL_RA, is_stall_mode ? 1 : 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
/* kgd_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
* The values read are:
* ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
* atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
* wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
* gws_wait_time -- Wait Count for Global Wave Syncs.
* que_sleep_wait_time -- Wait Count for Dequeue Retry.
* sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
* sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
* deq_retry_wait_time -- Wait Count for Global Wave Syncs.
*/
void get_iq_wait_times_v10_3(struct amdgpu_device *adev,
uint32_t *wait_times)
{
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
}
void build_grace_period_packet_info_v10_3(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data)
{
*reg_data = wait_times;
*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
SCH_WAVE,
grace_period);
*reg_offset = mmCP_IQ_WAIT_TIME2;
}
#endif
const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.program_sh_mem_settings = program_sh_mem_settings_v10_3, .program_sh_mem_settings = program_sh_mem_settings_v10_3,
.set_pasid_vmid_mapping = set_pasid_vmid_mapping_v10_3, .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v10_3,
@ -805,12 +672,13 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3, .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3,
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
.program_trap_handler_settings = program_trap_handler_settings_v10_3, .program_trap_handler_settings = program_trap_handler_settings_v10_3,
#if 0 .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
.enable_debug_trap = enable_debug_trap_v10_3, .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
.disable_debug_trap = disable_debug_trap_v10_3, .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
.set_wave_launch_trap_override = set_wave_launch_trap_override_v10_3, .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
.set_wave_launch_mode = set_wave_launch_mode_v10_3, .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
.get_iq_wait_times = get_iq_wait_times_v10_3, .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
.build_grace_period_packet_info = build_grace_period_packet_info_v10_3, .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
#endif .set_address_watch = kgd_gfx_v10_set_address_watch,
.clear_address_watch = kgd_gfx_v10_clear_address_watch
}; };

View file

@ -30,6 +30,7 @@
#include "soc15d.h" #include "soc15d.h"
#include "v11_structs.h" #include "v11_structs.h"
#include "soc21.h" #include "soc21.h"
#include <uapi/linux/kfd_ioctl.h>
enum hqd_dequeue_request_type { enum hqd_dequeue_request_type {
NO_ACTION = 0, NO_ACTION = 0,
@ -78,7 +79,7 @@ static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmi
uint32_t sh_mem_config, uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit, uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases) uint32_t sh_mem_bases, uint32_t inst)
{ {
lock_srbm(adev, 0, 0, 0, vmid); lock_srbm(adev, 0, 0, 0, vmid);
@ -89,7 +90,7 @@ static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmi
} }
static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid, static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid,
unsigned int vmid) unsigned int vmid, uint32_t inst)
{ {
uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT; uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
@ -101,7 +102,8 @@ static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int p
return 0; return 0;
} }
static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id) static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst)
{ {
uint32_t mec; uint32_t mec;
uint32_t pipe; uint32_t pipe;
@ -162,7 +164,7 @@ static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr, uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask, uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm) struct mm_struct *mm, uint32_t inst)
{ {
struct v11_compute_mqd *m; struct v11_compute_mqd *m;
uint32_t *mqd_hqd; uint32_t *mqd_hqd;
@ -258,9 +260,9 @@ static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd, static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t doorbell_off) uint32_t doorbell_off, uint32_t inst)
{ {
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v11_compute_mqd *m; struct v11_compute_mqd *m;
uint32_t mec, pipe; uint32_t mec, pipe;
int r; int r;
@ -275,7 +277,7 @@ static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id); mec, pipe, queue_id);
spin_lock(&adev->gfx.kiq.ring_lock); spin_lock(&adev->gfx.kiq[0].ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7); r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) { if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r); pr_err("Failed to alloc KIQ (%d).\n", r);
@ -302,7 +304,7 @@ static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
amdgpu_ring_commit(kiq_ring); amdgpu_ring_commit(kiq_ring);
out_unlock: out_unlock:
spin_unlock(&adev->gfx.kiq.ring_lock); spin_unlock(&adev->gfx.kiq[0].ring_lock);
release_queue(adev); release_queue(adev);
return r; return r;
@ -310,7 +312,7 @@ out_unlock:
static int hqd_dump_v11(struct amdgpu_device *adev, static int hqd_dump_v11(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs) uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{ {
uint32_t i = 0, reg; uint32_t i = 0, reg;
#define HQD_N_REGS 56 #define HQD_N_REGS 56
@ -445,7 +447,7 @@ static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
} }
static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address, static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id) uint32_t pipe_id, uint32_t queue_id, uint32_t inst)
{ {
uint32_t act; uint32_t act;
bool retval = false; bool retval = false;
@ -486,7 +488,7 @@ static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd)
static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd, static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type, enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id, unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id) uint32_t queue_id, uint32_t inst)
{ {
enum hqd_dequeue_request_type type; enum hqd_dequeue_request_type type;
unsigned long end_jiffies; unsigned long end_jiffies;
@ -571,7 +573,7 @@ static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd,
static int wave_control_execute_v11(struct amdgpu_device *adev, static int wave_control_execute_v11(struct amdgpu_device *adev,
uint32_t gfx_index_val, uint32_t gfx_index_val,
uint32_t sq_cmd) uint32_t sq_cmd, uint32_t inst)
{ {
uint32_t data = 0; uint32_t data = 0;
@ -606,6 +608,183 @@ static void set_vm_context_page_table_base_v11(struct amdgpu_device *adev,
adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
} }
/*
* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
*
* restore_dbg_registers is ignored here but is a general interface requirement
* for devices that support GFXOFF and where the RLC save/restore list
* does not support hw registers for debugging i.e. the driver has to manually
* initialize the debug mode registers after it has disabled GFX off during the
* debug session.
*/
static uint32_t kgd_gfx_v11_enable_debug_trap(struct amdgpu_device *adev,
bool restore_dbg_registers,
uint32_t vmid)
{
uint32_t data = 0;
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
return data;
}
/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev,
bool keep_trap_enabled,
uint32_t vmid)
{
uint32_t data = 0;
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
return data;
}
static int kgd_gfx_v11_validate_trap_override_request(struct amdgpu_device *adev,
uint32_t trap_override,
uint32_t *trap_mask_supported)
{
*trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
KFD_DBG_TRAP_MASK_FP_OVERFLOW |
KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
KFD_DBG_TRAP_MASK_FP_INEXACT |
KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 4))
*trap_mask_supported |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
return -EPERM;
return 0;
}
static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
{
uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
KFD_DBG_TRAP_MASK_FP_OVERFLOW |
KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
KFD_DBG_TRAP_MASK_FP_INEXACT |
KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
uint32_t ret;
ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
return ret;
}
static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
{
uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
return ret;
}
/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
static uint32_t kgd_gfx_v11_set_wave_launch_trap_override(struct amdgpu_device *adev,
uint32_t vmid,
uint32_t trap_override,
uint32_t trap_mask_bits,
uint32_t trap_mask_request,
uint32_t *trap_mask_prev,
uint32_t kfd_dbg_trap_cntl_prev)
{
uint32_t data = 0;
*trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request);
data = trap_mask_map_sw_to_hw(data);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
return data;
}
static uint32_t kgd_gfx_v11_set_wave_launch_mode(struct amdgpu_device *adev,
uint8_t wave_launch_mode,
uint32_t vmid)
{
uint32_t data = 0;
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
return data;
}
#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
static uint32_t kgd_gfx_v11_set_address_watch(struct amdgpu_device *adev,
uint64_t watch_address,
uint32_t watch_address_mask,
uint32_t watch_id,
uint32_t watch_mode,
uint32_t debug_vmid)
{
uint32_t watch_address_high;
uint32_t watch_address_low;
uint32_t watch_address_cntl;
watch_address_cntl = 0;
watch_address_low = lower_32_bits(watch_address);
watch_address_high = upper_32_bits(watch_address) & 0xffff;
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
MODE,
watch_mode);
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
MASK,
watch_address_mask >> 7);
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
VALID,
1);
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_high);
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_low);
return watch_address_cntl;
}
static uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev,
uint32_t watch_id)
{
return 0;
}
const struct kfd2kgd_calls gfx_v11_kfd2kgd = { const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
.program_sh_mem_settings = program_sh_mem_settings_v11, .program_sh_mem_settings = program_sh_mem_settings_v11,
.set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11, .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
@ -622,4 +801,11 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
.wave_control_execute = wave_control_execute_v11, .wave_control_execute = wave_control_execute_v11,
.get_atc_vmid_pasid_mapping_info = NULL, .get_atc_vmid_pasid_mapping_info = NULL,
.set_vm_context_page_table_base = set_vm_context_page_table_base_v11, .set_vm_context_page_table_base = set_vm_context_page_table_base_v11,
.enable_debug_trap = kgd_gfx_v11_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v11_disable_debug_trap,
.validate_trap_override_request = kgd_gfx_v11_validate_trap_override_request,
.set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override,
.set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode,
.set_address_watch = kgd_gfx_v11_set_address_watch,
.clear_address_watch = kgd_gfx_v11_clear_address_watch
}; };

View file

@ -78,7 +78,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
uint32_t sh_mem_config, uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit, uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases) uint32_t sh_mem_bases, uint32_t inst)
{ {
lock_srbm(adev, 0, 0, 0, vmid); lock_srbm(adev, 0, 0, 0, vmid);
@ -91,7 +91,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
} }
static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
unsigned int vmid) unsigned int vmid, uint32_t inst)
{ {
/* /*
* We have to assume that there is no outstanding mapping. * We have to assume that there is no outstanding mapping.
@ -114,7 +114,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
return 0; return 0;
} }
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst)
{ {
uint32_t mec; uint32_t mec;
uint32_t pipe; uint32_t pipe;
@ -158,7 +159,7 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift, uint32_t __user *wptr, uint32_t wptr_shift,
uint32_t wptr_mask, struct mm_struct *mm) uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{ {
struct cik_mqd *m; struct cik_mqd *m;
uint32_t *mqd_hqd; uint32_t *mqd_hqd;
@ -202,7 +203,7 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
static int kgd_hqd_dump(struct amdgpu_device *adev, static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs) uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{ {
uint32_t i = 0, reg; uint32_t i = 0, reg;
#define HQD_N_REGS (35+4) #define HQD_N_REGS (35+4)
@ -318,7 +319,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
static bool kgd_hqd_is_occupied(struct amdgpu_device *adev, static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id, uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id) uint32_t queue_id, uint32_t inst)
{ {
uint32_t act; uint32_t act;
bool retval = false; bool retval = false;
@ -358,7 +359,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type, enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id, unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id) uint32_t queue_id, uint32_t inst)
{ {
uint32_t temp; uint32_t temp;
enum hqd_dequeue_request_type type; enum hqd_dequeue_request_type type;
@ -494,7 +495,7 @@ static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
static int kgd_wave_control_execute(struct amdgpu_device *adev, static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val, uint32_t gfx_index_val,
uint32_t sq_cmd) uint32_t sq_cmd, uint32_t inst)
{ {
uint32_t data; uint32_t data;

View file

@ -72,7 +72,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
uint32_t sh_mem_config, uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit, uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases) uint32_t sh_mem_bases, uint32_t inst)
{ {
lock_srbm(adev, 0, 0, 0, vmid); lock_srbm(adev, 0, 0, 0, vmid);
@ -85,7 +85,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
} }
static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
unsigned int vmid) unsigned int vmid, uint32_t inst)
{ {
/* /*
* We have to assume that there is no outstanding mapping. * We have to assume that there is no outstanding mapping.
@ -109,7 +109,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
return 0; return 0;
} }
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst)
{ {
uint32_t mec; uint32_t mec;
uint32_t pipe; uint32_t pipe;
@ -153,7 +154,7 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift, uint32_t __user *wptr, uint32_t wptr_shift,
uint32_t wptr_mask, struct mm_struct *mm) uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{ {
struct vi_mqd *m; struct vi_mqd *m;
uint32_t *mqd_hqd; uint32_t *mqd_hqd;
@ -226,7 +227,7 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
static int kgd_hqd_dump(struct amdgpu_device *adev, static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs) uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{ {
uint32_t i = 0, reg; uint32_t i = 0, reg;
#define HQD_N_REGS (54+4) #define HQD_N_REGS (54+4)
@ -350,7 +351,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
static bool kgd_hqd_is_occupied(struct amdgpu_device *adev, static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id, uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id) uint32_t queue_id, uint32_t inst)
{ {
uint32_t act; uint32_t act;
bool retval = false; bool retval = false;
@ -390,7 +391,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type, enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id, unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id) uint32_t queue_id, uint32_t inst)
{ {
uint32_t temp; uint32_t temp;
enum hqd_dequeue_request_type type; enum hqd_dequeue_request_type type;
@ -540,7 +541,7 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
static int kgd_wave_control_execute(struct amdgpu_device *adev, static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val, uint32_t gfx_index_val,
uint32_t sq_cmd) uint32_t sq_cmd, uint32_t inst)
{ {
uint32_t data = 0; uint32_t data = 0;

View file

@ -38,6 +38,7 @@
#include "soc15d.h" #include "soc15d.h"
#include "gfx_v9_0.h" #include "gfx_v9_0.h"
#include "amdgpu_amdkfd_gfx_v9.h" #include "amdgpu_amdkfd_gfx_v9.h"
#include <uapi/linux/kfd_ioctl.h>
enum hqd_dequeue_request_type { enum hqd_dequeue_request_type {
NO_ACTION = 0, NO_ACTION = 0,
@ -46,29 +47,29 @@ enum hqd_dequeue_request_type {
SAVE_WAVES SAVE_WAVES
}; };
static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, static void kgd_gfx_v9_lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
uint32_t queue, uint32_t vmid) uint32_t queue, uint32_t vmid, uint32_t inst)
{ {
mutex_lock(&adev->srbm_mutex); mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, mec, pipe, queue, vmid); soc15_grbm_select(adev, mec, pipe, queue, vmid, GET_INST(GC, inst));
} }
static void unlock_srbm(struct amdgpu_device *adev) static void kgd_gfx_v9_unlock_srbm(struct amdgpu_device *adev, uint32_t inst)
{ {
soc15_grbm_select(adev, 0, 0, 0, 0); soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
mutex_unlock(&adev->srbm_mutex); mutex_unlock(&adev->srbm_mutex);
} }
static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t queue_id) uint32_t queue_id, uint32_t inst)
{ {
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(adev, mec, pipe, queue_id, 0); kgd_gfx_v9_lock_srbm(adev, mec, pipe, queue_id, 0, inst);
} }
static uint64_t get_queue_mask(struct amdgpu_device *adev, uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id) uint32_t pipe_id, uint32_t queue_id)
{ {
unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
@ -77,28 +78,28 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev,
return 1ull << bit; return 1ull << bit;
} }
static void release_queue(struct amdgpu_device *adev) void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst)
{ {
unlock_srbm(adev); kgd_gfx_v9_unlock_srbm(adev, inst);
} }
void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config, uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit, uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases) uint32_t sh_mem_bases, uint32_t inst)
{ {
lock_srbm(adev, 0, 0, 0, vmid); kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG), sh_mem_config);
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_BASES), sh_mem_bases);
/* APE1 no longer exists on GFX9 */ /* APE1 no longer exists on GFX9 */
unlock_srbm(adev); kgd_gfx_v9_unlock_srbm(adev, inst);
} }
int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
unsigned int vmid) unsigned int vmid, uint32_t inst)
{ {
/* /*
* We have to assume that there is no outstanding mapping. * We have to assume that there is no outstanding mapping.
@ -156,7 +157,8 @@ int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
* but still works * but still works
*/ */
int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst)
{ {
uint32_t mec; uint32_t mec;
uint32_t pipe; uint32_t pipe;
@ -164,13 +166,13 @@ int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(adev, mec, pipe, 0, 0); kgd_gfx_v9_lock_srbm(adev, mec, pipe, 0, 0, inst);
WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, WREG32_SOC15(GC, GET_INST(GC, inst), mmCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
unlock_srbm(adev); kgd_gfx_v9_unlock_srbm(adev, inst);
return 0; return 0;
} }
@ -220,7 +222,8 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift, uint32_t __user *wptr, uint32_t wptr_shift,
uint32_t wptr_mask, struct mm_struct *mm) uint32_t wptr_mask, struct mm_struct *mm,
uint32_t inst)
{ {
struct v9_mqd *m; struct v9_mqd *m;
uint32_t *mqd_hqd; uint32_t *mqd_hqd;
@ -228,21 +231,22 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
m = get_mqd(mqd); m = get_mqd(mqd);
acquire_queue(adev, pipe_id, queue_id); kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
mqd_hqd = &m->cp_mqd_base_addr_lo; mqd_hqd = &m->cp_mqd_base_addr_lo;
hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR);
for (reg = hqd_base; for (reg = hqd_base;
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
/* Activate doorbell logic before triggering WPTR poll. */ /* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL),
data);
if (wptr) { if (wptr) {
/* Don't read wptr with get_user because the user /* Don't read wptr with get_user because the user
@ -271,43 +275,43 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO),
lower_32_bits(guessed_wptr)); lower_32_bits(guessed_wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI),
upper_32_bits(guessed_wptr)); upper_32_bits(guessed_wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR),
lower_32_bits((uintptr_t)wptr)); lower_32_bits((uintptr_t)wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uintptr_t)wptr)); upper_32_bits((uintptr_t)wptr));
WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1, WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1,
(uint32_t)get_queue_mask(adev, pipe_id, queue_id)); (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
} }
/* Start the EOP fetcher */ /* Start the EOP fetcher */
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR),
REG_SET_FIELD(m->cp_hqd_eop_rptr, REG_SET_FIELD(m->cp_hqd_eop_rptr,
CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE), data);
release_queue(adev); kgd_gfx_v9_release_queue(adev, inst);
return 0; return 0;
} }
int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t doorbell_off) uint32_t doorbell_off, uint32_t inst)
{ {
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[inst].ring;
struct v9_mqd *m; struct v9_mqd *m;
uint32_t mec, pipe; uint32_t mec, pipe;
int r; int r;
m = get_mqd(mqd); m = get_mqd(mqd);
acquire_queue(adev, pipe_id, queue_id); kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
@ -315,7 +319,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id); mec, pipe, queue_id);
spin_lock(&adev->gfx.kiq.ring_lock); spin_lock(&adev->gfx.kiq[inst].ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7); r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) { if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r); pr_err("Failed to alloc KIQ (%d).\n", r);
@ -342,15 +346,15 @@ int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
amdgpu_ring_commit(kiq_ring); amdgpu_ring_commit(kiq_ring);
out_unlock: out_unlock:
spin_unlock(&adev->gfx.kiq.ring_lock); spin_unlock(&adev->gfx.kiq[inst].ring_lock);
release_queue(adev); kgd_gfx_v9_release_queue(adev, inst);
return r; return r;
} }
int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev, int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs) uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{ {
uint32_t i = 0, reg; uint32_t i = 0, reg;
#define HQD_N_REGS 56 #define HQD_N_REGS 56
@ -365,13 +369,13 @@ int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
if (*dump == NULL) if (*dump == NULL)
return -ENOMEM; return -ENOMEM;
acquire_queue(adev, pipe_id, queue_id); kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); for (reg = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR);
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
DUMP_REG(reg); DUMP_REG(reg);
release_queue(adev); kgd_gfx_v9_release_queue(adev, inst);
WARN_ON_ONCE(i != HQD_N_REGS); WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i; *n_regs = i;
@ -481,23 +485,23 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev, bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id, uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id) uint32_t queue_id, uint32_t inst)
{ {
uint32_t act; uint32_t act;
bool retval = false; bool retval = false;
uint32_t low, high; uint32_t low, high;
acquire_queue(adev, pipe_id, queue_id); kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); act = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
if (act) { if (act) {
low = lower_32_bits(queue_address >> 8); low = lower_32_bits(queue_address >> 8);
high = upper_32_bits(queue_address >> 8); high = upper_32_bits(queue_address >> 8);
if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) && if (low == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE) &&
high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI)) high == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI))
retval = true; retval = true;
} }
release_queue(adev); kgd_gfx_v9_release_queue(adev, inst);
return retval; return retval;
} }
@ -522,7 +526,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type, enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id, unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id) uint32_t queue_id, uint32_t inst)
{ {
enum hqd_dequeue_request_type type; enum hqd_dequeue_request_type type;
unsigned long end_jiffies; unsigned long end_jiffies;
@ -532,10 +536,10 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
if (amdgpu_in_reset(adev)) if (amdgpu_in_reset(adev))
return -EIO; return -EIO;
acquire_queue(adev, pipe_id, queue_id); kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
if (m->cp_hqd_vmid == 0) if (m->cp_hqd_vmid == 0)
WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); WREG32_FIELD15_RLC(GC, GET_INST(GC, inst), RLC_CP_SCHEDULERS, scheduler1, 0);
switch (reset_type) { switch (reset_type) {
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@ -552,22 +556,22 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
break; break;
} }
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST), type);
end_jiffies = (utimeout * HZ / 1000) + jiffies; end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) { while (true) {
temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
break; break;
if (time_after(jiffies, end_jiffies)) { if (time_after(jiffies, end_jiffies)) {
pr_err("cp queue preemption time out.\n"); pr_err("cp queue preemption time out.\n");
release_queue(adev); kgd_gfx_v9_release_queue(adev, inst);
return -ETIME; return -ETIME;
} }
usleep_range(500, 1000); usleep_range(500, 1000);
} }
release_queue(adev); kgd_gfx_v9_release_queue(adev, inst);
return 0; return 0;
} }
@ -624,14 +628,14 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val, uint32_t gfx_index_val,
uint32_t sq_cmd) uint32_t sq_cmd, uint32_t inst)
{ {
uint32_t data = 0; uint32_t data = 0;
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, gfx_index_val);
WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd); WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_CMD, sq_cmd);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
INSTANCE_BROADCAST_WRITES, 1); INSTANCE_BROADCAST_WRITES, 1);
@ -640,12 +644,271 @@ int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
SE_BROADCAST_WRITES, 1); SE_BROADCAST_WRITES, 1);
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, data);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
return 0; return 0;
} }
/*
* GFX9 helper for wave launch stall requirements on debug trap setting.
*
* vmid:
* Target VMID to stall/unstall.
*
* stall:
* 0-unstall wave launch (enable), 1-stall wave launch (disable).
* After wavefront launch has been stalled, allocated waves must drain from
* SPI in order for debug trap settings to take effect on those waves.
* This is roughly a ~96 clock cycle wait on SPI where a read on
* SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
* KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
*
* NOTE: We can afford to clear the entire STALL_VMID field on unstall
* because GFX9.4.1 cannot support multi-process debugging due to trap
* configuration and masking being limited to global scope. Always assume
* single process conditions.
*/
#define KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY 3
void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
uint32_t vmid,
bool stall)
{
int i;
uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
stall ? 1 << vmid : 0);
else
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA,
stall ? 1 : 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
if (!stall)
return;
for (i = 0; i < KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
}
/*
* restore_dbg_registers is ignored here but is a general interface requirement
* for devices that support GFXOFF and where the RLC save/restore list
* does not support hw registers for debugging i.e. the driver has to manually
* initialize the debug mode registers after it has disabled GFX off during the
* debug session.
*/
uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
bool restore_dbg_registers,
uint32_t vmid)
{
mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
/*
* keep_trap_enabled is ignored here but is a general interface requirement
* for devices that support multi-process debugging where the performance
* overhead from trap temporary setup needs to be bypassed when the debug
* session has ended.
*/
uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
bool keep_trap_enabled,
uint32_t vmid)
{
mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
uint32_t trap_override,
uint32_t *trap_mask_supported)
{
*trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
/* The SPI_GDBG_TRAP_MASK register is global and affects all
* processes. Only allow OR-ing the address-watch bit, since
* this only affects processes under the debugger. Other bits
* should stay 0 to avoid the debugger interfering with other
* processes.
*/
if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
return -EINVAL;
return 0;
}
uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
uint32_t vmid,
uint32_t trap_override,
uint32_t trap_mask_bits,
uint32_t trap_mask_request,
uint32_t *trap_mask_prev,
uint32_t kfd_dbg_cntl_prev)
{
uint32_t data, wave_cntl_prev;
mutex_lock(&adev->grbm_idx_mutex);
wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
*trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
trap_mask_bits = (trap_mask_bits & trap_mask_request) |
(*trap_mask_prev & ~trap_mask_request);
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
/* We need to preserve wave launch mode stall settings. */
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
uint8_t wave_launch_mode,
uint32_t vmid)
{
uint32_t data = 0;
bool is_mode_set = !!wave_launch_mode;
mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
VMID_MASK, is_mode_set ? 1 << vmid : 0);
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
MODE, is_mode_set ? wave_launch_mode : 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
uint64_t watch_address,
uint32_t watch_address_mask,
uint32_t watch_id,
uint32_t watch_mode,
uint32_t debug_vmid)
{
uint32_t watch_address_high;
uint32_t watch_address_low;
uint32_t watch_address_cntl;
watch_address_cntl = 0;
watch_address_low = lower_32_bits(watch_address);
watch_address_high = upper_32_bits(watch_address) & 0xffff;
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
VMID,
debug_vmid);
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
MODE,
watch_mode);
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
MASK,
watch_address_mask >> 6);
/* Turning off this watch point until we set all the registers */
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
VALID,
0);
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_cntl);
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_high);
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_low);
/* Enable the watch point */
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
VALID,
1);
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_cntl);
return 0;
}
uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
uint32_t watch_id)
{
uint32_t watch_address_cntl;
watch_address_cntl = 0;
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_cntl);
return 0;
}
/* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
* The values read are:
* ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
* atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
* wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
* gws_wait_time -- Wait Count for Global Wave Syncs.
* que_sleep_wait_time -- Wait Count for Dequeue Retry.
* sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
* sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
* deq_retry_wait_time -- Wait Count for Global Wave Syncs.
*/
void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times)
{
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
}
void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base) uint32_t vmid, uint64_t page_table_base)
{ {
@ -683,9 +946,10 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
* @wave_cnt: Output parameter updated with number of waves in flight * @wave_cnt: Output parameter updated with number of waves in flight
* @vmid: Output parameter updated with VMID of queue whose wave count * @vmid: Output parameter updated with VMID of queue whose wave count
* is being collected * is being collected
* @inst: xcc's instance number on a multi-XCC setup
*/ */
static void get_wave_count(struct amdgpu_device *adev, int queue_idx, static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
int *wave_cnt, int *vmid) int *wave_cnt, int *vmid, uint32_t inst)
{ {
int pipe_idx; int pipe_idx;
int queue_slot; int queue_slot;
@ -700,12 +964,12 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
*wave_cnt = 0; *wave_cnt = 0;
pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0); soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst);
reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
queue_slot); queue_slot);
*wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
if (*wave_cnt != 0) if (*wave_cnt != 0)
*vmid = (RREG32_SOC15(GC, 0, mmCP_HQD_VMID) & *vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) &
CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT; CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
} }
@ -721,6 +985,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
* belong to process with given pasid * belong to process with given pasid
* @max_waves_per_cu: Output parameter updated with maximum number of waves * @max_waves_per_cu: Output parameter updated with maximum number of waves
* possible per Compute Unit * possible per Compute Unit
* @inst: xcc's instance number on a multi-XCC setup
* *
* Note: It's possible that the device has too many queues (oversubscription) * Note: It's possible that the device has too many queues (oversubscription)
* in which case a VMID could be remapped to a different PASID. This could lead * in which case a VMID could be remapped to a different PASID. This could lead
@ -756,7 +1021,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
* Reading registers referenced above involves programming GRBM appropriately * Reading registers referenced above involves programming GRBM appropriately
*/ */
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
int *pasid_wave_cnt, int *max_waves_per_cu) int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst)
{ {
int qidx; int qidx;
int vmid; int vmid;
@ -772,13 +1037,13 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES); DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES);
lock_spi_csq_mutexes(adev); lock_spi_csq_mutexes(adev);
soc15_grbm_select(adev, 1, 0, 0, 0); soc15_grbm_select(adev, 1, 0, 0, 0, inst);
/* /*
* Iterate through the shader engines and arrays of the device * Iterate through the shader engines and arrays of the device
* to get number of waves in flight * to get number of waves in flight
*/ */
bitmap_complement(cp_queue_bitmap, adev->gfx.mec.queue_bitmap, bitmap_complement(cp_queue_bitmap, adev->gfx.mec_bitmap[0].queue_bitmap,
KGD_MAX_QUEUES); KGD_MAX_QUEUES);
max_queue_cnt = adev->gfx.mec.num_pipe_per_mec * max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe; adev->gfx.mec.num_queue_per_pipe;
@ -787,8 +1052,8 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
for (se_idx = 0; se_idx < se_cnt; se_idx++) { for (se_idx = 0; se_idx < se_cnt; se_idx++) {
for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff); amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst);
queue_map = RREG32_SOC15(GC, 0, mmSPI_CSQ_WF_ACTIVE_STATUS); queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS);
/* /*
* Assumption: queue map encodes following schema: four * Assumption: queue map encodes following schema: four
@ -808,10 +1073,11 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
continue; continue;
/* Get number of waves in flight and aggregate them */ /* Get number of waves in flight and aggregate them */
get_wave_count(adev, qidx, &wave_cnt, &vmid); get_wave_count(adev, qidx, &wave_cnt, &vmid,
inst);
if (wave_cnt != 0) { if (wave_cnt != 0) {
pasid_tmp = pasid_tmp =
RREG32(SOC15_REG_OFFSET(OSSSYS, 0, RREG32(SOC15_REG_OFFSET(OSSSYS, inst,
mmIH_VMID_0_LUT) + vmid); mmIH_VMID_0_LUT) + vmid);
if (pasid_tmp == pasid) if (pasid_tmp == pasid)
vmid_wave_cnt += wave_cnt; vmid_wave_cnt += wave_cnt;
@ -820,8 +1086,8 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
} }
} }
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
soc15_grbm_select(adev, 0, 0, 0, 0); soc15_grbm_select(adev, 0, 0, 0, 0, inst);
unlock_spi_csq_mutexes(adev); unlock_spi_csq_mutexes(adev);
/* Update the output parameters and return */ /* Update the output parameters and return */
@ -830,28 +1096,51 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
adev->gfx.cu_info.max_waves_per_simd; adev->gfx.cu_info.max_waves_per_simd;
} }
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data)
{ {
lock_srbm(adev, 0, 0, 0, vmid); *reg_data = wait_times;
/*
* The CP cannont handle a 0 grace period input and will result in
* an infinite grace period being set so set to 1 to prevent this.
*/
if (grace_period == 0)
grace_period = 1;
*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
SCH_WAVE,
grace_period);
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst)
{
kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
/* /*
* Program TBA registers * Program TBA registers
*/ */
WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_LO, WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_LO,
lower_32_bits(tba_addr >> 8)); lower_32_bits(tba_addr >> 8));
WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_HI, WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_HI,
upper_32_bits(tba_addr >> 8)); upper_32_bits(tba_addr >> 8));
/* /*
* Program TMA registers * Program TMA registers
*/ */
WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_LO, WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_LO,
lower_32_bits(tma_addr >> 8)); lower_32_bits(tma_addr >> 8));
WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_HI, WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_HI,
upper_32_bits(tma_addr >> 8)); upper_32_bits(tma_addr >> 8));
unlock_srbm(adev); kgd_gfx_v9_unlock_srbm(adev, inst);
} }
const struct kfd2kgd_calls gfx_v9_kfd2kgd = { const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
@ -871,6 +1160,15 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
.get_atc_vmid_pasid_mapping_info = .get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
.enable_debug_trap = kgd_gfx_v9_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v9_disable_debug_trap,
.validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
.set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
.set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
.set_address_watch = kgd_gfx_v9_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
}; };

View file

@ -20,41 +20,81 @@
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config, uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases); uint32_t sh_mem_bases, uint32_t inst);
int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
unsigned int vmid); unsigned int vmid, uint32_t inst);
int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id); int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst);
int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr, uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask, uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm); struct mm_struct *mm, uint32_t inst);
int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t doorbell_off); uint32_t doorbell_off, uint32_t inst);
int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev, int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs); uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst);
bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev, bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id, uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id); uint32_t queue_id, uint32_t inst);
int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type, enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id, unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id); uint32_t queue_id, uint32_t inst);
int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val, uint32_t gfx_index_val,
uint32_t sq_cmd); uint32_t sq_cmd, uint32_t inst);
bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid); uint8_t vmid, uint16_t *p_pasid);
void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base); uint32_t vmid, uint64_t page_table_base);
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
int *pasid_wave_cnt, int *max_waves_per_cu); int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst);
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr); uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst);
void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t queue_id, uint32_t inst);
uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id);
void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst);
void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
uint32_t vmid,
bool stall);
uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
bool restore_dbg_registers,
uint32_t vmid);
uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
bool keep_trap_enabled,
uint32_t vmid);
int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
uint32_t trap_override,
uint32_t *trap_mask_supported);
uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
uint8_t wave_launch_mode,
uint32_t vmid);
uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
uint32_t vmid,
uint32_t trap_override,
uint32_t trap_mask_bits,
uint32_t trap_mask_request,
uint32_t *trap_mask_prev,
uint32_t kfd_dbg_trap_cntl_prev);
uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
uint64_t watch_address,
uint32_t watch_address_mask,
uint32_t watch_id,
uint32_t watch_mode,
uint32_t debug_vmid);
uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
uint32_t watch_id);
void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data);

View file

@ -35,7 +35,9 @@
#include "amdgpu_dma_buf.h" #include "amdgpu_dma_buf.h"
#include <uapi/linux/kfd_ioctl.h> #include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_xgmi.h" #include "amdgpu_xgmi.h"
#include "kfd_priv.h"
#include "kfd_smi_events.h" #include "kfd_smi_events.h"
#include <drm/ttm/ttm_tt.h>
/* Userptr restore delay, just long enough to allow consecutive VM /* Userptr restore delay, just long enough to allow consecutive VM
* changes to accumulate * changes to accumulate
@ -110,13 +112,16 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
struct sysinfo si; struct sysinfo si;
uint64_t mem; uint64_t mem;
if (kfd_mem_limit.max_system_mem_limit)
return;
si_meminfo(&si); si_meminfo(&si);
mem = si.freeram - si.freehigh; mem = si.freeram - si.freehigh;
mem *= si.mem_unit; mem *= si.mem_unit;
spin_lock_init(&kfd_mem_limit.mem_limit_lock); spin_lock_init(&kfd_mem_limit.mem_limit_lock);
kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4); kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT;
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20), (kfd_mem_limit.max_system_mem_limit >> 20),
(kfd_mem_limit.max_ttm_mem_limit >> 20)); (kfd_mem_limit.max_ttm_mem_limit >> 20));
@ -148,16 +153,20 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
* @size: Size of buffer, in bytes, encapsulated by B0. This should be * @size: Size of buffer, in bytes, encapsulated by B0. This should be
* equivalent to amdgpu_bo_size(BO) * equivalent to amdgpu_bo_size(BO)
* @alloc_flag: Flag used in allocating a BO as noted above * @alloc_flag: Flag used in allocating a BO as noted above
* @xcp_id: xcp_id is used to get xcp from xcp manager, one xcp is
* managed as one compute node in driver for app
* *
* Return: returns -ENOMEM in case of error, ZERO otherwise * Return:
* returns -ENOMEM in case of error, ZERO otherwise
*/ */
int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag) uint64_t size, u32 alloc_flag, int8_t xcp_id)
{ {
uint64_t reserved_for_pt = uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
size_t system_mem_needed, ttm_mem_needed, vram_needed; size_t system_mem_needed, ttm_mem_needed, vram_needed;
int ret = 0; int ret = 0;
uint64_t vram_size = 0;
system_mem_needed = 0; system_mem_needed = 0;
ttm_mem_needed = 0; ttm_mem_needed = 0;
@ -172,6 +181,17 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
* 2M BO chunk. * 2M BO chunk.
*/ */
vram_needed = size; vram_needed = size;
/*
* For GFX 9.4.3, get the VRAM size from XCP structs
*/
if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
return -EINVAL;
vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
if (adev->gmc.is_app_apu) {
system_mem_needed = size;
ttm_mem_needed = size;
}
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
system_mem_needed = size; system_mem_needed = size;
} else if (!(alloc_flag & } else if (!(alloc_flag &
@ -191,8 +211,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) || kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed > (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
kfd_mem_limit.max_ttm_mem_limit) || kfd_mem_limit.max_ttm_mem_limit) ||
(adev && adev->kfd.vram_used + vram_needed > (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
adev->gmc.real_vram_size - reserved_for_pt)) { vram_size - reserved_for_pt)) {
ret = -ENOMEM; ret = -ENOMEM;
goto release; goto release;
} }
@ -202,9 +222,11 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
*/ */
WARN_ONCE(vram_needed && !adev, WARN_ONCE(vram_needed && !adev,
"adev reference can't be null when vram is used"); "adev reference can't be null when vram is used");
if (adev) { if (adev && xcp_id >= 0) {
adev->kfd.vram_used += vram_needed; adev->kfd.vram_used[xcp_id] += vram_needed;
adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN); adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ?
vram_needed :
ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
} }
kfd_mem_limit.system_mem_used += system_mem_needed; kfd_mem_limit.system_mem_used += system_mem_needed;
kfd_mem_limit.ttm_mem_used += ttm_mem_needed; kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
@ -215,7 +237,7 @@ release:
} }
void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag) uint64_t size, u32 alloc_flag, int8_t xcp_id)
{ {
spin_lock(&kfd_mem_limit.mem_limit_lock); spin_lock(&kfd_mem_limit.mem_limit_lock);
@ -225,9 +247,19 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
WARN_ONCE(!adev, WARN_ONCE(!adev,
"adev reference can't be null when alloc mem flags vram is set"); "adev reference can't be null when alloc mem flags vram is set");
if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
goto release;
if (adev) { if (adev) {
adev->kfd.vram_used -= size; adev->kfd.vram_used[xcp_id] -= size;
adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN); if (adev->gmc.is_app_apu) {
adev->kfd.vram_used_aligned[xcp_id] -= size;
kfd_mem_limit.system_mem_used -= size;
kfd_mem_limit.ttm_mem_used -= size;
} else {
adev->kfd.vram_used_aligned[xcp_id] -=
ALIGN(size, VRAM_AVAILABLITY_ALIGN);
}
} }
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
kfd_mem_limit.system_mem_used -= size; kfd_mem_limit.system_mem_used -= size;
@ -237,8 +269,8 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
goto release; goto release;
} }
WARN_ONCE(adev && adev->kfd.vram_used < 0, WARN_ONCE(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] < 0,
"KFD VRAM memory accounting unbalanced"); "KFD VRAM memory accounting unbalanced for xcp: %d", xcp_id);
WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
"KFD TTM memory accounting unbalanced"); "KFD TTM memory accounting unbalanced");
WARN_ONCE(kfd_mem_limit.system_mem_used < 0, WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
@ -254,14 +286,16 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
u32 alloc_flags = bo->kfd_bo->alloc_flags; u32 alloc_flags = bo->kfd_bo->alloc_flags;
u64 size = amdgpu_bo_size(bo); u64 size = amdgpu_bo_size(bo);
amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags); amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags,
bo->xcp_id);
kfree(bo->kfd_bo); kfree(bo->kfd_bo);
} }
/** /**
* @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information * create_dmamap_sg_bo() - Creates a amdgpu_bo object to reflect information
* about USERPTR or DOOREBELL or MMIO BO. * about USERPTR or DOOREBELL or MMIO BO.
*
* @adev: Device for which dmamap BO is being created * @adev: Device for which dmamap BO is being created
* @mem: BO of peer device that is being DMA mapped. Provides parameters * @mem: BO of peer device that is being DMA mapped. Provides parameters
* in building the dmamap BO * in building the dmamap BO
@ -285,7 +319,7 @@ create_dmamap_sg_bo(struct amdgpu_device *adev,
ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1, ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1,
AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags, AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags,
ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj); ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj, 0);
amdgpu_bo_unreserve(mem->bo); amdgpu_bo_unreserve(mem->bo);
@ -527,6 +561,12 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
{ {
struct ttm_operation_ctx ctx = {.interruptible = true}; struct ttm_operation_ctx ctx = {.interruptible = true};
struct amdgpu_bo *bo = attachment->bo_va->base.bo; struct amdgpu_bo *bo = attachment->bo_va->base.bo;
int ret;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret)
return ret;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@ -659,11 +699,10 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
static void static void
kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment) kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
{ {
struct ttm_operation_ctx ctx = {.interruptible = true}; /* This is a no-op. We don't want to trigger eviction fences when
struct amdgpu_bo *bo = attachment->bo_va->base.bo; * unmapping DMABufs. Therefore the invalidation (moving to system
* domain) is done in kfd_mem_dmamap_dmabuf.
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); */
ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
} }
/** /**
@ -804,7 +843,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
* if peer device has large BAR. In contrast, access over xGMI is * if peer device has large BAR. In contrast, access over xGMI is
* allowed for both small and large BAR configurations of peer device * allowed for both small and large BAR configurations of peer device
*/ */
if ((adev != bo_adev) && if ((adev != bo_adev && !adev->gmc.is_app_apu) &&
((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) || ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) || (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
@ -1599,23 +1638,42 @@ out_unlock:
return ret; return ret;
} }
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
uint8_t xcp_id)
{ {
uint64_t reserved_for_pt = uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
ssize_t available; ssize_t available;
uint64_t vram_available, system_mem_available, ttm_mem_available;
spin_lock(&kfd_mem_limit.mem_limit_lock); spin_lock(&kfd_mem_limit.mem_limit_lock);
available = adev->gmc.real_vram_size vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
- adev->kfd.vram_used_aligned - adev->kfd.vram_used_aligned[xcp_id]
- atomic64_read(&adev->vram_pin_size) - atomic64_read(&adev->vram_pin_size)
- reserved_for_pt; - reserved_for_pt;
if (adev->gmc.is_app_apu) {
system_mem_available = no_system_mem_limit ?
kfd_mem_limit.max_system_mem_limit :
kfd_mem_limit.max_system_mem_limit -
kfd_mem_limit.system_mem_used;
ttm_mem_available = kfd_mem_limit.max_ttm_mem_limit -
kfd_mem_limit.ttm_mem_used;
available = min3(system_mem_available, ttm_mem_available,
vram_available);
available = ALIGN_DOWN(available, PAGE_SIZE);
} else {
available = ALIGN_DOWN(vram_available, VRAM_AVAILABLITY_ALIGN);
}
spin_unlock(&kfd_mem_limit.mem_limit_lock); spin_unlock(&kfd_mem_limit.mem_limit_lock);
if (available < 0) if (available < 0)
available = 0; available = 0;
return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN); return available;
} }
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
@ -1624,6 +1682,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
uint64_t *offset, uint32_t flags, bool criu_resume) uint64_t *offset, uint32_t flags, bool criu_resume)
{ {
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
struct amdgpu_fpriv *fpriv = container_of(avm, struct amdgpu_fpriv, vm);
enum ttm_bo_type bo_type = ttm_bo_type_device; enum ttm_bo_type bo_type = ttm_bo_type_device;
struct sg_table *sg = NULL; struct sg_table *sg = NULL;
uint64_t user_addr = 0; uint64_t user_addr = 0;
@ -1631,6 +1690,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct drm_gem_object *gobj = NULL; struct drm_gem_object *gobj = NULL;
u32 domain, alloc_domain; u32 domain, alloc_domain;
uint64_t aligned_size; uint64_t aligned_size;
int8_t xcp_id = -1;
u64 alloc_flags; u64 alloc_flags;
int ret; int ret;
@ -1639,9 +1699,17 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
*/ */
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
if (adev->gmc.is_app_apu) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
} else {
alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0; AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
}
xcp_id = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id;
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0; alloc_flags = 0;
@ -1693,17 +1761,19 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
amdgpu_sync_create(&(*mem)->sync); amdgpu_sync_create(&(*mem)->sync);
ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags); ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags,
xcp_id);
if (ret) { if (ret) {
pr_debug("Insufficient memory\n"); pr_debug("Insufficient memory\n");
goto err_reserve_limit; goto err_reserve_limit;
} }
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s xcp_id %d\n",
va, (*mem)->aql_queue ? size << 1 : size, domain_string(alloc_domain)); va, (*mem)->aql_queue ? size << 1 : size,
domain_string(alloc_domain), xcp_id);
ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags, ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
bo_type, NULL, &gobj); bo_type, NULL, &gobj, xcp_id + 1);
if (ret) { if (ret) {
pr_debug("Failed to create BO on domain %s. ret %d\n", pr_debug("Failed to create BO on domain %s. ret %d\n",
domain_string(alloc_domain), ret); domain_string(alloc_domain), ret);
@ -1728,6 +1798,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
(*mem)->domain = domain; (*mem)->domain = domain;
(*mem)->mapped_to_gpu_memory = 0; (*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info; (*mem)->process_info = avm->process_info;
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
if (user_addr) { if (user_addr) {
@ -1759,7 +1830,7 @@ err_node_allow:
/* Don't unreserve system mem limit twice */ /* Don't unreserve system mem limit twice */
goto err_reserve_limit; goto err_reserve_limit;
err_bo_create: err_bo_create:
amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags); amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
err_reserve_limit: err_reserve_limit:
mutex_destroy(&(*mem)->lock); mutex_destroy(&(*mem)->lock);
if (gobj) if (gobj)
@ -1855,11 +1926,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
} }
/* Update the size of the BO being freed if it was allocated from /* Update the size of the BO being freed if it was allocated from
* VRAM and is not imported. * VRAM and is not imported. For APP APU VRAM allocations are done
* in GTT domain
*/ */
if (size) { if (size) {
if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) && if (!is_imported &&
(!is_imported)) (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
(adev->gmc.is_app_apu &&
mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
*size = bo_size; *size = bo_size;
else else
*size = 0; *size = 0;
@ -2282,8 +2356,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
(*mem)->dmabuf = dma_buf; (*mem)->dmabuf = dma_buf;
(*mem)->bo = bo; (*mem)->bo = bo;
(*mem)->va = va; (*mem)->va = va;
(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ?
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
(*mem)->mapped_to_gpu_memory = 0; (*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info; (*mem)->process_info = avm->process_info;
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
@ -2445,6 +2520,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
ret = -EAGAIN; ret = -EAGAIN;
goto unlock_out; goto unlock_out;
} }
/* set mem valid if mem has hmm range associated */
if (mem->range)
mem->invalid = 0; mem->invalid = 0;
} }
@ -2577,7 +2654,14 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
list_for_each_entry_safe(mem, tmp_mem, list_for_each_entry_safe(mem, tmp_mem,
&process_info->userptr_inval_list, &process_info->userptr_inval_list,
validate_list.head) { validate_list.head) {
bool valid = amdgpu_ttm_tt_get_user_pages_done( bool valid;
/* keep mem without hmm range at userptr_inval_list */
if (!mem->range)
continue;
/* Only check mem with hmm range associated */
valid = amdgpu_ttm_tt_get_user_pages_done(
mem->bo->tbo.ttm, mem->range); mem->bo->tbo.ttm, mem->range);
mem->range = NULL; mem->range = NULL;
@ -2586,7 +2670,12 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
ret = -EAGAIN; ret = -EAGAIN;
continue; continue;
} }
WARN(mem->invalid, "Valid BO is marked invalid");
if (mem->invalid) {
WARN(1, "Valid BO is marked invalid");
ret = -EAGAIN;
continue;
}
list_move_tail(&mem->validate_list.head, list_move_tail(&mem->validate_list.head,
&process_info->userptr_valid_list); &process_info->userptr_valid_list);

View file

@ -272,6 +272,7 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
break; break;
case ATOM_DGPU_VRAM_TYPE_HBM2: case ATOM_DGPU_VRAM_TYPE_HBM2:
case ATOM_DGPU_VRAM_TYPE_HBM2E: case ATOM_DGPU_VRAM_TYPE_HBM2E:
case ATOM_DGPU_VRAM_TYPE_HBM3:
vram_type = AMDGPU_VRAM_TYPE_HBM; vram_type = AMDGPU_VRAM_TYPE_HBM;
break; break;
case ATOM_DGPU_VRAM_TYPE_GDDR6: case ATOM_DGPU_VRAM_TYPE_GDDR6:

View file

@ -104,9 +104,8 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
adev->bios = NULL; adev->bios = NULL;
vram_base = pci_resource_start(adev->pdev, 0); vram_base = pci_resource_start(adev->pdev, 0);
bios = ioremap_wc(vram_base, size); bios = ioremap_wc(vram_base, size);
if (!bios) { if (!bios)
return false; return false;
}
adev->bios = kmalloc(size, GFP_KERNEL); adev->bios = kmalloc(size, GFP_KERNEL);
if (!adev->bios) { if (!adev->bios) {
@ -133,9 +132,8 @@ bool amdgpu_read_bios(struct amdgpu_device *adev)
adev->bios = NULL; adev->bios = NULL;
/* XXX: some cards may return 0 for rom size? ddx has a workaround */ /* XXX: some cards may return 0 for rom size? ddx has a workaround */
bios = pci_map_rom(adev->pdev, &size); bios = pci_map_rom(adev->pdev, &size);
if (!bios) { if (!bios)
return false; return false;
}
adev->bios = kzalloc(size, GFP_KERNEL); adev->bios = kzalloc(size, GFP_KERNEL);
if (adev->bios == NULL) { if (adev->bios == NULL) {
@ -168,9 +166,9 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev)
header[AMD_VBIOS_SIGNATURE_END] = 0; header[AMD_VBIOS_SIGNATURE_END] = 0;
if ((!AMD_IS_VALID_VBIOS(header)) || if ((!AMD_IS_VALID_VBIOS(header)) ||
0 != memcmp((char *)&header[AMD_VBIOS_SIGNATURE_OFFSET], memcmp((char *)&header[AMD_VBIOS_SIGNATURE_OFFSET],
AMD_VBIOS_SIGNATURE, AMD_VBIOS_SIGNATURE,
strlen(AMD_VBIOS_SIGNATURE))) strlen(AMD_VBIOS_SIGNATURE)) != 0)
return false; return false;
/* valid vbios, go on */ /* valid vbios, go on */
@ -264,7 +262,7 @@ static int amdgpu_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer); status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
if (ACPI_FAILURE(status)) { if (ACPI_FAILURE(status)) {
printk("failed to evaluate ATRM got %s\n", acpi_format_exception(status)); DRM_ERROR("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
return -ENODEV; return -ENODEV;
} }
@ -363,7 +361,7 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
struct acpi_table_header *hdr; struct acpi_table_header *hdr;
acpi_size tbl_size; acpi_size tbl_size;
UEFI_ACPI_VFCT *vfct; UEFI_ACPI_VFCT *vfct;
unsigned offset; unsigned int offset;
if (!ACPI_SUCCESS(acpi_get_table("VFCT", 1, &hdr))) if (!ACPI_SUCCESS(acpi_get_table("VFCT", 1, &hdr)))
return false; return false;

View file

@ -593,11 +593,20 @@ static int amdgpu_connector_set_property(struct drm_connector *connector,
switch (val) { switch (val) {
default: default:
case DRM_MODE_SCALE_NONE: rmx_type = RMX_OFF; break; case DRM_MODE_SCALE_NONE:
case DRM_MODE_SCALE_CENTER: rmx_type = RMX_CENTER; break; rmx_type = RMX_OFF;
case DRM_MODE_SCALE_ASPECT: rmx_type = RMX_ASPECT; break; break;
case DRM_MODE_SCALE_FULLSCREEN: rmx_type = RMX_FULL; break; case DRM_MODE_SCALE_CENTER:
rmx_type = RMX_CENTER;
break;
case DRM_MODE_SCALE_ASPECT:
rmx_type = RMX_ASPECT;
break;
case DRM_MODE_SCALE_FULLSCREEN:
rmx_type = RMX_FULL;
break;
} }
if (amdgpu_encoder->rmx_type == rmx_type) if (amdgpu_encoder->rmx_type == rmx_type)
return 0; return 0;
@ -799,12 +808,21 @@ static int amdgpu_connector_set_lcd_property(struct drm_connector *connector,
} }
switch (value) { switch (value) {
case DRM_MODE_SCALE_NONE: rmx_type = RMX_OFF; break; case DRM_MODE_SCALE_NONE:
case DRM_MODE_SCALE_CENTER: rmx_type = RMX_CENTER; break; rmx_type = RMX_OFF;
case DRM_MODE_SCALE_ASPECT: rmx_type = RMX_ASPECT; break; break;
case DRM_MODE_SCALE_CENTER:
rmx_type = RMX_CENTER;
break;
case DRM_MODE_SCALE_ASPECT:
rmx_type = RMX_ASPECT;
break;
default: default:
case DRM_MODE_SCALE_FULLSCREEN: rmx_type = RMX_FULL; break; case DRM_MODE_SCALE_FULLSCREEN:
rmx_type = RMX_FULL;
break;
} }
if (amdgpu_encoder->rmx_type == rmx_type) if (amdgpu_encoder->rmx_type == rmx_type)
return 0; return 0;
@ -1127,7 +1145,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
/* assume digital unless load detected otherwise */ /* assume digital unless load detected otherwise */
amdgpu_connector->use_digital = true; amdgpu_connector->use_digital = true;
lret = encoder_funcs->detect(encoder, connector); lret = encoder_funcs->detect(encoder, connector);
DRM_DEBUG_KMS("load_detect %x returned: %x\n",encoder->encoder_type,lret); DRM_DEBUG_KMS("load_detect %x returned: %x\n",
encoder->encoder_type, lret);
if (lret == connector_status_connected) if (lret == connector_status_connected)
amdgpu_connector->use_digital = false; amdgpu_connector->use_digital = false;
} }

View file

@ -112,6 +112,9 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
if (r < 0) if (r < 0)
return r; return r;
if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type))
return -EINVAL;
++(num_ibs[r]); ++(num_ibs[r]);
p->gang_leader_idx = r; p->gang_leader_idx = r;
return 0; return 0;
@ -192,7 +195,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
uint64_t *chunk_array_user; uint64_t *chunk_array_user;
uint64_t *chunk_array; uint64_t *chunk_array;
uint32_t uf_offset = 0; uint32_t uf_offset = 0;
unsigned int size; size_t size;
int ret; int ret;
int i; int i;
@ -285,6 +288,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
break; break;
default: default:
@ -393,7 +397,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
{ {
struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata; struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;
struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
unsigned num_deps; unsigned int num_deps;
int i, r; int i, r;
num_deps = chunk->length_dw * 4 / num_deps = chunk->length_dw * 4 /
@ -464,7 +468,7 @@ static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk) struct amdgpu_cs_chunk *chunk)
{ {
struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
unsigned num_deps; unsigned int num_deps;
int i, r; int i, r;
num_deps = chunk->length_dw * 4 / num_deps = chunk->length_dw * 4 /
@ -482,7 +486,7 @@ static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk) struct amdgpu_cs_chunk *chunk)
{ {
struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
unsigned num_deps; unsigned int num_deps;
int i, r; int i, r;
num_deps = chunk->length_dw * 4 / num_deps = chunk->length_dw * 4 /
@ -502,7 +506,7 @@ static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk) struct amdgpu_cs_chunk *chunk)
{ {
struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
unsigned num_deps; unsigned int num_deps;
int i; int i;
num_deps = chunk->length_dw * 4 / num_deps = chunk->length_dw * 4 /
@ -536,7 +540,7 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk) struct amdgpu_cs_chunk *chunk)
{ {
struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
unsigned num_deps; unsigned int num_deps;
int i; int i;
num_deps = chunk->length_dw * 4 / num_deps = chunk->length_dw * 4 /
@ -575,6 +579,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
return 0; return 0;
} }
static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk)
{
struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
int i;
if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)
return -EINVAL;
for (i = 0; i < p->gang_size; ++i) {
p->jobs[i]->shadow_va = shadow->shadow_va;
p->jobs[i]->csa_va = shadow->csa_va;
p->jobs[i]->gds_va = shadow->gds_va;
p->jobs[i]->init_shadow =
shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
}
return 0;
}
static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
{ {
unsigned int ce_preempt = 0, de_preempt = 0; unsigned int ce_preempt = 0, de_preempt = 0;
@ -617,6 +641,11 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
if (r) if (r)
return r; return r;
break; break;
case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
r = amdgpu_cs_p2_shadow(p, chunk);
if (r)
return r;
break;
} }
} }
@ -729,6 +758,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
if (used_vis_vram < total_vis_vram) { if (used_vis_vram < total_vis_vram) {
u64 free_vis_vram = total_vis_vram - used_vis_vram; u64 free_vis_vram = total_vis_vram - used_vis_vram;
adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
increment_us, us_upper_bound); increment_us, us_upper_bound);
@ -1047,9 +1077,8 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
/* the IB should be reserved at this point */ /* the IB should be reserved at this point */
r = amdgpu_bo_kmap(aobj, (void **)&kptr); r = amdgpu_bo_kmap(aobj, (void **)&kptr);
if (r) { if (r)
return r; return r;
}
kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE); kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);
@ -1356,7 +1385,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
/* Cleanup the parser structure */ /* Cleanup the parser structure */
static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser) static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
{ {
unsigned i; unsigned int i;
amdgpu_sync_free(&parser->sync); amdgpu_sync_free(&parser->sync);
for (i = 0; i < parser->num_post_deps; i++) { for (i = 0; i < parser->num_post_deps; i++) {

View file

@ -106,3 +106,41 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
ttm_eu_backoff_reservation(&ticket, &list); ttm_eu_backoff_reservation(&ticket, &list);
return 0; return 0;
} }
int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
uint64_t csa_addr)
{
struct ww_acquire_ctx ticket;
struct list_head list;
struct amdgpu_bo_list_entry pd;
struct ttm_validate_buffer csa_tv;
int r;
INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&csa_tv.head);
csa_tv.bo = &bo->tbo;
csa_tv.num_shared = 1;
list_add(&csa_tv.head, &list);
amdgpu_vm_get_pd_bo(vm, &list, &pd);
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
if (r) {
DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
return r;
}
r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr);
if (r) {
DRM_ERROR("failed to do bo_unmap on static CSA, err=%d\n", r);
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
amdgpu_vm_bo_del(adev, bo_va);
ttm_eu_backoff_reservation(&ticket, &list);
return 0;
}

View file

@ -34,6 +34,9 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va, struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
uint64_t csa_addr, uint32_t size); uint64_t csa_addr, uint32_t size);
int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
uint64_t csa_addr);
void amdgpu_free_static_csa(struct amdgpu_bo **bo); void amdgpu_free_static_csa(struct amdgpu_bo **bo);
#endif #endif

View file

@ -222,8 +222,19 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio); drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
if (!(adev)->xcp_mgr) {
scheds = adev->gpu_sched[hw_ip][hw_prio].sched; scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
} else {
struct amdgpu_fpriv *fpriv;
fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr);
r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv,
&num_scheds, &scheds);
if (r)
goto cleanup_entity;
}
/* disable load balance if the hw engine retains context among dependent jobs */ /* disable load balance if the hw engine retains context among dependent jobs */
if (hw_ip == AMDGPU_HW_IP_VCN_ENC || if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
@ -255,7 +266,8 @@ error_free_entity:
return r; return r;
} }
static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev,
struct amdgpu_ctx_entity *entity)
{ {
ktime_t res = ns_to_ktime(0); ktime_t res = ns_to_ktime(0);
int i; int i;
@ -268,6 +280,8 @@ static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
dma_fence_put(entity->fences[i]); dma_fence_put(entity->fences[i]);
} }
amdgpu_xcp_release_sched(adev, entity);
kfree(entity); kfree(entity);
return res; return res;
} }
@ -303,6 +317,7 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
struct drm_file *filp, struct amdgpu_ctx *ctx) struct drm_file *filp, struct amdgpu_ctx *ctx)
{ {
struct amdgpu_fpriv *fpriv = filp->driver_priv;
u32 current_stable_pstate; u32 current_stable_pstate;
int r; int r;
@ -331,6 +346,7 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
else else
ctx->stable_pstate = current_stable_pstate; ctx->stable_pstate = current_stable_pstate;
ctx->ctx_mgr = &(fpriv->ctx_mgr);
return 0; return 0;
} }
@ -399,7 +415,7 @@ static void amdgpu_ctx_fini(struct kref *ref)
for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) { for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
ktime_t spend; ktime_t spend;
spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]); spend = amdgpu_ctx_fini_entity(adev, ctx->entities[i][j]);
atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]); atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
} }
} }
@ -576,6 +592,9 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
if (atomic_read(&ctx->guilty)) if (atomic_read(&ctx->guilty))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
if (amdgpu_in_reset(adev))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS;
if (adev->ras_enabled && con) { if (adev->ras_enabled && con) {
/* Return the cached values in O(1), /* Return the cached values in O(1),
* and schedule delayed work to cache * and schedule delayed work to cache

View file

@ -57,6 +57,7 @@ struct amdgpu_ctx {
unsigned long ras_counter_ce; unsigned long ras_counter_ce;
unsigned long ras_counter_ue; unsigned long ras_counter_ue;
uint32_t stable_pstate; uint32_t stable_pstate;
struct amdgpu_ctx_mgr *ctx_mgr;
}; };
struct amdgpu_ctx_mgr { struct amdgpu_ctx_mgr {

View file

@ -76,7 +76,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
ssize_t result = 0; ssize_t result = 0;
int r; int r;
bool pm_pg_lock, use_bank, use_ring; bool pm_pg_lock, use_bank, use_ring;
unsigned instance_bank, sh_bank, se_bank, me, pipe, queue, vmid; unsigned int instance_bank, sh_bank, se_bank, me, pipe, queue, vmid;
pm_pg_lock = use_bank = use_ring = false; pm_pg_lock = use_bank = use_ring = false;
instance_bank = sh_bank = se_bank = me = pipe = queue = vmid = 0; instance_bank = sh_bank = se_bank = me = pipe = queue = vmid = 0;
@ -136,10 +136,10 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
} }
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, se_bank, amdgpu_gfx_select_se_sh(adev, se_bank,
sh_bank, instance_bank); sh_bank, instance_bank, 0);
} else if (use_ring) { } else if (use_ring) {
mutex_lock(&adev->srbm_mutex); mutex_lock(&adev->srbm_mutex);
amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid); amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid, 0);
} }
if (pm_pg_lock) if (pm_pg_lock)
@ -169,10 +169,10 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
end: end:
if (use_bank) { if (use_bank) {
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
} else if (use_ring) { } else if (use_ring) {
amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex); mutex_unlock(&adev->srbm_mutex);
} }
@ -208,7 +208,7 @@ static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file)
{ {
struct amdgpu_debugfs_regs2_data *rd; struct amdgpu_debugfs_regs2_data *rd;
rd = kzalloc(sizeof *rd, GFP_KERNEL); rd = kzalloc(sizeof(*rd), GFP_KERNEL);
if (!rd) if (!rd)
return -ENOMEM; return -ENOMEM;
rd->adev = file_inode(file)->i_private; rd->adev = file_inode(file)->i_private;
@ -221,6 +221,7 @@ static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file)
static int amdgpu_debugfs_regs2_release(struct inode *inode, struct file *file) static int amdgpu_debugfs_regs2_release(struct inode *inode, struct file *file)
{ {
struct amdgpu_debugfs_regs2_data *rd = file->private_data; struct amdgpu_debugfs_regs2_data *rd = file->private_data;
mutex_destroy(&rd->lock); mutex_destroy(&rd->lock);
kfree(file->private_data); kfree(file->private_data);
return 0; return 0;
@ -263,13 +264,13 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, rd->id.grbm.se, amdgpu_gfx_select_se_sh(adev, rd->id.grbm.se,
rd->id.grbm.sh, rd->id.grbm.sh,
rd->id.grbm.instance); rd->id.grbm.instance, rd->id.xcc_id);
} }
if (rd->id.use_srbm) { if (rd->id.use_srbm) {
mutex_lock(&adev->srbm_mutex); mutex_lock(&adev->srbm_mutex);
amdgpu_gfx_select_me_pipe_q(adev, rd->id.srbm.me, rd->id.srbm.pipe, amdgpu_gfx_select_me_pipe_q(adev, rd->id.srbm.me, rd->id.srbm.pipe,
rd->id.srbm.queue, rd->id.srbm.vmid); rd->id.srbm.queue, rd->id.srbm.vmid, rd->id.xcc_id);
} }
if (rd->id.pg_lock) if (rd->id.pg_lock)
@ -295,12 +296,12 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off
} }
end: end:
if (rd->id.use_grbm) { if (rd->id.use_grbm) {
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, rd->id.xcc_id);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
} }
if (rd->id.use_srbm) { if (rd->id.use_srbm) {
amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, rd->id.xcc_id);
mutex_unlock(&adev->srbm_mutex); mutex_unlock(&adev->srbm_mutex);
} }
@ -319,18 +320,45 @@ end:
static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data) static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data)
{ {
struct amdgpu_debugfs_regs2_data *rd = f->private_data; struct amdgpu_debugfs_regs2_data *rd = f->private_data;
struct amdgpu_debugfs_regs2_iocdata v1_data;
int r; int r;
switch (cmd) {
case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE:
mutex_lock(&rd->lock); mutex_lock(&rd->lock);
r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata *)data, sizeof rd->id);
mutex_unlock(&rd->lock); switch (cmd) {
return r ? -EINVAL : 0; case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2:
default: r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata_v2 *)data,
return -EINVAL; sizeof(rd->id));
if (r)
r = -EINVAL;
goto done;
case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE:
r = copy_from_user(&v1_data, (struct amdgpu_debugfs_regs2_iocdata *)data,
sizeof(v1_data));
if (r) {
r = -EINVAL;
goto done;
} }
return 0; goto v1_copy;
default:
r = -EINVAL;
goto done;
}
v1_copy:
rd->id.use_srbm = v1_data.use_srbm;
rd->id.use_grbm = v1_data.use_grbm;
rd->id.pg_lock = v1_data.pg_lock;
rd->id.grbm.se = v1_data.grbm.se;
rd->id.grbm.sh = v1_data.grbm.sh;
rd->id.grbm.instance = v1_data.grbm.instance;
rd->id.srbm.me = v1_data.srbm.me;
rd->id.srbm.pipe = v1_data.srbm.pipe;
rd->id.srbm.queue = v1_data.srbm.queue;
rd->id.xcc_id = 0;
done:
mutex_unlock(&rd->lock);
return r;
} }
static ssize_t amdgpu_debugfs_regs2_read(struct file *f, char __user *buf, size_t size, loff_t *pos) static ssize_t amdgpu_debugfs_regs2_read(struct file *f, char __user *buf, size_t size, loff_t *pos)
@ -343,6 +371,136 @@ static ssize_t amdgpu_debugfs_regs2_write(struct file *f, const char __user *buf
return amdgpu_debugfs_regs2_op(f, (char __user *)buf, *pos, size, 1); return amdgpu_debugfs_regs2_op(f, (char __user *)buf, *pos, size, 1);
} }
static int amdgpu_debugfs_gprwave_open(struct inode *inode, struct file *file)
{
struct amdgpu_debugfs_gprwave_data *rd;
rd = kzalloc(sizeof *rd, GFP_KERNEL);
if (!rd)
return -ENOMEM;
rd->adev = file_inode(file)->i_private;
file->private_data = rd;
mutex_init(&rd->lock);
return 0;
}
static int amdgpu_debugfs_gprwave_release(struct inode *inode, struct file *file)
{
struct amdgpu_debugfs_gprwave_data *rd = file->private_data;
mutex_destroy(&rd->lock);
kfree(file->private_data);
return 0;
}
static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, size_t size, loff_t *pos)
{
struct amdgpu_debugfs_gprwave_data *rd = f->private_data;
struct amdgpu_device *adev = rd->adev;
ssize_t result = 0;
int r;
uint32_t *data, x;
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
}
r = amdgpu_virt_enable_access_debugfs(adev);
if (r < 0) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
}
data = kcalloc(1024, sizeof(*data), GFP_KERNEL);
if (!data) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return -ENOMEM;
}
/* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, rd->id.se, rd->id.sh, rd->id.cu, rd->id.xcc_id);
if (!rd->id.gpr_or_wave) {
x = 0;
if (adev->gfx.funcs->read_wave_data)
adev->gfx.funcs->read_wave_data(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, data, &x);
} else {
x = size >> 2;
if (rd->id.gpr.vpgr_or_sgpr) {
if (adev->gfx.funcs->read_wave_vgprs)
adev->gfx.funcs->read_wave_vgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, rd->id.gpr.thread, *pos, size>>2, data);
} else {
if (adev->gfx.funcs->read_wave_sgprs)
adev->gfx.funcs->read_wave_sgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, *pos, size>>2, data);
}
}
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, rd->id.xcc_id);
mutex_unlock(&adev->grbm_idx_mutex);
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
if (!x) {
result = -EINVAL;
goto done;
}
while (size && (*pos < x * 4)) {
uint32_t value;
value = data[*pos >> 2];
r = put_user(value, (uint32_t *)buf);
if (r) {
result = r;
goto done;
}
result += 4;
buf += 4;
*pos += 4;
size -= 4;
}
done:
amdgpu_virt_disable_access_debugfs(adev);
kfree(data);
return result;
}
static long amdgpu_debugfs_gprwave_ioctl(struct file *f, unsigned int cmd, unsigned long data)
{
struct amdgpu_debugfs_gprwave_data *rd = f->private_data;
int r = 0;
mutex_lock(&rd->lock);
switch (cmd) {
case AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE:
if (copy_from_user(&rd->id,
(struct amdgpu_debugfs_gprwave_iocdata *)data,
sizeof(rd->id)))
r = -EFAULT;
goto done;
default:
r = -EINVAL;
goto done;
}
done:
mutex_unlock(&rd->lock);
return r;
}
/** /**
* amdgpu_debugfs_regs_pcie_read - Read from a PCIE register * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
@ -907,13 +1065,13 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
/* switch to the specific se/sh/cu */ /* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, se, sh, cu); amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
x = 0; x = 0;
if (adev->gfx.funcs->read_wave_data) if (adev->gfx.funcs->read_wave_data)
adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x); adev->gfx.funcs->read_wave_data(adev, 0, simd, wave, data, &x);
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
@ -1001,17 +1159,17 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
/* switch to the specific se/sh/cu */ /* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, se, sh, cu); amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
if (bank == 0) { if (bank == 0) {
if (adev->gfx.funcs->read_wave_vgprs) if (adev->gfx.funcs->read_wave_vgprs)
adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data); adev->gfx.funcs->read_wave_vgprs(adev, 0, simd, wave, thread, offset, size>>2, data);
} else { } else {
if (adev->gfx.funcs->read_wave_sgprs) if (adev->gfx.funcs->read_wave_sgprs)
adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data); adev->gfx.funcs->read_wave_sgprs(adev, 0, simd, wave, offset, size>>2, data);
} }
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
@ -1339,6 +1497,15 @@ static const struct file_operations amdgpu_debugfs_regs2_fops = {
.llseek = default_llseek .llseek = default_llseek
}; };
static const struct file_operations amdgpu_debugfs_gprwave_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = amdgpu_debugfs_gprwave_ioctl,
.read = amdgpu_debugfs_gprwave_read,
.open = amdgpu_debugfs_gprwave_open,
.release = amdgpu_debugfs_gprwave_release,
.llseek = default_llseek
};
static const struct file_operations amdgpu_debugfs_regs_fops = { static const struct file_operations amdgpu_debugfs_regs_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.read = amdgpu_debugfs_regs_read, .read = amdgpu_debugfs_regs_read,
@ -1416,6 +1583,7 @@ static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = {
static const struct file_operations *debugfs_regs[] = { static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_regs_fops, &amdgpu_debugfs_regs_fops,
&amdgpu_debugfs_regs2_fops, &amdgpu_debugfs_regs2_fops,
&amdgpu_debugfs_gprwave_fops,
&amdgpu_debugfs_regs_didt_fops, &amdgpu_debugfs_regs_didt_fops,
&amdgpu_debugfs_regs_pcie_fops, &amdgpu_debugfs_regs_pcie_fops,
&amdgpu_debugfs_regs_smc_fops, &amdgpu_debugfs_regs_smc_fops,
@ -1429,9 +1597,10 @@ static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_gfxoff_residency_fops, &amdgpu_debugfs_gfxoff_residency_fops,
}; };
static const char *debugfs_regs_names[] = { static const char * const debugfs_regs_names[] = {
"amdgpu_regs", "amdgpu_regs",
"amdgpu_regs2", "amdgpu_regs2",
"amdgpu_gprwave",
"amdgpu_regs_didt", "amdgpu_regs_didt",
"amdgpu_regs_pcie", "amdgpu_regs_pcie",
"amdgpu_regs_smc", "amdgpu_regs_smc",
@ -1459,7 +1628,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
ent = debugfs_create_file(debugfs_regs_names[i], ent = debugfs_create_file(debugfs_regs_names[i],
S_IFREG | S_IRUGO, root, S_IFREG | 0444, root,
adev, debugfs_regs[i]); adev, debugfs_regs[i]);
if (!i && !IS_ERR_OR_NULL(ent)) if (!i && !IS_ERR_OR_NULL(ent))
i_size_write(ent->d_inode, adev->rmmio_size); i_size_write(ent->d_inode, adev->rmmio_size);
@ -1470,7 +1639,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)m->private; struct amdgpu_device *adev = m->private;
struct drm_device *dev = adev_to_drm(adev); struct drm_device *dev = adev_to_drm(adev);
int r = 0, i; int r = 0, i;
@ -1494,12 +1663,12 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
kthread_park(ring->sched.thread); kthread_park(ring->sched.thread);
} }
seq_printf(m, "run ib test:\n"); seq_puts(m, "run ib test:\n");
r = amdgpu_ib_ring_tests(adev); r = amdgpu_ib_ring_tests(adev);
if (r) if (r)
seq_printf(m, "ib ring tests failed (%d).\n", r); seq_printf(m, "ib ring tests failed (%d).\n", r);
else else
seq_printf(m, "ib ring tests passed.\n"); seq_puts(m, "ib ring tests passed.\n");
/* go on the scheduler */ /* go on the scheduler */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) { for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
@ -1581,7 +1750,7 @@ static int amdgpu_debugfs_benchmark(void *data, u64 val)
static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused) static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)m->private; struct amdgpu_device *adev = m->private;
struct drm_device *dev = adev_to_drm(adev); struct drm_device *dev = adev_to_drm(adev);
struct drm_file *file; struct drm_file *file;
int r; int r;
@ -1978,7 +2147,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
amdgpu_debugfs_ring_init(adev, ring); amdgpu_debugfs_ring_init(adev, ring);
} }
for ( i = 0; i < adev->vcn.num_vcn_inst; i++) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
if (!amdgpu_vcnfw_log) if (!amdgpu_vcnfw_log)
break; break;

View file

@ -707,6 +707,48 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
return r; return r;
} }
u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
u64 reg_addr)
{
unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
u32 r;
void __iomem *pcie_index_offset;
void __iomem *pcie_index_hi_offset;
void __iomem *pcie_data_offset;
pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
if (adev->nbio.funcs->get_pcie_index_hi_offset)
pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
else
pcie_index_hi = 0;
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
if (pcie_index_hi != 0)
pcie_index_hi_offset = (void __iomem *)adev->rmmio +
pcie_index_hi * 4;
writel(reg_addr, pcie_index_offset);
readl(pcie_index_offset);
if (pcie_index_hi != 0) {
writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
readl(pcie_index_hi_offset);
}
r = readl(pcie_data_offset);
/* clear the high bits */
if (pcie_index_hi != 0) {
writel(0, pcie_index_hi_offset);
readl(pcie_index_hi_offset);
}
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
return r;
}
/** /**
* amdgpu_device_indirect_rreg64 - read a 64bits indirect register * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
* *
@ -747,8 +789,6 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
* amdgpu_device_indirect_wreg - write an indirect register address * amdgpu_device_indirect_wreg - write an indirect register address
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* @pcie_index: mmio register offset
* @pcie_data: mmio register offset
* @reg_addr: indirect register offset * @reg_addr: indirect register offset
* @reg_data: indirect register data * @reg_data: indirect register data
* *
@ -774,12 +814,50 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
} }
void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
u64 reg_addr, u32 reg_data)
{
unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
void __iomem *pcie_index_offset;
void __iomem *pcie_index_hi_offset;
void __iomem *pcie_data_offset;
pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
if (adev->nbio.funcs->get_pcie_index_hi_offset)
pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
else
pcie_index_hi = 0;
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
if (pcie_index_hi != 0)
pcie_index_hi_offset = (void __iomem *)adev->rmmio +
pcie_index_hi * 4;
writel(reg_addr, pcie_index_offset);
readl(pcie_index_offset);
if (pcie_index_hi != 0) {
writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
readl(pcie_index_hi_offset);
}
writel(reg_data, pcie_data_offset);
readl(pcie_data_offset);
/* clear the high bits */
if (pcie_index_hi != 0) {
writel(0, pcie_index_hi_offset);
readl(pcie_index_hi_offset);
}
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
}
/** /**
* amdgpu_device_indirect_wreg64 - write a 64bits indirect register address * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* @pcie_index: mmio register offset
* @pcie_data: mmio register offset
* @reg_addr: indirect register offset * @reg_addr: indirect register offset
* @reg_data: indirect register data * @reg_data: indirect register data
* *
@ -840,6 +918,13 @@ static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
return 0; return 0;
} }
static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
{
DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
BUG();
return 0;
}
/** /**
* amdgpu_invalid_wreg - dummy reg write function * amdgpu_invalid_wreg - dummy reg write function
* *
@ -857,6 +942,13 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32
BUG(); BUG();
} }
static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
{
DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
reg, v);
BUG();
}
/** /**
* amdgpu_invalid_rreg64 - dummy 64 bit reg read function * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
* *
@ -942,7 +1034,8 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
{ {
amdgpu_asic_pre_asic_init(adev); amdgpu_asic_pre_asic_init(adev);
if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
return amdgpu_atomfirmware_asic_init(adev, true); return amdgpu_atomfirmware_asic_init(adev, true);
else else
return amdgpu_atom_asic_init(adev->mode_info.atom_context); return amdgpu_atom_asic_init(adev->mode_info.atom_context);
@ -998,7 +1091,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
if (array_size % 3) if (array_size % 3)
return; return;
for (i = 0; i < array_size; i +=3) { for (i = 0; i < array_size; i += 3) {
reg = registers[i + 0]; reg = registers[i + 0];
and_mask = registers[i + 1]; and_mask = registers[i + 1];
or_mask = registers[i + 2]; or_mask = registers[i + 2];
@ -1090,7 +1183,8 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
* doorbells are in the first page. So with paging queue enabled, * doorbells are in the first page. So with paging queue enabled,
* the max num_kernel_doorbells should + 1 page (0x400 in dword) * the max num_kernel_doorbells should + 1 page (0x400 in dword)
*/ */
if (adev->asic_type >= CHIP_VEGA10) if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 0, 0) &&
adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 0))
adev->doorbell.num_kernel_doorbells += 0x400; adev->doorbell.num_kernel_doorbells += 0x400;
} }
@ -1291,6 +1385,15 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
return 0; return 0;
} }
static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
{
if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) {
return false;
}
return true;
}
/* /*
* GPU helpers function. * GPU helpers function.
*/ */
@ -1310,6 +1413,9 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev)) if (amdgpu_sriov_vf(adev))
return false; return false;
if (!amdgpu_device_read_bios(adev))
return false;
if (amdgpu_passthrough(adev)) { if (amdgpu_passthrough(adev)) {
/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
* some old smc fw still need driver do vPost otherwise gpu hang, while * some old smc fw still need driver do vPost otherwise gpu hang, while
@ -1547,7 +1653,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
amdgpu_sched_jobs); amdgpu_sched_jobs);
amdgpu_sched_jobs = 4; amdgpu_sched_jobs = 4;
} else if (!is_power_of_2(amdgpu_sched_jobs)){ } else if (!is_power_of_2(amdgpu_sched_jobs)) {
dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
amdgpu_sched_jobs); amdgpu_sched_jobs);
amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
@ -2194,7 +2300,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
total = true; total = true;
for (i = 0; i < adev->num_ip_blocks; i++) { for (i = 0; i < adev->num_ip_blocks; i++) {
if ((amdgpu_ip_block_mask & (1 << i)) == 0) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
DRM_ERROR("disabled ip block: %d <%s>\n", DRM_WARN("disabled ip block: %d <%s>\n",
i, adev->ip_blocks[i].version->funcs->name); i, adev->ip_blocks[i].version->funcs->name);
adev->ip_blocks[i].status.valid = false; adev->ip_blocks[i].status.valid = false;
} else { } else {
@ -2220,6 +2326,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
return r; return r;
/* Read BIOS */ /* Read BIOS */
if (amdgpu_device_read_bios(adev)) {
if (!amdgpu_get_bios(adev)) if (!amdgpu_get_bios(adev))
return -EINVAL; return -EINVAL;
@ -2229,6 +2336,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
return r; return r;
} }
}
/*get pf2vf msg info at it's earliest time*/ /*get pf2vf msg info at it's earliest time*/
if (amdgpu_sriov_vf(adev)) if (amdgpu_sriov_vf(adev))
@ -2376,6 +2484,8 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
} }
} }
amdgpu_xcp_update_partition_sched_list(adev);
return 0; return 0;
} }
@ -2533,8 +2643,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
goto init_failed; goto init_failed;
/* Don't init kfd if whole hive need to be reset during init */ /* Don't init kfd if whole hive need to be reset during init */
if (!adev->gmc.xgmi.pending_reset) if (!adev->gmc.xgmi.pending_reset) {
kgd2kfd_init_zone_device(adev);
amdgpu_amdkfd_device_init(adev); amdgpu_amdkfd_device_init(adev);
}
amdgpu_fru_get_product_info(adev); amdgpu_fru_get_product_info(adev);
@ -2759,8 +2871,9 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */ /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)|| if (amdgpu_passthrough(adev) &&
adev->asic_type == CHIP_ALDEBARAN )) ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
adev->asic_type == CHIP_ALDEBARAN))
amdgpu_dpm_handle_passthrough_sbr(adev, true); amdgpu_dpm_handle_passthrough_sbr(adev, true);
if (adev->gmc.xgmi.num_physical_nodes > 1) { if (adev->gmc.xgmi.num_physical_nodes > 1) {
@ -3089,7 +3202,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
} }
adev->ip_blocks[i].status.hw = false; adev->ip_blocks[i].status.hw = false;
/* handle putting the SMC in the appropriate state */ /* handle putting the SMC in the appropriate state */
if(!amdgpu_sriov_vf(adev)){ if (!amdgpu_sriov_vf(adev)) {
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
if (r) { if (r) {
@ -3608,6 +3721,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->smc_wreg = &amdgpu_invalid_wreg; adev->smc_wreg = &amdgpu_invalid_wreg;
adev->pcie_rreg = &amdgpu_invalid_rreg; adev->pcie_rreg = &amdgpu_invalid_rreg;
adev->pcie_wreg = &amdgpu_invalid_wreg; adev->pcie_wreg = &amdgpu_invalid_wreg;
adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
adev->pciep_rreg = &amdgpu_invalid_rreg; adev->pciep_rreg = &amdgpu_invalid_rreg;
adev->pciep_wreg = &amdgpu_invalid_wreg; adev->pciep_wreg = &amdgpu_invalid_wreg;
adev->pcie_rreg64 = &amdgpu_invalid_rreg64; adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
@ -3633,6 +3748,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->srbm_mutex); mutex_init(&adev->srbm_mutex);
mutex_init(&adev->gfx.pipe_reserve_mutex); mutex_init(&adev->gfx.pipe_reserve_mutex);
mutex_init(&adev->gfx.gfx_off_mutex); mutex_init(&adev->gfx.gfx_off_mutex);
mutex_init(&adev->gfx.partition_mutex);
mutex_init(&adev->grbm_idx_mutex); mutex_init(&adev->grbm_idx_mutex);
mutex_init(&adev->mn_lock); mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock); mutex_init(&adev->virt.vf_errors.lock);
@ -3708,8 +3824,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
amdgpu_device_get_pcie_info(adev);
if (amdgpu_mcbp) if (amdgpu_mcbp)
DRM_INFO("MCBP is enabled\n"); DRM_INFO("MCBP is enabled\n");
@ -3725,6 +3839,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* detect hw virtualization here */ /* detect hw virtualization here */
amdgpu_detect_virtualization(adev); amdgpu_detect_virtualization(adev);
amdgpu_device_get_pcie_info(adev);
r = amdgpu_device_get_job_timeout_settings(adev); r = amdgpu_device_get_job_timeout_settings(adev);
if (r) { if (r) {
dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
@ -3753,21 +3869,24 @@ int amdgpu_device_init(struct amdgpu_device *adev,
} }
/* enable PCIE atomic ops */ /* enable PCIE atomic ops */
if (amdgpu_sriov_vf(adev)) if (amdgpu_sriov_vf(adev)) {
if (adev->virt.fw_reserve.p_pf2vf)
adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *) adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags == adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
* internal path natively support atomics, set have_atomics_support to true. * internal path natively support atomics, set have_atomics_support to true.
*/ */
else if ((adev->flags & AMD_IS_APU) && } else if ((adev->flags & AMD_IS_APU) &&
(adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
adev->have_atomics_support = true; adev->have_atomics_support = true;
else } else {
adev->have_atomics_support = adev->have_atomics_support =
!pci_enable_atomic_ops_to_root(adev->pdev, !pci_enable_atomic_ops_to_root(adev->pdev,
PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
PCI_EXP_DEVCAP2_ATOMIC_COMP64); PCI_EXP_DEVCAP2_ATOMIC_COMP64);
}
if (!adev->have_atomics_support) if (!adev->have_atomics_support)
dev_info(adev->dev, "PCIE atomic ops is not supported\n"); dev_info(adev->dev, "PCIE atomic ops is not supported\n");
@ -3783,6 +3902,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
amdgpu_reset_init(adev); amdgpu_reset_init(adev);
/* detect if we are with an SRIOV vbios */ /* detect if we are with an SRIOV vbios */
if (adev->bios)
amdgpu_device_detect_sriov_bios(adev); amdgpu_device_detect_sriov_bios(adev);
/* check if we need to reset the asic /* check if we need to reset the asic
@ -3835,6 +3955,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
} }
} }
if (adev->bios) {
if (adev->is_atom_fw) { if (adev->is_atom_fw) {
/* Initialize clocks */ /* Initialize clocks */
r = amdgpu_atomfirmware_get_clock_info(adev); r = amdgpu_atomfirmware_get_clock_info(adev);
@ -3855,6 +3976,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (!amdgpu_device_has_dc_support(adev)) if (!amdgpu_device_has_dc_support(adev))
amdgpu_atombios_i2c_init(adev); amdgpu_atombios_i2c_init(adev);
} }
}
fence_driver_init: fence_driver_init:
/* Fence driver */ /* Fence driver */
@ -4019,7 +4141,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
adev->mman.aper_base_kaddr = NULL; adev->mman.aper_base_kaddr = NULL;
/* Memory manager related */ /* Memory manager related */
if (!adev->gmc.xgmi.connected_to_cpu) { if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
arch_phys_wc_del(adev->gmc.vram_mtrr); arch_phys_wc_del(adev->gmc.vram_mtrr);
arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
} }
@ -4049,7 +4171,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
/* disable all interrupts */ /* disable all interrupts */
amdgpu_irq_disable_all(adev); amdgpu_irq_disable_all(adev);
if (adev->mode_info.mode_config_initialized){ if (adev->mode_info.mode_config_initialized) {
if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev))) if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
drm_helper_force_disable_all(adev_to_drm(adev)); drm_helper_force_disable_all(adev_to_drm(adev));
else else
@ -5478,7 +5600,7 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
/* covers APUs as well */ /* covers APUs as well */
if (pci_is_root_bus(adev->pdev->bus)) { if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
if (adev->pm.pcie_gen_mask == 0) if (adev->pm.pcie_gen_mask == 0)
adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
if (adev->pm.pcie_mlw_mask == 0) if (adev->pm.pcie_mlw_mask == 0)
@ -5959,6 +6081,7 @@ void amdgpu_device_halt(struct amdgpu_device *adev)
struct pci_dev *pdev = adev->pdev; struct pci_dev *pdev = adev->pdev;
struct drm_device *ddev = adev_to_drm(adev); struct drm_device *ddev = adev_to_drm(adev);
amdgpu_xcp_dev_unplug(adev);
drm_dev_unplug(ddev); drm_dev_unplug(ddev);
amdgpu_irq_disable_all(adev); amdgpu_irq_disable_all(adev);
@ -6079,3 +6202,31 @@ bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
return true; return true;
} }
} }
uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
uint32_t inst, uint32_t reg_addr, char reg_name[],
uint32_t expected_value, uint32_t mask)
{
uint32_t ret = 0;
uint32_t old_ = 0;
uint32_t tmp_ = RREG32(reg_addr);
uint32_t loop = adev->usec_timeout;
while ((tmp_ & (mask)) != (expected_value)) {
if (old_ != tmp_) {
loop = adev->usec_timeout;
old_ = tmp_;
} else
udelay(1);
tmp_ = RREG32(reg_addr);
loop--;
if (!loop) {
DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
inst, reg_name, (uint32_t)expected_value,
(uint32_t)(tmp_ & (mask)));
ret = -ETIMEDOUT;
break;
}
}
return ret;
}

View file

@ -30,6 +30,7 @@
#include "soc15.h" #include "soc15.h"
#include "gfx_v9_0.h" #include "gfx_v9_0.h"
#include "gfx_v9_4_3.h"
#include "gmc_v9_0.h" #include "gmc_v9_0.h"
#include "df_v1_7.h" #include "df_v1_7.h"
#include "df_v3_6.h" #include "df_v3_6.h"
@ -76,12 +77,15 @@
#include "jpeg_v3_0.h" #include "jpeg_v3_0.h"
#include "vcn_v4_0.h" #include "vcn_v4_0.h"
#include "jpeg_v4_0.h" #include "jpeg_v4_0.h"
#include "vcn_v4_0_3.h"
#include "jpeg_v4_0_3.h"
#include "amdgpu_vkms.h" #include "amdgpu_vkms.h"
#include "mes_v10_1.h" #include "mes_v10_1.h"
#include "mes_v11_0.h" #include "mes_v11_0.h"
#include "smuio_v11_0.h" #include "smuio_v11_0.h"
#include "smuio_v11_0_6.h" #include "smuio_v11_0_6.h"
#include "smuio_v13_0.h" #include "smuio_v13_0.h"
#include "smuio_v13_0_3.h"
#include "smuio_v13_0_6.h" #include "smuio_v13_0_6.h"
#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin" #define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin"
@ -200,14 +204,44 @@ static int hw_id_map[MAX_HWIP] = {
[PCIE_HWIP] = PCIE_HWID, [PCIE_HWIP] = PCIE_HWID,
}; };
static int amdgpu_discovery_read_binary_from_vram(struct amdgpu_device *adev, uint8_t *binary) static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary)
{
u64 tmr_offset, tmr_size, pos;
void *discv_regn;
int ret;
ret = amdgpu_acpi_get_tmr_info(adev, &tmr_offset, &tmr_size);
if (ret)
return ret;
pos = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET;
/* This region is read-only and reserved from system use */
discv_regn = memremap(pos, adev->mman.discovery_tmr_size, MEMREMAP_WC);
if (discv_regn) {
memcpy(binary, discv_regn, adev->mman.discovery_tmr_size);
memunmap(discv_regn);
return 0;
}
return -ENOENT;
}
static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
uint8_t *binary)
{ {
uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; int ret = 0;
if (vram_size) {
uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
adev->mman.discovery_tmr_size, false); adev->mman.discovery_tmr_size, false);
return 0; } else {
ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
}
return ret;
} }
static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary) static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary)
@ -280,6 +314,7 @@ static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
case 0xCF: case 0xCF:
case 0xDF: case 0xDF:
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1; adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
adev->vcn.inst_mask &= ~AMDGPU_VCN_HARVEST_VCN1;
break; break;
default: default:
break; break;
@ -301,33 +336,30 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
if (!adev->mman.discovery_bin) if (!adev->mman.discovery_bin)
return -ENOMEM; return -ENOMEM;
r = amdgpu_discovery_read_binary_from_vram(adev, adev->mman.discovery_bin); /* Read from file if it is the preferred option */
if (r) { if (amdgpu_discovery == 2) {
dev_err(adev->dev, "failed to read ip discovery binary from vram\n"); dev_info(adev->dev, "use ip discovery information from file");
r = -EINVAL;
goto out;
}
if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin) || amdgpu_discovery == 2) {
/* ignore the discovery binary from vram if discovery=2 in kernel module parameter */
if (amdgpu_discovery == 2)
dev_info(adev->dev,"force read ip discovery binary from file");
else
dev_warn(adev->dev, "get invalid ip discovery binary signature from vram\n");
/* retry read ip discovery binary from file */
r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin); r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin);
if (r) { if (r) {
dev_err(adev->dev, "failed to read ip discovery binary from file\n"); dev_err(adev->dev, "failed to read ip discovery binary from file\n");
r = -EINVAL; r = -EINVAL;
goto out; goto out;
} }
/* check the ip discovery binary signature */
if(!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) { } else {
dev_warn(adev->dev, "get invalid ip discovery binary signature from file\n"); r = amdgpu_discovery_read_binary_from_mem(
r = -EINVAL; adev, adev->mman.discovery_bin);
if (r)
goto out; goto out;
} }
/* check the ip discovery binary signature */
if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) {
dev_err(adev->dev,
"get invalid ip discovery binary signature\n");
r = -EINVAL;
goto out;
} }
bhdr = (struct binary_header *)adev->mman.discovery_bin; bhdr = (struct binary_header *)adev->mman.discovery_bin;
@ -471,11 +503,11 @@ void amdgpu_discovery_fini(struct amdgpu_device *adev)
adev->mman.discovery_bin = NULL; adev->mman.discovery_bin = NULL;
} }
static int amdgpu_discovery_validate_ip(const struct ip *ip) static int amdgpu_discovery_validate_ip(const struct ip_v4 *ip)
{ {
if (ip->number_instance >= HWIP_MAX_INSTANCE) { if (ip->instance_number >= HWIP_MAX_INSTANCE) {
DRM_ERROR("Unexpected number_instance (%d) from ip discovery blob\n", DRM_ERROR("Unexpected instance_number (%d) from ip discovery blob\n",
ip->number_instance); ip->instance_number);
return -EINVAL; return -EINVAL;
} }
if (le16_to_cpu(ip->hw_id) >= HW_ID_MAX) { if (le16_to_cpu(ip->hw_id) >= HW_ID_MAX) {
@ -493,7 +525,7 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
struct binary_header *bhdr; struct binary_header *bhdr;
struct ip_discovery_header *ihdr; struct ip_discovery_header *ihdr;
struct die_header *dhdr; struct die_header *dhdr;
struct ip *ip; struct ip_v4 *ip;
uint16_t die_offset, ip_offset, num_dies, num_ips; uint16_t die_offset, ip_offset, num_dies, num_ips;
int i, j; int i, j;
@ -510,19 +542,28 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
ip_offset = die_offset + sizeof(*dhdr); ip_offset = die_offset + sizeof(*dhdr);
for (j = 0; j < num_ips; j++) { for (j = 0; j < num_ips; j++) {
ip = (struct ip *)(adev->mman.discovery_bin + ip_offset); ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
if (amdgpu_discovery_validate_ip(ip)) if (amdgpu_discovery_validate_ip(ip))
goto next_ip; goto next_ip;
if (le16_to_cpu(ip->harvest) == 1) { if (le16_to_cpu(ip->variant) == 1) {
switch (le16_to_cpu(ip->hw_id)) { switch (le16_to_cpu(ip->hw_id)) {
case VCN_HWID: case VCN_HWID:
(*vcn_harvest_count)++; (*vcn_harvest_count)++;
if (ip->number_instance == 0) if (ip->instance_number == 0) {
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0; adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0;
else adev->vcn.inst_mask &=
~AMDGPU_VCN_HARVEST_VCN0;
adev->jpeg.inst_mask &=
~AMDGPU_VCN_HARVEST_VCN0;
} else {
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1; adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
adev->vcn.inst_mask &=
~AMDGPU_VCN_HARVEST_VCN1;
adev->jpeg.inst_mask &=
~AMDGPU_VCN_HARVEST_VCN1;
}
break; break;
case DMU_HWID: case DMU_HWID:
adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
@ -532,6 +573,9 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
} }
} }
next_ip: next_ip:
if (ihdr->base_addr_64_bit)
ip_offset += struct_size(ip, base_address_64, ip->num_base_address);
else
ip_offset += struct_size(ip, base_address, ip->num_base_address); ip_offset += struct_size(ip, base_address, ip->num_base_address);
} }
} }
@ -564,10 +608,15 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
switch (le16_to_cpu(harvest_info->list[i].hw_id)) { switch (le16_to_cpu(harvest_info->list[i].hw_id)) {
case VCN_HWID: case VCN_HWID:
(*vcn_harvest_count)++; (*vcn_harvest_count)++;
if (harvest_info->list[i].number_instance == 0) adev->vcn.harvest_config |=
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0; (1 << harvest_info->list[i].number_instance);
else adev->jpeg.harvest_config |=
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1; (1 << harvest_info->list[i].number_instance);
adev->vcn.inst_mask &=
~(1U << harvest_info->list[i].number_instance);
adev->jpeg.inst_mask &=
~(1U << harvest_info->list[i].number_instance);
break; break;
case DMU_HWID: case DMU_HWID:
adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
@ -577,6 +626,14 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
1 << (le16_to_cpu(harvest_info->list[i].number_instance)); 1 << (le16_to_cpu(harvest_info->list[i].number_instance));
(*umc_harvest_count)++; (*umc_harvest_count)++;
break; break;
case GC_HWID:
adev->gfx.xcc_mask &=
~(1U << harvest_info->list[i].number_instance);
break;
case SDMA0_HWID:
adev->sdma.sdma_mask &=
~(1U << harvest_info->list[i].number_instance);
break;
default: default:
break; break;
} }
@ -836,9 +893,40 @@ static void ip_disc_release(struct kobject *kobj)
kfree(ip_top); kfree(ip_top);
} }
static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev,
uint16_t hw_id, uint8_t inst)
{
uint8_t harvest = 0;
/* Until a uniform way is figured, get mask based on hwid */
switch (hw_id) {
case VCN_HWID:
harvest = ((1 << inst) & adev->vcn.inst_mask) == 0;
break;
case DMU_HWID:
if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)
harvest = 0x1;
break;
case UMC_HWID:
/* TODO: It needs another parsing; for now, ignore.*/
break;
case GC_HWID:
harvest = ((1 << inst) & adev->gfx.xcc_mask) == 0;
break;
case SDMA0_HWID:
harvest = ((1 << inst) & adev->sdma.sdma_mask) == 0;
break;
default:
break;
}
return harvest;
}
static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev, static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
struct ip_die_entry *ip_die_entry, struct ip_die_entry *ip_die_entry,
const size_t _ip_offset, const int num_ips) const size_t _ip_offset, const int num_ips,
bool reg_base_64)
{ {
int ii, jj, kk, res; int ii, jj, kk, res;
@ -852,10 +940,10 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
size_t ip_offset = _ip_offset; size_t ip_offset = _ip_offset;
for (jj = 0; jj < num_ips; jj++) { for (jj = 0; jj < num_ips; jj++) {
struct ip *ip; struct ip_v4 *ip;
struct ip_hw_instance *ip_hw_instance; struct ip_hw_instance *ip_hw_instance;
ip = (struct ip *)(adev->mman.discovery_bin + ip_offset); ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
if (amdgpu_discovery_validate_ip(ip) || if (amdgpu_discovery_validate_ip(ip) ||
le16_to_cpu(ip->hw_id) != ii) le16_to_cpu(ip->hw_id) != ii)
goto next_ip; goto next_ip;
@ -903,22 +991,35 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
return -ENOMEM; return -ENOMEM;
} }
ip_hw_instance->hw_id = le16_to_cpu(ip->hw_id); /* == ii */ ip_hw_instance->hw_id = le16_to_cpu(ip->hw_id); /* == ii */
ip_hw_instance->num_instance = ip->number_instance; ip_hw_instance->num_instance = ip->instance_number;
ip_hw_instance->major = ip->major; ip_hw_instance->major = ip->major;
ip_hw_instance->minor = ip->minor; ip_hw_instance->minor = ip->minor;
ip_hw_instance->revision = ip->revision; ip_hw_instance->revision = ip->revision;
ip_hw_instance->harvest = ip->harvest; ip_hw_instance->harvest =
amdgpu_discovery_get_harvest_info(
adev, ip_hw_instance->hw_id,
ip_hw_instance->num_instance);
ip_hw_instance->num_base_addresses = ip->num_base_address; ip_hw_instance->num_base_addresses = ip->num_base_address;
for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++) for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++) {
if (reg_base_64)
ip_hw_instance->base_addr[kk] =
lower_32_bits(le64_to_cpu(ip->base_address_64[kk])) & 0x3FFFFFFF;
else
ip_hw_instance->base_addr[kk] = ip->base_address[kk]; ip_hw_instance->base_addr[kk] = ip->base_address[kk];
}
kobject_init(&ip_hw_instance->kobj, &ip_hw_instance_ktype); kobject_init(&ip_hw_instance->kobj, &ip_hw_instance_ktype);
ip_hw_instance->kobj.kset = &ip_hw_id->hw_id_kset; ip_hw_instance->kobj.kset = &ip_hw_id->hw_id_kset;
res = kobject_add(&ip_hw_instance->kobj, NULL, res = kobject_add(&ip_hw_instance->kobj, NULL,
"%d", ip_hw_instance->num_instance); "%d", ip_hw_instance->num_instance);
next_ip: next_ip:
ip_offset += struct_size(ip, base_address, ip->num_base_address); if (reg_base_64)
ip_offset += struct_size(ip, base_address_64,
ip->num_base_address);
else
ip_offset += struct_size(ip, base_address,
ip->num_base_address);
} }
} }
@ -972,7 +1073,7 @@ static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
return res; return res;
} }
amdgpu_discovery_sysfs_ips(adev, ip_die_entry, ip_offset, num_ips); amdgpu_discovery_sysfs_ips(adev, ip_die_entry, ip_offset, num_ips, !!ihdr->base_addr_64_bit);
} }
return 0; return 0;
@ -983,6 +1084,9 @@ static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev)
struct kset *die_kset; struct kset *die_kset;
int res, ii; int res, ii;
if (!adev->mman.discovery_bin)
return -EINVAL;
adev->ip_top = kzalloc(sizeof(*adev->ip_top), GFP_KERNEL); adev->ip_top = kzalloc(sizeof(*adev->ip_top), GFP_KERNEL);
if (!adev->ip_top) if (!adev->ip_top)
return -ENOMEM; return -ENOMEM;
@ -1082,7 +1186,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
struct binary_header *bhdr; struct binary_header *bhdr;
struct ip_discovery_header *ihdr; struct ip_discovery_header *ihdr;
struct die_header *dhdr; struct die_header *dhdr;
struct ip *ip; struct ip_v4 *ip;
uint16_t die_offset; uint16_t die_offset;
uint16_t ip_offset; uint16_t ip_offset;
uint16_t num_dies; uint16_t num_dies;
@ -1098,6 +1202,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
return r; return r;
} }
adev->gfx.xcc_mask = 0;
adev->sdma.sdma_mask = 0;
adev->vcn.inst_mask = 0;
adev->jpeg.inst_mask = 0;
bhdr = (struct binary_header *)adev->mman.discovery_bin; bhdr = (struct binary_header *)adev->mman.discovery_bin;
ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
@ -1121,7 +1229,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
le16_to_cpu(dhdr->die_id), num_ips); le16_to_cpu(dhdr->die_id), num_ips);
for (j = 0; j < num_ips; j++) { for (j = 0; j < num_ips; j++) {
ip = (struct ip *)(adev->mman.discovery_bin + ip_offset); ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
if (amdgpu_discovery_validate_ip(ip)) if (amdgpu_discovery_validate_ip(ip))
goto next_ip; goto next_ip;
@ -1131,7 +1239,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n", DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n",
hw_id_names[le16_to_cpu(ip->hw_id)], hw_id_names[le16_to_cpu(ip->hw_id)],
le16_to_cpu(ip->hw_id), le16_to_cpu(ip->hw_id),
ip->number_instance, ip->instance_number,
ip->major, ip->minor, ip->major, ip->minor,
ip->revision); ip->revision);
@ -1145,44 +1253,72 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
adev->vcn.vcn_config[adev->vcn.num_vcn_inst] = adev->vcn.vcn_config[adev->vcn.num_vcn_inst] =
ip->revision & 0xc0; ip->revision & 0xc0;
ip->revision &= ~0xc0; ip->revision &= ~0xc0;
if (adev->vcn.num_vcn_inst < AMDGPU_MAX_VCN_INSTANCES) if (adev->vcn.num_vcn_inst <
AMDGPU_MAX_VCN_INSTANCES) {
adev->vcn.num_vcn_inst++; adev->vcn.num_vcn_inst++;
else adev->vcn.inst_mask |=
(1U << ip->instance_number);
adev->jpeg.inst_mask |=
(1U << ip->instance_number);
} else {
dev_err(adev->dev, "Too many VCN instances: %d vs %d\n", dev_err(adev->dev, "Too many VCN instances: %d vs %d\n",
adev->vcn.num_vcn_inst + 1, adev->vcn.num_vcn_inst + 1,
AMDGPU_MAX_VCN_INSTANCES); AMDGPU_MAX_VCN_INSTANCES);
} }
}
if (le16_to_cpu(ip->hw_id) == SDMA0_HWID || if (le16_to_cpu(ip->hw_id) == SDMA0_HWID ||
le16_to_cpu(ip->hw_id) == SDMA1_HWID || le16_to_cpu(ip->hw_id) == SDMA1_HWID ||
le16_to_cpu(ip->hw_id) == SDMA2_HWID || le16_to_cpu(ip->hw_id) == SDMA2_HWID ||
le16_to_cpu(ip->hw_id) == SDMA3_HWID) { le16_to_cpu(ip->hw_id) == SDMA3_HWID) {
if (adev->sdma.num_instances < AMDGPU_MAX_SDMA_INSTANCES) if (adev->sdma.num_instances <
AMDGPU_MAX_SDMA_INSTANCES) {
adev->sdma.num_instances++; adev->sdma.num_instances++;
else adev->sdma.sdma_mask |=
(1U << ip->instance_number);
} else {
dev_err(adev->dev, "Too many SDMA instances: %d vs %d\n", dev_err(adev->dev, "Too many SDMA instances: %d vs %d\n",
adev->sdma.num_instances + 1, adev->sdma.num_instances + 1,
AMDGPU_MAX_SDMA_INSTANCES); AMDGPU_MAX_SDMA_INSTANCES);
} }
}
if (le16_to_cpu(ip->hw_id) == UMC_HWID) { if (le16_to_cpu(ip->hw_id) == UMC_HWID) {
adev->gmc.num_umc++; adev->gmc.num_umc++;
adev->umc.node_inst_num++; adev->umc.node_inst_num++;
} }
if (le16_to_cpu(ip->hw_id) == GC_HWID)
adev->gfx.xcc_mask |=
(1U << ip->instance_number);
for (k = 0; k < num_base_address; k++) { for (k = 0; k < num_base_address; k++) {
/* /*
* convert the endianness of base addresses in place, * convert the endianness of base addresses in place,
* so that we don't need to convert them when accessing adev->reg_offset. * so that we don't need to convert them when accessing adev->reg_offset.
*/ */
if (ihdr->base_addr_64_bit)
/* Truncate the 64bit base address from ip discovery
* and only store lower 32bit ip base in reg_offset[].
* Bits > 32 follows ASIC specific format, thus just
* discard them and handle it within specific ASIC.
* By this way reg_offset[] and related helpers can
* stay unchanged.
* The base address is in dwords, thus clear the
* highest 2 bits to store.
*/
ip->base_address[k] =
lower_32_bits(le64_to_cpu(ip->base_address_64[k])) & 0x3FFFFFFF;
else
ip->base_address[k] = le32_to_cpu(ip->base_address[k]); ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
DRM_DEBUG("\t0x%08x\n", ip->base_address[k]); DRM_DEBUG("\t0x%08x\n", ip->base_address[k]);
} }
for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) { for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) {
if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) { if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id) &&
hw_id_map[hw_ip] != 0) {
DRM_DEBUG("set register base offset for %s\n", DRM_DEBUG("set register base offset for %s\n",
hw_id_names[le16_to_cpu(ip->hw_id)]); hw_id_names[le16_to_cpu(ip->hw_id)]);
adev->reg_offset[hw_ip][ip->number_instance] = adev->reg_offset[hw_ip][ip->instance_number] =
ip->base_address; ip->base_address;
/* Instance support is somewhat inconsistent. /* Instance support is somewhat inconsistent.
* SDMA is a good example. Sienna cichlid has 4 total * SDMA is a good example. Sienna cichlid has 4 total
@ -1193,69 +1329,22 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
* example. On most chips there are multiple instances * example. On most chips there are multiple instances
* with the same HWID. * with the same HWID.
*/ */
adev->ip_versions[hw_ip][ip->number_instance] = adev->ip_versions[hw_ip][ip->instance_number] =
IP_VERSION(ip->major, ip->minor, ip->revision); IP_VERSION(ip->major, ip->minor, ip->revision);
} }
} }
next_ip: next_ip:
if (ihdr->base_addr_64_bit)
ip_offset += struct_size(ip, base_address_64, ip->num_base_address);
else
ip_offset += struct_size(ip, base_address, ip->num_base_address); ip_offset += struct_size(ip, base_address, ip->num_base_address);
} }
} }
amdgpu_discovery_sysfs_init(adev);
return 0; return 0;
} }
int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance,
int *major, int *minor, int *revision)
{
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
struct ip *ip;
uint16_t die_offset;
uint16_t ip_offset;
uint16_t num_dies;
uint16_t num_ips;
int i, j;
if (!adev->mman.discovery_bin) {
DRM_ERROR("ip discovery uninitialized\n");
return -EINVAL;
}
bhdr = (struct binary_header *)adev->mman.discovery_bin;
ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
num_dies = le16_to_cpu(ihdr->num_dies);
for (i = 0; i < num_dies; i++) {
die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
num_ips = le16_to_cpu(dhdr->num_ips);
ip_offset = die_offset + sizeof(*dhdr);
for (j = 0; j < num_ips; j++) {
ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
if ((le16_to_cpu(ip->hw_id) == hw_id) && (ip->number_instance == number_instance)) {
if (major)
*major = ip->major;
if (minor)
*minor = ip->minor;
if (revision)
*revision = ip->revision;
return 0;
}
ip_offset += struct_size(ip, base_address, ip->num_base_address);
}
}
return -EINVAL;
}
static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
{ {
int vcn_harvest_count = 0; int vcn_harvest_count = 0;
@ -1266,7 +1355,8 @@ static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
* so read harvest bit per IP data structure to set * so read harvest bit per IP data structure to set
* harvest configuration. * harvest configuration.
*/ */
if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 2, 0)) { if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 2, 0) &&
adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) {
if ((adev->pdev->device == 0x731E && if ((adev->pdev->device == 0x731E &&
(adev->pdev->revision == 0xC6 || (adev->pdev->revision == 0xC6 ||
adev->pdev->revision == 0xC7)) || adev->pdev->revision == 0xC7)) ||
@ -1706,6 +1796,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 3): case IP_VERSION(13, 0, 3):
case IP_VERSION(13, 0, 4): case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 5):
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10): case IP_VERSION(13, 0, 10):
@ -1804,6 +1895,9 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 2):
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
break; break;
case IP_VERSION(9, 4, 3):
amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
break;
case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 2): case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 1):
@ -1939,7 +2033,6 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 1, 1): case IP_VERSION(3, 1, 1):
case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 0, 2): case IP_VERSION(3, 0, 2):
case IP_VERSION(3, 0, 192):
amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
if (!amdgpu_sriov_vf(adev)) if (!amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
@ -1952,7 +2045,11 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(4, 0, 4): case IP_VERSION(4, 0, 4):
amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block);
return 0; break;
case IP_VERSION(4, 0, 3):
amdgpu_device_ip_block_add(adev, &vcn_v4_0_3_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v4_0_3_ip_block);
break;
default: default:
dev_err(adev->dev, dev_err(adev->dev,
"Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n", "Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n",
@ -2000,6 +2097,17 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
return 0; return 0;
} }
static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev)
{
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 4, 3):
aqua_vanjaram_init_soc_config(adev);
break;
default:
break;
}
}
int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
{ {
int r; int r;
@ -2177,6 +2285,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
break; break;
} }
amdgpu_discovery_init_soc_config(adev);
amdgpu_discovery_sysfs_init(adev);
switch (adev->ip_versions[GC_HWIP][0]) { switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1): case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 2, 1): case IP_VERSION(9, 2, 1):
@ -2387,6 +2498,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 2):
adev->smuio.funcs = &smuio_v13_0_funcs; adev->smuio.funcs = &smuio_v13_0_funcs;
break; break;
case IP_VERSION(13, 0, 3):
adev->smuio.funcs = &smuio_v13_0_3_funcs;
if (adev->smuio.funcs->get_pkg_type(adev) == AMDGPU_PKG_TYPE_APU) {
adev->flags |= AMD_IS_APU;
}
break;
case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 8):
adev->smuio.funcs = &smuio_v13_0_6_funcs; adev->smuio.funcs = &smuio_v13_0_6_funcs;

View file

@ -24,12 +24,10 @@
#ifndef __AMDGPU_DISCOVERY__ #ifndef __AMDGPU_DISCOVERY__
#define __AMDGPU_DISCOVERY__ #define __AMDGPU_DISCOVERY__
#define DISCOVERY_TMR_SIZE (4 << 10) #define DISCOVERY_TMR_SIZE (8 << 10)
#define DISCOVERY_TMR_OFFSET (64 << 10) #define DISCOVERY_TMR_OFFSET (64 << 10)
void amdgpu_discovery_fini(struct amdgpu_device *adev); void amdgpu_discovery_fini(struct amdgpu_device *adev);
int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance,
int *major, int *minor, int *revision);
int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev); int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
#endif /* __AMDGPU_DISCOVERY__ */ #endif /* __AMDGPU_DISCOVERY__ */

View file

@ -98,7 +98,7 @@ static void amdgpu_display_flip_callback(struct dma_fence *f,
static bool amdgpu_display_flip_handle_fence(struct amdgpu_flip_work *work, static bool amdgpu_display_flip_handle_fence(struct amdgpu_flip_work *work,
struct dma_fence **f) struct dma_fence **f)
{ {
struct dma_fence *fence= *f; struct dma_fence *fence = *f;
if (fence == NULL) if (fence == NULL)
return false; return false;
@ -1252,21 +1252,21 @@ const struct drm_mode_config_funcs amdgpu_mode_funcs = {
.fb_create = amdgpu_display_user_framebuffer_create, .fb_create = amdgpu_display_user_framebuffer_create,
}; };
static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] = static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] = {
{ { UNDERSCAN_OFF, "off" }, { UNDERSCAN_OFF, "off" },
{ UNDERSCAN_ON, "on" }, { UNDERSCAN_ON, "on" },
{ UNDERSCAN_AUTO, "auto" }, { UNDERSCAN_AUTO, "auto" },
}; };
static const struct drm_prop_enum_list amdgpu_audio_enum_list[] = static const struct drm_prop_enum_list amdgpu_audio_enum_list[] = {
{ { AMDGPU_AUDIO_DISABLE, "off" }, { AMDGPU_AUDIO_DISABLE, "off" },
{ AMDGPU_AUDIO_ENABLE, "on" }, { AMDGPU_AUDIO_ENABLE, "on" },
{ AMDGPU_AUDIO_AUTO, "auto" }, { AMDGPU_AUDIO_AUTO, "auto" },
}; };
/* XXX support different dither options? spatial, temporal, both, etc. */ /* XXX support different dither options? spatial, temporal, both, etc. */
static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = {
{ { AMDGPU_FMT_DITHER_DISABLE, "off" }, { AMDGPU_FMT_DITHER_DISABLE, "off" },
{ AMDGPU_FMT_DITHER_ENABLE, "on" }, { AMDGPU_FMT_DITHER_ENABLE, "on" },
}; };
@ -1496,8 +1496,7 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
ret |= DRM_SCANOUTPOS_ACCURATE; ret |= DRM_SCANOUTPOS_ACCURATE;
vbl_start = vbl & 0x1fff; vbl_start = vbl & 0x1fff;
vbl_end = (vbl >> 16) & 0x1fff; vbl_end = (vbl >> 16) & 0x1fff;
} } else {
else {
/* No: Fake something reasonable which gives at least ok results. */ /* No: Fake something reasonable which gives at least ok results. */
vbl_start = mode->crtc_vdisplay; vbl_start = mode->crtc_vdisplay;
vbl_end = 0; vbl_end = 0;

View file

@ -149,7 +149,7 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
if (!bo->tbo.pin_count) { if (!bo->tbo.pin_count) {
/* move buffer into GTT or VRAM */ /* move buffer into GTT or VRAM */
struct ttm_operation_ctx ctx = { false, false }; struct ttm_operation_ctx ctx = { false, false };
unsigned domains = AMDGPU_GEM_DOMAIN_GTT; unsigned int domains = AMDGPU_GEM_DOMAIN_GTT;
if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM && if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
attach->peer2peer) { attach->peer2peer) {
@ -336,7 +336,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE, ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_CPU, flags, AMDGPU_GEM_DOMAIN_CPU, flags,
ttm_bo_type_sg, resv, &gobj); ttm_bo_type_sg, resv, &gobj, 0);
if (ret) if (ret)
goto error; goto error;

View file

@ -59,7 +59,7 @@ struct amdgpu_doorbell_index {
uint32_t gfx_ring1; uint32_t gfx_ring1;
uint32_t gfx_userqueue_start; uint32_t gfx_userqueue_start;
uint32_t gfx_userqueue_end; uint32_t gfx_userqueue_end;
uint32_t sdma_engine[8]; uint32_t sdma_engine[16];
uint32_t mes_ring0; uint32_t mes_ring0;
uint32_t mes_ring1; uint32_t mes_ring1;
uint32_t ih; uint32_t ih;
@ -86,6 +86,8 @@ struct amdgpu_doorbell_index {
uint32_t max_assignment; uint32_t max_assignment;
/* Per engine SDMA doorbell size in dword */ /* Per engine SDMA doorbell size in dword */
uint32_t sdma_doorbell_range; uint32_t sdma_doorbell_range;
/* Per xcc doorbell size for KIQ/KCQ */
uint32_t xcc_doorbell_range;
}; };
typedef enum _AMDGPU_DOORBELL_ASSIGNMENT typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
@ -164,7 +166,15 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0, AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0,
AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7, AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,
AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x18F, /* kiq/kcq from second XCD. Max 8 XCDs */
AMDGPU_VEGA20_DOORBELL_XCC1_KIQ_START = 0x190,
/* 8 compute rings per GC. Max to 0x1CE */
AMDGPU_VEGA20_DOORBELL_XCC1_MEC_RING0_START = 0x197,
/* AID1 SDMA: 0x1D0 ~ 0x1F7 */
AMDGPU_VEGA20_DOORBELL_AID1_sDMA_START = 0x1D0,
AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x1F7,
AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF
} AMDGPU_VEGA20_DOORBELL_ASSIGNMENT; } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
@ -301,6 +311,36 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
AMDGPU_DOORBELL64_INVALID = 0xFFFF AMDGPU_DOORBELL64_INVALID = 0xFFFF
} AMDGPU_DOORBELL64_ASSIGNMENT; } AMDGPU_DOORBELL64_ASSIGNMENT;
typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 {
/* XCC0: 0x00 ~20, XCC1: 20 ~ 2F ... */
/* KIQ/HIQ/DIQ */
AMDGPU_DOORBELL_LAYOUT1_KIQ_START = 0x000,
AMDGPU_DOORBELL_LAYOUT1_HIQ = 0x001,
AMDGPU_DOORBELL_LAYOUT1_DIQ = 0x002,
/* Compute: 0x08 ~ 0x20 */
AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START = 0x008,
AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END = 0x00F,
AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START = 0x010,
AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END = 0x01F,
AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE = 0x020,
/* SDMA: 0x100 ~ 0x19F */
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START = 0x100,
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F,
/* IH: 0x1A0 ~ 0x1AF */
AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0,
/* VCN: 0x1B0 ~ 0x1D4 */
AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0,
AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4,
AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END,
AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1D4,
AMDGPU_DOORBELL_LAYOUT1_INVALID = 0xFFFF
} AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1;
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index); u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index); u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);

View file

@ -50,6 +50,7 @@
#include "amdgpu_ras.h" #include "amdgpu_ras.h"
#include "amdgpu_xgmi.h" #include "amdgpu_xgmi.h"
#include "amdgpu_reset.h" #include "amdgpu_reset.h"
#include "../amdxcp/amdgpu_xcp_drv.h"
/* /*
* KMS wrapper. * KMS wrapper.
@ -110,9 +111,11 @@
* 3.52.0 - Add AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD, add device_info fields: * 3.52.0 - Add AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD, add device_info fields:
* tcp_cache_size, num_sqc_per_wgp, sqc_data_cache_size, sqc_inst_cache_size, * tcp_cache_size, num_sqc_per_wgp, sqc_data_cache_size, sqc_inst_cache_size,
* gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi * gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi
* 3.53.0 - Support for GFX11 CP GFX shadowing
* 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
*/ */
#define KMS_DRIVER_MAJOR 3 #define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 52 #define KMS_DRIVER_MINOR 54
#define KMS_DRIVER_PATCHLEVEL 0 #define KMS_DRIVER_PATCHLEVEL 0
unsigned int amdgpu_vram_limit = UINT_MAX; unsigned int amdgpu_vram_limit = UINT_MAX;
@ -150,7 +153,7 @@ uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32; uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu; char *amdgpu_disable_cu;
char *amdgpu_virtual_display; char *amdgpu_virtual_display;
bool enforce_isolation;
/* /*
* OverDrive(bit 14) disabled by default * OverDrive(bit 14) disabled by default
* GFX DCS(bit 19) disabled by default * GFX DCS(bit 19) disabled by default
@ -191,6 +194,7 @@ int amdgpu_smartshift_bias;
int amdgpu_use_xgmi_p2p = 1; int amdgpu_use_xgmi_p2p = 1;
int amdgpu_vcnfw_log; int amdgpu_vcnfw_log;
int amdgpu_sg_display = -1; /* auto */ int amdgpu_sg_display = -1; /* auto */
int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
@ -819,6 +823,13 @@ MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (
module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
#endif #endif
/**
* DOC: mtype_local (int)
*/
int amdgpu_mtype_local;
MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)");
module_param_named(mtype_local, amdgpu_mtype_local, int, 0444);
/** /**
* DOC: pcie_p2p (bool) * DOC: pcie_p2p (bool)
* Enable PCIe P2P (requires large-BAR). Default value: true (on) * Enable PCIe P2P (requires large-BAR). Default value: true (on)
@ -948,6 +959,28 @@ MODULE_PARM_DESC(smu_pptable_id,
"specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)"); "specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)");
module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444); module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444);
/**
* DOC: partition_mode (int)
* Used to override the default SPX mode.
*/
MODULE_PARM_DESC(
user_partt_mode,
"specify partition mode to be used (-2 = AMDGPU_AUTO_COMPUTE_PARTITION_MODE(default value) \
0 = AMDGPU_SPX_PARTITION_MODE, \
1 = AMDGPU_DPX_PARTITION_MODE, \
2 = AMDGPU_TPX_PARTITION_MODE, \
3 = AMDGPU_QPX_PARTITION_MODE, \
4 = AMDGPU_CPX_PARTITION_MODE)");
module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
/**
* DOC: enforce_isolation (bool)
* enforce process isolation between graphics and compute via using the same reserved vmid.
*/
module_param(enforce_isolation, bool, 0444);
MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
/* These devices are not supported by amdgpu. /* These devices are not supported by amdgpu.
* They are supported by the mach64, r128, radeon drivers * They are supported by the mach64, r128, radeon drivers
*/ */
@ -1615,6 +1648,7 @@ static const u16 amdgpu_unsupported_pciidlist[] = {
0x5874, 0x5874,
0x5940, 0x5940,
0x5941, 0x5941,
0x5b70,
0x5b72, 0x5b72,
0x5b73, 0x5b73,
0x5b74, 0x5b74,
@ -2017,6 +2051,11 @@ static const struct pci_device_id pciidlist[] = {
.class_mask = 0xffffff, .class_mask = 0xffffff,
.driver_data = CHIP_IP_DISCOVERY }, .driver_data = CHIP_IP_DISCOVERY },
{ PCI_DEVICE(0x1002, PCI_ANY_ID),
.class = PCI_CLASS_ACCELERATOR_PROCESSING << 8,
.class_mask = 0xffffff,
.driver_data = CHIP_IP_DISCOVERY },
{0, 0, 0} {0, 0, 0}
}; };
@ -2161,6 +2200,10 @@ retry_init:
goto err_pci; goto err_pci;
} }
ret = amdgpu_xcp_dev_register(adev, ent);
if (ret)
goto err_pci;
/* /*
* 1. don't init fbdev on hw without DCE * 1. don't init fbdev on hw without DCE
* 2. don't init fbdev if there are no connectors * 2. don't init fbdev if there are no connectors
@ -2233,6 +2276,7 @@ amdgpu_pci_remove(struct pci_dev *pdev)
struct drm_device *dev = pci_get_drvdata(pdev); struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_device *adev = drm_to_adev(dev);
amdgpu_xcp_dev_unplug(adev);
drm_dev_unplug(dev); drm_dev_unplug(dev);
if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
@ -2819,6 +2863,33 @@ static const struct drm_driver amdgpu_kms_driver = {
.patchlevel = KMS_DRIVER_PATCHLEVEL, .patchlevel = KMS_DRIVER_PATCHLEVEL,
}; };
const struct drm_driver amdgpu_partition_driver = {
.driver_features =
DRIVER_GEM | DRIVER_RENDER | DRIVER_SYNCOBJ |
DRIVER_SYNCOBJ_TIMELINE,
.open = amdgpu_driver_open_kms,
.postclose = amdgpu_driver_postclose_kms,
.lastclose = amdgpu_driver_lastclose_kms,
.ioctls = amdgpu_ioctls_kms,
.num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),
.dumb_create = amdgpu_mode_dumb_create,
.dumb_map_offset = amdgpu_mode_dumb_mmap,
.fops = &amdgpu_driver_kms_fops,
.release = &amdgpu_driver_release_kms,
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import = amdgpu_gem_prime_import,
.gem_prime_mmap = drm_gem_prime_mmap,
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
.date = DRIVER_DATE,
.major = KMS_DRIVER_MAJOR,
.minor = KMS_DRIVER_MINOR,
.patchlevel = KMS_DRIVER_PATCHLEVEL,
};
static struct pci_error_handlers amdgpu_pci_err_handler = { static struct pci_error_handlers amdgpu_pci_err_handler = {
.error_detected = amdgpu_pci_error_detected, .error_detected = amdgpu_pci_error_detected,
.mmio_enabled = amdgpu_pci_mmio_enabled, .mmio_enabled = amdgpu_pci_mmio_enabled,
@ -2886,9 +2957,11 @@ static void __exit amdgpu_exit(void)
amdgpu_amdkfd_fini(); amdgpu_amdkfd_fini();
pci_unregister_driver(&amdgpu_kms_pci_driver); pci_unregister_driver(&amdgpu_kms_pci_driver);
amdgpu_unregister_atpx_handler(); amdgpu_unregister_atpx_handler();
amdgpu_acpi_release();
amdgpu_sync_fini(); amdgpu_sync_fini();
amdgpu_fence_slab_fini(); amdgpu_fence_slab_fini();
mmu_notifier_synchronize(); mmu_notifier_synchronize();
amdgpu_xcp_drv_release();
} }
module_init(amdgpu_init); module_init(amdgpu_init);

View file

@ -42,6 +42,8 @@
#define DRIVER_DESC "AMD GPU" #define DRIVER_DESC "AMD GPU"
#define DRIVER_DATE "20150101" #define DRIVER_DATE "20150101"
extern const struct drm_driver amdgpu_partition_driver;
long amdgpu_drm_ioctl(struct file *filp, long amdgpu_drm_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg); unsigned int cmd, unsigned long arg);

View file

@ -70,6 +70,7 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
drm_for_each_connector_iter(connector, &iter) { drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) { if (connector->encoder == encoder) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices; amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices;
DRM_DEBUG_KMS("setting active device to %08x from %08x %08x for encoder %d\n", DRM_DEBUG_KMS("setting active device to %08x from %08x %08x for encoder %d\n",
amdgpu_encoder->active_device, amdgpu_encoder->devices, amdgpu_encoder->active_device, amdgpu_encoder->devices,
@ -165,12 +166,12 @@ void amdgpu_panel_mode_fixup(struct drm_encoder *encoder,
{ {
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode; struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
unsigned hblank = native_mode->htotal - native_mode->hdisplay; unsigned int hblank = native_mode->htotal - native_mode->hdisplay;
unsigned vblank = native_mode->vtotal - native_mode->vdisplay; unsigned int vblank = native_mode->vtotal - native_mode->vdisplay;
unsigned hover = native_mode->hsync_start - native_mode->hdisplay; unsigned int hover = native_mode->hsync_start - native_mode->hdisplay;
unsigned vover = native_mode->vsync_start - native_mode->vdisplay; unsigned int vover = native_mode->vsync_start - native_mode->vdisplay;
unsigned hsync_width = native_mode->hsync_end - native_mode->hsync_start; unsigned int hsync_width = native_mode->hsync_end - native_mode->hsync_start;
unsigned vsync_width = native_mode->vsync_end - native_mode->vsync_start; unsigned int vsync_width = native_mode->vsync_end - native_mode->vsync_start;
adjusted_mode->clock = native_mode->clock; adjusted_mode->clock = native_mode->clock;
adjusted_mode->flags = native_mode->flags; adjusted_mode->flags = native_mode->flags;

View file

@ -42,7 +42,6 @@
#include "amdgpu_reset.h" #include "amdgpu_reset.h"
/* /*
* Fences
* Fences mark an event in the GPUs pipeline and are used * Fences mark an event in the GPUs pipeline and are used
* for GPU/CPU synchronization. When the fence is written, * for GPU/CPU synchronization. When the fence is written,
* it is expected that all buffers associated with that fence * it is expected that all buffers associated with that fence
@ -140,7 +139,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
* Returns 0 on success, -ENOMEM on failure. * Returns 0 on success, -ENOMEM on failure.
*/ */
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job, int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job,
unsigned flags) unsigned int flags)
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence; struct dma_fence *fence;
@ -174,12 +173,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
adev->fence_context + ring->idx, seq); adev->fence_context + ring->idx, seq);
/* Against remove in amdgpu_job_{free, free_cb} */ /* Against remove in amdgpu_job_{free, free_cb} */
dma_fence_get(fence); dma_fence_get(fence);
} } else {
else
dma_fence_init(fence, &amdgpu_fence_ops, dma_fence_init(fence, &amdgpu_fence_ops,
&ring->fence_drv.lock, &ring->fence_drv.lock,
adev->fence_context + ring->idx, seq); adev->fence_context + ring->idx, seq);
} }
}
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT); seq, flags | AMDGPU_FENCE_FLAG_INT);
@ -377,14 +376,11 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
uint32_t wait_seq, uint32_t wait_seq,
signed long timeout) signed long timeout)
{ {
uint32_t seq;
do {
seq = amdgpu_fence_read(ring);
udelay(5);
timeout -= 5;
} while ((int32_t)(wait_seq - seq) > 0 && timeout > 0);
while ((int32_t)(wait_seq - amdgpu_fence_read(ring)) > 0 && timeout > 0) {
udelay(2);
timeout -= 2;
}
return timeout > 0 ? timeout : 0; return timeout > 0 ? timeout : 0;
} }
/** /**
@ -396,7 +392,7 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
* Returns the number of emitted fences on the ring. Used by the * Returns the number of emitted fences on the ring. Used by the
* dynpm code to ring track activity. * dynpm code to ring track activity.
*/ */
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) unsigned int amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
{ {
uint64_t emitted; uint64_t emitted;
@ -475,7 +471,7 @@ void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
*/ */
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src, struct amdgpu_irq_src *irq_src,
unsigned irq_type) unsigned int irq_type)
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
uint64_t index; uint64_t index;
@ -582,7 +578,8 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
if (r) if (r)
amdgpu_fence_driver_force_completion(ring); amdgpu_fence_driver_force_completion(ring);
if (ring->fence_drv.irq_src) if (!drm_dev_is_unplugged(adev_to_drm(adev)) &&
ring->fence_drv.irq_src)
amdgpu_irq_put(adev, ring->fence_drv.irq_src, amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type); ring->fence_drv.irq_type);
@ -653,6 +650,7 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_RINGS; i++) { for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i]; struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->fence_drv.initialized) if (!ring || !ring->fence_drv.initialized)
continue; continue;
@ -835,11 +833,12 @@ static const struct dma_fence_ops amdgpu_job_fence_ops = {
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused) static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)m->private; struct amdgpu_device *adev = m->private;
int i; int i;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i]; struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->fence_drv.initialized) if (!ring || !ring->fence_drv.initialized)
continue; continue;
@ -913,6 +912,7 @@ static void amdgpu_debugfs_reset_work(struct work_struct *work)
reset_work); reset_work);
struct amdgpu_reset_context reset_context; struct amdgpu_reset_context reset_context;
memset(&reset_context, 0, sizeof(reset_context)); memset(&reset_context, 0, sizeof(reset_context));
reset_context.method = AMD_RESET_METHOD_NONE; reset_context.method = AMD_RESET_METHOD_NONE;

View file

@ -35,6 +35,7 @@
#endif #endif
#include "amdgpu.h" #include "amdgpu.h"
#include <drm/drm_drv.h> #include <drm/drm_drv.h>
#include <drm/ttm/ttm_tt.h>
/* /*
* GART * GART
@ -102,6 +103,142 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
adev->dummy_page_addr = 0; adev->dummy_page_addr = 0;
} }
/**
* amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
*
* @adev: amdgpu_device pointer
*
* Allocate system memory for GART page table for ASICs that don't have
* dedicated VRAM.
* Returns 0 for success, error for failure.
*/
int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
{
unsigned int order = get_order(adev->gart.table_size);
gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO;
struct amdgpu_bo *bo = NULL;
struct sg_table *sg = NULL;
struct amdgpu_bo_param bp;
dma_addr_t dma_addr;
struct page *p;
int ret;
if (adev->gart.bo != NULL)
return 0;
p = alloc_pages(gfp_flags, order);
if (!p)
return -ENOMEM;
/* If the hardware does not support UTCL2 snooping of the CPU caches
* then set_memory_wc() could be used as a workaround to mark the pages
* as write combine memory.
*/
dma_addr = dma_map_page(&adev->pdev->dev, p, 0, adev->gart.table_size,
DMA_BIDIRECTIONAL);
if (dma_mapping_error(&adev->pdev->dev, dma_addr)) {
dev_err(&adev->pdev->dev, "Failed to DMA MAP the GART BO page\n");
__free_pages(p, order);
p = NULL;
return -EFAULT;
}
dev_info(adev->dev, "%s dma_addr:%pad\n", __func__, &dma_addr);
/* Create SG table */
sg = kmalloc(sizeof(*sg), GFP_KERNEL);
if (!sg) {
ret = -ENOMEM;
goto error;
}
ret = sg_alloc_table(sg, 1, GFP_KERNEL);
if (ret)
goto error;
sg_dma_address(sg->sgl) = dma_addr;
sg->sgl->length = adev->gart.table_size;
#ifdef CONFIG_NEED_SG_DMA_LENGTH
sg->sgl->dma_length = adev->gart.table_size;
#endif
/* Create SG BO */
memset(&bp, 0, sizeof(bp));
bp.size = adev->gart.table_size;
bp.byte_align = PAGE_SIZE;
bp.domain = AMDGPU_GEM_DOMAIN_CPU;
bp.type = ttm_bo_type_sg;
bp.resv = NULL;
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
bp.flags = 0;
ret = amdgpu_bo_create(adev, &bp, &bo);
if (ret)
goto error;
bo->tbo.sg = sg;
bo->tbo.ttm->sg = sg;
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
ret = amdgpu_bo_reserve(bo, true);
if (ret) {
dev_err(adev->dev, "(%d) failed to reserve bo for GART system bo\n", ret);
goto error;
}
ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
WARN(ret, "Pinning the GART table failed");
if (ret)
goto error_resv;
adev->gart.bo = bo;
adev->gart.ptr = page_to_virt(p);
/* Make GART table accessible in VMID0 */
ret = amdgpu_ttm_alloc_gart(&adev->gart.bo->tbo);
if (ret)
amdgpu_gart_table_ram_free(adev);
amdgpu_bo_unreserve(bo);
return 0;
error_resv:
amdgpu_bo_unreserve(bo);
error:
amdgpu_bo_unref(&bo);
if (sg) {
sg_free_table(sg);
kfree(sg);
}
__free_pages(p, order);
return ret;
}
/**
* amdgpu_gart_table_ram_free - free gart page table system ram
*
* @adev: amdgpu_device pointer
*
* Free the system memory used for the GART page tableon ASICs that don't
* have dedicated VRAM.
*/
void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
{
unsigned int order = get_order(adev->gart.table_size);
struct sg_table *sg = adev->gart.bo->tbo.sg;
struct page *p;
int ret;
ret = amdgpu_bo_reserve(adev->gart.bo, false);
if (!ret) {
amdgpu_bo_unpin(adev->gart.bo);
amdgpu_bo_unreserve(adev->gart.bo);
}
amdgpu_bo_unref(&adev->gart.bo);
sg_free_table(sg);
kfree(sg);
p = virt_to_page(adev->gart.ptr);
__free_pages(p, order);
adev->gart.ptr = NULL;
}
/** /**
* amdgpu_gart_table_vram_alloc - allocate vram for gart page table * amdgpu_gart_table_vram_alloc - allocate vram for gart page table
* *
@ -182,7 +319,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
} }
mb(); mb();
amdgpu_device_flush_hdp(adev, NULL); amdgpu_device_flush_hdp(adev, NULL);
for (i = 0; i < adev->num_vmhubs; i++) for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
drm_dev_exit(idx); drm_dev_exit(idx);
@ -264,7 +401,7 @@ void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
mb(); mb();
amdgpu_device_flush_hdp(adev, NULL); amdgpu_device_flush_hdp(adev, NULL);
for (i = 0; i < adev->num_vmhubs; i++) for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
} }

View file

@ -51,6 +51,8 @@ struct amdgpu_gart {
uint64_t gart_pte_flags; uint64_t gart_pte_flags;
}; };
int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev);
void amdgpu_gart_table_ram_free(struct amdgpu_device *adev);
int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
void amdgpu_gart_table_vram_free(struct amdgpu_device *adev); void amdgpu_gart_table_vram_free(struct amdgpu_device *adev);
int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);

View file

@ -98,7 +98,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain, int alignment, u32 initial_domain,
u64 flags, enum ttm_bo_type type, u64 flags, enum ttm_bo_type type,
struct dma_resv *resv, struct dma_resv *resv,
struct drm_gem_object **obj) struct drm_gem_object **obj, int8_t xcp_id_plus1)
{ {
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
struct amdgpu_bo_user *ubo; struct amdgpu_bo_user *ubo;
@ -116,6 +116,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
bp.flags = flags; bp.flags = flags;
bp.domain = initial_domain; bp.domain = initial_domain;
bp.bo_ptr_size = sizeof(struct amdgpu_bo); bp.bo_ptr_size = sizeof(struct amdgpu_bo);
bp.xcp_id_plus1 = xcp_id_plus1;
r = amdgpu_bo_create_user(adev, &bp, &ubo); r = amdgpu_bo_create_user(adev, &bp, &ubo);
if (r) if (r)
@ -336,7 +337,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
retry: retry:
r = amdgpu_gem_object_create(adev, size, args->in.alignment, r = amdgpu_gem_object_create(adev, size, args->in.alignment,
initial_domain, initial_domain,
flags, ttm_bo_type_device, resv, &gobj); flags, ttm_bo_type_device, resv, &gobj, fpriv->xcp_id + 1);
if (r && r != -ERESTARTSYS) { if (r && r != -ERESTARTSYS) {
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
@ -379,6 +380,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
struct ttm_operation_ctx ctx = { true, false }; struct ttm_operation_ctx ctx = { true, false };
struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_device *adev = drm_to_adev(dev);
struct drm_amdgpu_gem_userptr *args = data; struct drm_amdgpu_gem_userptr *args = data;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct drm_gem_object *gobj; struct drm_gem_object *gobj;
struct hmm_range *range; struct hmm_range *range;
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
@ -405,7 +407,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
/* create a gem object to contain this object in */ /* create a gem object to contain this object in */
r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU, r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
0, ttm_bo_type_device, NULL, &gobj); 0, ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
if (r) if (r)
return r; return r;
@ -908,6 +910,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
struct drm_mode_create_dumb *args) struct drm_mode_create_dumb *args)
{ {
struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct drm_gem_object *gobj; struct drm_gem_object *gobj;
uint32_t handle; uint32_t handle;
u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
@ -931,7 +934,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
domain = amdgpu_bo_get_preferred_domain(adev, domain = amdgpu_bo_get_preferred_domain(adev,
amdgpu_display_supported_domains(adev, flags)); amdgpu_display_supported_domains(adev, flags));
r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags, r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags,
ttm_bo_type_device, NULL, &gobj); ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
if (r) if (r)
return -ENOMEM; return -ENOMEM;
@ -948,7 +951,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused) static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)m->private; struct amdgpu_device *adev = m->private;
struct drm_device *dev = adev_to_drm(adev); struct drm_device *dev = adev_to_drm(adev);
struct drm_file *file; struct drm_file *file;
int r; int r;

View file

@ -43,8 +43,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain, int alignment, u32 initial_domain,
u64 flags, enum ttm_bo_type type, u64 flags, enum ttm_bo_type type,
struct dma_resv *resv, struct dma_resv *resv,
struct drm_gem_object **obj); struct drm_gem_object **obj, int8_t xcp_id_plus1);
int amdgpu_mode_dumb_create(struct drm_file *file_priv, int amdgpu_mode_dumb_create(struct drm_file *file_priv,
struct drm_device *dev, struct drm_device *dev,
struct drm_mode_create_dumb *args); struct drm_mode_create_dumb *args);

View file

@ -28,6 +28,7 @@
#include "amdgpu_gfx.h" #include "amdgpu_gfx.h"
#include "amdgpu_rlc.h" #include "amdgpu_rlc.h"
#include "amdgpu_ras.h" #include "amdgpu_ras.h"
#include "amdgpu_xcp.h"
/* delay 0.1 second to enable gfx off feature */ /* delay 0.1 second to enable gfx off feature */
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
@ -63,10 +64,10 @@ void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
} }
bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
int mec, int pipe, int queue) int xcc_id, int mec, int pipe, int queue)
{ {
return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue), return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
adev->gfx.mec.queue_bitmap); adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
} }
int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
@ -204,29 +205,38 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
{ {
int i, queue, pipe; int i, j, queue, pipe;
bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev); bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe, adev->gfx.mec.num_queue_per_pipe,
adev->gfx.num_compute_rings); adev->gfx.num_compute_rings);
int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
if (multipipe_policy) { if (multipipe_policy) {
/* policy: make queues evenly cross all pipes on MEC1 only */ /* policy: make queues evenly cross all pipes on MEC1 only
* for multiple xcc, just use the original policy for simplicity */
for (j = 0; j < num_xcc; j++) {
for (i = 0; i < max_queues_per_mec; i++) { for (i = 0; i < max_queues_per_mec; i++) {
pipe = i % adev->gfx.mec.num_pipe_per_mec; pipe = i % adev->gfx.mec.num_pipe_per_mec;
queue = (i / adev->gfx.mec.num_pipe_per_mec) % queue = (i / adev->gfx.mec.num_pipe_per_mec) %
adev->gfx.mec.num_queue_per_pipe; adev->gfx.mec.num_queue_per_pipe;
set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue, set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
adev->gfx.mec.queue_bitmap); adev->gfx.mec_bitmap[j].queue_bitmap);
}
} }
} else { } else {
/* policy: amdgpu owns all queues in the given pipe */ /* policy: amdgpu owns all queues in the given pipe */
for (j = 0; j < num_xcc; j++) {
for (i = 0; i < max_queues_per_mec; ++i) for (i = 0; i < max_queues_per_mec; ++i)
set_bit(i, adev->gfx.mec.queue_bitmap); set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
}
} }
dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); for (j = 0; j < num_xcc; j++) {
dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
}
} }
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
@ -258,7 +268,7 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
} }
static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
struct amdgpu_ring *ring) struct amdgpu_ring *ring, int xcc_id)
{ {
int queue_bit; int queue_bit;
int mec, pipe, queue; int mec, pipe, queue;
@ -268,7 +278,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
* adev->gfx.mec.num_queue_per_pipe; * adev->gfx.mec.num_queue_per_pipe;
while (--queue_bit >= 0) { while (--queue_bit >= 0) {
if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
continue; continue;
amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
@ -294,9 +304,9 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring, struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq) struct amdgpu_irq_src *irq, int xcc_id)
{ {
struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
int r = 0; int r = 0;
spin_lock_init(&kiq->ring_lock); spin_lock_init(&kiq->ring_lock);
@ -304,16 +314,20 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
ring->adev = NULL; ring->adev = NULL;
ring->ring_obj = NULL; ring->ring_obj = NULL;
ring->use_doorbell = true; ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.kiq; ring->xcc_id = xcc_id;
ring->vm_hub = AMDGPU_GFXHUB_0; ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
ring->doorbell_index =
(adev->doorbell_index.kiq +
xcc_id * adev->doorbell_index.xcc_doorbell_range)
<< 1;
r = amdgpu_gfx_kiq_acquire(adev, ring); r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
if (r) if (r)
return r; return r;
ring->eop_gpu_addr = kiq->eop_gpu_addr; ring->eop_gpu_addr = kiq->eop_gpu_addr;
ring->no_scheduler = true; ring->no_scheduler = true;
sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue); sprintf(ring->name, "kiq_%d.%d.%d.%d", xcc_id, ring->me, ring->pipe, ring->queue);
r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0, r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
AMDGPU_RING_PRIO_DEFAULT, NULL); AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r) if (r)
@ -327,19 +341,19 @@ void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
amdgpu_ring_fini(ring); amdgpu_ring_fini(ring);
} }
void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev) void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
{ {
struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
} }
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
unsigned hpd_size) unsigned hpd_size, int xcc_id)
{ {
int r; int r;
u32 *hpd; u32 *hpd;
struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE, r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
@ -362,13 +376,18 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
/* create MQD for each compute/gfx queue */ /* create MQD for each compute/gfx queue */
int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
unsigned mqd_size) unsigned mqd_size, int xcc_id)
{ {
struct amdgpu_ring *ring = NULL; int r, i, j;
int r, i; struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *ring = &kiq->ring;
u32 domain = AMDGPU_GEM_DOMAIN_GTT;
/* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0))
domain |= AMDGPU_GEM_DOMAIN_VRAM;
/* create MQD for KIQ */ /* create MQD for KIQ */
ring = &adev->gfx.kiq.ring;
if (!adev->enable_mes_kiq && !ring->mqd_obj) { if (!adev->enable_mes_kiq && !ring->mqd_obj) {
/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
* otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
@ -387,8 +406,8 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
} }
/* prepare MQD backup */ /* prepare MQD backup */
adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL); kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) if (!kiq->mqd_backup)
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
} }
@ -398,13 +417,14 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
ring = &adev->gfx.gfx_ring[i]; ring = &adev->gfx.gfx_ring[i];
if (!ring->mqd_obj) { if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, domain, &ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_ptr); &ring->mqd_gpu_addr, &ring->mqd_ptr);
if (r) { if (r) {
dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
return r; return r;
} }
ring->mqd_size = mqd_size;
/* prepare MQD backup */ /* prepare MQD backup */
adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
if (!adev->gfx.me.mqd_backup[i]) if (!adev->gfx.me.mqd_backup[i])
@ -415,19 +435,21 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
/* create MQD for each KCQ */ /* create MQD for each KCQ */
for (i = 0; i < adev->gfx.num_compute_rings; i++) { for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i]; j = i + xcc_id * adev->gfx.num_compute_rings;
ring = &adev->gfx.compute_ring[j];
if (!ring->mqd_obj) { if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, domain, &ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_ptr); &ring->mqd_gpu_addr, &ring->mqd_ptr);
if (r) { if (r) {
dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
return r; return r;
} }
ring->mqd_size = mqd_size;
/* prepare MQD backup */ /* prepare MQD backup */
adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL);
if (!adev->gfx.mec.mqd_backup[i]) if (!adev->gfx.mec.mqd_backup[j])
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
} }
} }
@ -435,10 +457,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
return 0; return 0;
} }
void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
{ {
struct amdgpu_ring *ring = NULL; struct amdgpu_ring *ring = NULL;
int i; int i, j;
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
for (i = 0; i < adev->gfx.num_gfx_rings; i++) { for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
@ -451,43 +474,81 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
} }
for (i = 0; i < adev->gfx.num_compute_rings; i++) { for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i]; j = i + xcc_id * adev->gfx.num_compute_rings;
kfree(adev->gfx.mec.mqd_backup[i]); ring = &adev->gfx.compute_ring[j];
kfree(adev->gfx.mec.mqd_backup[j]);
amdgpu_bo_free_kernel(&ring->mqd_obj, amdgpu_bo_free_kernel(&ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_gpu_addr,
&ring->mqd_ptr); &ring->mqd_ptr);
} }
ring = &adev->gfx.kiq.ring; ring = &kiq->ring;
kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); kfree(kiq->mqd_backup);
amdgpu_bo_free_kernel(&ring->mqd_obj, amdgpu_bo_free_kernel(&ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_gpu_addr,
&ring->mqd_ptr); &ring->mqd_ptr);
} }
int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
{ {
struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *kiq_ring = &kiq->ring; struct amdgpu_ring *kiq_ring = &kiq->ring;
int i, r = 0; int i, r = 0;
int j;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL; return -EINVAL;
spin_lock(&adev->gfx.kiq.ring_lock); spin_lock(&kiq->ring_lock);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
adev->gfx.num_compute_rings)) { adev->gfx.num_compute_rings)) {
spin_unlock(&adev->gfx.kiq.ring_lock); spin_unlock(&kiq->ring_lock);
return -ENOMEM; return -ENOMEM;
} }
for (i = 0; i < adev->gfx.num_compute_rings; i++) for (i = 0; i < adev->gfx.num_compute_rings; i++) {
kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], j = i + xcc_id * adev->gfx.num_compute_rings;
kiq->pmf->kiq_unmap_queues(kiq_ring,
&adev->gfx.compute_ring[j],
RESET_QUEUES, 0, 0); RESET_QUEUES, 0, 0);
}
if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang) if (kiq_ring->sched.ready && !adev->job_hang)
r = amdgpu_ring_test_helper(kiq_ring); r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&adev->gfx.kiq.ring_lock); spin_unlock(&kiq->ring_lock);
return r;
}
int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *kiq_ring = &kiq->ring;
int i, r = 0;
int j;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
spin_lock(&kiq->ring_lock);
if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
adev->gfx.num_gfx_rings)) {
spin_unlock(&kiq->ring_lock);
return -ENOMEM;
}
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
j = i + xcc_id * adev->gfx.num_gfx_rings;
kiq->pmf->kiq_unmap_queues(kiq_ring,
&adev->gfx.gfx_ring[j],
PREEMPT_QUEUES, 0, 0);
}
}
if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang)
r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
return r; return r;
} }
@ -505,18 +566,18 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
return set_resource_bit; return set_resource_bit;
} }
int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
{ {
struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct amdgpu_ring *kiq_ring = &kiq->ring;
uint64_t queue_mask = 0; uint64_t queue_mask = 0;
int r, i; int r, i, j;
if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
return -EINVAL; return -EINVAL;
for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
if (!test_bit(i, adev->gfx.mec.queue_bitmap)) if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
continue; continue;
/* This situation may be hit in the future if a new HW /* This situation may be hit in the future if a new HW
@ -532,13 +593,15 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
kiq_ring->queue); kiq_ring->queue);
spin_lock(&adev->gfx.kiq.ring_lock); amdgpu_device_flush_hdp(adev, NULL);
spin_lock(&kiq->ring_lock);
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
adev->gfx.num_compute_rings + adev->gfx.num_compute_rings +
kiq->pmf->set_resources_size); kiq->pmf->set_resources_size);
if (r) { if (r) {
DRM_ERROR("Failed to lock KIQ (%d).\n", r); DRM_ERROR("Failed to lock KIQ (%d).\n", r);
spin_unlock(&adev->gfx.kiq.ring_lock); spin_unlock(&kiq->ring_lock);
return r; return r;
} }
@ -546,11 +609,51 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
queue_mask = ~0ULL; queue_mask = ~0ULL;
kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
for (i = 0; i < adev->gfx.num_compute_rings; i++) for (i = 0; i < adev->gfx.num_compute_rings; i++) {
kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]); j = i + xcc_id * adev->gfx.num_compute_rings;
kiq->pmf->kiq_map_queues(kiq_ring,
&adev->gfx.compute_ring[j]);
}
r = amdgpu_ring_test_helper(kiq_ring); r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&adev->gfx.kiq.ring_lock); spin_unlock(&kiq->ring_lock);
if (r)
DRM_ERROR("KCQ enable failed\n");
return r;
}
int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *kiq_ring = &kiq->ring;
int r, i, j;
if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
return -EINVAL;
amdgpu_device_flush_hdp(adev, NULL);
spin_lock(&kiq->ring_lock);
/* No need to map kcq on the slave */
if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
adev->gfx.num_gfx_rings);
if (r) {
DRM_ERROR("Failed to lock KIQ (%d).\n", r);
spin_unlock(&kiq->ring_lock);
return r;
}
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
j = i + xcc_id * adev->gfx.num_gfx_rings;
kiq->pmf->kiq_map_queues(kiq_ring,
&adev->gfx.gfx_ring[j]);
}
}
r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
if (r) if (r)
DRM_ERROR("KCQ enable failed\n"); DRM_ERROR("KCQ enable failed\n");
@ -785,12 +888,31 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
return 0; return 0;
} }
void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
void *ras_error_status,
void (*func)(struct amdgpu_device *adev, void *ras_error_status,
int xcc_id))
{
int i;
int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
if (err_data) {
err_data->ue_count = 0;
err_data->ce_count = 0;
}
for_each_inst(i, xcc_mask)
func(adev, ras_error_status, i);
}
uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
{ {
signed long r, cnt = 0; signed long r, cnt = 0;
unsigned long flags; unsigned long flags;
uint32_t seq, reg_val_offs = 0, value = 0; uint32_t seq, reg_val_offs = 0, value = 0;
struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *ring = &kiq->ring; struct amdgpu_ring *ring = &kiq->ring;
if (amdgpu_device_skip_hw_access(adev)) if (amdgpu_device_skip_hw_access(adev))
@ -858,7 +980,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
signed long r, cnt = 0; signed long r, cnt = 0;
unsigned long flags; unsigned long flags;
uint32_t seq; uint32_t seq;
struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *ring = &kiq->ring; struct amdgpu_ring *ring = &kiq->ring;
BUG_ON(!ring->funcs->emit_wreg); BUG_ON(!ring->funcs->emit_wreg);
@ -1062,3 +1184,125 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE); adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
} }
} }
bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
{
return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
adev->gfx.num_xcc_per_xcp : 1));
}
static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
struct device_attribute *addr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
int mode;
mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
AMDGPU_XCP_FL_NONE);
return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
}
static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
struct device_attribute *addr,
const char *buf, size_t count)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
enum amdgpu_gfx_partition mode;
int ret = 0, num_xcc;
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
if (num_xcc % 2 != 0)
return -EINVAL;
if (!strncasecmp("SPX", buf, strlen("SPX"))) {
mode = AMDGPU_SPX_PARTITION_MODE;
} else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
/*
* DPX mode needs AIDs to be in multiple of 2.
* Each AID connects 2 XCCs.
*/
if (num_xcc%4)
return -EINVAL;
mode = AMDGPU_DPX_PARTITION_MODE;
} else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
if (num_xcc != 6)
return -EINVAL;
mode = AMDGPU_TPX_PARTITION_MODE;
} else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
if (num_xcc != 8)
return -EINVAL;
mode = AMDGPU_QPX_PARTITION_MODE;
} else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
mode = AMDGPU_CPX_PARTITION_MODE;
} else {
return -EINVAL;
}
ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
if (ret)
return ret;
return count;
}
static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
struct device_attribute *addr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
char *supported_partition;
/* TBD */
switch (NUM_XCC(adev->gfx.xcc_mask)) {
case 8:
supported_partition = "SPX, DPX, QPX, CPX";
break;
case 6:
supported_partition = "SPX, TPX, CPX";
break;
case 4:
supported_partition = "SPX, DPX, CPX";
break;
/* this seems only existing in emulation phase */
case 2:
supported_partition = "SPX, CPX";
break;
default:
supported_partition = "Not supported";
break;
}
return sysfs_emit(buf, "%s\n", supported_partition);
}
static DEVICE_ATTR(current_compute_partition, S_IRUGO | S_IWUSR,
amdgpu_gfx_get_current_compute_partition,
amdgpu_gfx_set_compute_partition);
static DEVICE_ATTR(available_compute_partition, S_IRUGO,
amdgpu_gfx_get_available_compute_partition, NULL);
int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
{
int r;
r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
if (r)
return r;
r = device_create_file(adev->dev, &dev_attr_available_compute_partition);
return r;
}
void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
{
device_remove_file(adev->dev, &dev_attr_current_compute_partition);
device_remove_file(adev->dev, &dev_attr_available_compute_partition);
}

View file

@ -61,7 +61,42 @@ enum amdgpu_gfx_partition {
AMDGPU_TPX_PARTITION_MODE = 2, AMDGPU_TPX_PARTITION_MODE = 2,
AMDGPU_QPX_PARTITION_MODE = 3, AMDGPU_QPX_PARTITION_MODE = 3,
AMDGPU_CPX_PARTITION_MODE = 4, AMDGPU_CPX_PARTITION_MODE = 4,
AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE = -1,
/* Automatically choose the right mode */
AMDGPU_AUTO_COMPUTE_PARTITION_MODE = -2,
};
#define NUM_XCC(x) hweight16(x)
enum amdgpu_pkg_type {
AMDGPU_PKG_TYPE_APU = 2,
AMDGPU_PKG_TYPE_UNKNOWN,
};
enum amdgpu_gfx_ras_mem_id_type {
AMDGPU_GFX_CP_MEM = 0,
AMDGPU_GFX_GCEA_MEM,
AMDGPU_GFX_GC_CANE_MEM,
AMDGPU_GFX_GCUTCL2_MEM,
AMDGPU_GFX_GDS_MEM,
AMDGPU_GFX_LDS_MEM,
AMDGPU_GFX_RLC_MEM,
AMDGPU_GFX_SP_MEM,
AMDGPU_GFX_SPI_MEM,
AMDGPU_GFX_SQC_MEM,
AMDGPU_GFX_SQ_MEM,
AMDGPU_GFX_TA_MEM,
AMDGPU_GFX_TCC_MEM,
AMDGPU_GFX_TCA_MEM,
AMDGPU_GFX_TCI_MEM,
AMDGPU_GFX_TCP_MEM,
AMDGPU_GFX_TD_MEM,
AMDGPU_GFX_TCX_MEM,
AMDGPU_GFX_ATC_L2_MEM,
AMDGPU_GFX_UTCL2_MEM,
AMDGPU_GFX_VML2_MEM,
AMDGPU_GFX_VML2_WALKER_MEM,
AMDGPU_GFX_MEM_TYPE_NUM
}; };
struct amdgpu_mec { struct amdgpu_mec {
@ -75,8 +110,10 @@ struct amdgpu_mec {
u32 num_mec; u32 num_mec;
u32 num_pipe_per_mec; u32 num_pipe_per_mec;
u32 num_queue_per_pipe; u32 num_queue_per_pipe;
void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES];
};
struct amdgpu_mec_bitmap {
/* These are the resources for which amdgpu takes ownership */ /* These are the resources for which amdgpu takes ownership */
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
}; };
@ -120,6 +157,7 @@ struct amdgpu_kiq {
struct amdgpu_ring ring; struct amdgpu_ring ring;
struct amdgpu_irq_src irq; struct amdgpu_irq_src irq;
const struct kiq_pm4_funcs *pmf; const struct kiq_pm4_funcs *pmf;
void *mqd_backup;
}; };
/* /*
@ -230,23 +268,37 @@ struct amdgpu_gfx_ras {
struct amdgpu_iv_entry *entry); struct amdgpu_iv_entry *entry);
}; };
struct amdgpu_gfx_shadow_info {
u32 shadow_size;
u32 shadow_alignment;
u32 csa_size;
u32 csa_alignment;
};
struct amdgpu_gfx_funcs { struct amdgpu_gfx_funcs {
/* get the gpu clock counter */ /* get the gpu clock counter */
uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev); uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num,
u32 sh_num, u32 instance); u32 sh_num, u32 instance, int xcc_id);
void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd, void (*read_wave_data)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t *dst, int *no_fields); uint32_t wave, uint32_t *dst, int *no_fields);
void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t thread, uint32_t start, uint32_t wave, uint32_t thread, uint32_t start,
uint32_t size, uint32_t *dst); uint32_t size, uint32_t *dst);
void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start, uint32_t size, uint32_t wave, uint32_t start, uint32_t size,
uint32_t *dst); uint32_t *dst);
void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
u32 queue, u32 vmid); u32 queue, u32 vmid, u32 xcc_id);
void (*init_spm_golden)(struct amdgpu_device *adev); void (*init_spm_golden)(struct amdgpu_device *adev);
void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable); void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
int (*get_gfx_shadow_info)(struct amdgpu_device *adev,
struct amdgpu_gfx_shadow_info *shadow_info);
enum amdgpu_gfx_partition
(*query_partition_mode)(struct amdgpu_device *adev);
int (*switch_partition_mode)(struct amdgpu_device *adev,
int num_xccs_per_xcp);
int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node);
}; };
struct sq_work { struct sq_work {
@ -296,7 +348,8 @@ struct amdgpu_gfx {
struct amdgpu_ce ce; struct amdgpu_ce ce;
struct amdgpu_me me; struct amdgpu_me me;
struct amdgpu_mec mec; struct amdgpu_mec mec;
struct amdgpu_kiq kiq; struct amdgpu_mec_bitmap mec_bitmap[AMDGPU_MAX_GC_INSTANCES];
struct amdgpu_kiq kiq[AMDGPU_MAX_GC_INSTANCES];
struct amdgpu_imu imu; struct amdgpu_imu imu;
bool rs64_enable; /* firmware format */ bool rs64_enable; /* firmware format */
const struct firmware *me_fw; /* ME firmware */ const struct firmware *me_fw; /* ME firmware */
@ -376,15 +429,31 @@ struct amdgpu_gfx {
struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS]; struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
struct amdgpu_ring_mux muxer; struct amdgpu_ring_mux muxer;
enum amdgpu_gfx_partition partition_mode; bool cp_gfx_shadow; /* for gfx11 */
uint32_t num_xcd;
uint16_t xcc_mask;
uint32_t num_xcc_per_xcp; uint32_t num_xcc_per_xcp;
struct mutex partition_mutex;
}; };
struct amdgpu_gfx_ras_reg_entry {
struct amdgpu_ras_err_status_reg_entry reg_entry;
enum amdgpu_gfx_ras_mem_id_type mem_id_type;
uint32_t se_num;
};
struct amdgpu_gfx_ras_mem_id_entry {
const struct amdgpu_ras_memory_id_entry *mem_id_ent;
uint32_t size;
};
#define AMDGPU_GFX_MEMID_ENT(x) {(x), ARRAY_SIZE(x)},
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance)) #define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id)))
#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid)) #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id)))
#define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev)) #define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev))
#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si)))
/** /**
* amdgpu_gfx_create_bitmask - create a bitmask * amdgpu_gfx_create_bitmask - create a bitmask
@ -404,19 +473,21 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring, struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq); struct amdgpu_irq_src *irq, int xcc_id);
void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring); void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring);
void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev); void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id);
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
unsigned hpd_size); unsigned hpd_size, int xcc_id);
int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
unsigned mqd_size); unsigned mqd_size, int xcc_id);
void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev); void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id);
int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev); int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id);
int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev); int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id);
int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id);
int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id);
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev); void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev); void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev);
@ -425,8 +496,8 @@ int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
int pipe, int queue); int pipe, int queue);
void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
int *mec, int *pipe, int *queue); int *mec, int *pipe, int *queue);
bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec, bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int xcc_id,
int pipe, int queue); int mec, int pipe, int queue);
bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring); struct amdgpu_ring *ring);
bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
@ -458,4 +529,33 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id)
int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev); int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry); struct amdgpu_iv_entry *entry);
bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id);
int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev);
void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev);
void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
void *ras_error_status,
void (*func)(struct amdgpu_device *adev, void *ras_error_status,
int xcc_id));
static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
{
switch (mode) {
case AMDGPU_SPX_PARTITION_MODE:
return "SPX";
case AMDGPU_DPX_PARTITION_MODE:
return "DPX";
case AMDGPU_TPX_PARTITION_MODE:
return "TPX";
case AMDGPU_QPX_PARTITION_MODE:
return "QPX";
case AMDGPU_CPX_PARTITION_MODE:
return "CPX";
default:
return "UNKNOWN";
}
return "UNKNOWN";
}
#endif #endif

View file

@ -534,22 +534,21 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
* subject to change when ring number changes * subject to change when ring number changes
* Engine 17: Gart flushes * Engine 17: Gart flushes
*/ */
#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3 #define AMDGPU_VMHUB_INV_ENG_BITMAP 0x1FFF3
#define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
{ {
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0};
{GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
GFXHUB_FREE_VM_INV_ENGS_BITMAP};
unsigned i; unsigned i;
unsigned vmhub, inv_eng; unsigned vmhub, inv_eng;
if (adev->enable_mes) { /* init the vm inv eng for all vmhubs */
for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
vm_inv_engs[i] = AMDGPU_VMHUB_INV_ENG_BITMAP;
/* reserve engine 5 for firmware */ /* reserve engine 5 for firmware */
for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++) if (adev->enable_mes)
vm_inv_engs[vmhub] &= ~(1 << 5); vm_inv_engs[i] &= ~(1 << 5);
} }
for (i = 0; i < adev->num_rings; ++i) { for (i = 0; i < adev->num_rings; ++i) {
@ -593,6 +592,8 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
case IP_VERSION(9, 3, 0): case IP_VERSION(9, 3, 0):
/* GC 10.3.7 */ /* GC 10.3.7 */
case IP_VERSION(10, 3, 7): case IP_VERSION(10, 3, 7):
/* GC 11.0.1 */
case IP_VERSION(11, 0, 1):
if (amdgpu_tmz == 0) { if (amdgpu_tmz == 0) {
adev->gmc.tmz_enabled = false; adev->gmc.tmz_enabled = false;
dev_info(adev->dev, dev_info(adev->dev,
@ -616,7 +617,6 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 1):
/* YELLOW_CARP*/ /* YELLOW_CARP*/
case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 3):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4): case IP_VERSION(11, 0, 4):
/* Don't enable it by default yet. /* Don't enable it by default yet.
*/ */
@ -670,7 +670,7 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
for (i = 0; i < 16; i++) { for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + hub->ctx_distance * i; reg = hub->vm_context0_cntl + hub->ctx_distance * i;
tmp = (hub_type == AMDGPU_GFXHUB_0) ? tmp = (hub_type == AMDGPU_GFXHUB(0)) ?
RREG32_SOC15_IP(GC, reg) : RREG32_SOC15_IP(GC, reg) :
RREG32_SOC15_IP(MMHUB, reg); RREG32_SOC15_IP(MMHUB, reg);
@ -679,7 +679,7 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
else else
tmp &= ~hub->vm_cntx_cntl_vm_fault; tmp &= ~hub->vm_cntx_cntl_vm_fault;
(hub_type == AMDGPU_GFXHUB_0) ? (hub_type == AMDGPU_GFXHUB(0)) ?
WREG32_SOC15_IP(GC, reg, tmp) : WREG32_SOC15_IP(GC, reg, tmp) :
WREG32_SOC15_IP(MMHUB, reg, tmp); WREG32_SOC15_IP(MMHUB, reg, tmp);
} }
@ -892,3 +892,47 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
return 0; return 0;
} }
static ssize_t current_memory_partition_show(
struct device *dev, struct device_attribute *addr, char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
enum amdgpu_memory_partition mode;
mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
switch (mode) {
case AMDGPU_NPS1_PARTITION_MODE:
return sysfs_emit(buf, "NPS1\n");
case AMDGPU_NPS2_PARTITION_MODE:
return sysfs_emit(buf, "NPS2\n");
case AMDGPU_NPS3_PARTITION_MODE:
return sysfs_emit(buf, "NPS3\n");
case AMDGPU_NPS4_PARTITION_MODE:
return sysfs_emit(buf, "NPS4\n");
case AMDGPU_NPS6_PARTITION_MODE:
return sysfs_emit(buf, "NPS6\n");
case AMDGPU_NPS8_PARTITION_MODE:
return sysfs_emit(buf, "NPS8\n");
default:
return sysfs_emit(buf, "UNKNOWN\n");
}
return sysfs_emit(buf, "UNKNOWN\n");
}
static DEVICE_ATTR_RO(current_memory_partition);
int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev)
{
if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
return 0;
return device_create_file(adev->dev,
&dev_attr_current_memory_partition);
}
void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev)
{
device_remove_file(adev->dev, &dev_attr_current_memory_partition);
}

View file

@ -63,6 +63,16 @@
struct firmware; struct firmware;
enum amdgpu_memory_partition {
UNKNOWN_MEMORY_PARTITION_MODE = 0,
AMDGPU_NPS1_PARTITION_MODE = 1,
AMDGPU_NPS2_PARTITION_MODE = 2,
AMDGPU_NPS3_PARTITION_MODE = 3,
AMDGPU_NPS4_PARTITION_MODE = 4,
AMDGPU_NPS6_PARTITION_MODE = 6,
AMDGPU_NPS8_PARTITION_MODE = 8,
};
/* /*
* GMC page fault information * GMC page fault information
*/ */
@ -119,7 +129,8 @@ struct amdgpu_gmc_funcs {
uint32_t vmhub, uint32_t flush_type); uint32_t vmhub, uint32_t flush_type);
/* flush the vm tlb via pasid */ /* flush the vm tlb via pasid */
int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
uint32_t flush_type, bool all_hub); uint32_t flush_type, bool all_hub,
uint32_t inst);
/* flush the vm tlb via ring */ /* flush the vm tlb via ring */
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
uint64_t pd_addr); uint64_t pd_addr);
@ -137,8 +148,15 @@ struct amdgpu_gmc_funcs {
void (*get_vm_pte)(struct amdgpu_device *adev, void (*get_vm_pte)(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping, struct amdgpu_bo_va_mapping *mapping,
uint64_t *flags); uint64_t *flags);
/* override per-page pte flags */
void (*override_vm_pte_flags)(struct amdgpu_device *dev,
struct amdgpu_vm *vm,
uint64_t addr, uint64_t *flags);
/* get the amount of memory used by the vbios for pre-OS console */ /* get the amount of memory used by the vbios for pre-OS console */
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
enum amdgpu_memory_partition (*query_mem_partition_mode)(
struct amdgpu_device *adev);
}; };
struct amdgpu_xgmi_ras { struct amdgpu_xgmi_ras {
@ -164,6 +182,21 @@ struct amdgpu_xgmi {
struct amdgpu_xgmi_ras *ras; struct amdgpu_xgmi_ras *ras;
}; };
struct amdgpu_mem_partition_info {
union {
struct {
uint32_t fpfn;
uint32_t lpfn;
} range;
struct {
int node;
} numa;
};
uint64_t size;
};
#define INVALID_PFN -1
struct amdgpu_gmc { struct amdgpu_gmc {
/* FB's physical address in MMIO space (for CPU to /* FB's physical address in MMIO space (for CPU to
* map FB). This is different compared to the agp/ * map FB). This is different compared to the agp/
@ -250,7 +283,10 @@ struct amdgpu_gmc {
uint64_t last_fault:AMDGPU_GMC_FAULT_RING_ORDER; uint64_t last_fault:AMDGPU_GMC_FAULT_RING_ORDER;
bool tmz_enabled; bool tmz_enabled;
bool is_app_apu;
struct amdgpu_mem_partition_info *mem_partitions;
uint8_t num_mem_partitions;
const struct amdgpu_gmc_funcs *gmc_funcs; const struct amdgpu_gmc_funcs *gmc_funcs;
struct amdgpu_xgmi xgmi; struct amdgpu_xgmi xgmi;
@ -296,14 +332,17 @@ struct amdgpu_gmc {
}; };
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ #define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
((adev), (pasid), (type), (allhub))) ((adev), (pasid), (type), (allhub), (inst)))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags)) #define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
#define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags) \
(adev)->gmc.gmc_funcs->override_vm_pte_flags \
((adev), (vm), (addr), (pte_flags))
#define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
/** /**
@ -373,4 +412,7 @@ uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr);
uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
int amdgpu_gmc_vram_checking(struct amdgpu_device *adev); int amdgpu_gmc_vram_checking(struct amdgpu_device *adev);
int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev);
void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev);
#endif #endif

View file

@ -136,7 +136,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
uint64_t fence_ctx; uint64_t fence_ctx;
uint32_t status = 0, alloc_size; uint32_t status = 0, alloc_size;
unsigned fence_flags = 0; unsigned fence_flags = 0;
bool secure; bool secure, init_shadow;
u64 shadow_va, csa_va, gds_va;
int vmid = AMDGPU_JOB_GET_VMID(job);
unsigned i; unsigned i;
int r = 0; int r = 0;
@ -150,9 +152,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
vm = job->vm; vm = job->vm;
fence_ctx = job->base.s_fence ? fence_ctx = job->base.s_fence ?
job->base.s_fence->scheduled.context : 0; job->base.s_fence->scheduled.context : 0;
shadow_va = job->shadow_va;
csa_va = job->csa_va;
gds_va = job->gds_va;
init_shadow = job->init_shadow;
} else { } else {
vm = NULL; vm = NULL;
fence_ctx = 0; fence_ctx = 0;
shadow_va = 0;
csa_va = 0;
gds_va = 0;
init_shadow = false;
} }
if (!ring->sched.ready && !ring->is_mes_queue) { if (!ring->sched.ready && !ring->is_mes_queue) {
@ -212,7 +222,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
} }
amdgpu_ring_ib_begin(ring); amdgpu_ring_ib_begin(ring);
if (job && ring->funcs->init_cond_exec)
if (ring->funcs->emit_gfx_shadow)
amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va,
init_shadow, vmid);
if (ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring); patch_offset = amdgpu_ring_init_cond_exec(ring);
amdgpu_device_flush_hdp(adev, ring); amdgpu_device_flush_hdp(adev, ring);
@ -263,6 +278,18 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
fence_flags | AMDGPU_FENCE_FLAG_64BIT); fence_flags | AMDGPU_FENCE_FLAG_64BIT);
} }
if (ring->funcs->emit_gfx_shadow) {
amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0);
if (ring->funcs->init_cond_exec) {
unsigned ce_offset = ~0;
ce_offset = amdgpu_ring_init_cond_exec(ring);
if (ce_offset != ~0 && ring->funcs->patch_cond_exec)
amdgpu_ring_patch_cond_exec(ring, ce_offset);
}
}
r = amdgpu_fence_emit(ring, f, job, fence_flags); r = amdgpu_fence_emit(ring, f, job, fence_flags);
if (r) { if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r); dev_err(adev->dev, "failed to emit fence (%d)\n", r);
@ -436,7 +463,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
static int amdgpu_debugfs_sa_info_show(struct seq_file *m, void *unused) static int amdgpu_debugfs_sa_info_show(struct seq_file *m, void *unused)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)m->private; struct amdgpu_device *adev = m->private;
seq_printf(m, "--------------------- DELAYED --------------------- \n"); seq_printf(m, "--------------------- DELAYED --------------------- \n");
amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED], amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED],

View file

@ -409,7 +409,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
if (r || !idle) if (r || !idle)
goto error; goto error;
if (vm->reserved_vmid[vmhub]) { if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) {
r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
if (r || !id) if (r || !id)
goto error; goto error;
@ -460,14 +460,11 @@ error:
} }
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
unsigned vmhub) unsigned vmhub)
{ {
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock); mutex_lock(&id_mgr->lock);
if (vm->reserved_vmid[vmhub])
goto unlock;
++id_mgr->reserved_use_count; ++id_mgr->reserved_use_count;
if (!id_mgr->reserved) { if (!id_mgr->reserved) {
@ -479,27 +476,23 @@ int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
list_del_init(&id->list); list_del_init(&id->list);
id_mgr->reserved = id; id_mgr->reserved = id;
} }
vm->reserved_vmid[vmhub] = true;
unlock:
mutex_unlock(&id_mgr->lock); mutex_unlock(&id_mgr->lock);
return 0; return 0;
} }
void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
unsigned vmhub) unsigned vmhub)
{ {
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock); mutex_lock(&id_mgr->lock);
if (vm->reserved_vmid[vmhub] && if (!--id_mgr->reserved_use_count) {
!--id_mgr->reserved_use_count) {
/* give the reserved ID back to normal round robin */ /* give the reserved ID back to normal round robin */
list_add(&id_mgr->reserved->list, &id_mgr->ids_lru); list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
id_mgr->reserved = NULL; id_mgr->reserved = NULL;
} }
vm->reserved_vmid[vmhub] = false;
mutex_unlock(&id_mgr->lock); mutex_unlock(&id_mgr->lock);
} }
@ -578,6 +571,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
} }
} }
/* alloc a default reserved vmid to enforce isolation */
if (enforce_isolation)
amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
} }
/** /**

View file

@ -79,10 +79,8 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vmid *id); struct amdgpu_vmid *id);
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
unsigned vmhub); unsigned vmhub);
void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
unsigned vmhub); unsigned vmhub);
int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_job *job, struct dma_fence **fence); struct amdgpu_job *job, struct dma_fence **fence);

View file

@ -270,7 +270,7 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32); entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
entry->timestamp_src = dw[2] >> 31; entry->timestamp_src = dw[2] >> 31;
entry->pasid = dw[3] & 0xffff; entry->pasid = dw[3] & 0xffff;
entry->pasid_src = dw[3] >> 31; entry->node_id = (dw[3] >> 16) & 0xff;
entry->src_data[0] = dw[4]; entry->src_data[0] = dw[4];
entry->src_data[1] = dw[5]; entry->src_data[1] = dw[5];
entry->src_data[2] = dw[6]; entry->src_data[2] = dw[6];

View file

@ -99,6 +99,21 @@ const char *soc15_ih_clientid_name[] = {
"MP1" "MP1"
}; };
const int node_id_to_phys_map[NODEID_MAX] = {
[AID0_NODEID] = 0,
[XCD0_NODEID] = 0,
[XCD1_NODEID] = 1,
[AID1_NODEID] = 1,
[XCD2_NODEID] = 2,
[XCD3_NODEID] = 3,
[AID2_NODEID] = 2,
[XCD4_NODEID] = 4,
[XCD5_NODEID] = 5,
[AID3_NODEID] = 3,
[XCD6_NODEID] = 6,
[XCD7_NODEID] = 7,
};
/** /**
* amdgpu_irq_disable_all - disable *all* interrupts * amdgpu_irq_disable_all - disable *all* interrupts
* *
@ -109,7 +124,7 @@ const char *soc15_ih_clientid_name[] = {
void amdgpu_irq_disable_all(struct amdgpu_device *adev) void amdgpu_irq_disable_all(struct amdgpu_device *adev)
{ {
unsigned long irqflags; unsigned long irqflags;
unsigned i, j, k; unsigned int i, j, k;
int r; int r;
spin_lock_irqsave(&adev->irq.lock, irqflags); spin_lock_irqsave(&adev->irq.lock, irqflags);
@ -124,7 +139,6 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
continue; continue;
for (k = 0; k < src->num_types; ++k) { for (k = 0; k < src->num_types; ++k) {
atomic_set(&src->enabled_types[k], 0);
r = src->funcs->set(adev, src, k, r = src->funcs->set(adev, src, k,
AMDGPU_IRQ_STATE_DISABLE); AMDGPU_IRQ_STATE_DISABLE);
if (r) if (r)
@ -268,11 +282,11 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
int nvec = pci_msix_vec_count(adev->pdev); int nvec = pci_msix_vec_count(adev->pdev);
unsigned int flags; unsigned int flags;
if (nvec <= 0) { if (nvec <= 0)
flags = PCI_IRQ_MSI; flags = PCI_IRQ_MSI;
} else { else
flags = PCI_IRQ_MSI | PCI_IRQ_MSIX; flags = PCI_IRQ_MSI | PCI_IRQ_MSIX;
}
/* we only need one vector */ /* we only need one vector */
nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags); nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags);
if (nvec > 0) { if (nvec > 0) {
@ -331,7 +345,7 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
*/ */
void amdgpu_irq_fini_sw(struct amdgpu_device *adev) void amdgpu_irq_fini_sw(struct amdgpu_device *adev)
{ {
unsigned i, j; unsigned int i, j;
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources) if (!adev->irq.client[i].sources)
@ -365,7 +379,7 @@ void amdgpu_irq_fini_sw(struct amdgpu_device *adev)
* 0 on success or error code otherwise * 0 on success or error code otherwise
*/ */
int amdgpu_irq_add_id(struct amdgpu_device *adev, int amdgpu_irq_add_id(struct amdgpu_device *adev,
unsigned client_id, unsigned src_id, unsigned int client_id, unsigned int src_id,
struct amdgpu_irq_src *source) struct amdgpu_irq_src *source)
{ {
if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) if (client_id >= AMDGPU_IRQ_CLIENTID_MAX)
@ -417,7 +431,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
{ {
u32 ring_index = ih->rptr >> 2; u32 ring_index = ih->rptr >> 2;
struct amdgpu_iv_entry entry; struct amdgpu_iv_entry entry;
unsigned client_id, src_id; unsigned int client_id, src_id;
struct amdgpu_irq_src *src; struct amdgpu_irq_src *src;
bool handled = false; bool handled = false;
int r; int r;
@ -492,7 +506,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev,
* Updates interrupt state for the specific source (all ASICs). * Updates interrupt state for the specific source (all ASICs).
*/ */
int amdgpu_irq_update(struct amdgpu_device *adev, int amdgpu_irq_update(struct amdgpu_device *adev,
struct amdgpu_irq_src *src, unsigned type) struct amdgpu_irq_src *src, unsigned int type)
{ {
unsigned long irqflags; unsigned long irqflags;
enum amdgpu_interrupt_state state; enum amdgpu_interrupt_state state;
@ -501,7 +515,8 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
spin_lock_irqsave(&adev->irq.lock, irqflags); spin_lock_irqsave(&adev->irq.lock, irqflags);
/* We need to determine after taking the lock, otherwise /* We need to determine after taking the lock, otherwise
we might disable just enabled interrupts again */ * we might disable just enabled interrupts again
*/
if (amdgpu_irq_enabled(adev, src, type)) if (amdgpu_irq_enabled(adev, src, type))
state = AMDGPU_IRQ_STATE_ENABLE; state = AMDGPU_IRQ_STATE_ENABLE;
else else
@ -555,7 +570,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
* 0 on success or error code otherwise * 0 on success or error code otherwise
*/ */
int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
unsigned type) unsigned int type)
{ {
if (!adev->irq.installed) if (!adev->irq.installed)
return -ENOENT; return -ENOENT;
@ -585,7 +600,7 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
* 0 on success or error code otherwise * 0 on success or error code otherwise
*/ */
int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
unsigned type) unsigned int type)
{ {
if (!adev->irq.installed) if (!adev->irq.installed)
return -ENOENT; return -ENOENT;
@ -619,7 +634,7 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
* invalid parameters * invalid parameters
*/ */
bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src, bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
unsigned type) unsigned int type)
{ {
if (!adev->irq.installed) if (!adev->irq.installed)
return false; return false;
@ -732,7 +747,7 @@ void amdgpu_irq_remove_domain(struct amdgpu_device *adev)
* Returns: * Returns:
* Linux IRQ * Linux IRQ
*/ */
unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id) unsigned int amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned int src_id)
{ {
adev->irq.virq[src_id] = irq_create_mapping(adev->irq.domain, src_id); adev->irq.virq[src_id] = irq_create_mapping(adev->irq.domain, src_id);

View file

@ -53,7 +53,7 @@ struct amdgpu_iv_entry {
uint64_t timestamp; uint64_t timestamp;
unsigned timestamp_src; unsigned timestamp_src;
unsigned pasid; unsigned pasid;
unsigned pasid_src; unsigned node_id;
unsigned src_data[AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW]; unsigned src_data[AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW];
const uint32_t *iv_entry; const uint32_t *iv_entry;
}; };
@ -102,6 +102,24 @@ struct amdgpu_irq {
bool retry_cam_enabled; bool retry_cam_enabled;
}; };
enum interrupt_node_id_per_aid {
AID0_NODEID = 0,
XCD0_NODEID = 1,
XCD1_NODEID = 2,
AID1_NODEID = 4,
XCD2_NODEID = 5,
XCD3_NODEID = 6,
AID2_NODEID = 8,
XCD4_NODEID = 9,
XCD5_NODEID = 10,
AID3_NODEID = 12,
XCD6_NODEID = 13,
XCD7_NODEID = 14,
NODEID_MAX,
};
extern const int node_id_to_phys_map[NODEID_MAX];
void amdgpu_irq_disable_all(struct amdgpu_device *adev); void amdgpu_irq_disable_all(struct amdgpu_device *adev);
int amdgpu_irq_init(struct amdgpu_device *adev); int amdgpu_irq_init(struct amdgpu_device *adev);

View file

@ -65,6 +65,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
ti.process_name, ti.tgid, ti.task_name, ti.pid); ti.process_name, ti.tgid, ti.task_name, ti.pid);
dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
if (amdgpu_device_should_recover_gpu(ring->adev)) { if (amdgpu_device_should_recover_gpu(ring->adev)) {
struct amdgpu_reset_context reset_context; struct amdgpu_reset_context reset_context;
memset(&reset_context, 0, sizeof(reset_context)); memset(&reset_context, 0, sizeof(reset_context));

View file

@ -67,6 +67,12 @@ struct amdgpu_job {
uint64_t uf_addr; uint64_t uf_addr;
uint64_t uf_sequence; uint64_t uf_sequence;
/* virtual addresses for shadow/GDS/CSA */
uint64_t shadow_va;
uint64_t csa_va;
uint64_t gds_va;
bool init_shadow;
/* job_run_counter >= 1 means a resubmit job */ /* job_run_counter >= 1 means a resubmit job */
uint32_t job_run_counter; uint32_t job_run_counter;

View file

@ -45,13 +45,14 @@ int amdgpu_jpeg_sw_init(struct amdgpu_device *adev)
int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev) int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev)
{ {
int i; int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i)) if (adev->jpeg.harvest_config & (1 << i))
continue; continue;
amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec); for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec[j]);
} }
mutex_destroy(&adev->jpeg.jpeg_pg_lock); mutex_destroy(&adev->jpeg.jpeg_pg_lock);
@ -76,13 +77,14 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
struct amdgpu_device *adev = struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, jpeg.idle_work.work); container_of(work, struct amdgpu_device, jpeg.idle_work.work);
unsigned int fences = 0; unsigned int fences = 0;
unsigned int i; unsigned int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i)) if (adev->jpeg.harvest_config & (1 << i))
continue; continue;
fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec); for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec[j]);
} }
if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt)) if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt))
@ -122,18 +124,21 @@ int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring)
if (amdgpu_sriov_vf(adev)) if (amdgpu_sriov_vf(adev))
return 0; return 0;
WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3); r = amdgpu_ring_alloc(ring, 3);
if (r) if (r)
return r; return r;
amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch, 0)); WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe], 0xCAFEDEAD);
amdgpu_ring_write(ring, 0xDEADBEEF); /* Add a read register to make sure the write register is executed. */
RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0));
amdgpu_ring_write(ring, 0xABADCAFE);
amdgpu_ring_commit(ring); amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) { for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch); tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
if (tmp == 0xDEADBEEF) if (tmp == 0xABADCAFE)
break; break;
udelay(1); udelay(1);
} }
@ -161,8 +166,7 @@ static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle,
ib = &job->ibs[0]; ib = &job->ibs[0];
ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0, 0, PACKETJ_TYPE0);
PACKETJ_TYPE0);
ib->ptr[1] = 0xDEADBEEF; ib->ptr[1] = 0xDEADBEEF;
for (i = 2; i < 16; i += 2) { for (i = 2; i < 16; i += 2) {
ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
@ -208,7 +212,7 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
} }
if (!amdgpu_sriov_vf(adev)) { if (!amdgpu_sriov_vf(adev)) {
for (i = 0; i < adev->usec_timeout; i++) { for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch); tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
if (tmp == 0xDEADBEEF) if (tmp == 0xDEADBEEF)
break; break;
udelay(1); udelay(1);
@ -241,6 +245,31 @@ int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
return 0; return 0;
} }
int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{
int r, i;
r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
return r;
if (amdgpu_ras_is_supported(adev, ras_block->block)) {
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
r = amdgpu_irq_get(adev, &adev->jpeg.inst[i].ras_poison_irq, 0);
if (r)
goto late_fini;
}
}
return 0;
late_fini:
amdgpu_ras_block_late_fini(adev, ras_block);
return r;
}
int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev) int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev)
{ {
int err; int err;
@ -262,7 +291,7 @@ int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev)
adev->jpeg.ras_if = &ras->ras_block.ras_comm; adev->jpeg.ras_if = &ras->ras_block.ras_comm;
if (!ras->ras_block.ras_late_init) if (!ras->ras_block.ras_late_init)
ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; ras->ras_block.ras_late_init = amdgpu_jpeg_ras_late_init;
return 0; return 0;
} }

View file

@ -26,19 +26,22 @@
#include "amdgpu_ras.h" #include "amdgpu_ras.h"
#define AMDGPU_MAX_JPEG_INSTANCES 2 #define AMDGPU_MAX_JPEG_INSTANCES 4
#define AMDGPU_MAX_JPEG_RINGS 8
#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) #define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0)
#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1) #define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1)
struct amdgpu_jpeg_reg{ struct amdgpu_jpeg_reg{
unsigned jpeg_pitch; unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS];
}; };
struct amdgpu_jpeg_inst { struct amdgpu_jpeg_inst {
struct amdgpu_ring ring_dec; struct amdgpu_ring ring_dec[AMDGPU_MAX_JPEG_RINGS];
struct amdgpu_irq_src irq; struct amdgpu_irq_src irq;
struct amdgpu_irq_src ras_poison_irq;
struct amdgpu_jpeg_reg external; struct amdgpu_jpeg_reg external;
uint8_t aid_id;
}; };
struct amdgpu_jpeg_ras { struct amdgpu_jpeg_ras {
@ -48,6 +51,7 @@ struct amdgpu_jpeg_ras {
struct amdgpu_jpeg { struct amdgpu_jpeg {
uint8_t num_jpeg_inst; uint8_t num_jpeg_inst;
struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES]; struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES];
unsigned num_jpeg_rings;
struct amdgpu_jpeg_reg internal; struct amdgpu_jpeg_reg internal;
unsigned harvest_config; unsigned harvest_config;
struct delayed_work idle_work; struct delayed_work idle_work;
@ -56,6 +60,9 @@ struct amdgpu_jpeg {
atomic_t total_submission_cnt; atomic_t total_submission_cnt;
struct ras_common_if *ras_if; struct ras_common_if *ras_if;
struct amdgpu_jpeg_ras *ras; struct amdgpu_jpeg_ras *ras;
uint16_t inst_mask;
uint8_t num_inst_per_aid;
}; };
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev); int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
@ -72,6 +79,8 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev, int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry); struct amdgpu_iv_entry *entry);
int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev,
struct ras_common_if *ras_block);
int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev); int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev);
#endif /*__AMDGPU_JPEG_H__*/ #endif /*__AMDGPU_JPEG_H__*/

View file

@ -462,7 +462,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->jpeg.harvest_config & (1 << i)) if (adev->jpeg.harvest_config & (1 << i))
continue; continue;
if (adev->jpeg.inst[i].ring_dec.sched.ready) for (j = 0; j < adev->jpeg.num_jpeg_rings; j++)
if (adev->jpeg.inst[i].ring_dec[j].sched.ready)
++num_rings; ++num_rings;
} }
ib_start_alignment = 16; ib_start_alignment = 16;
@ -876,6 +877,19 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
dev_info->gl2c_cache_size = adev->gfx.config.gc_gl2c_per_gpu; dev_info->gl2c_cache_size = adev->gfx.config.gc_gl2c_per_gpu;
dev_info->mall_size = adev->gmc.mall_size; dev_info->mall_size = adev->gmc.mall_size;
if (adev->gfx.funcs->get_gfx_shadow_info) {
struct amdgpu_gfx_shadow_info shadow_info;
ret = amdgpu_gfx_get_gfx_shadow_info(adev, &shadow_info);
if (!ret) {
dev_info->shadow_size = shadow_info.shadow_size;
dev_info->shadow_alignment = shadow_info.shadow_alignment;
dev_info->csa_size = shadow_info.csa_size;
dev_info->csa_alignment = shadow_info.csa_alignment;
}
}
ret = copy_to_user(out, dev_info, ret = copy_to_user(out, dev_info,
min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0; min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
kfree(dev_info); kfree(dev_info);
@ -1140,6 +1154,15 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
kfree(caps); kfree(caps);
return r; return r;
} }
case AMDGPU_INFO_MAX_IBS: {
uint32_t max_ibs[AMDGPU_HW_IP_NUM];
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
max_ibs[i] = amdgpu_ring_max_ibs(i);
return copy_to_user(out, max_ibs,
min((size_t)size, sizeof(max_ibs))) ? -EFAULT : 0;
}
default: default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query); DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL; return -EINVAL;
@ -1210,6 +1233,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
if (r) if (r)
goto error_pasid; goto error_pasid;
r = amdgpu_xcp_open_device(adev, fpriv, file_priv);
if (r)
goto error_vm;
r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid); r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid);
if (r) if (r)
goto error_vm; goto error_vm;
@ -1284,12 +1311,12 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL) if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)
amdgpu_vce_free_handles(adev, file_priv); amdgpu_vce_free_handles(adev, file_priv);
if (amdgpu_mcbp) { if (fpriv->csa_va) {
/* TODO: how to handle reserve failure */ uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
amdgpu_vm_bo_del(adev, fpriv->csa_va); WARN_ON(amdgpu_unmap_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
fpriv->csa_va, csa_addr));
fpriv->csa_va = NULL; fpriv->csa_va = NULL;
amdgpu_bo_unreserve(adev->virt.csa_obj);
} }
pasid = fpriv->vm.pasid; pasid = fpriv->vm.pasid;
@ -1441,7 +1468,7 @@ void amdgpu_disable_vblank_kms(struct drm_crtc *crtc)
static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused) static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)m->private; struct amdgpu_device *adev = m->private;
struct drm_amdgpu_info_firmware fw_info; struct drm_amdgpu_info_firmware fw_info;
struct drm_amdgpu_query_fw query_fw; struct drm_amdgpu_query_fw query_fw;
struct atom_context *ctx = adev->mode_info.atom_context; struct atom_context *ctx = adev->mode_info.atom_context;
@ -1449,7 +1476,7 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
int ret, i; int ret, i;
static const char *ta_fw_name[TA_FW_TYPE_MAX_INDEX] = { static const char *ta_fw_name[TA_FW_TYPE_MAX_INDEX] = {
#define TA_FW_NAME(type) [TA_FW_TYPE_PSP_##type] = #type #define TA_FW_NAME(type)[TA_FW_TYPE_PSP_##type] = #type
TA_FW_NAME(XGMI), TA_FW_NAME(XGMI),
TA_FW_NAME(RAS), TA_FW_NAME(RAS),
TA_FW_NAME(HDCP), TA_FW_NAME(HDCP),

View file

@ -924,6 +924,43 @@ error:
return r; return r;
} }
int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
uint64_t process_context_addr,
uint32_t spi_gdbg_per_vmid_cntl,
const uint32_t *tcp_watch_cntl,
uint32_t flags,
bool trap_en)
{
struct mes_misc_op_input op_input = {0};
int r;
if (!adev->mes.funcs->misc_op) {
DRM_ERROR("mes set shader debugger is not supported!\n");
return -EINVAL;
}
op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
op_input.set_shader_debugger.process_context_addr = process_context_addr;
op_input.set_shader_debugger.flags.u32all = flags;
op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
AMDGPU_MES_API_VERSION_SHIFT) >= 14)
op_input.set_shader_debugger.trap_en = trap_en;
amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
if (r)
DRM_ERROR("failed to set_shader_debugger\n");
amdgpu_mes_unlock(&adev->mes);
return r;
}
static void static void
amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev, amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
struct amdgpu_ring *ring, struct amdgpu_ring *ring,
@ -1305,14 +1342,9 @@ static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings)
if (!ring) if (!ring)
continue; continue;
r = amdgpu_ring_test_ring(ring); r = amdgpu_ring_test_helper(ring);
if (r) { if (r)
DRM_DEV_ERROR(ring->adev->dev,
"ring %s test failed (%d)\n",
ring->name, r);
return r; return r;
} else
DRM_INFO("ring %s test pass\n", ring->name);
r = amdgpu_ring_test_ib(ring, 1000 * 10); r = amdgpu_ring_test_ib(ring, 1000 * 10);
if (r) { if (r) {

View file

@ -219,6 +219,8 @@ struct mes_add_queue_input {
uint32_t gws_size; uint32_t gws_size;
uint64_t tba_addr; uint64_t tba_addr;
uint64_t tma_addr; uint64_t tma_addr;
uint32_t trap_en;
uint32_t skip_process_ctx_clear;
uint32_t is_kfd_process; uint32_t is_kfd_process;
uint32_t is_aql_queue; uint32_t is_aql_queue;
uint32_t queue_size; uint32_t queue_size;
@ -256,6 +258,7 @@ enum mes_misc_opcode {
MES_MISC_OP_READ_REG, MES_MISC_OP_READ_REG,
MES_MISC_OP_WRM_REG_WAIT, MES_MISC_OP_WRM_REG_WAIT,
MES_MISC_OP_WRM_REG_WR_WAIT, MES_MISC_OP_WRM_REG_WR_WAIT,
MES_MISC_OP_SET_SHADER_DEBUGGER,
}; };
struct mes_misc_op_input { struct mes_misc_op_input {
@ -278,6 +281,21 @@ struct mes_misc_op_input {
uint32_t reg0; uint32_t reg0;
uint32_t reg1; uint32_t reg1;
} wrm_reg; } wrm_reg;
struct {
uint64_t process_context_addr;
union {
struct {
uint64_t single_memop : 1;
uint64_t single_alu_op : 1;
uint64_t reserved: 30;
};
uint32_t u32all;
} flags;
uint32_t spi_gdbg_per_vmid_cntl;
uint32_t tcp_watch_cntl[4];
uint32_t trap_en;
} set_shader_debugger;
}; };
}; };
@ -340,6 +358,12 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1, uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask); uint32_t ref, uint32_t mask);
int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
uint64_t process_context_addr,
uint32_t spi_gdbg_per_vmid_cntl,
const uint32_t *tcp_watch_cntl,
uint32_t flags,
bool trap_en);
int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
int queue_type, int idx, int queue_type, int idx,

View file

@ -21,6 +21,29 @@
#ifndef __AMDGPU_MMHUB_H__ #ifndef __AMDGPU_MMHUB_H__
#define __AMDGPU_MMHUB_H__ #define __AMDGPU_MMHUB_H__
enum amdgpu_mmhub_ras_memory_id {
AMDGPU_MMHUB_WGMI_PAGEMEM = 0,
AMDGPU_MMHUB_RGMI_PAGEMEM = 1,
AMDGPU_MMHUB_WDRAM_PAGEMEM = 2,
AMDGPU_MMHUB_RDRAM_PAGEMEM = 3,
AMDGPU_MMHUB_WIO_CMDMEM = 4,
AMDGPU_MMHUB_RIO_CMDMEM = 5,
AMDGPU_MMHUB_WGMI_CMDMEM = 6,
AMDGPU_MMHUB_RGMI_CMDMEM = 7,
AMDGPU_MMHUB_WDRAM_CMDMEM = 8,
AMDGPU_MMHUB_RDRAM_CMDMEM = 9,
AMDGPU_MMHUB_MAM_DMEM0 = 10,
AMDGPU_MMHUB_MAM_DMEM1 = 11,
AMDGPU_MMHUB_MAM_DMEM2 = 12,
AMDGPU_MMHUB_MAM_DMEM3 = 13,
AMDGPU_MMHUB_WRET_TAGMEM = 19,
AMDGPU_MMHUB_RRET_TAGMEM = 20,
AMDGPU_MMHUB_WIO_DATAMEM = 21,
AMDGPU_MMHUB_WGMI_DATAMEM = 22,
AMDGPU_MMHUB_WDRAM_DATAMEM = 23,
AMDGPU_MMHUB_MEMORY_BLOCK_LAST,
};
struct amdgpu_mmhub_ras { struct amdgpu_mmhub_ras {
struct amdgpu_ras_block_object ras_block; struct amdgpu_ras_block_object ras_block;
}; };

View file

@ -61,6 +61,7 @@ struct amdgpu_nbio_funcs {
u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev); u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); u32 (*get_pcie_index_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_index_hi_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_port_index_offset)(struct amdgpu_device *adev); u32 (*get_pcie_port_index_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_port_data_offset)(struct amdgpu_device *adev); u32 (*get_pcie_port_data_offset)(struct amdgpu_device *adev);
u32 (*get_rev_id)(struct amdgpu_device *adev); u32 (*get_rev_id)(struct amdgpu_device *adev);
@ -95,6 +96,11 @@ struct amdgpu_nbio_funcs {
void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev); void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev);
void (*clear_doorbell_interrupt)(struct amdgpu_device *adev); void (*clear_doorbell_interrupt)(struct amdgpu_device *adev);
u32 (*get_rom_offset)(struct amdgpu_device *adev); u32 (*get_rom_offset)(struct amdgpu_device *adev);
int (*get_compute_partition_mode)(struct amdgpu_device *adev);
u32 (*get_memory_partition_mode)(struct amdgpu_device *adev,
u32 *supp_modes);
void (*set_compute_partition_mode)(struct amdgpu_device *adev,
enum amdgpu_gfx_partition mode);
}; };
struct amdgpu_nbio { struct amdgpu_nbio {

View file

@ -79,9 +79,10 @@ static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo)
static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo) static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo)
{ {
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); struct amdgpu_bo *shadow_bo = ttm_to_amdgpu_bo(tbo), *bo;
struct amdgpu_bo_vm *vmbo; struct amdgpu_bo_vm *vmbo;
bo = shadow_bo->parent;
vmbo = to_amdgpu_bo_vm(bo); vmbo = to_amdgpu_bo_vm(bo);
/* in case amdgpu_device_recover_vram got NULL of bo->parent */ /* in case amdgpu_device_recover_vram got NULL of bo->parent */
if (!list_empty(&vmbo->shadow_list)) { if (!list_empty(&vmbo->shadow_list)) {
@ -130,15 +131,25 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
u32 c = 0; u32 c = 0;
if (domain & AMDGPU_GEM_DOMAIN_VRAM) { if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
unsigned visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; unsigned int visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
int8_t mem_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
if (adev->gmc.mem_partitions && mem_id >= 0) {
places[c].fpfn = adev->gmc.mem_partitions[mem_id].range.fpfn;
/*
* memory partition range lpfn is inclusive start + size - 1
* TTM place lpfn is exclusive start + size
*/
places[c].lpfn = adev->gmc.mem_partitions[mem_id].range.lpfn + 1;
} else {
places[c].fpfn = 0; places[c].fpfn = 0;
places[c].lpfn = 0; places[c].lpfn = 0;
}
places[c].mem_type = TTM_PL_VRAM; places[c].mem_type = TTM_PL_VRAM;
places[c].flags = 0; places[c].flags = 0;
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
places[c].lpfn = visible_pfn; places[c].lpfn = min_not_zero(places[c].lpfn, visible_pfn);
else if (adev->gmc.real_vram_size != adev->gmc.visible_vram_size) else if (adev->gmc.real_vram_size != adev->gmc.visible_vram_size)
places[c].flags |= TTM_PL_FLAG_TOPDOWN; places[c].flags |= TTM_PL_FLAG_TOPDOWN;
@ -574,6 +585,13 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bo->flags = bp->flags; bo->flags = bp->flags;
if (adev->gmc.mem_partitions)
/* For GPUs with spatial partitioning, bo->xcp_id=-1 means any partition */
bo->xcp_id = bp->xcp_id_plus1 - 1;
else
/* For GPUs without spatial partitioning */
bo->xcp_id = 0;
if (!amdgpu_bo_support_uswc(bo->flags)) if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
@ -610,7 +628,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bo->tbo.resource->mem_type == TTM_PL_VRAM) { bo->tbo.resource->mem_type == TTM_PL_VRAM) {
struct dma_fence *fence; struct dma_fence *fence;
r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence); r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
if (unlikely(r)) if (unlikely(r))
goto fail_unreserve; goto fail_unreserve;
@ -694,11 +712,6 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev,
return r; return r;
*vmbo_ptr = to_amdgpu_bo_vm(bo_ptr); *vmbo_ptr = to_amdgpu_bo_vm(bo_ptr);
INIT_LIST_HEAD(&(*vmbo_ptr)->shadow_list);
/* Set destroy callback to amdgpu_bo_vm_destroy after vmbo->shadow_list
* is initialized.
*/
bo_ptr->tbo.destroy = &amdgpu_bo_vm_destroy;
return r; return r;
} }
@ -715,6 +728,8 @@ void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo)
mutex_lock(&adev->shadow_list_lock); mutex_lock(&adev->shadow_list_lock);
list_add_tail(&vmbo->shadow_list, &adev->shadow_list); list_add_tail(&vmbo->shadow_list, &adev->shadow_list);
vmbo->shadow->parent = amdgpu_bo_ref(&vmbo->bo);
vmbo->shadow->tbo.destroy = &amdgpu_bo_vm_destroy;
mutex_unlock(&adev->shadow_list_lock); mutex_unlock(&adev->shadow_list_lock);
} }
@ -935,7 +950,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
amdgpu_bo_placement_from_domain(bo, domain); amdgpu_bo_placement_from_domain(bo, domain);
for (i = 0; i < bo->placement.num_placement; i++) { for (i = 0; i < bo->placement.num_placement; i++) {
unsigned fpfn, lpfn; unsigned int fpfn, lpfn;
fpfn = min_offset >> PAGE_SHIFT; fpfn = min_offset >> PAGE_SHIFT;
lpfn = max_offset >> PAGE_SHIFT; lpfn = max_offset >> PAGE_SHIFT;
@ -1016,7 +1031,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo)
} }
} }
static const char *amdgpu_vram_names[] = { static const char * const amdgpu_vram_names[] = {
"UNKNOWN", "UNKNOWN",
"GDDR1", "GDDR1",
"DDR2", "DDR2",
@ -1044,7 +1059,7 @@ static const char *amdgpu_vram_names[] = {
int amdgpu_bo_init(struct amdgpu_device *adev) int amdgpu_bo_init(struct amdgpu_device *adev)
{ {
/* On A+A platform, VRAM can be mapped as WB */ /* On A+A platform, VRAM can be mapped as WB */
if (!adev->gmc.xgmi.connected_to_cpu) { if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
/* reserve PAT memory space to WC for VRAM */ /* reserve PAT memory space to WC for VRAM */
int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base, int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base,
adev->gmc.aper_size); adev->gmc.aper_size);
@ -1080,8 +1095,7 @@ void amdgpu_bo_fini(struct amdgpu_device *adev)
amdgpu_ttm_fini(adev); amdgpu_ttm_fini(adev);
if (drm_dev_enter(adev_to_drm(adev), &idx)) { if (drm_dev_enter(adev_to_drm(adev), &idx)) {
if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
if (!adev->gmc.xgmi.connected_to_cpu) {
arch_phys_wc_del(adev->gmc.vram_mtrr); arch_phys_wc_del(adev->gmc.vram_mtrr);
arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
} }
@ -1148,8 +1162,8 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
* Returns: * Returns:
* 0 for success or a negative error code on failure. * 0 for success or a negative error code on failure.
*/ */
int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, int amdgpu_bo_set_metadata(struct amdgpu_bo *bo, void *metadata,
uint32_t metadata_size, uint64_t flags) u32 metadata_size, uint64_t flags)
{ {
struct amdgpu_bo_user *ubo; struct amdgpu_bo_user *ubo;
void *buffer; void *buffer;
@ -1338,7 +1352,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return; return;
r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence); r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
if (!WARN_ON(r)) { if (!WARN_ON(r)) {
amdgpu_bo_fence(abo, fence, false); amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence); dma_fence_put(fence);

View file

@ -56,6 +56,8 @@ struct amdgpu_bo_param {
bool no_wait_gpu; bool no_wait_gpu;
struct dma_resv *resv; struct dma_resv *resv;
void (*destroy)(struct ttm_buffer_object *bo); void (*destroy)(struct ttm_buffer_object *bo);
/* xcp partition number plus 1, 0 means any partition */
int8_t xcp_id_plus1;
}; };
/* bo virtual addresses in a vm */ /* bo virtual addresses in a vm */
@ -108,6 +110,13 @@ struct amdgpu_bo {
struct mmu_interval_notifier notifier; struct mmu_interval_notifier notifier;
#endif #endif
struct kgd_mem *kfd_bo; struct kgd_mem *kfd_bo;
/*
* For GPUs with spatial partitioning, xcp partition number, -1 means
* any partition. For other ASICs without spatial partition, always 0
* for memory accounting.
*/
int8_t xcp_id;
}; };
struct amdgpu_bo_user { struct amdgpu_bo_user {

View file

@ -146,6 +146,9 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 0):
adev->virt.autoload_ucode_id = 0; adev->virt.autoload_ucode_id = 0;
break; break;
case IP_VERSION(13, 0, 6):
ret = psp_init_cap_microcode(psp, ucode_prefix);
break;
case IP_VERSION(13, 0, 10): case IP_VERSION(13, 0, 10):
adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA; adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
ret = psp_init_cap_microcode(psp, ucode_prefix); ret = psp_init_cap_microcode(psp, ucode_prefix);
@ -329,6 +332,9 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
bool ret = false; bool ret = false;
int i; int i;
if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6))
return false;
db_header_pos = adev->gmc.mc_vram_size - PSP_RUNTIME_DB_OFFSET; db_header_pos = adev->gmc.mc_vram_size - PSP_RUNTIME_DB_OFFSET;
db_dir_pos = db_header_pos + sizeof(struct psp_runtime_data_header); db_dir_pos = db_header_pos + sizeof(struct psp_runtime_data_header);
@ -411,7 +417,7 @@ static int psp_sw_init(void *handle)
if ((psp_get_runtime_db_entry(adev, if ((psp_get_runtime_db_entry(adev,
PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS, PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS,
&scpm_entry)) && &scpm_entry)) &&
(SCPM_DISABLE != scpm_entry.scpm_status)) { (scpm_entry.scpm_status != SCPM_DISABLE)) {
adev->scpm_enabled = true; adev->scpm_enabled = true;
adev->scpm_status = scpm_entry.scpm_status; adev->scpm_status = scpm_entry.scpm_status;
} else { } else {
@ -458,11 +464,10 @@ static int psp_sw_init(void *handle)
if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) || if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) ||
adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7)) { adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7)) {
ret= psp_sysfs_init(adev); ret = psp_sysfs_init(adev);
if (ret) { if (ret)
return ret; return ret;
} }
}
ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
amdgpu_sriov_vf(adev) ? amdgpu_sriov_vf(adev) ?
@ -474,7 +479,8 @@ static int psp_sw_init(void *handle)
return ret; return ret;
ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT,
&psp->fence_buf_bo, &psp->fence_buf_bo,
&psp->fence_buf_mc_addr, &psp->fence_buf_mc_addr,
&psp->fence_buf); &psp->fence_buf);
@ -482,7 +488,8 @@ static int psp_sw_init(void *handle)
goto failed1; goto failed1;
ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT,
&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
(void **)&psp->cmd_buf_mem); (void **)&psp->cmd_buf_mem);
if (ret) if (ret)
@ -520,6 +527,8 @@ static int psp_sw_fini(void *handle)
kfree(cmd); kfree(cmd);
cmd = NULL; cmd = NULL;
psp_free_shared_bufs(psp);
if (psp->km_ring.ring_mem) if (psp->km_ring.ring_mem)
amdgpu_bo_free_kernel(&adev->firmware.rbuf, amdgpu_bo_free_kernel(&adev->firmware.rbuf,
&psp->km_ring.ring_mem_mc_addr, &psp->km_ring.ring_mem_mc_addr,
@ -643,7 +652,7 @@ psp_cmd_submit_buf(struct psp_context *psp,
skip_unsupport = (psp->cmd_buf_mem->resp.status == TEE_ERROR_NOT_SUPPORTED || skip_unsupport = (psp->cmd_buf_mem->resp.status == TEE_ERROR_NOT_SUPPORTED ||
psp->cmd_buf_mem->resp.status == PSP_ERR_UNKNOWN_COMMAND) && amdgpu_sriov_vf(psp->adev); psp->cmd_buf_mem->resp.status == PSP_ERR_UNKNOWN_COMMAND) && amdgpu_sriov_vf(psp->adev);
memcpy((void*)&cmd->resp, (void*)&psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp)); memcpy(&cmd->resp, &psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp));
/* In some cases, psp response status is not 0 even there is no /* In some cases, psp response status is not 0 even there is no
* problem while the command is submitted. Some version of PSP FW * problem while the command is submitted. Some version of PSP FW
@ -699,8 +708,13 @@ static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
uint64_t tmr_mc, struct amdgpu_bo *tmr_bo) uint64_t tmr_mc, struct amdgpu_bo *tmr_bo)
{ {
struct amdgpu_device *adev = psp->adev; struct amdgpu_device *adev = psp->adev;
uint32_t size = amdgpu_bo_size(tmr_bo); uint32_t size = 0;
uint64_t tmr_pa = amdgpu_gmc_vram_pa(adev, tmr_bo); uint64_t tmr_pa = 0;
if (tmr_bo) {
size = amdgpu_bo_size(tmr_bo);
tmr_pa = amdgpu_gmc_vram_pa(adev, tmr_bo);
}
if (amdgpu_sriov_vf(psp->adev)) if (amdgpu_sriov_vf(psp->adev))
cmd->cmd_id = GFX_CMD_ID_SETUP_VMR; cmd->cmd_id = GFX_CMD_ID_SETUP_VMR;
@ -745,6 +759,16 @@ static int psp_load_toc(struct psp_context *psp,
return ret; return ret;
} }
static bool psp_boottime_tmr(struct psp_context *psp)
{
switch (psp->adev->ip_versions[MP0_HWIP][0]) {
case IP_VERSION(13, 0, 6):
return true;
default:
return false;
}
}
/* Set up Trusted Memory Region */ /* Set up Trusted Memory Region */
static int psp_tmr_init(struct psp_context *psp) static int psp_tmr_init(struct psp_context *psp)
{ {
@ -816,6 +840,7 @@ static int psp_tmr_load(struct psp_context *psp)
cmd = acquire_psp_cmd_buf(psp); cmd = acquire_psp_cmd_buf(psp);
psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo); psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo);
if (psp->tmr_bo)
DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n", DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n",
amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr); amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);
@ -969,6 +994,27 @@ static int psp_rl_load(struct amdgpu_device *adev)
return ret; return ret;
} }
int psp_spatial_partition(struct psp_context *psp, int mode)
{
struct psp_gfx_cmd_resp *cmd;
int ret;
if (amdgpu_sriov_vf(psp->adev))
return 0;
cmd = acquire_psp_cmd_buf(psp);
cmd->cmd_id = GFX_CMD_ID_SRIOV_SPATIAL_PART;
cmd->cmd.cmd_spatial_part.mode = mode;
dev_info(psp->adev->dev, "Requesting %d partitions through PSP", mode);
ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
release_psp_cmd_buf(psp);
return ret;
}
static int psp_asd_initialize(struct psp_context *psp) static int psp_asd_initialize(struct psp_context *psp)
{ {
int ret; int ret;
@ -1136,9 +1182,8 @@ int psp_ta_load(struct psp_context *psp, struct ta_context *context)
context->resp_status = cmd->resp.status; context->resp_status = cmd->resp.status;
if (!ret) { if (!ret)
context->session_id = cmd->resp.session_id; context->session_id = cmd->resp.session_id;
}
release_psp_cmd_buf(psp); release_psp_cmd_buf(psp);
@ -1254,8 +1299,9 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id)
static bool psp_xgmi_peer_link_info_supported(struct psp_context *psp) static bool psp_xgmi_peer_link_info_supported(struct psp_context *psp)
{ {
return psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) && return (psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) &&
psp->xgmi_context.context.bin_desc.fw_version >= 0x2000000b; psp->xgmi_context.context.bin_desc.fw_version >= 0x2000000b) ||
psp->adev->ip_versions[MP0_HWIP][0] >= IP_VERSION(13, 0, 6);
} }
/* /*
@ -1363,6 +1409,9 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
/* Invoke xgmi ta again to get the link information */ /* Invoke xgmi ta again to get the link information */
if (psp_xgmi_peer_link_info_supported(psp)) { if (psp_xgmi_peer_link_info_supported(psp)) {
struct ta_xgmi_cmd_get_peer_link_info_output *link_info_output; struct ta_xgmi_cmd_get_peer_link_info_output *link_info_output;
bool requires_reflection =
(psp->xgmi_context.supports_extended_data && get_extended_data) ||
psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6);
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS; xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS;
@ -1377,11 +1426,11 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
topology->nodes[i].num_links = get_extended_data ? topology->nodes[i].num_links = get_extended_data ?
topology->nodes[i].num_links + topology->nodes[i].num_links +
link_info_output->nodes[i].num_links : link_info_output->nodes[i].num_links :
link_info_output->nodes[i].num_links; ((requires_reflection && topology->nodes[i].num_links) ? topology->nodes[i].num_links :
link_info_output->nodes[i].num_links);
/* reflect the topology information for bi-directionality */ /* reflect the topology information for bi-directionality */
if (psp->xgmi_context.supports_extended_data && if (requires_reflection && topology->nodes[i].num_hops)
get_extended_data && topology->nodes[i].num_hops)
psp_xgmi_reflect_topology_info(psp, topology->nodes[i]); psp_xgmi_reflect_topology_info(psp, topology->nodes[i]);
} }
} }
@ -1465,8 +1514,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
if (amdgpu_ras_intr_triggered()) if (amdgpu_ras_intr_triggered())
return ret; return ret;
if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) {
{
DRM_WARN("RAS: Unsupported Interface"); DRM_WARN("RAS: Unsupported Interface");
return -EINVAL; return -EINVAL;
} }
@ -1476,8 +1524,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
dev_warn(psp->adev->dev, "ECC switch disabled\n"); dev_warn(psp->adev->dev, "ECC switch disabled\n");
ras_cmd->ras_status = TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE; ras_cmd->ras_status = TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE;
} } else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag)
else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag)
dev_warn(psp->adev->dev, dev_warn(psp->adev->dev,
"RAS internal register access blocked\n"); "RAS internal register access blocked\n");
@ -1573,11 +1620,10 @@ int psp_ras_initialize(struct psp_context *psp)
if (ret) if (ret)
dev_warn(adev->dev, "PSP set boot config failed\n"); dev_warn(adev->dev, "PSP set boot config failed\n");
else else
dev_warn(adev->dev, "GECC will be disabled in next boot cycle " dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
"if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
} }
} else { } else {
if (1 == boot_cfg) { if (boot_cfg == 1) {
dev_info(adev->dev, "GECC is enabled\n"); dev_info(adev->dev, "GECC is enabled\n");
} else { } else {
/* enable GECC in next boot cycle if it is disabled /* enable GECC in next boot cycle if it is disabled
@ -1609,6 +1655,8 @@ int psp_ras_initialize(struct psp_context *psp)
ras_cmd->ras_in_message.init_flags.poison_mode_en = 1; ras_cmd->ras_in_message.init_flags.poison_mode_en = 1;
if (!adev->gmc.xgmi.connected_to_cpu) if (!adev->gmc.xgmi.connected_to_cpu)
ras_cmd->ras_in_message.init_flags.dgpu_mode = 1; ras_cmd->ras_in_message.init_flags.dgpu_mode = 1;
ras_cmd->ras_in_message.init_flags.xcc_mask =
adev->gfx.xcc_mask;
ret = psp_ta_load(psp, &psp->ras_context.context); ret = psp_ta_load(psp, &psp->ras_context.context);
@ -1626,14 +1674,37 @@ int psp_ras_initialize(struct psp_context *psp)
} }
int psp_ras_trigger_error(struct psp_context *psp, int psp_ras_trigger_error(struct psp_context *psp,
struct ta_ras_trigger_error_input *info) struct ta_ras_trigger_error_input *info, uint32_t instance_mask)
{ {
struct ta_ras_shared_memory *ras_cmd; struct ta_ras_shared_memory *ras_cmd;
struct amdgpu_device *adev = psp->adev;
int ret; int ret;
uint32_t dev_mask;
if (!psp->ras_context.context.initialized) if (!psp->ras_context.context.initialized)
return -EINVAL; return -EINVAL;
switch (info->block_id) {
case TA_RAS_BLOCK__GFX:
dev_mask = GET_MASK(GC, instance_mask);
break;
case TA_RAS_BLOCK__SDMA:
dev_mask = GET_MASK(SDMA0, instance_mask);
break;
case TA_RAS_BLOCK__VCN:
case TA_RAS_BLOCK__JPEG:
dev_mask = GET_MASK(VCN, instance_mask);
break;
default:
dev_mask = instance_mask;
break;
}
/* reuse sub_block_index for backward compatibility */
dev_mask <<= AMDGPU_RAS_INST_SHIFT;
dev_mask &= AMDGPU_RAS_INST_MASK;
info->sub_block_index |= dev_mask;
ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
@ -2077,11 +2148,13 @@ static int psp_hw_start(struct psp_context *psp)
if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
goto skip_pin_bo; goto skip_pin_bo;
if (!psp_boottime_tmr(psp)) {
ret = psp_tmr_init(psp); ret = psp_tmr_init(psp);
if (ret) { if (ret) {
DRM_ERROR("PSP tmr init failed!\n"); DRM_ERROR("PSP tmr init failed!\n");
return ret; return ret;
} }
}
skip_pin_bo: skip_pin_bo:
/* /*
@ -2402,10 +2475,9 @@ static int psp_load_smu_fw(struct psp_context *psp)
(adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 4) || (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 4) ||
adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 2)))) { adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 2)))) {
ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD); ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD);
if (ret) { if (ret)
DRM_WARN("Failed to set MP1 state prepare for reload\n"); DRM_WARN("Failed to set MP1 state prepare for reload\n");
} }
}
ret = psp_execute_non_psp_fw_load(psp, ucode); ret = psp_execute_non_psp_fw_load(psp, ucode);
@ -2655,8 +2727,6 @@ static int psp_hw_fini(void *handle)
psp_ring_destroy(psp, PSP_RING_TYPE__KM); psp_ring_destroy(psp, PSP_RING_TYPE__KM);
psp_free_shared_bufs(psp);
return 0; return 0;
} }
@ -2716,9 +2786,8 @@ static int psp_suspend(void *handle)
} }
ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
if (ret) { if (ret)
DRM_ERROR("PSP ring stop failed\n"); DRM_ERROR("PSP ring stop failed\n");
}
out: out:
return ret; return ret;
@ -3491,7 +3560,7 @@ void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size
drm_dev_exit(idx); drm_dev_exit(idx);
} }
static DEVICE_ATTR(usbc_pd_fw, S_IRUGO | S_IWUSR, static DEVICE_ATTR(usbc_pd_fw, 0644,
psp_usbc_pd_fw_sysfs_read, psp_usbc_pd_fw_sysfs_read,
psp_usbc_pd_fw_sysfs_write); psp_usbc_pd_fw_sysfs_write);
@ -3548,6 +3617,9 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj,
void *fw_pri_cpu_addr; void *fw_pri_cpu_addr;
int ret; int ret;
if (adev->psp.vbflash_image_size == 0)
return -EINVAL;
dev_info(adev->dev, "VBIOS flash to PSP started"); dev_info(adev->dev, "VBIOS flash to PSP started");
ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size, ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size,
@ -3599,13 +3671,13 @@ static ssize_t amdgpu_psp_vbflash_status(struct device *dev,
} }
static const struct bin_attribute psp_vbflash_bin_attr = { static const struct bin_attribute psp_vbflash_bin_attr = {
.attr = {.name = "psp_vbflash", .mode = 0664}, .attr = {.name = "psp_vbflash", .mode = 0660},
.size = 0, .size = 0,
.write = amdgpu_psp_vbflash_write, .write = amdgpu_psp_vbflash_write,
.read = amdgpu_psp_vbflash_read, .read = amdgpu_psp_vbflash_read,
}; };
static DEVICE_ATTR(psp_vbflash_status, 0444, amdgpu_psp_vbflash_status, NULL); static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL);
int amdgpu_psp_sysfs_init(struct amdgpu_device *adev) int amdgpu_psp_sysfs_init(struct amdgpu_device *adev)
{ {
@ -3618,6 +3690,7 @@ int amdgpu_psp_sysfs_init(struct amdgpu_device *adev)
switch (adev->ip_versions[MP0_HWIP][0]) { switch (adev->ip_versions[MP0_HWIP][0]) {
case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 10):
if (!psp->adev) { if (!psp->adev) {
psp->adev = adev; psp->adev = adev;
psp_v13_0_set_psp_funcs(psp); psp_v13_0_set_psp_funcs(psp);
@ -3673,8 +3746,7 @@ static void psp_sysfs_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev, &dev_attr_usbc_pd_fw); device_remove_file(adev->dev, &dev_attr_usbc_pd_fw);
} }
const struct amdgpu_ip_block_version psp_v3_1_ip_block = const struct amdgpu_ip_block_version psp_v3_1_ip_block = {
{
.type = AMD_IP_BLOCK_TYPE_PSP, .type = AMD_IP_BLOCK_TYPE_PSP,
.major = 3, .major = 3,
.minor = 1, .minor = 1,
@ -3682,8 +3754,7 @@ const struct amdgpu_ip_block_version psp_v3_1_ip_block =
.funcs = &psp_ip_funcs, .funcs = &psp_ip_funcs,
}; };
const struct amdgpu_ip_block_version psp_v10_0_ip_block = const struct amdgpu_ip_block_version psp_v10_0_ip_block = {
{
.type = AMD_IP_BLOCK_TYPE_PSP, .type = AMD_IP_BLOCK_TYPE_PSP,
.major = 10, .major = 10,
.minor = 0, .minor = 0,
@ -3691,8 +3762,7 @@ const struct amdgpu_ip_block_version psp_v10_0_ip_block =
.funcs = &psp_ip_funcs, .funcs = &psp_ip_funcs,
}; };
const struct amdgpu_ip_block_version psp_v11_0_ip_block = const struct amdgpu_ip_block_version psp_v11_0_ip_block = {
{
.type = AMD_IP_BLOCK_TYPE_PSP, .type = AMD_IP_BLOCK_TYPE_PSP,
.major = 11, .major = 11,
.minor = 0, .minor = 0,
@ -3708,8 +3778,7 @@ const struct amdgpu_ip_block_version psp_v11_0_8_ip_block = {
.funcs = &psp_ip_funcs, .funcs = &psp_ip_funcs,
}; };
const struct amdgpu_ip_block_version psp_v12_0_ip_block = const struct amdgpu_ip_block_version psp_v12_0_ip_block = {
{
.type = AMD_IP_BLOCK_TYPE_PSP, .type = AMD_IP_BLOCK_TYPE_PSP,
.major = 12, .major = 12,
.minor = 0, .minor = 0,

View file

@ -486,7 +486,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_ras_enable_features(struct psp_context *psp, int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable); union ta_ras_cmd_input *info, bool enable);
int psp_ras_trigger_error(struct psp_context *psp, int psp_ras_trigger_error(struct psp_context *psp,
struct ta_ras_trigger_error_input *info); struct ta_ras_trigger_error_input *info, uint32_t instance_mask);
int psp_ras_terminate(struct psp_context *psp); int psp_ras_terminate(struct psp_context *psp);
int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
@ -519,6 +519,8 @@ int psp_load_fw_list(struct psp_context *psp,
struct amdgpu_firmware_info **ucode_list, int ucode_count); struct amdgpu_firmware_info **ucode_list, int ucode_count);
void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size); void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size);
int psp_spatial_partition(struct psp_context *psp, int mode);
int is_psp_fw_valid(struct psp_bin_desc bin); int is_psp_fw_valid(struct psp_bin_desc bin);
int amdgpu_psp_sysfs_init(struct amdgpu_device *adev); int amdgpu_psp_sysfs_init(struct amdgpu_device *adev);

View file

@ -256,6 +256,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
int block_id; int block_id;
uint32_t sub_block; uint32_t sub_block;
u64 address, value; u64 address, value;
/* default value is 0 if the mask is not set by user */
u32 instance_mask = 0;
if (*pos) if (*pos)
return -EINVAL; return -EINVAL;
@ -306,7 +308,11 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
data->op = op; data->op = op;
if (op == 2) { if (op == 2) {
if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx", if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx 0x%x",
&sub_block, &address, &value, &instance_mask) != 4 &&
sscanf(str, "%*s %*s %*s %u %llu %llu %u",
&sub_block, &address, &value, &instance_mask) != 4 &&
sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
&sub_block, &address, &value) != 3 && &sub_block, &address, &value) != 3 &&
sscanf(str, "%*s %*s %*s %u %llu %llu", sscanf(str, "%*s %*s %*s %u %llu %llu",
&sub_block, &address, &value) != 3) &sub_block, &address, &value) != 3)
@ -314,6 +320,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
data->head.sub_block_index = sub_block; data->head.sub_block_index = sub_block;
data->inject.address = address; data->inject.address = address;
data->inject.value = value; data->inject.value = value;
data->inject.instance_mask = instance_mask;
} }
} else { } else {
if (size < sizeof(*data)) if (size < sizeof(*data))
@ -326,6 +333,46 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
return 0; return 0;
} }
static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev,
struct ras_debug_if *data)
{
int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
uint32_t mask, inst_mask = data->inject.instance_mask;
/* no need to set instance mask if there is only one instance */
if (num_xcc <= 1 && inst_mask) {
data->inject.instance_mask = 0;
dev_dbg(adev->dev,
"RAS inject mask(0x%x) isn't supported and force it to 0.\n",
inst_mask);
return;
}
switch (data->head.block) {
case AMDGPU_RAS_BLOCK__GFX:
mask = GENMASK(num_xcc - 1, 0);
break;
case AMDGPU_RAS_BLOCK__SDMA:
mask = GENMASK(adev->sdma.num_instances - 1, 0);
break;
case AMDGPU_RAS_BLOCK__VCN:
case AMDGPU_RAS_BLOCK__JPEG:
mask = GENMASK(adev->vcn.num_vcn_inst - 1, 0);
break;
default:
mask = inst_mask;
break;
}
/* remove invalid bits in instance mask */
data->inject.instance_mask &= mask;
if (inst_mask != data->inject.instance_mask)
dev_dbg(adev->dev,
"Adjust RAS inject mask 0x%x to 0x%x\n",
inst_mask, data->inject.instance_mask);
}
/** /**
* DOC: AMDGPU RAS debugfs control interface * DOC: AMDGPU RAS debugfs control interface
* *
@ -341,7 +388,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
* sub_block_index: some IPs have subcomponets. say, GFX, sDMA. * sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
* name: the name of IP. * name: the name of IP.
* *
* inject has two more members than head, they are address, value. * inject has three more members than head, they are address, value and mask.
* As their names indicate, inject operation will write the * As their names indicate, inject operation will write the
* value to the address. * value to the address.
* *
@ -365,7 +412,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
* *
* echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl * echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
* echo "enable <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl * echo "enable <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
* echo "inject <block> <error> <sub-block> <address> <value> > /sys/kernel/debug/dri/<N>/ras/ras_ctrl * echo "inject <block> <error> <sub-block> <address> <value> <mask>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
* *
* Where N, is the card which you want to affect. * Where N, is the card which you want to affect.
* *
@ -382,13 +429,14 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
* *
* The sub-block is a the sub-block index, pass 0 if there is no sub-block. * The sub-block is a the sub-block index, pass 0 if there is no sub-block.
* The address and value are hexadecimal numbers, leading 0x is optional. * The address and value are hexadecimal numbers, leading 0x is optional.
* The mask means instance mask, is optional, default value is 0x1.
* *
* For instance, * For instance,
* *
* .. code-block:: bash * .. code-block:: bash
* *
* echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl * echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
* echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl * echo inject umc ce 0 0 0 3 > /sys/kernel/debug/dri/0/ras/ras_ctrl
* echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl * echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
* *
* How to check the result of the operation? * How to check the result of the operation?
@ -460,6 +508,8 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
break; break;
} }
amdgpu_ras_instance_mask_check(adev, &data);
/* data.inject.address is offset instead of absolute gpu address */ /* data.inject.address is offset instead of absolute gpu address */
ret = amdgpu_ras_error_inject(adev, &data.inject); ret = amdgpu_ras_error_inject(adev, &data.inject);
break; break;
@ -1115,15 +1165,15 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
block_info.address); block_info.address);
} }
if (info->head.block == AMDGPU_RAS_BLOCK__GFX) { if (block_obj->hw_ops->ras_error_inject) {
if (block_obj->hw_ops->ras_error_inject) if (info->head.block == AMDGPU_RAS_BLOCK__GFX)
ret = block_obj->hw_ops->ras_error_inject(adev, info); ret = block_obj->hw_ops->ras_error_inject(adev, info, info->instance_mask);
else /* Special ras_error_inject is defined (e.g: xgmi) */
ret = block_obj->hw_ops->ras_error_inject(adev, &block_info,
info->instance_mask);
} else { } else {
/* If defined special ras_error_inject(e.g: xgmi), implement special ras_error_inject */ /* default path */
if (block_obj->hw_ops->ras_error_inject) ret = psp_ras_trigger_error(&adev->psp, &block_info, info->instance_mask);
ret = block_obj->hw_ops->ras_error_inject(adev, &block_info);
else /*If not defined .ras_error_inject, use default ras_error_inject*/
ret = psp_ras_trigger_error(&adev->psp, &block_info);
} }
if (ret) if (ret)
@ -1597,8 +1647,7 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
{ {
/* Fatal error events are handled on host side */ /* Fatal error events are handled on host side */
if (amdgpu_sriov_vf(adev) || if (amdgpu_sriov_vf(adev))
!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF))
return; return;
if (adev->nbio.ras && if (adev->nbio.ras &&
@ -2008,9 +2057,15 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
/* Perform full reset in fatal error mode */ /* Perform full reset in fatal error mode */
if (!amdgpu_ras_is_poison_mode_supported(ras->adev)) if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
else else {
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET;
reset_context.method = AMD_RESET_METHOD_MODE2;
}
}
amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
} }
atomic_set(&ras->in_recovery, 0); atomic_set(&ras->in_recovery, 0);
@ -2259,7 +2314,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
atomic_set(&con->in_recovery, 0); atomic_set(&con->in_recovery, 0);
con->eeprom_control.bad_channel_bitmap = 0; con->eeprom_control.bad_channel_bitmap = 0;
max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(); max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control);
amdgpu_ras_validate_threshold(adev, max_eeprom_records_count); amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
/* Todo: During test the SMU might fail to read the eeprom through I2C /* Todo: During test the SMU might fail to read the eeprom through I2C
@ -2625,7 +2680,8 @@ release_con:
int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev) int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev)
{ {
if (adev->gmc.xgmi.connected_to_cpu) if (adev->gmc.xgmi.connected_to_cpu ||
adev->gmc.is_app_apu)
return 1; return 1;
return 0; return 0;
} }
@ -3104,3 +3160,143 @@ int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
return 0; return 0;
} }
void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name)
{
if (!err_type_name)
return;
switch (err_type) {
case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
sprintf(err_type_name, "correctable");
break;
case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
sprintf(err_type_name, "uncorrectable");
break;
default:
sprintf(err_type_name, "unknown");
break;
}
}
bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
const struct amdgpu_ras_err_status_reg_entry *reg_entry,
uint32_t instance,
uint32_t *memory_id)
{
uint32_t err_status_lo_data, err_status_lo_offset;
if (!reg_entry)
return false;
err_status_lo_offset =
AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
reg_entry->seg_lo, reg_entry->reg_lo);
err_status_lo_data = RREG32(err_status_lo_offset);
if ((reg_entry->flags & AMDGPU_RAS_ERR_STATUS_VALID) &&
!REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, ERR_STATUS_VALID_FLAG))
return false;
*memory_id = REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, MEMORY_ID);
return true;
}
bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
const struct amdgpu_ras_err_status_reg_entry *reg_entry,
uint32_t instance,
unsigned long *err_cnt)
{
uint32_t err_status_hi_data, err_status_hi_offset;
if (!reg_entry)
return false;
err_status_hi_offset =
AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
reg_entry->seg_hi, reg_entry->reg_hi);
err_status_hi_data = RREG32(err_status_hi_offset);
if ((reg_entry->flags & AMDGPU_RAS_ERR_INFO_VALID) &&
!REG_GET_FIELD(err_status_hi_data, ERR_STATUS_HI, ERR_INFO_VALID_FLAG))
/* keep the check here in case we need to refer to the result later */
dev_dbg(adev->dev, "Invalid err_info field\n");
/* read err count */
*err_cnt = REG_GET_FIELD(err_status_hi_data, ERR_STATUS, ERR_CNT);
return true;
}
void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
const struct amdgpu_ras_err_status_reg_entry *reg_list,
uint32_t reg_list_size,
const struct amdgpu_ras_memory_id_entry *mem_list,
uint32_t mem_list_size,
uint32_t instance,
uint32_t err_type,
unsigned long *err_count)
{
uint32_t memory_id;
unsigned long err_cnt;
char err_type_name[16];
uint32_t i, j;
for (i = 0; i < reg_list_size; i++) {
/* query memory_id from err_status_lo */
if (!amdgpu_ras_inst_get_memory_id_field(adev, &reg_list[i],
instance, &memory_id))
continue;
/* query err_cnt from err_status_hi */
if (!amdgpu_ras_inst_get_err_cnt_field(adev, &reg_list[i],
instance, &err_cnt) ||
!err_cnt)
continue;
*err_count += err_cnt;
/* log the errors */
amdgpu_ras_get_error_type_name(err_type, err_type_name);
if (!mem_list) {
/* memory_list is not supported */
dev_info(adev->dev,
"%ld %s hardware errors detected in %s, instance: %d, memory_id: %d\n",
err_cnt, err_type_name,
reg_list[i].block_name,
instance, memory_id);
} else {
for (j = 0; j < mem_list_size; j++) {
if (memory_id == mem_list[j].memory_id) {
dev_info(adev->dev,
"%ld %s hardware errors detected in %s, instance: %d, memory block: %s\n",
err_cnt, err_type_name,
reg_list[i].block_name,
instance, mem_list[j].name);
break;
}
}
}
}
}
void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
const struct amdgpu_ras_err_status_reg_entry *reg_list,
uint32_t reg_list_size,
uint32_t instance)
{
uint32_t err_status_lo_offset, err_status_hi_offset;
uint32_t i;
for (i = 0; i < reg_list_size; i++) {
err_status_lo_offset =
AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
reg_list[i].seg_lo, reg_list[i].reg_lo);
err_status_hi_offset =
AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
reg_list[i].seg_hi, reg_list[i].reg_hi);
WREG32(err_status_lo_offset, 0);
WREG32(err_status_hi_offset, 0);
}
}

View file

@ -32,6 +32,11 @@
struct amdgpu_iv_entry; struct amdgpu_iv_entry;
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0) #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
/* position of instance value in sub_block_index of
* ta_ras_trigger_error_input, the sub block uses lower 12 bits
*/
#define AMDGPU_RAS_INST_MASK 0xfffff000
#define AMDGPU_RAS_INST_SHIFT 0xc
enum amdgpu_ras_block { enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__UMC = 0, AMDGPU_RAS_BLOCK__UMC = 0,
@ -314,6 +319,45 @@ enum amdgpu_ras_ret {
AMDGPU_RAS_PT, AMDGPU_RAS_PT,
}; };
/* ras error status reisger fields */
#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG__SHIFT 0x0
#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG_MASK 0x00000001L
#define ERR_STATUS_LO__MEMORY_ID__SHIFT 0x18
#define ERR_STATUS_LO__MEMORY_ID_MASK 0xFF000000L
#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG__SHIFT 0x2
#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG_MASK 0x00000004L
#define ERR_STATUS__ERR_CNT__SHIFT 0x17
#define ERR_STATUS__ERR_CNT_MASK 0x03800000L
#define AMDGPU_RAS_REG_ENTRY(ip, inst, reg_lo, reg_hi) \
ip##_HWIP, inst, reg_lo##_BASE_IDX, reg_lo, reg_hi##_BASE_IDX, reg_hi
#define AMDGPU_RAS_REG_ENTRY_OFFSET(hwip, ip_inst, segment, reg) \
(adev->reg_offset[hwip][ip_inst][segment] + (reg))
#define AMDGPU_RAS_ERR_INFO_VALID (1 << 0)
#define AMDGPU_RAS_ERR_STATUS_VALID (1 << 1)
#define AMDGPU_RAS_ERR_ADDRESS_VALID (1 << 2)
#define AMDGPU_RAS_GPU_RESET_MODE2_RESET (0x1 << 0)
struct amdgpu_ras_err_status_reg_entry {
uint32_t hwip;
uint32_t ip_inst;
uint32_t seg_lo;
uint32_t reg_lo;
uint32_t seg_hi;
uint32_t reg_hi;
uint32_t reg_inst;
uint32_t flags;
const char *block_name;
};
struct amdgpu_ras_memory_id_entry {
uint32_t memory_id;
const char *name;
};
struct ras_common_if { struct ras_common_if {
enum amdgpu_ras_block block; enum amdgpu_ras_block block;
enum amdgpu_ras_error_type type; enum amdgpu_ras_error_type type;
@ -385,6 +429,9 @@ struct amdgpu_ras {
/* Indicates smu whether need update bad channel info */ /* Indicates smu whether need update bad channel info */
bool update_channel_flag; bool update_channel_flag;
/* Record special requirements of gpu reset caller */
uint32_t gpu_reset_flags;
}; };
struct ras_fs_data { struct ras_fs_data {
@ -471,6 +518,7 @@ struct ras_inject_if {
struct ras_common_if head; struct ras_common_if head;
uint64_t address; uint64_t address;
uint64_t value; uint64_t value;
uint32_t instance_mask;
}; };
struct ras_cure_if { struct ras_cure_if {
@ -508,7 +556,8 @@ struct amdgpu_ras_block_object {
}; };
struct amdgpu_ras_block_hw_ops { struct amdgpu_ras_block_hw_ops {
int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if); int (*ras_error_inject)(struct amdgpu_device *adev,
void *inject_if, uint32_t instance_mask);
void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status);
void (*query_ras_error_status)(struct amdgpu_device *adev); void (*query_ras_error_status)(struct amdgpu_device *adev);
void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status);
@ -696,4 +745,25 @@ int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_co
int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
struct amdgpu_ras_block_object *ras_block_obj); struct amdgpu_ras_block_object *ras_block_obj);
void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev); void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev);
void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name);
bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
const struct amdgpu_ras_err_status_reg_entry *reg_entry,
uint32_t instance,
uint32_t *memory_id);
bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
const struct amdgpu_ras_err_status_reg_entry *reg_entry,
uint32_t instance,
unsigned long *err_cnt);
void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
const struct amdgpu_ras_err_status_reg_entry *reg_list,
uint32_t reg_list_size,
const struct amdgpu_ras_memory_id_entry *mem_list,
uint32_t mem_list_size,
uint32_t instance,
uint32_t err_type,
unsigned long *err_count);
void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
const struct amdgpu_ras_err_status_reg_entry *reg_list,
uint32_t reg_list_size,
uint32_t instance);
#endif #endif

View file

@ -68,11 +68,24 @@
/* Table hdr is 'AMDR' */ /* Table hdr is 'AMDR' */
#define RAS_TABLE_HDR_VAL 0x414d4452 #define RAS_TABLE_HDR_VAL 0x414d4452
#define RAS_TABLE_VER 0x00010000
/* Bad GPU tag BADG */ /* Bad GPU tag BADG */
#define RAS_TABLE_HDR_BAD 0x42414447 #define RAS_TABLE_HDR_BAD 0x42414447
/**
* EEPROM Table structure v1
* ---------------------------------
* | |
* | EEPROM TABLE HEADER |
* | ( size 20 Bytes ) |
* | |
* ---------------------------------
* | |
* | BAD PAGE RECORD AREA |
* | |
* ---------------------------------
*/
/* Assume 2-Mbit size EEPROM and take up the whole space. */ /* Assume 2-Mbit size EEPROM and take up the whole space. */
#define RAS_TBL_SIZE_BYTES (256 * 1024) #define RAS_TBL_SIZE_BYTES (256 * 1024)
#define RAS_TABLE_START 0 #define RAS_TABLE_START 0
@ -81,6 +94,35 @@
#define RAS_MAX_RECORD_COUNT ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \ #define RAS_MAX_RECORD_COUNT ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \
/ RAS_TABLE_RECORD_SIZE) / RAS_TABLE_RECORD_SIZE)
/**
* EEPROM Table structrue v2.1
* ---------------------------------
* | |
* | EEPROM TABLE HEADER |
* | ( size 20 Bytes ) |
* | |
* ---------------------------------
* | |
* | EEPROM TABLE RAS INFO |
* | (available info size 4 Bytes) |
* | ( reserved size 252 Bytes ) |
* | |
* ---------------------------------
* | |
* | BAD PAGE RECORD AREA |
* | |
* ---------------------------------
*/
/* EEPROM Table V2_1 */
#define RAS_TABLE_V2_1_INFO_SIZE 256
#define RAS_TABLE_V2_1_INFO_START RAS_TABLE_HEADER_SIZE
#define RAS_RECORD_START_V2_1 (RAS_HDR_START + RAS_TABLE_HEADER_SIZE + \
RAS_TABLE_V2_1_INFO_SIZE)
#define RAS_MAX_RECORD_COUNT_V2_1 ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE - \
RAS_TABLE_V2_1_INFO_SIZE) \
/ RAS_TABLE_RECORD_SIZE)
/* Given a zero-based index of an EEPROM RAS record, yields the EEPROM /* Given a zero-based index of an EEPROM RAS record, yields the EEPROM
* offset off of RAS_TABLE_START. That is, this is something you can * offset off of RAS_TABLE_START. That is, this is something you can
* add to control->i2c_address, and then tell I2C layer to read * add to control->i2c_address, and then tell I2C layer to read
@ -103,6 +145,10 @@
#define RAS_NUM_RECS(_tbl_hdr) (((_tbl_hdr)->tbl_size - \ #define RAS_NUM_RECS(_tbl_hdr) (((_tbl_hdr)->tbl_size - \
RAS_TABLE_HEADER_SIZE) / RAS_TABLE_RECORD_SIZE) RAS_TABLE_HEADER_SIZE) / RAS_TABLE_RECORD_SIZE)
#define RAS_NUM_RECS_V2_1(_tbl_hdr) (((_tbl_hdr)->tbl_size - \
RAS_TABLE_HEADER_SIZE - \
RAS_TABLE_V2_1_INFO_SIZE) / RAS_TABLE_RECORD_SIZE)
#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev #define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev
static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
@ -230,6 +276,69 @@ static int __write_table_header(struct amdgpu_ras_eeprom_control *control)
return res; return res;
} }
static void
__encode_table_ras_info_to_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
unsigned char *buf)
{
u32 *pp = (uint32_t *)buf;
u32 tmp;
tmp = ((uint32_t)(rai->rma_status) & 0xFF) |
(((uint32_t)(rai->health_percent) << 8) & 0xFF00) |
(((uint32_t)(rai->ecc_page_threshold) << 16) & 0xFFFF0000);
pp[0] = cpu_to_le32(tmp);
}
static void
__decode_table_ras_info_from_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
unsigned char *buf)
{
u32 *pp = (uint32_t *)buf;
u32 tmp;
tmp = le32_to_cpu(pp[0]);
rai->rma_status = tmp & 0xFF;
rai->health_percent = (tmp >> 8) & 0xFF;
rai->ecc_page_threshold = (tmp >> 16) & 0xFFFF;
}
static int __write_table_ras_info(struct amdgpu_ras_eeprom_control *control)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
u8 *buf;
int res;
buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
if (!buf) {
DRM_ERROR("Failed to alloc buf to write table ras info\n");
return -ENOMEM;
}
__encode_table_ras_info_to_buf(&control->tbl_rai, buf);
/* i2c may be unstable in gpu reset */
down_read(&adev->reset_domain->sem);
res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
control->i2c_address +
control->ras_info_offset,
buf, RAS_TABLE_V2_1_INFO_SIZE);
up_read(&adev->reset_domain->sem);
if (res < 0) {
DRM_ERROR("Failed to write EEPROM table ras info:%d", res);
} else if (res < RAS_TABLE_V2_1_INFO_SIZE) {
DRM_ERROR("Short write:%d out of %d\n",
res, RAS_TABLE_V2_1_INFO_SIZE);
res = -EIO;
} else {
res = 0;
}
kfree(buf);
return res;
}
static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control) static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control)
{ {
int ii; int ii;
@ -246,6 +355,21 @@ static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control)
return csum; return csum;
} }
static u8 __calc_ras_info_byte_sum(const struct amdgpu_ras_eeprom_control *control)
{
int ii;
u8 *pp, csum;
size_t sz;
sz = sizeof(control->tbl_rai);
pp = (u8 *) &control->tbl_rai;
csum = 0;
for (ii = 0; ii < sz; ii++, pp++)
csum += *pp;
return csum;
}
static int amdgpu_ras_eeprom_correct_header_tag( static int amdgpu_ras_eeprom_correct_header_tag(
struct amdgpu_ras_eeprom_control *control, struct amdgpu_ras_eeprom_control *control,
uint32_t header) uint32_t header)
@ -282,6 +406,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
{ {
struct amdgpu_device *adev = to_amdgpu_device(control); struct amdgpu_device *adev = to_amdgpu_device(control);
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
u8 csum; u8 csum;
int res; int res;
@ -289,14 +414,37 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
mutex_lock(&control->ras_tbl_mutex); mutex_lock(&control->ras_tbl_mutex);
hdr->header = RAS_TABLE_HDR_VAL; hdr->header = RAS_TABLE_HDR_VAL;
hdr->version = RAS_TABLE_VER; if (adev->umc.ras &&
adev->umc.ras->set_eeprom_table_version)
adev->umc.ras->set_eeprom_table_version(hdr);
else
hdr->version = RAS_TABLE_VER_V1;
if (hdr->version == RAS_TABLE_VER_V2_1) {
hdr->first_rec_offset = RAS_RECORD_START_V2_1;
hdr->tbl_size = RAS_TABLE_HEADER_SIZE +
RAS_TABLE_V2_1_INFO_SIZE;
rai->rma_status = GPU_HEALTH_USABLE;
/**
* GPU health represented as a percentage.
* 0 means worst health, 100 means fully health.
*/
rai->health_percent = 100;
/* ecc_page_threshold = 0 means disable bad page retirement */
rai->ecc_page_threshold = con->bad_page_cnt_threshold;
} else {
hdr->first_rec_offset = RAS_RECORD_START; hdr->first_rec_offset = RAS_RECORD_START;
hdr->tbl_size = RAS_TABLE_HEADER_SIZE; hdr->tbl_size = RAS_TABLE_HEADER_SIZE;
}
csum = __calc_hdr_byte_sum(control); csum = __calc_hdr_byte_sum(control);
if (hdr->version == RAS_TABLE_VER_V2_1)
csum += __calc_ras_info_byte_sum(control);
csum = -csum; csum = -csum;
hdr->checksum = csum; hdr->checksum = csum;
res = __write_table_header(control); res = __write_table_header(control);
if (!res && hdr->version > RAS_TABLE_VER_V1)
res = __write_table_ras_info(control);
control->ras_num_recs = 0; control->ras_num_recs = 0;
control->ras_fri = 0; control->ras_fri = 0;
@ -573,11 +721,19 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
"Saved bad pages %d reaches threshold value %d\n", "Saved bad pages %d reaches threshold value %d\n",
control->ras_num_recs, ras->bad_page_cnt_threshold); control->ras_num_recs, ras->bad_page_cnt_threshold);
control->tbl_hdr.header = RAS_TABLE_HDR_BAD; control->tbl_hdr.header = RAS_TABLE_HDR_BAD;
if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) {
control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD;
control->tbl_rai.health_percent = 0;
}
} }
control->tbl_hdr.version = RAS_TABLE_VER; if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
control->tbl_hdr.first_rec_offset = RAS_INDEX_TO_OFFSET(control, control->ras_fri); control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; RAS_TABLE_V2_1_INFO_SIZE +
control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
else
control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
control->tbl_hdr.checksum = 0; control->tbl_hdr.checksum = 0;
buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE; buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
@ -606,6 +762,17 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
goto Out; goto Out;
} }
/**
* bad page records have been stored in eeprom,
* now calculate gpu health percent
*/
if (amdgpu_bad_page_threshold != 0 &&
control->tbl_hdr.version == RAS_TABLE_VER_V2_1 &&
control->ras_num_recs < ras->bad_page_cnt_threshold)
control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold -
control->ras_num_recs) * 100) /
ras->bad_page_cnt_threshold;
/* Recalc the checksum. /* Recalc the checksum.
*/ */
csum = 0; csum = 0;
@ -613,10 +780,14 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
csum += *pp; csum += *pp;
csum += __calc_hdr_byte_sum(control); csum += __calc_hdr_byte_sum(control);
if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
csum += __calc_ras_info_byte_sum(control);
/* avoid sign extension when assigning to "checksum" */ /* avoid sign extension when assigning to "checksum" */
csum = -csum; csum = -csum;
control->tbl_hdr.checksum = csum; control->tbl_hdr.checksum = csum;
res = __write_table_header(control); res = __write_table_header(control);
if (!res && control->tbl_hdr.version > RAS_TABLE_VER_V1)
res = __write_table_ras_info(control);
Out: Out:
kfree(buf); kfree(buf);
return res; return res;
@ -807,8 +978,11 @@ Out:
return res; return res;
} }
uint32_t amdgpu_ras_eeprom_max_record_count(void) uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control)
{ {
if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
return RAS_MAX_RECORD_COUNT_V2_1;
else
return RAS_MAX_RECORD_COUNT; return RAS_MAX_RECORD_COUNT;
} }
@ -1051,8 +1225,14 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control
int buf_size, res; int buf_size, res;
u8 csum, *buf, *pp; u8 csum, *buf, *pp;
if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
buf_size = RAS_TABLE_HEADER_SIZE +
RAS_TABLE_V2_1_INFO_SIZE +
control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
else
buf_size = RAS_TABLE_HEADER_SIZE + buf_size = RAS_TABLE_HEADER_SIZE +
control->ras_num_recs * RAS_TABLE_RECORD_SIZE; control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
buf = kzalloc(buf_size, GFP_KERNEL); buf = kzalloc(buf_size, GFP_KERNEL);
if (!buf) { if (!buf) {
DRM_ERROR("Out of memory checking RAS table checksum.\n"); DRM_ERROR("Out of memory checking RAS table checksum.\n");
@ -1080,6 +1260,39 @@ Out:
return res < 0 ? res : csum; return res < 0 ? res : csum;
} }
static int __read_table_ras_info(struct amdgpu_ras_eeprom_control *control)
{
struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
struct amdgpu_device *adev = to_amdgpu_device(control);
unsigned char *buf;
int res;
buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
if (!buf) {
DRM_ERROR("Failed to alloc buf to read EEPROM table ras info\n");
return -ENOMEM;
}
/**
* EEPROM table V2_1 supports ras info,
* read EEPROM table ras info
*/
res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
control->i2c_address + control->ras_info_offset,
buf, RAS_TABLE_V2_1_INFO_SIZE);
if (res < RAS_TABLE_V2_1_INFO_SIZE) {
DRM_ERROR("Failed to read EEPROM table ras info, res:%d", res);
res = res >= 0 ? -EIO : res;
goto Out;
}
__decode_table_ras_info_from_buf(rai, buf);
Out:
kfree(buf);
return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res;
}
int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
bool *exceed_err_limit) bool *exceed_err_limit)
{ {
@ -1102,8 +1315,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
return -EINVAL; return -EINVAL;
control->ras_header_offset = RAS_HDR_START; control->ras_header_offset = RAS_HDR_START;
control->ras_record_offset = RAS_RECORD_START; control->ras_info_offset = RAS_TABLE_V2_1_INFO_START;
control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
mutex_init(&control->ras_tbl_mutex); mutex_init(&control->ras_tbl_mutex);
/* Read the table header from EEPROM address */ /* Read the table header from EEPROM address */
@ -1117,12 +1329,27 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
__decode_table_header_from_buf(hdr, buf); __decode_table_header_from_buf(hdr, buf);
if (hdr->version == RAS_TABLE_VER_V2_1) {
control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr);
control->ras_record_offset = RAS_RECORD_START_V2_1;
control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
} else {
control->ras_num_recs = RAS_NUM_RECS(hdr); control->ras_num_recs = RAS_NUM_RECS(hdr);
control->ras_record_offset = RAS_RECORD_START;
control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
}
control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset); control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
if (hdr->header == RAS_TABLE_HDR_VAL) { if (hdr->header == RAS_TABLE_HDR_VAL) {
DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
control->ras_num_recs); control->ras_num_recs);
if (hdr->version == RAS_TABLE_VER_V2_1) {
res = __read_table_ras_info(control);
if (res)
return res;
}
res = __verify_ras_table_checksum(control); res = __verify_ras_table_checksum(control);
if (res) if (res)
DRM_ERROR("RAS table incorrect checksum or error:%d\n", DRM_ERROR("RAS table incorrect checksum or error:%d\n",
@ -1136,6 +1363,12 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
ras->bad_page_cnt_threshold); ras->bad_page_cnt_threshold);
} else if (hdr->header == RAS_TABLE_HDR_BAD && } else if (hdr->header == RAS_TABLE_HDR_BAD &&
amdgpu_bad_page_threshold != 0) { amdgpu_bad_page_threshold != 0) {
if (hdr->version == RAS_TABLE_VER_V2_1) {
res = __read_table_ras_info(control);
if (res)
return res;
}
res = __verify_ras_table_checksum(control); res = __verify_ras_table_checksum(control);
if (res) if (res)
DRM_ERROR("RAS Table incorrect checksum or error:%d\n", DRM_ERROR("RAS Table incorrect checksum or error:%d\n",

View file

@ -26,8 +26,16 @@
#include <linux/i2c.h> #include <linux/i2c.h>
#define RAS_TABLE_VER_V1 0x00010000
#define RAS_TABLE_VER_V2_1 0x00021000
struct amdgpu_device; struct amdgpu_device;
enum amdgpu_ras_gpu_health_status {
GPU_HEALTH_USABLE = 0,
GPU_RETIRED__ECC_REACH_THRESHOLD = 2,
};
enum amdgpu_ras_eeprom_err_type { enum amdgpu_ras_eeprom_err_type {
AMDGPU_RAS_EEPROM_ERR_NA, AMDGPU_RAS_EEPROM_ERR_NA,
AMDGPU_RAS_EEPROM_ERR_RECOVERABLE, AMDGPU_RAS_EEPROM_ERR_RECOVERABLE,
@ -43,9 +51,18 @@ struct amdgpu_ras_eeprom_table_header {
uint32_t checksum; uint32_t checksum;
} __packed; } __packed;
struct amdgpu_ras_eeprom_table_ras_info {
u8 rma_status;
u8 health_percent;
u16 ecc_page_threshold;
u32 padding[64 - 1];
} __packed;
struct amdgpu_ras_eeprom_control { struct amdgpu_ras_eeprom_control {
struct amdgpu_ras_eeprom_table_header tbl_hdr; struct amdgpu_ras_eeprom_table_header tbl_hdr;
struct amdgpu_ras_eeprom_table_ras_info tbl_rai;
/* Base I2C EEPPROM 19-bit memory address, /* Base I2C EEPPROM 19-bit memory address,
* where the table is located. For more information, * where the table is located. For more information,
* see top of amdgpu_eeprom.c. * see top of amdgpu_eeprom.c.
@ -58,6 +75,7 @@ struct amdgpu_ras_eeprom_control {
* right after the header. * right after the header.
*/ */
u32 ras_header_offset; u32 ras_header_offset;
u32 ras_info_offset;
u32 ras_record_offset; u32 ras_record_offset;
/* Number of records in the table. /* Number of records in the table.
@ -124,7 +142,7 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
struct eeprom_table_record *records, const u32 num); struct eeprom_table_record *records, const u32 num);
uint32_t amdgpu_ras_eeprom_max_record_count(void); uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control);
void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control); void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control);

View file

@ -40,6 +40,7 @@ int amdgpu_reset_init(struct amdgpu_device *adev)
switch (adev->ip_versions[MP1_HWIP][0]) { switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
ret = aldebaran_reset_init(adev); ret = aldebaran_reset_init(adev);
break; break;
case IP_VERSION(11, 0, 7): case IP_VERSION(11, 0, 7):
@ -61,6 +62,7 @@ int amdgpu_reset_fini(struct amdgpu_device *adev)
switch (adev->ip_versions[MP1_HWIP][0]) { switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
ret = aldebaran_reset_fini(adev); ret = aldebaran_reset_fini(adev);
break; break;
case IP_VERSION(11, 0, 7): case IP_VERSION(11, 0, 7):

View file

@ -49,6 +49,26 @@
* them until the pointers are equal again. * them until the pointers are equal again.
*/ */
/**
* amdgpu_ring_max_ibs - Return max IBs that fit in a single submission.
*
* @type: ring type for which to return the limit.
*/
unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type)
{
switch (type) {
case AMDGPU_RING_TYPE_GFX:
/* Need to keep at least 192 on GFX7+ for old radv. */
return 192;
case AMDGPU_RING_TYPE_COMPUTE:
return 125;
case AMDGPU_RING_TYPE_VCN_JPEG:
return 16;
default:
return 49;
}
}
/** /**
* amdgpu_ring_alloc - allocate space on the ring buffer * amdgpu_ring_alloc - allocate space on the ring buffer
* *
@ -58,7 +78,7 @@
* Allocate @ndw dwords in the ring buffer (all asics). * Allocate @ndw dwords in the ring buffer (all asics).
* Returns 0 on success, error on failure. * Returns 0 on success, error on failure.
*/ */
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw) int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
{ {
/* Align requested size with padding so unlock_commit can /* Align requested size with padding so unlock_commit can
* pad safely */ * pad safely */
@ -182,6 +202,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
int sched_hw_submission = amdgpu_sched_hw_submission; int sched_hw_submission = amdgpu_sched_hw_submission;
u32 *num_sched; u32 *num_sched;
u32 hw_ip; u32 hw_ip;
unsigned int max_ibs_dw;
/* Set the hw submission limit higher for KIQ because /* Set the hw submission limit higher for KIQ because
* it's used for a number of gfx/compute tasks by both * it's used for a number of gfx/compute tasks by both
@ -290,6 +311,13 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
return r; return r;
} }
max_ibs_dw = ring->funcs->emit_frame_size +
amdgpu_ring_max_ibs(ring->funcs->type) * ring->funcs->emit_ib_size;
max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
if (WARN_ON(max_ibs_dw > max_dw))
max_dw = max_ibs_dw;
ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission); ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
ring->buf_mask = (ring->ring_size / 4) - 1; ring->buf_mask = (ring->ring_size / 4) - 1;
@ -361,6 +389,8 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
amdgpu_bo_free_kernel(&ring->ring_obj, amdgpu_bo_free_kernel(&ring->ring_obj,
&ring->gpu_addr, &ring->gpu_addr,
(void **)&ring->ring); (void **)&ring->ring);
} else {
kfree(ring->fence_drv.fences);
} }
dma_fence_put(ring->vmid_wait); dma_fence_put(ring->vmid_wait);
@ -478,6 +508,59 @@ static const struct file_operations amdgpu_debugfs_ring_fops = {
.llseek = default_llseek .llseek = default_llseek
}; };
static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_ring *ring = file_inode(f)->i_private;
volatile u32 *mqd;
int r;
uint32_t value, result;
if (*pos & 3 || size & 3)
return -EINVAL;
result = 0;
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0))
return r;
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
if (r) {
amdgpu_bo_unreserve(ring->mqd_obj);
return r;
}
while (size) {
if (*pos >= ring->mqd_size)
goto done;
value = mqd[*pos/4];
r = put_user(value, (uint32_t *)buf);
if (r)
goto done;
buf += 4;
result += 4;
size -= 4;
*pos += 4;
}
done:
amdgpu_bo_kunmap(ring->mqd_obj);
mqd = NULL;
amdgpu_bo_unreserve(ring->mqd_obj);
if (r)
return r;
return result;
}
static const struct file_operations amdgpu_debugfs_mqd_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_mqd_read,
.llseek = default_llseek
};
#endif #endif
void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
@ -489,10 +572,16 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
char name[32]; char name[32];
sprintf(name, "amdgpu_ring_%s", ring->name); sprintf(name, "amdgpu_ring_%s", ring->name);
debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, ring, debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
&amdgpu_debugfs_ring_fops, &amdgpu_debugfs_ring_fops,
ring->ring_size + 12); ring->ring_size + 12);
if (ring->mqd_obj) {
sprintf(name, "amdgpu_mqd_%s", ring->name);
debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
&amdgpu_debugfs_mqd_fops,
ring->mqd_size);
}
#endif #endif
} }
@ -581,3 +670,21 @@ void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
if (ring->is_sw_ring) if (ring->is_sw_ring)
amdgpu_sw_ring_ib_end(ring); amdgpu_sw_ring_ib_end(ring);
} }
void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring)
{
if (ring->is_sw_ring)
amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CONTROL);
}
void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring)
{
if (ring->is_sw_ring)
amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CE);
}
void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
{
if (ring->is_sw_ring)
amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
}

View file

@ -37,8 +37,8 @@ struct amdgpu_job;
struct amdgpu_vm; struct amdgpu_vm;
/* max number of rings */ /* max number of rings */
#define AMDGPU_MAX_RINGS 28 #define AMDGPU_MAX_RINGS 124
#define AMDGPU_MAX_HWIP_RINGS 8 #define AMDGPU_MAX_HWIP_RINGS 64
#define AMDGPU_MAX_GFX_RINGS 2 #define AMDGPU_MAX_GFX_RINGS 2
#define AMDGPU_MAX_SW_GFX_RINGS 2 #define AMDGPU_MAX_SW_GFX_RINGS 2
#define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_COMPUTE_RINGS 8
@ -212,6 +212,8 @@ struct amdgpu_ring_funcs {
void (*end_use)(struct amdgpu_ring *ring); void (*end_use)(struct amdgpu_ring *ring);
void (*emit_switch_buffer) (struct amdgpu_ring *ring); void (*emit_switch_buffer) (struct amdgpu_ring *ring);
void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
void (*emit_gfx_shadow)(struct amdgpu_ring *ring, u64 shadow_va, u64 csa_va,
u64 gds_va, bool init_shadow, int vmid);
void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg, void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs); uint32_t reg_val_offs);
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
@ -227,6 +229,9 @@ struct amdgpu_ring_funcs {
int (*preempt_ib)(struct amdgpu_ring *ring); int (*preempt_ib)(struct amdgpu_ring *ring);
void (*emit_mem_sync)(struct amdgpu_ring *ring); void (*emit_mem_sync)(struct amdgpu_ring *ring);
void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable); void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable);
void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset);
void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset);
void (*patch_de)(struct amdgpu_ring *ring, unsigned offset);
}; };
struct amdgpu_ring { struct amdgpu_ring {
@ -250,12 +255,14 @@ struct amdgpu_ring {
uint32_t buf_mask; uint32_t buf_mask;
u32 idx; u32 idx;
u32 xcc_id; u32 xcc_id;
u32 xcp_id;
u32 me; u32 me;
u32 pipe; u32 pipe;
u32 queue; u32 queue;
struct amdgpu_bo *mqd_obj; struct amdgpu_bo *mqd_obj;
uint64_t mqd_gpu_addr; uint64_t mqd_gpu_addr;
void *mqd_ptr; void *mqd_ptr;
unsigned mqd_size;
uint64_t eop_gpu_addr; uint64_t eop_gpu_addr;
u32 doorbell_index; u32 doorbell_index;
bool use_doorbell; bool use_doorbell;
@ -309,6 +316,7 @@ struct amdgpu_ring {
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
#define amdgpu_ring_emit_gfx_shadow(r, s, c, g, i, v) ((r)->funcs->emit_gfx_shadow((r), (s), (c), (g), (i), (v)))
#define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o)) #define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o))
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
@ -318,10 +326,17 @@ struct amdgpu_ring {
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
#define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o)))
#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o)))
#define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o)))
unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type);
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
void amdgpu_ring_ib_begin(struct amdgpu_ring *ring); void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
void amdgpu_ring_ib_end(struct amdgpu_ring *ring); void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring);
void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring);
void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring);
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);

View file

@ -105,6 +105,16 @@ static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
amdgpu_fence_update_start_timestamp(e->ring, amdgpu_fence_update_start_timestamp(e->ring,
chunk->sync_seq, chunk->sync_seq,
ktime_get()); ktime_get());
if (chunk->sync_seq ==
le32_to_cpu(*(e->ring->fence_drv.cpu_addr + 2))) {
if (chunk->cntl_offset <= e->ring->buf_mask)
amdgpu_ring_patch_cntl(e->ring,
chunk->cntl_offset);
if (chunk->ce_offset <= e->ring->buf_mask)
amdgpu_ring_patch_ce(e->ring, chunk->ce_offset);
if (chunk->de_offset <= e->ring->buf_mask)
amdgpu_ring_patch_de(e->ring, chunk->de_offset);
}
amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring, amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring,
chunk->start, chunk->start,
chunk->end); chunk->end);
@ -407,6 +417,17 @@ void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
amdgpu_ring_mux_end_ib(mux, ring); amdgpu_ring_mux_end_ib(mux, ring);
} }
void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
unsigned offset;
offset = ring->wptr & ring->buf_mask;
amdgpu_ring_mux_ib_mark_offset(mux, ring, offset, type);
}
void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
{ {
struct amdgpu_mux_entry *e; struct amdgpu_mux_entry *e;
@ -429,6 +450,10 @@ void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *r
} }
chunk->start = ring->wptr; chunk->start = ring->wptr;
/* the initialized value used to check if they are set by the ib submission*/
chunk->cntl_offset = ring->buf_mask + 1;
chunk->de_offset = ring->buf_mask + 1;
chunk->ce_offset = ring->buf_mask + 1;
list_add_tail(&chunk->entry, &e->list); list_add_tail(&chunk->entry, &e->list);
} }
@ -454,6 +479,41 @@ static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct a
} }
} }
void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux,
struct amdgpu_ring *ring, u64 offset,
enum amdgpu_ring_mux_offset_type type)
{
struct amdgpu_mux_entry *e;
struct amdgpu_mux_chunk *chunk;
e = amdgpu_ring_mux_sw_entry(mux, ring);
if (!e) {
DRM_ERROR("cannot find entry!\n");
return;
}
chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
if (!chunk) {
DRM_ERROR("cannot find chunk!\n");
return;
}
switch (type) {
case AMDGPU_MUX_OFFSET_TYPE_CONTROL:
chunk->cntl_offset = offset;
break;
case AMDGPU_MUX_OFFSET_TYPE_DE:
chunk->de_offset = offset;
break;
case AMDGPU_MUX_OFFSET_TYPE_CE:
chunk->ce_offset = offset;
break;
default:
DRM_ERROR("invalid type (%d)\n", type);
break;
}
}
void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
{ {
struct amdgpu_mux_entry *e; struct amdgpu_mux_entry *e;

View file

@ -50,6 +50,12 @@ struct amdgpu_mux_entry {
struct list_head list; struct list_head list;
}; };
enum amdgpu_ring_mux_offset_type {
AMDGPU_MUX_OFFSET_TYPE_CONTROL,
AMDGPU_MUX_OFFSET_TYPE_DE,
AMDGPU_MUX_OFFSET_TYPE_CE,
};
struct amdgpu_ring_mux { struct amdgpu_ring_mux {
struct amdgpu_ring *real_ring; struct amdgpu_ring *real_ring;
@ -72,12 +78,18 @@ struct amdgpu_ring_mux {
* @sync_seq: the fence seqno related with the saved IB. * @sync_seq: the fence seqno related with the saved IB.
* @start:- start location on the software ring. * @start:- start location on the software ring.
* @end:- end location on the software ring. * @end:- end location on the software ring.
* @control_offset:- the PRE_RESUME bit position used for resubmission.
* @de_offset:- the anchor in write_data for de meta of resubmission.
* @ce_offset:- the anchor in write_data for ce meta of resubmission.
*/ */
struct amdgpu_mux_chunk { struct amdgpu_mux_chunk {
struct list_head entry; struct list_head entry;
uint32_t sync_seq; uint32_t sync_seq;
u64 start; u64 start;
u64 end; u64 end;
u64 cntl_offset;
u64 de_offset;
u64 ce_offset;
}; };
int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
@ -89,6 +101,8 @@ u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ri
u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
u64 offset, enum amdgpu_ring_mux_offset_type type);
bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux); bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux);
u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring); u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
@ -97,6 +111,7 @@ void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring); void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring); void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type);
const char *amdgpu_sw_ring_name(int idx); const char *amdgpu_sw_ring_name(int idx);
unsigned int amdgpu_sw_ring_priority(int idx); unsigned int amdgpu_sw_ring_priority(int idx);

View file

@ -31,12 +31,13 @@
* amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode * amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* @xcc_id: xcc accelerated compute core id
* *
* Set RLC enter into safe mode if RLC is enabled and haven't in safe mode. * Set RLC enter into safe mode if RLC is enabled and haven't in safe mode.
*/ */
void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev) void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id)
{ {
if (adev->gfx.rlc.in_safe_mode) if (adev->gfx.rlc.in_safe_mode[xcc_id])
return; return;
/* if RLC is not enabled, do nothing */ /* if RLC is not enabled, do nothing */
@ -46,8 +47,8 @@ void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
if (adev->cg_flags & if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG)) { AMD_CG_SUPPORT_GFX_3D_CGCG)) {
adev->gfx.rlc.funcs->set_safe_mode(adev); adev->gfx.rlc.funcs->set_safe_mode(adev, xcc_id);
adev->gfx.rlc.in_safe_mode = true; adev->gfx.rlc.in_safe_mode[xcc_id] = true;
} }
} }
@ -55,12 +56,13 @@ void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
* amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode * amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* @xcc_id: xcc accelerated compute core id
* *
* Set RLC exit safe mode if RLC is enabled and have entered into safe mode. * Set RLC exit safe mode if RLC is enabled and have entered into safe mode.
*/ */
void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev) void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id)
{ {
if (!(adev->gfx.rlc.in_safe_mode)) if (!(adev->gfx.rlc.in_safe_mode[xcc_id]))
return; return;
/* if RLC is not enabled, do nothing */ /* if RLC is not enabled, do nothing */
@ -70,8 +72,8 @@ void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
if (adev->cg_flags & if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG)) { AMD_CG_SUPPORT_GFX_3D_CGCG)) {
adev->gfx.rlc.funcs->unset_safe_mode(adev); adev->gfx.rlc.funcs->unset_safe_mode(adev, xcc_id);
adev->gfx.rlc.in_safe_mode = false; adev->gfx.rlc.in_safe_mode[xcc_id] = false;
} }
} }

View file

@ -157,8 +157,8 @@ typedef struct _RLC_TABLE_OF_CONTENT {
struct amdgpu_rlc_funcs { struct amdgpu_rlc_funcs {
bool (*is_rlc_enabled)(struct amdgpu_device *adev); bool (*is_rlc_enabled)(struct amdgpu_device *adev);
void (*set_safe_mode)(struct amdgpu_device *adev); void (*set_safe_mode)(struct amdgpu_device *adev, int xcc_id);
void (*unset_safe_mode)(struct amdgpu_device *adev); void (*unset_safe_mode)(struct amdgpu_device *adev, int xcc_id);
int (*init)(struct amdgpu_device *adev); int (*init)(struct amdgpu_device *adev);
u32 (*get_csb_size)(struct amdgpu_device *adev); u32 (*get_csb_size)(struct amdgpu_device *adev);
void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer); void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
@ -201,7 +201,7 @@ struct amdgpu_rlc {
u32 cp_table_size; u32 cp_table_size;
/* safe mode for updating CG/PG state */ /* safe mode for updating CG/PG state */
bool in_safe_mode; bool in_safe_mode[8];
const struct amdgpu_rlc_funcs *funcs; const struct amdgpu_rlc_funcs *funcs;
/* for firmware data */ /* for firmware data */
@ -260,8 +260,8 @@ struct amdgpu_rlc {
struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl; struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl;
}; };
void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev); void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id);
void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev); void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id);
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws); int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws);
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev); int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev);
int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev); int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev);

View file

@ -64,7 +64,7 @@ int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index)
} }
uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
unsigned vmid) unsigned int vmid)
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
uint64_t csa_mc_addr; uint64_t csa_mc_addr;
@ -252,6 +252,13 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
if (!duplicate && (instance != i)) if (!duplicate && (instance != i))
continue; continue;
else { else {
/* Use a single copy per SDMA firmware type. PSP uses the same instance for all
* groups of SDMAs */
if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2) &&
adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
adev->sdma.num_inst_per_aid == i) {
break;
}
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
info->fw = adev->sdma.instance[i].fw; info->fw = adev->sdma.instance[i].fw;

View file

@ -26,7 +26,7 @@
#include "amdgpu_ras.h" #include "amdgpu_ras.h"
/* max number of IP instances */ /* max number of IP instances */
#define AMDGPU_MAX_SDMA_INSTANCES 8 #define AMDGPU_MAX_SDMA_INSTANCES 16
enum amdgpu_sdma_irq { enum amdgpu_sdma_irq {
AMDGPU_SDMA_IRQ_INSTANCE0 = 0, AMDGPU_SDMA_IRQ_INSTANCE0 = 0,
@ -37,9 +37,19 @@ enum amdgpu_sdma_irq {
AMDGPU_SDMA_IRQ_INSTANCE5, AMDGPU_SDMA_IRQ_INSTANCE5,
AMDGPU_SDMA_IRQ_INSTANCE6, AMDGPU_SDMA_IRQ_INSTANCE6,
AMDGPU_SDMA_IRQ_INSTANCE7, AMDGPU_SDMA_IRQ_INSTANCE7,
AMDGPU_SDMA_IRQ_INSTANCE8,
AMDGPU_SDMA_IRQ_INSTANCE9,
AMDGPU_SDMA_IRQ_INSTANCE10,
AMDGPU_SDMA_IRQ_INSTANCE11,
AMDGPU_SDMA_IRQ_INSTANCE12,
AMDGPU_SDMA_IRQ_INSTANCE13,
AMDGPU_SDMA_IRQ_INSTANCE14,
AMDGPU_SDMA_IRQ_INSTANCE15,
AMDGPU_SDMA_IRQ_LAST AMDGPU_SDMA_IRQ_LAST
}; };
#define NUM_SDMA(x) hweight32(x)
struct amdgpu_sdma_instance { struct amdgpu_sdma_instance {
/* SDMA firmware */ /* SDMA firmware */
const struct firmware *fw; const struct firmware *fw;
@ -49,6 +59,35 @@ struct amdgpu_sdma_instance {
struct amdgpu_ring ring; struct amdgpu_ring ring;
struct amdgpu_ring page; struct amdgpu_ring page;
bool burst_nop; bool burst_nop;
uint32_t aid_id;
};
enum amdgpu_sdma_ras_memory_id {
AMDGPU_SDMA_MBANK_DATA_BUF0 = 1,
AMDGPU_SDMA_MBANK_DATA_BUF1 = 2,
AMDGPU_SDMA_MBANK_DATA_BUF2 = 3,
AMDGPU_SDMA_MBANK_DATA_BUF3 = 4,
AMDGPU_SDMA_MBANK_DATA_BUF4 = 5,
AMDGPU_SDMA_MBANK_DATA_BUF5 = 6,
AMDGPU_SDMA_MBANK_DATA_BUF6 = 7,
AMDGPU_SDMA_MBANK_DATA_BUF7 = 8,
AMDGPU_SDMA_MBANK_DATA_BUF8 = 9,
AMDGPU_SDMA_MBANK_DATA_BUF9 = 10,
AMDGPU_SDMA_MBANK_DATA_BUF10 = 11,
AMDGPU_SDMA_MBANK_DATA_BUF11 = 12,
AMDGPU_SDMA_MBANK_DATA_BUF12 = 13,
AMDGPU_SDMA_MBANK_DATA_BUF13 = 14,
AMDGPU_SDMA_MBANK_DATA_BUF14 = 15,
AMDGPU_SDMA_MBANK_DATA_BUF15 = 16,
AMDGPU_SDMA_UCODE_BUF = 17,
AMDGPU_SDMA_RB_CMD_BUF = 18,
AMDGPU_SDMA_IB_CMD_BUF = 19,
AMDGPU_SDMA_UTCL1_RD_FIFO = 20,
AMDGPU_SDMA_UTCL1_RDBST_FIFO = 21,
AMDGPU_SDMA_UTCL1_WR_FIFO = 22,
AMDGPU_SDMA_DATA_LUT_FIFO = 23,
AMDGPU_SDMA_SPLIT_DAT_BUF = 24,
AMDGPU_SDMA_MEMORY_BLOCK_LAST,
}; };
struct amdgpu_sdma_ras { struct amdgpu_sdma_ras {
@ -66,6 +105,8 @@ struct amdgpu_sdma {
struct amdgpu_irq_src srbm_write_irq; struct amdgpu_irq_src srbm_write_irq;
int num_instances; int num_instances;
uint32_t sdma_mask;
int num_inst_per_aid;
uint32_t srbm_soft_reset; uint32_t srbm_soft_reset;
bool has_page_queue; bool has_page_queue;
struct ras_common_if *ras_if; struct ras_common_if *ras_if;

View file

@ -30,6 +30,7 @@ struct amdgpu_smuio_funcs {
void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags); void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags);
u32 (*get_die_id)(struct amdgpu_device *adev); u32 (*get_die_id)(struct amdgpu_device *adev);
u32 (*get_socket_id)(struct amdgpu_device *adev); u32 (*get_socket_id)(struct amdgpu_device *adev);
enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev);
bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev); bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
}; };

View file

@ -38,7 +38,6 @@
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/swiotlb.h>
#include <linux/dma-buf.h> #include <linux/dma-buf.h>
#include <linux/sizes.h> #include <linux/sizes.h>
#include <linux/module.h> #include <linux/module.h>
@ -65,7 +64,7 @@
MODULE_IMPORT_NS(DMA_BUF); MODULE_IMPORT_NS(DMA_BUF);
#define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128 #define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128)
static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
struct ttm_tt *ttm, struct ttm_tt *ttm,
@ -184,11 +183,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
struct ttm_resource *mem, struct ttm_resource *mem,
struct amdgpu_res_cursor *mm_cur, struct amdgpu_res_cursor *mm_cur,
unsigned window, struct amdgpu_ring *ring, unsigned int window, struct amdgpu_ring *ring,
bool tmz, uint64_t *size, uint64_t *addr) bool tmz, uint64_t *size, uint64_t *addr)
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
unsigned offset, num_pages, num_dw, num_bytes; unsigned int offset, num_pages, num_dw, num_bytes;
uint64_t src_addr, dst_addr; uint64_t src_addr, dst_addr;
struct amdgpu_job *job; struct amdgpu_job *job;
void *cpu_addr; void *cpu_addr;
@ -384,7 +383,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL; struct dma_fence *wipe_fence = NULL;
r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence); r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
false);
if (r) { if (r) {
goto error; goto error;
} else if (wipe_fence) { } else if (wipe_fence) {
@ -631,6 +631,7 @@ struct amdgpu_ttm_tt {
struct task_struct *usertask; struct task_struct *usertask;
uint32_t userflags; uint32_t userflags;
bool bound; bool bound;
int32_t pool_id;
}; };
#define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) #define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm)
@ -800,6 +801,44 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
sg_free_table(ttm->sg); sg_free_table(ttm->sg);
} }
/*
* total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ...
* MQDn+CtrlStackn where n is the number of XCCs per partition.
* pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD
* and uses memory type default, UC. The rest of pages_per_xcc are
* Ctrl stack and modify their memory type to NC.
*/
static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
struct ttm_tt *ttm, uint64_t flags)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
uint64_t total_pages = ttm->num_pages;
int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
uint64_t page_idx, pages_per_xcc;
int i;
uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
pages_per_xcc = total_pages;
do_div(pages_per_xcc, num_xcc);
for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
/* MQD page: use default flags */
amdgpu_gart_bind(adev,
gtt->offset + (page_idx << PAGE_SHIFT),
1, &gtt->ttm.dma_address[page_idx], flags);
/*
* Ctrl pages - modify the memory type to NC (ctrl_flags) from
* the second page of the BO onward.
*/
amdgpu_gart_bind(adev,
gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
pages_per_xcc - 1,
&gtt->ttm.dma_address[page_idx + 1],
ctrl_flags);
}
}
static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
struct ttm_buffer_object *tbo, struct ttm_buffer_object *tbo,
uint64_t flags) uint64_t flags)
@ -812,21 +851,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
flags |= AMDGPU_PTE_TMZ; flags |= AMDGPU_PTE_TMZ;
if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
uint64_t page_idx = 1; amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
amdgpu_gart_bind(adev, gtt->offset, page_idx,
gtt->ttm.dma_address, flags);
/* The memory type of the first page defaults to UC. Now
* modify the memory type to NC from the second page of
* the BO onward.
*/
flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT),
ttm->num_pages - page_idx,
&(gtt->ttm.dma_address[page_idx]), flags);
} else { } else {
amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
gtt->ttm.dma_address, flags); gtt->ttm.dma_address, flags);
@ -1029,15 +1054,20 @@ static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
uint32_t page_flags) uint32_t page_flags)
{ {
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
struct amdgpu_ttm_tt *gtt; struct amdgpu_ttm_tt *gtt;
enum ttm_caching caching; enum ttm_caching caching;
gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
if (gtt == NULL) { if (!gtt)
return NULL; return NULL;
}
gtt->gobj = &bo->base; gtt->gobj = &bo->base;
if (adev->gmc.mem_partitions && abo->xcp_id >= 0)
gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
else
gtt->pool_id = abo->xcp_id;
if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
caching = ttm_write_combined; caching = ttm_write_combined;
@ -1064,6 +1094,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
{ {
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
struct ttm_pool *pool;
pgoff_t i; pgoff_t i;
int ret; int ret;
@ -1078,7 +1109,11 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
return 0; return 0;
ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx); if (adev->mman.ttm_pools && gtt->pool_id >= 0)
pool = &adev->mman.ttm_pools[gtt->pool_id];
else
pool = &adev->mman.bdev.pool;
ret = ttm_pool_alloc(pool, ttm, ctx);
if (ret) if (ret)
return ret; return ret;
@ -1099,6 +1134,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
{ {
struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
struct amdgpu_device *adev; struct amdgpu_device *adev;
struct ttm_pool *pool;
pgoff_t i; pgoff_t i;
amdgpu_ttm_backend_unbind(bdev, ttm); amdgpu_ttm_backend_unbind(bdev, ttm);
@ -1117,7 +1153,13 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
ttm->pages[i]->mapping = NULL; ttm->pages[i]->mapping = NULL;
adev = amdgpu_ttm_adev(bdev); adev = amdgpu_ttm_adev(bdev);
return ttm_pool_free(&adev->mman.bdev.pool, ttm);
if (adev->mman.ttm_pools && gtt->pool_id >= 0)
pool = &adev->mman.ttm_pools[gtt->pool_id];
else
pool = &adev->mman.bdev.pool;
return ttm_pool_free(pool, ttm);
} }
/** /**
@ -1623,14 +1665,15 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
return 0; return 0;
} }
static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev) static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev,
uint32_t reserve_size)
{ {
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
memset(ctx, 0, sizeof(*ctx)); memset(ctx, 0, sizeof(*ctx));
ctx->c2p_train_data_offset = ctx->c2p_train_data_offset =
ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M); ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M);
ctx->p2c_train_data_offset = ctx->p2c_train_data_offset =
(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
ctx->train_data_size = ctx->train_data_size =
@ -1648,11 +1691,12 @@ static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
*/ */
static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
{ {
int ret;
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
bool mem_train_support = false; bool mem_train_support = false;
uint32_t reserve_size = 0;
int ret;
if (!amdgpu_sriov_vf(adev)) { if (adev->bios && !amdgpu_sriov_vf(adev)) {
if (amdgpu_atomfirmware_mem_training_supported(adev)) if (amdgpu_atomfirmware_mem_training_supported(adev))
mem_train_support = true; mem_train_support = true;
else else
@ -1666,14 +1710,18 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
* Otherwise, fallback to legacy approach to check and reserve tmr block for ip * Otherwise, fallback to legacy approach to check and reserve tmr block for ip
* discovery data and G6 memory training data respectively * discovery data and G6 memory training data respectively
*/ */
adev->mman.discovery_tmr_size = if (adev->bios)
reserve_size =
amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
if (!adev->mman.discovery_tmr_size)
adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET; if (!adev->bios && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
reserve_size = max(reserve_size, (uint32_t)280 << 20);
else if (!reserve_size)
reserve_size = DISCOVERY_TMR_OFFSET;
if (mem_train_support) { if (mem_train_support) {
/* reserve vram for mem train according to TMR location */ /* reserve vram for mem train according to TMR location */
amdgpu_ttm_training_data_block_init(adev); amdgpu_ttm_training_data_block_init(adev, reserve_size);
ret = amdgpu_bo_create_kernel_at(adev, ret = amdgpu_bo_create_kernel_at(adev,
ctx->c2p_train_data_offset, ctx->c2p_train_data_offset,
ctx->train_data_size, ctx->train_data_size,
@ -1687,20 +1735,58 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
} }
ret = amdgpu_bo_create_kernel_at(adev, if (!adev->gmc.is_app_apu) {
adev->gmc.real_vram_size - adev->mman.discovery_tmr_size, ret = amdgpu_bo_create_kernel_at(
adev->mman.discovery_tmr_size, adev, adev->gmc.real_vram_size - reserve_size,
&adev->mman.discovery_memory, reserve_size, &adev->mman.fw_reserved_memory, NULL);
NULL);
if (ret) { if (ret) {
DRM_ERROR("alloc tmr failed(%d)!\n", ret); DRM_ERROR("alloc tmr failed(%d)!\n", ret);
amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
NULL, NULL);
return ret; return ret;
} }
} else {
DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n");
}
return 0; return 0;
} }
static int amdgpu_ttm_pools_init(struct amdgpu_device *adev)
{
int i;
if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions)
return 0;
adev->mman.ttm_pools = kcalloc(adev->gmc.num_mem_partitions,
sizeof(*adev->mman.ttm_pools),
GFP_KERNEL);
if (!adev->mman.ttm_pools)
return -ENOMEM;
for (i = 0; i < adev->gmc.num_mem_partitions; i++) {
ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev,
adev->gmc.mem_partitions[i].numa.node,
false, false);
}
return 0;
}
static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
{
int i;
if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools)
return;
for (i = 0; i < adev->gmc.num_mem_partitions; i++)
ttm_pool_fini(&adev->mman.ttm_pools[i]);
kfree(adev->mman.ttm_pools);
adev->mman.ttm_pools = NULL;
}
/* /*
* amdgpu_ttm_init - Init the memory management (ttm) as well as various * amdgpu_ttm_init - Init the memory management (ttm) as well as various
* gtt/vram related fields. * gtt/vram related fields.
@ -1727,6 +1813,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_ERROR("failed initializing buffer object driver(%d).\n", r); DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
return r; return r;
} }
r = amdgpu_ttm_pools_init(adev);
if (r) {
DRM_ERROR("failed to init ttm pools(%d).\n", r);
return r;
}
adev->mman.initialized = true; adev->mman.initialized = true;
/* Initialize VRAM pool with all of VRAM divided into pages */ /* Initialize VRAM pool with all of VRAM divided into pages */
@ -1744,6 +1836,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base, adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
adev->gmc.visible_vram_size); adev->gmc.visible_vram_size);
else if (adev->gmc.is_app_apu)
DRM_DEBUG_DRIVER(
"No need to ioremap when real vram size is 0\n");
else else
#endif #endif
adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
@ -1755,9 +1850,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
*place on the VRAM, so reserve it early. *place on the VRAM, so reserve it early.
*/ */
r = amdgpu_ttm_fw_reserve_vram_init(adev); r = amdgpu_ttm_fw_reserve_vram_init(adev);
if (r) { if (r)
return r; return r;
}
/* /*
*The reserved vram for driver must be pinned to the specified *The reserved vram for driver must be pinned to the specified
@ -1781,48 +1875,45 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* allocate memory as required for VGA /* allocate memory as required for VGA
* This is used for VGA emulation and pre-OS scanout buffers to * This is used for VGA emulation and pre-OS scanout buffers to
* avoid display artifacts while transitioning between pre-OS * avoid display artifacts while transitioning between pre-OS
* and driver. */ * and driver.
r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size, */
if (!adev->gmc.is_app_apu) {
r = amdgpu_bo_create_kernel_at(adev, 0,
adev->mman.stolen_vga_size,
&adev->mman.stolen_vga_memory, &adev->mman.stolen_vga_memory,
NULL); NULL);
if (r) if (r)
return r; return r;
r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
adev->mman.stolen_extended_size, adev->mman.stolen_extended_size,
&adev->mman.stolen_extended_memory, &adev->mman.stolen_extended_memory,
NULL); NULL);
if (r) if (r)
return r; return r;
r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,
r = amdgpu_bo_create_kernel_at(adev,
adev->mman.stolen_reserved_offset,
adev->mman.stolen_reserved_size, adev->mman.stolen_reserved_size,
&adev->mman.stolen_reserved_memory, &adev->mman.stolen_reserved_memory,
NULL); NULL);
if (r) if (r)
return r; return r;
} else {
DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");
}
DRM_INFO("amdgpu: %uM of VRAM memory ready\n", DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024)));
/* Compute GTT size, either based on 1/2 the size of RAM size /* Compute GTT size, either based on TTM limit
* or whatever the user passed on module init */ * or whatever the user passed on module init.
if (amdgpu_gtt_size == -1) {
struct sysinfo si;
si_meminfo(&si);
/* Certain GL unit tests for large textures can cause problems
* with the OOM killer since there is no way to link this memory
* to a process. This was originally mitigated (but not necessarily
* eliminated) by limiting the GTT size. The problem is this limit
* is often too low for many modern games so just make the limit 1/2
* of system memory which aligns with TTM. The OOM accounting needs
* to be addressed, but we shouldn't prevent common 3D applications
* from being usable just to potentially mitigate that corner case.
*/ */
gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), if (amdgpu_gtt_size == -1)
(u64)si.totalram * si.mem_unit / 2); gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
} else { else
gtt_size = (uint64_t)amdgpu_gtt_size << 20; gtt_size = (uint64_t)amdgpu_gtt_size << 20;
}
/* Initialize GTT memory pool */ /* Initialize GTT memory pool */
r = amdgpu_gtt_mgr_init(adev, gtt_size); r = amdgpu_gtt_mgr_init(adev, gtt_size);
@ -1831,7 +1922,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r; return r;
} }
DRM_INFO("amdgpu: %uM of GTT memory ready.\n", DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
(unsigned)(gtt_size / (1024 * 1024))); (unsigned int)(gtt_size / (1024 * 1024)));
/* Initialize preemptible memory pool */ /* Initialize preemptible memory pool */
r = amdgpu_preempt_mgr_init(adev); r = amdgpu_preempt_mgr_init(adev);
@ -1858,7 +1949,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_ERROR("Failed initializing oa heap.\n"); DRM_ERROR("Failed initializing oa heap.\n");
return r; return r;
} }
if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_GTT,
&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_bo, NULL,
@ -1874,18 +1964,24 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
void amdgpu_ttm_fini(struct amdgpu_device *adev) void amdgpu_ttm_fini(struct amdgpu_device *adev)
{ {
int idx; int idx;
if (!adev->mman.initialized) if (!adev->mman.initialized)
return; return;
amdgpu_ttm_pools_fini(adev);
amdgpu_ttm_training_reserve_vram_fini(adev); amdgpu_ttm_training_reserve_vram_fini(adev);
/* return the stolen vga memory back to VRAM */ /* return the stolen vga memory back to VRAM */
if (!adev->gmc.is_app_apu) {
amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
/* return the IP Discovery TMR memory back to VRAM */ /* return the FW reserved memory back to VRAM */
amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
NULL);
if (adev->mman.stolen_reserved_size) if (adev->mman.stolen_reserved_size)
amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
NULL, NULL); NULL, NULL);
}
amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
&adev->mman.sdma_access_ptr); &adev->mman.sdma_access_ptr);
amdgpu_ttm_fw_reserve_vram_fini(adev); amdgpu_ttm_fw_reserve_vram_fini(adev);
@ -1927,7 +2023,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
int r; int r;
if (!adev->mman.initialized || amdgpu_in_reset(adev) || if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
adev->mman.buffer_funcs_enabled == enable) adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
return; return;
if (enable) { if (enable) {
@ -1944,8 +2040,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
r); r);
return; return;
} }
r = drm_sched_entity_init(&adev->mman.delayed,
DRM_SCHED_PRIORITY_NORMAL, &sched,
1, NULL);
if (r) {
DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
r);
goto error_free_entity;
}
} else { } else {
drm_sched_entity_destroy(&adev->mman.entity); drm_sched_entity_destroy(&adev->mman.entity);
drm_sched_entity_destroy(&adev->mman.delayed);
dma_fence_put(man->move); dma_fence_put(man->move);
man->move = NULL; man->move = NULL;
} }
@ -1957,6 +2063,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
size = adev->gmc.visible_vram_size; size = adev->gmc.visible_vram_size;
man->size = size; man->size = size;
adev->mman.buffer_funcs_enabled = enable; adev->mman.buffer_funcs_enabled = enable;
return;
error_free_entity:
drm_sched_entity_destroy(&adev->mman.entity);
} }
static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
@ -1964,14 +2075,16 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
unsigned int num_dw, unsigned int num_dw,
struct dma_resv *resv, struct dma_resv *resv,
bool vm_needs_flush, bool vm_needs_flush,
struct amdgpu_job **job) struct amdgpu_job **job,
bool delayed)
{ {
enum amdgpu_ib_pool_type pool = direct_submit ? enum amdgpu_ib_pool_type pool = direct_submit ?
AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DIRECT :
AMDGPU_IB_POOL_DELAYED; AMDGPU_IB_POOL_DELAYED;
int r; int r;
struct drm_sched_entity *entity = delayed ? &adev->mman.delayed :
r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, &adev->mman.entity;
r = amdgpu_job_alloc_with_ib(adev, entity,
AMDGPU_FENCE_OWNER_UNDEFINED, AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4, pool, job); num_dw * 4, pool, job);
if (r) if (r)
@ -1997,10 +2110,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
bool vm_needs_flush, bool tmz) bool vm_needs_flush, bool tmz)
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
unsigned num_loops, num_dw; unsigned int num_loops, num_dw;
struct amdgpu_job *job; struct amdgpu_job *job;
uint32_t max_bytes; uint32_t max_bytes;
unsigned i; unsigned int i;
int r; int r;
if (!direct_submit && !ring->sched.ready) { if (!direct_submit && !ring->sched.ready) {
@ -2012,7 +2125,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw, r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
resv, vm_needs_flush, &job); resv, vm_needs_flush, &job, false);
if (r) if (r)
return r; return r;
@ -2048,7 +2161,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
uint64_t dst_addr, uint32_t byte_count, uint64_t dst_addr, uint32_t byte_count,
struct dma_resv *resv, struct dma_resv *resv,
struct dma_fence **fence, struct dma_fence **fence,
bool vm_needs_flush) bool vm_needs_flush, bool delayed)
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
unsigned int num_loops, num_dw; unsigned int num_loops, num_dw;
@ -2061,7 +2174,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes); num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush, r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
&job); &job, delayed);
if (r) if (r)
return r; return r;
@ -2084,7 +2197,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
int amdgpu_fill_buffer(struct amdgpu_bo *bo, int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data, uint32_t src_data,
struct dma_resv *resv, struct dma_resv *resv,
struct dma_fence **f) struct dma_fence **f,
bool delayed)
{ {
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
@ -2113,7 +2227,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
goto error; goto error;
r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv, r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
&next, true); &next, true, delayed);
if (r) if (r)
goto error; goto error;
@ -2164,7 +2278,7 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused) static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)m->private; struct amdgpu_device *adev = m->private;
return ttm_pool_debugfs(&adev->mman.bdev.pool, m); return ttm_pool_debugfs(&adev->mman.bdev.pool, m);
} }

View file

@ -49,6 +49,7 @@ struct amdgpu_gtt_mgr {
struct amdgpu_mman { struct amdgpu_mman {
struct ttm_device bdev; struct ttm_device bdev;
struct ttm_pool *ttm_pools;
bool initialized; bool initialized;
void __iomem *aper_base_kaddr; void __iomem *aper_base_kaddr;
@ -60,6 +61,8 @@ struct amdgpu_mman {
struct mutex gtt_window_lock; struct mutex gtt_window_lock;
/* Scheduler entity for buffer moves */ /* Scheduler entity for buffer moves */
struct drm_sched_entity entity; struct drm_sched_entity entity;
/* Scheduler entity for VRAM clearing */
struct drm_sched_entity delayed;
struct amdgpu_vram_mgr vram_mgr; struct amdgpu_vram_mgr vram_mgr;
struct amdgpu_gtt_mgr gtt_mgr; struct amdgpu_gtt_mgr gtt_mgr;
@ -78,7 +81,8 @@ struct amdgpu_mman {
/* discovery */ /* discovery */
uint8_t *discovery_bin; uint8_t *discovery_bin;
uint32_t discovery_tmr_size; uint32_t discovery_tmr_size;
struct amdgpu_bo *discovery_memory; /* fw reserved memory */
struct amdgpu_bo *fw_reserved_memory;
/* firmware VRAM reservation */ /* firmware VRAM reservation */
u64 fw_vram_usage_start_offset; u64 fw_vram_usage_start_offset;
@ -150,7 +154,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
int amdgpu_fill_buffer(struct amdgpu_bo *bo, int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data, uint32_t src_data,
struct dma_resv *resv, struct dma_resv *resv,
struct dma_fence **fence); struct dma_fence **fence,
bool delayed);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);

View file

@ -748,7 +748,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
const struct imu_firmware_header_v1_0 *imu_hdr = NULL; const struct imu_firmware_header_v1_0 *imu_hdr = NULL;
u8 *ucode_addr; u8 *ucode_addr;
if (NULL == ucode->fw) if (!ucode->fw)
return 0; return 0;
ucode->mc_addr = mc_addr; ucode->mc_addr = mc_addr;
@ -972,7 +972,7 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
uint8_t *src_addr = NULL; uint8_t *src_addr = NULL;
uint8_t *dst_addr = NULL; uint8_t *dst_addr = NULL;
if (NULL == ucode->fw) if (!ucode->fw)
return 0; return 0;
comm_hdr = (const struct common_firmware_header *)ucode->fw->data; comm_hdr = (const struct common_firmware_header *)ucode->fw->data;
@ -1043,6 +1043,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
if (i == AMDGPU_UCODE_ID_CP_MEC1 && if (i == AMDGPU_UCODE_ID_CP_MEC1 &&
adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
const struct gfx_firmware_header_v1_0 *cp_hdr; const struct gfx_firmware_header_v1_0 *cp_hdr;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data; cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
amdgpu_ucode_patch_jt(ucode, adev->firmware.fw_buf_mc + fw_offset, amdgpu_ucode_patch_jt(ucode, adev->firmware.fw_buf_mc + fw_offset,
adev->firmware.fw_buf_ptr + fw_offset); adev->firmware.fw_buf_ptr + fw_offset);

View file

@ -59,6 +59,8 @@ struct amdgpu_umc_ras {
void *ras_error_status); void *ras_error_status);
void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev, void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
void *ras_error_status); void *ras_error_status);
/* support different eeprom table version for different asic */
void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header *hdr);
}; };
struct amdgpu_umc_funcs { struct amdgpu_umc_funcs {

View file

@ -35,17 +35,51 @@ struct amdgpu_debugfs_regs2_iocdata {
} srbm; } srbm;
}; };
struct amdgpu_debugfs_regs2_iocdata_v2 {
__u32 use_srbm, use_grbm, pg_lock;
struct {
__u32 se, sh, instance;
} grbm;
struct {
__u32 me, pipe, queue, vmid;
} srbm;
u32 xcc_id;
};
struct amdgpu_debugfs_gprwave_iocdata {
u32 gpr_or_wave, se, sh, cu, wave, simd, xcc_id;
struct {
u32 thread, vpgr_or_sgpr;
} gpr;
};
/* /*
* MMIO debugfs state data (per file* handle) * MMIO debugfs state data (per file* handle)
*/ */
struct amdgpu_debugfs_regs2_data { struct amdgpu_debugfs_regs2_data {
struct amdgpu_device *adev; struct amdgpu_device *adev;
struct mutex lock; struct mutex lock;
struct amdgpu_debugfs_regs2_iocdata id; struct amdgpu_debugfs_regs2_iocdata_v2 id;
};
struct amdgpu_debugfs_gprwave_data {
struct amdgpu_device *adev;
struct mutex lock;
struct amdgpu_debugfs_gprwave_iocdata id;
}; };
enum AMDGPU_DEBUGFS_REGS2_CMDS { enum AMDGPU_DEBUGFS_REGS2_CMDS {
AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0,
AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2,
}; };
enum AMDGPU_DEBUGFS_GPRWAVE_CMDS {
AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE=0,
};
//reg2 interface
#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata) #define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata)
#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2 _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, struct amdgpu_debugfs_regs2_iocdata_v2)
//gprwave interface
#define AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE, struct amdgpu_debugfs_gprwave_iocdata)

View file

@ -96,16 +96,16 @@
*/ */
struct amdgpu_uvd_cs_ctx { struct amdgpu_uvd_cs_ctx {
struct amdgpu_cs_parser *parser; struct amdgpu_cs_parser *parser;
unsigned reg, count; unsigned int reg, count;
unsigned data0, data1; unsigned int data0, data1;
unsigned idx; unsigned int idx;
struct amdgpu_ib *ib; struct amdgpu_ib *ib;
/* does the IB has a msg command */ /* does the IB has a msg command */
bool has_msg_cmd; bool has_msg_cmd;
/* minimum buffer sizes */ /* minimum buffer sizes */
unsigned *buf_sizes; unsigned int *buf_sizes;
}; };
#ifdef CONFIG_DRM_AMDGPU_SI #ifdef CONFIG_DRM_AMDGPU_SI
@ -186,7 +186,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
unsigned long bo_size; unsigned long bo_size;
const char *fw_name; const char *fw_name;
const struct common_firmware_header *hdr; const struct common_firmware_header *hdr;
unsigned family_id; unsigned int family_id;
int i, j, r; int i, j, r;
INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler); INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
@ -275,7 +275,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
family_id = le32_to_cpu(hdr->ucode_version) & 0xff; family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
if (adev->asic_type < CHIP_VEGA20) { if (adev->asic_type < CHIP_VEGA20) {
unsigned version_major, version_minor; unsigned int version_major, version_minor;
version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
@ -420,7 +420,7 @@ int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
int amdgpu_uvd_suspend(struct amdgpu_device *adev) int amdgpu_uvd_suspend(struct amdgpu_device *adev)
{ {
unsigned size; unsigned int size;
void *ptr; void *ptr;
int i, j, idx; int i, j, idx;
bool in_ras_intr = amdgpu_ras_intr_triggered(); bool in_ras_intr = amdgpu_ras_intr_triggered();
@ -469,7 +469,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
int amdgpu_uvd_resume(struct amdgpu_device *adev) int amdgpu_uvd_resume(struct amdgpu_device *adev)
{ {
unsigned size; unsigned int size;
void *ptr; void *ptr;
int i, idx; int i, idx;
@ -491,7 +491,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
adev->uvd.inst[i].saved_bo = NULL; adev->uvd.inst[i].saved_bo = NULL;
} else { } else {
const struct common_firmware_header *hdr; const struct common_firmware_header *hdr;
unsigned offset; unsigned int offset;
hdr = (const struct common_firmware_header *)adev->uvd.fw->data; hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
@ -542,6 +542,7 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo) static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
{ {
int i; int i;
for (i = 0; i < abo->placement.num_placement; ++i) { for (i = 0; i < abo->placement.num_placement; ++i) {
abo->placements[i].fpfn = 0 >> PAGE_SHIFT; abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
@ -579,7 +580,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping); r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
if (r) { if (r) {
DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
return r; return r;
} }
@ -589,6 +590,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
if (cmd == 0x0 || cmd == 0x3) { if (cmd == 0x0 || cmd == 0x3) {
/* yes, force it into VRAM */ /* yes, force it into VRAM */
uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
amdgpu_bo_placement_from_domain(bo, domain); amdgpu_bo_placement_from_domain(bo, domain);
} }
amdgpu_uvd_force_into_uvd_segment(bo); amdgpu_uvd_force_into_uvd_segment(bo);
@ -609,21 +611,21 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
* Peek into the decode message and calculate the necessary buffer sizes. * Peek into the decode message and calculate the necessary buffer sizes.
*/ */
static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
unsigned buf_sizes[]) unsigned int buf_sizes[])
{ {
unsigned stream_type = msg[4]; unsigned int stream_type = msg[4];
unsigned width = msg[6]; unsigned int width = msg[6];
unsigned height = msg[7]; unsigned int height = msg[7];
unsigned dpb_size = msg[9]; unsigned int dpb_size = msg[9];
unsigned pitch = msg[28]; unsigned int pitch = msg[28];
unsigned level = msg[57]; unsigned int level = msg[57];
unsigned width_in_mb = width / 16; unsigned int width_in_mb = width / 16;
unsigned height_in_mb = ALIGN(height / 16, 2); unsigned int height_in_mb = ALIGN(height / 16, 2);
unsigned fs_in_mb = width_in_mb * height_in_mb; unsigned int fs_in_mb = width_in_mb * height_in_mb;
unsigned image_size, tmp, min_dpb_size, num_dpb_buffer; unsigned int image_size, tmp, min_dpb_size, num_dpb_buffer;
unsigned min_ctx_size = ~0; unsigned int min_ctx_size = ~0;
image_size = width * height; image_size = width * height;
image_size += image_size / 2; image_size += image_size / 2;
@ -631,7 +633,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
switch (stream_type) { switch (stream_type) {
case 0: /* H264 */ case 0: /* H264 */
switch(level) { switch (level) {
case 30: case 30:
num_dpb_buffer = 8100 / fs_in_mb; num_dpb_buffer = 8100 / fs_in_mb;
break; break;
@ -709,7 +711,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
break; break;
case 7: /* H264 Perf */ case 7: /* H264 Perf */
switch(level) { switch (level) {
case 30: case 30:
num_dpb_buffer = 8100 / fs_in_mb; num_dpb_buffer = 8100 / fs_in_mb;
break; break;
@ -742,7 +744,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
/* reference picture buffer */ /* reference picture buffer */
min_dpb_size = image_size * num_dpb_buffer; min_dpb_size = image_size * num_dpb_buffer;
if (!adev->uvd.use_ctx_buf){ if (!adev->uvd.use_ctx_buf) {
/* macroblock context buffer */ /* macroblock context buffer */
min_dpb_size += min_dpb_size +=
width_in_mb * height_in_mb * num_dpb_buffer * 192; width_in_mb * height_in_mb * num_dpb_buffer * 192;
@ -805,7 +807,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
* Make sure that we don't open up to many sessions. * Make sure that we don't open up to many sessions.
*/ */
static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
struct amdgpu_bo *bo, unsigned offset) struct amdgpu_bo *bo, unsigned int offset)
{ {
struct amdgpu_device *adev = ctx->parser->adev; struct amdgpu_device *adev = ctx->parser->adev;
int32_t *msg, msg_type, handle; int32_t *msg, msg_type, handle;
@ -911,7 +913,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping); r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
if (r) { if (r) {
DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
return r; return r;
} }
@ -930,7 +932,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
if (cmd < 0x4) { if (cmd < 0x4) {
if ((end - start) < ctx->buf_sizes[cmd]) { if ((end - start) < ctx->buf_sizes[cmd]) {
DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
(unsigned)(end - start), (unsigned int)(end - start),
ctx->buf_sizes[cmd]); ctx->buf_sizes[cmd]);
return -EINVAL; return -EINVAL;
} }
@ -938,7 +940,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
} else if (cmd == 0x206) { } else if (cmd == 0x206) {
if ((end - start) < ctx->buf_sizes[4]) { if ((end - start) < ctx->buf_sizes[4]) {
DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
(unsigned)(end - start), (unsigned int)(end - start),
ctx->buf_sizes[4]); ctx->buf_sizes[4]);
return -EINVAL; return -EINVAL;
} }
@ -949,14 +951,14 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
if (!ctx->parser->adev->uvd.address_64_bit) { if (!ctx->parser->adev->uvd.address_64_bit) {
if ((start >> 28) != ((end - 1) >> 28)) { if ((start >> 28) != ((end - 1) >> 28)) {
DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", DRM_ERROR("reloc %llx-%llx crossing 256MB boundary!\n",
start, end); start, end);
return -EINVAL; return -EINVAL;
} }
if ((cmd == 0 || cmd == 0x3) && if ((cmd == 0 || cmd == 0x3) &&
(start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) { (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", DRM_ERROR("msg/fb buffer %llx-%llx out of 256MB segment!\n",
start, end); start, end);
return -EINVAL; return -EINVAL;
} }
@ -990,7 +992,7 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
ctx->idx++; ctx->idx++;
for (i = 0; i <= ctx->count; ++i) { for (i = 0; i <= ctx->count; ++i) {
unsigned reg = ctx->reg + i; unsigned int reg = ctx->reg + i;
if (ctx->idx >= ctx->ib->length_dw) { if (ctx->idx >= ctx->ib->length_dw) {
DRM_ERROR("Register command after end of CS!\n"); DRM_ERROR("Register command after end of CS!\n");
@ -1036,7 +1038,8 @@ static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
for (ctx->idx = 0 ; ctx->idx < ctx->ib->length_dw; ) { for (ctx->idx = 0 ; ctx->idx < ctx->ib->length_dw; ) {
uint32_t cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx); uint32_t cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx);
unsigned type = CP_PACKET_GET_TYPE(cmd); unsigned int type = CP_PACKET_GET_TYPE(cmd);
switch (type) { switch (type) {
case PACKET_TYPE0: case PACKET_TYPE0:
ctx->reg = CP_PACKET0_GET_REG(cmd); ctx->reg = CP_PACKET0_GET_REG(cmd);
@ -1070,7 +1073,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_ib *ib) struct amdgpu_ib *ib)
{ {
struct amdgpu_uvd_cs_ctx ctx = {}; struct amdgpu_uvd_cs_ctx ctx = {};
unsigned buf_sizes[] = { unsigned int buf_sizes[] = {
[0x00000000] = 2048, [0x00000000] = 2048,
[0x00000001] = 0xFFFFFFFF, [0x00000001] = 0xFFFFFFFF,
[0x00000002] = 0xFFFFFFFF, [0x00000002] = 0xFFFFFFFF,
@ -1185,8 +1188,9 @@ err_free:
} }
/* multiple fence commands without any stream commands in between can /* multiple fence commands without any stream commands in between can
crash the vcpu so just try to emmit a dummy create/destroy msg to * crash the vcpu so just try to emmit a dummy create/destroy msg to
avoid this */ * avoid this
*/
int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct dma_fence **fence) struct dma_fence **fence)
{ {
@ -1252,16 +1256,15 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
{ {
struct amdgpu_device *adev = struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, uvd.idle_work.work); container_of(work, struct amdgpu_device, uvd.idle_work.work);
unsigned fences = 0, i, j; unsigned int fences = 0, i, j;
for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
if (adev->uvd.harvest_config & (1 << i)) if (adev->uvd.harvest_config & (1 << i))
continue; continue;
fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring); fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
for (j = 0; j < adev->uvd.num_enc_rings; ++j) { for (j = 0; j < adev->uvd.num_enc_rings; ++j)
fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]); fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
} }
}
if (fences == 0) { if (fences == 0) {
if (adev->pm.dpm_enabled) { if (adev->pm.dpm_enabled) {
@ -1356,7 +1359,7 @@ error:
*/ */
uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev) uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
{ {
unsigned i; unsigned int i;
uint32_t used_handles = 0; uint32_t used_handles = 0;
for (i = 0; i < adev->uvd.max_handles; ++i) { for (i = 0; i < adev->uvd.max_handles; ++i) {

View file

@ -99,7 +99,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
{ {
const char *fw_name; const char *fw_name;
const struct common_firmware_header *hdr; const struct common_firmware_header *hdr;
unsigned ucode_version, version_major, version_minor, binary_id; unsigned int ucode_version, version_major, version_minor, binary_id;
int i, r; int i, r;
switch (adev->asic_type) { switch (adev->asic_type) {
@ -207,7 +207,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
*/ */
int amdgpu_vce_sw_fini(struct amdgpu_device *adev) int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
{ {
unsigned i; unsigned int i;
if (adev->vce.vcpu_bo == NULL) if (adev->vce.vcpu_bo == NULL)
return 0; return 0;
@ -286,7 +286,7 @@ int amdgpu_vce_resume(struct amdgpu_device *adev)
{ {
void *cpu_addr; void *cpu_addr;
const struct common_firmware_header *hdr; const struct common_firmware_header *hdr;
unsigned offset; unsigned int offset;
int r, idx; int r, idx;
if (adev->vce.vcpu_bo == NULL) if (adev->vce.vcpu_bo == NULL)
@ -332,7 +332,7 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
{ {
struct amdgpu_device *adev = struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, vce.idle_work.work); container_of(work, struct amdgpu_device, vce.idle_work.work);
unsigned i, count = 0; unsigned int i, count = 0;
for (i = 0; i < adev->vce.num_rings; i++) for (i = 0; i < adev->vce.num_rings; i++)
count += amdgpu_fence_count_emitted(&adev->vce.ring[i]); count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
@ -409,6 +409,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
{ {
struct amdgpu_ring *ring = &adev->vce.ring[0]; struct amdgpu_ring *ring = &adev->vce.ring[0];
int i, r; int i, r;
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
uint32_t handle = atomic_read(&adev->vce.handles[i]); uint32_t handle = atomic_read(&adev->vce.handles[i]);
@ -436,7 +437,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct dma_fence **fence) struct dma_fence **fence)
{ {
const unsigned ib_size_dw = 1024; const unsigned int ib_size_dw = 1024;
struct amdgpu_job *job; struct amdgpu_job *job;
struct amdgpu_ib *ib; struct amdgpu_ib *ib;
struct amdgpu_ib ib_msg; struct amdgpu_ib ib_msg;
@ -528,7 +529,7 @@ err:
static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
bool direct, struct dma_fence **fence) bool direct, struct dma_fence **fence)
{ {
const unsigned ib_size_dw = 1024; const unsigned int ib_size_dw = 1024;
struct amdgpu_job *job; struct amdgpu_job *job;
struct amdgpu_ib *ib; struct amdgpu_ib *ib;
struct dma_fence *f = NULL; struct dma_fence *f = NULL;
@ -596,12 +597,12 @@ err:
*/ */
static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
struct amdgpu_ib *ib, int lo, int hi, struct amdgpu_ib *ib, int lo, int hi,
unsigned size, int32_t index) unsigned int size, int32_t index)
{ {
int64_t offset = ((uint64_t)size) * ((int64_t)index); int64_t offset = ((uint64_t)size) * ((int64_t)index);
struct ttm_operation_ctx ctx = { false, false }; struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo_va_mapping *mapping;
unsigned i, fpfn, lpfn; unsigned int i, fpfn, lpfn;
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
uint64_t addr; uint64_t addr;
int r; int r;
@ -619,7 +620,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
if (r) { if (r) {
DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n", DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
addr, lo, hi, size, index); addr, lo, hi, size, index);
return r; return r;
} }
@ -646,7 +647,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
* Patch relocation inside command stream with real buffer address * Patch relocation inside command stream with real buffer address
*/ */
static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib, static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
int lo, int hi, unsigned size, uint32_t index) int lo, int hi, unsigned int size, uint32_t index)
{ {
struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
@ -662,14 +663,14 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
if (r) { if (r) {
DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n", DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
addr, lo, hi, size, index); addr, lo, hi, size, index);
return r; return r;
} }
if ((addr + (uint64_t)size) > if ((addr + (uint64_t)size) >
(mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) { (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n", DRM_ERROR("BO too small for addr 0x%010llx %d %d\n",
addr, lo, hi); addr, lo, hi);
return -EINVAL; return -EINVAL;
} }
@ -692,12 +693,12 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
* @allocated: allocated a new handle? * @allocated: allocated a new handle?
* *
* Validates the handle and return the found session index or -EINVAL * Validates the handle and return the found session index or -EINVAL
* we we don't have another free session index. * we don't have another free session index.
*/ */
static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
uint32_t handle, uint32_t *allocated) uint32_t handle, uint32_t *allocated)
{ {
unsigned i; unsigned int i;
/* validate the handle */ /* validate the handle */
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
@ -735,14 +736,14 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p,
struct amdgpu_job *job, struct amdgpu_job *job,
struct amdgpu_ib *ib) struct amdgpu_ib *ib)
{ {
unsigned fb_idx = 0, bs_idx = 0; unsigned int fb_idx = 0, bs_idx = 0;
int session_idx = -1; int session_idx = -1;
uint32_t destroyed = 0; uint32_t destroyed = 0;
uint32_t created = 0; uint32_t created = 0;
uint32_t allocated = 0; uint32_t allocated = 0;
uint32_t tmp, handle = 0; uint32_t tmp, handle = 0;
uint32_t *size = &tmp; uint32_t *size = &tmp;
unsigned idx; unsigned int idx;
int i, r = 0; int i, r = 0;
job->vm = NULL; job->vm = NULL;
@ -1084,7 +1085,7 @@ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
* *
*/ */
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
unsigned flags) unsigned int flags)
{ {
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
@ -1106,7 +1107,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
uint32_t rptr; uint32_t rptr;
unsigned i; unsigned int i;
int r, timeout = adev->usec_timeout; int r, timeout = adev->usec_timeout;
/* skip ring test for sriov*/ /* skip ring test for sriov*/
@ -1171,7 +1172,7 @@ error:
enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring) enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring)
{ {
switch(ring) { switch (ring) {
case 0: case 0:
return AMDGPU_RING_PRIO_0; return AMDGPU_RING_PRIO_0;
case 1: case 1:

View file

@ -56,6 +56,7 @@
#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin" #define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
#define FIRMWARE_VCN4_0_0 "amdgpu/vcn_4_0_0.bin" #define FIRMWARE_VCN4_0_0 "amdgpu/vcn_4_0_0.bin"
#define FIRMWARE_VCN4_0_2 "amdgpu/vcn_4_0_2.bin" #define FIRMWARE_VCN4_0_2 "amdgpu/vcn_4_0_2.bin"
#define FIRMWARE_VCN4_0_3 "amdgpu/vcn_4_0_3.bin"
#define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin" #define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_RAVEN);
@ -77,6 +78,7 @@ MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP);
MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2); MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_0); MODULE_FIRMWARE(FIRMWARE_VCN4_0_0);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_2); MODULE_FIRMWARE(FIRMWARE_VCN4_0_2);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_3);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_4); MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work); static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
@ -167,7 +169,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)){ if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)) {
fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)); fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared));
log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log); log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log);
} else { } else {
@ -233,11 +235,11 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << j)) if (adev->vcn.harvest_config & (1 << j))
continue; continue;
if (adev->vcn.indirect_sram) { amdgpu_bo_free_kernel(
amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo, &adev->vcn.inst[j].dpg_sram_bo,
&adev->vcn.inst[j].dpg_sram_gpu_addr, &adev->vcn.inst[j].dpg_sram_gpu_addr,
(void **)&adev->vcn.inst[j].dpg_sram_cpu_addr); (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
}
kvfree(adev->vcn.inst[j].saved_bo); kvfree(adev->vcn.inst[j].saved_bo);
amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
@ -274,20 +276,19 @@ bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type t
bool ret = false; bool ret = false;
int vcn_config = adev->vcn.vcn_config[vcn_instance]; int vcn_config = adev->vcn.vcn_config[vcn_instance];
if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) { if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK))
ret = true; ret = true;
} else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK)) { else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK))
ret = true; ret = true;
} else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK)) { else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK))
ret = true; ret = true;
}
return ret; return ret;
} }
int amdgpu_vcn_suspend(struct amdgpu_device *adev) int amdgpu_vcn_suspend(struct amdgpu_device *adev)
{ {
unsigned size; unsigned int size;
void *ptr; void *ptr;
int i, idx; int i, idx;
@ -316,7 +317,7 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)
int amdgpu_vcn_resume(struct amdgpu_device *adev) int amdgpu_vcn_resume(struct amdgpu_device *adev)
{ {
unsigned size; unsigned int size;
void *ptr; void *ptr;
int i, idx; int i, idx;
@ -338,7 +339,7 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
adev->vcn.inst[i].saved_bo = NULL; adev->vcn.inst[i].saved_bo = NULL;
} else { } else {
const struct common_firmware_header *hdr; const struct common_firmware_header *hdr;
unsigned offset; unsigned int offset;
hdr = (const struct common_firmware_header *)adev->vcn.fw->data; hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
@ -369,9 +370,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
if (adev->vcn.harvest_config & (1 << j)) if (adev->vcn.harvest_config & (1 << j))
continue; continue;
for (i = 0; i < adev->vcn.num_enc_rings; ++i) { for (i = 0; i < adev->vcn.num_enc_rings; ++i)
fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
}
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
struct dpg_pause_state new_state; struct dpg_pause_state new_state;
@ -458,7 +458,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
uint32_t tmp = 0; uint32_t tmp = 0;
unsigned i; unsigned int i;
int r; int r;
/* VCN in SRIOV does not support direct register read/write */ /* VCN in SRIOV does not support direct register read/write */
@ -795,7 +795,7 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
uint32_t rptr; uint32_t rptr;
unsigned i; unsigned int i;
int r; int r;
if (amdgpu_sriov_vf(adev)) if (amdgpu_sriov_vf(adev))
@ -993,11 +993,14 @@ error:
int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout) int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{ {
struct amdgpu_device *adev = ring->adev;
long r; long r;
if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(4, 0, 3)) {
r = amdgpu_vcn_enc_ring_test_ib(ring, timeout); r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
if (r) if (r)
goto error; goto error;
}
r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout); r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout);
@ -1007,7 +1010,7 @@ error:
enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring) enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring)
{ {
switch(ring) { switch (ring) {
case 0: case 0:
return AMDGPU_RING_PRIO_0; return AMDGPU_RING_PRIO_0;
case 1: case 1:
@ -1026,6 +1029,7 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
const struct common_firmware_header *hdr; const struct common_firmware_header *hdr;
hdr = (const struct common_firmware_header *)adev->vcn.fw->data; hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
@ -1041,6 +1045,9 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
adev->firmware.ucode[idx].fw = adev->vcn.fw; adev->firmware.ucode[idx].fw = adev->vcn.fw;
adev->firmware.fw_size += adev->firmware.fw_size +=
ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(4, 0, 3))
break;
} }
dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); dev_info(adev->dev, "Will use PSP to load VCN firmware\n");
} }
@ -1126,7 +1133,7 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i,
char name[32]; char name[32];
sprintf(name, "amdgpu_vcn_%d_fwlog", i); sprintf(name, "amdgpu_vcn_%d_fwlog", i);
debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, vcn, debugfs_create_file_size(name, S_IFREG | 0444, root, vcn,
&amdgpu_debugfs_vcnfwlog_fops, &amdgpu_debugfs_vcnfwlog_fops,
AMDGPU_VCNFW_LOG_SIZE); AMDGPU_VCNFW_LOG_SIZE);
#endif #endif
@ -1181,6 +1188,31 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
return 0; return 0;
} }
int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{
int r, i;
r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
return r;
if (amdgpu_ras_is_supported(adev, ras_block->block)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
if (adev->vcn.harvest_config & (1 << i))
continue;
r = amdgpu_irq_get(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
if (r)
goto late_fini;
}
}
return 0;
late_fini:
amdgpu_ras_block_late_fini(adev, ras_block);
return r;
}
int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev) int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev)
{ {
int err; int err;
@ -1202,7 +1234,7 @@ int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev)
adev->vcn.ras_if = &ras->ras_block.ras_comm; adev->vcn.ras_if = &ras->ras_block.ras_comm;
if (!ras->ras_block.ras_late_init) if (!ras->ras_block.ras_late_init)
ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; ras->ras_block.ras_late_init = amdgpu_vcn_ras_late_init;
return 0; return 0;
} }

View file

@ -32,7 +32,7 @@
#define AMDGPU_VCN_FIRMWARE_OFFSET 256 #define AMDGPU_VCN_FIRMWARE_OFFSET 256
#define AMDGPU_VCN_MAX_ENC_RINGS 3 #define AMDGPU_VCN_MAX_ENC_RINGS 3
#define AMDGPU_MAX_VCN_INSTANCES 2 #define AMDGPU_MAX_VCN_INSTANCES 4
#define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES #define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES
#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0) #define AMDGPU_VCN_HARVEST_VCN0 (1 << 0)
@ -144,14 +144,19 @@
#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \ #define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
do { \ do { \
if (!indirect) { \ if (!indirect) { \
WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \ WREG32_SOC15(VCN, GET_INST(VCN, inst_idx), \
WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \ mmUVD_DPG_LMA_DATA, value); \
WREG32_SOC15( \
VCN, GET_INST(VCN, inst_idx), \
mmUVD_DPG_LMA_CTL, \
(0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
} else { \ } else { \
*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \ offset; \
*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
value; \
} \ } \
} while (0) } while (0)
@ -234,6 +239,7 @@ struct amdgpu_vcn_inst {
struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
atomic_t sched_score; atomic_t sched_score;
struct amdgpu_irq_src irq; struct amdgpu_irq_src irq;
struct amdgpu_irq_src ras_poison_irq;
struct amdgpu_vcn_reg external; struct amdgpu_vcn_reg external;
struct amdgpu_bo *dpg_sram_bo; struct amdgpu_bo *dpg_sram_bo;
struct dpg_pause_state pause_state; struct dpg_pause_state pause_state;
@ -242,6 +248,7 @@ struct amdgpu_vcn_inst {
uint32_t *dpg_sram_curr_addr; uint32_t *dpg_sram_curr_addr;
atomic_t dpg_enc_submission_cnt; atomic_t dpg_enc_submission_cnt;
struct amdgpu_vcn_fw_shared fw_shared; struct amdgpu_vcn_fw_shared fw_shared;
uint8_t aid_id;
}; };
struct amdgpu_vcn_ras { struct amdgpu_vcn_ras {
@ -271,6 +278,9 @@ struct amdgpu_vcn {
struct ras_common_if *ras_if; struct ras_common_if *ras_if;
struct amdgpu_vcn_ras *ras; struct amdgpu_vcn_ras *ras;
uint16_t inst_mask;
uint8_t num_inst_per_aid;
}; };
struct amdgpu_fw_shared_rb_ptrs_struct { struct amdgpu_fw_shared_rb_ptrs_struct {
@ -400,6 +410,8 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev,
int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev, int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry); struct amdgpu_iv_entry *entry);
int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev,
struct ras_common_if *ras_block);
int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev); int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev);
#endif #endif

View file

@ -56,7 +56,8 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
/* enable virtual display */ /* enable virtual display */
if (adev->asic_type != CHIP_ALDEBARAN && if (adev->asic_type != CHIP_ALDEBARAN &&
adev->asic_type != CHIP_ARCTURUS) { adev->asic_type != CHIP_ARCTURUS &&
((adev->pdev->class >> 8) != PCI_CLASS_ACCELERATOR_PROCESSING)) {
if (adev->mode_info.num_crtc == 0) if (adev->mode_info.num_crtc == 0)
adev->mode_info.num_crtc = 1; adev->mode_info.num_crtc = 1;
adev->enable_virtual_display = true; adev->enable_virtual_display = true;
@ -65,16 +66,19 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
adev->cg_flags = 0; adev->cg_flags = 0;
adev->pg_flags = 0; adev->pg_flags = 0;
/* enable mcbp for sriov asic_type before soc21 */ /* enable mcbp for sriov */
amdgpu_mcbp = (adev->asic_type < CHIP_IP_DISCOVERY) ? 1 : 0; amdgpu_mcbp = 1;
/* Reduce kcq number to 2 to reduce latency */
if (amdgpu_num_kcq == -1)
amdgpu_num_kcq = 2;
} }
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1, uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask) uint32_t ref, uint32_t mask)
{ {
struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *ring = &kiq->ring; struct amdgpu_ring *ring = &kiq->ring;
signed long r, cnt = 0; signed long r, cnt = 0;
unsigned long flags; unsigned long flags;
@ -557,7 +561,6 @@ static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev)
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version);
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version);
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version);
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_IMU, adev->gfx.imu_fw_version);
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos.fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos.fw_version);
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD, POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD,
adev->psp.asd_context.bin_desc.fw_version); adev->psp.asd_context.bin_desc.fw_version);

View file

@ -1358,6 +1358,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
amdgpu_vm_bo_base_init(&bo_va->base, vm, bo); amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
bo_va->ref_count = 1; bo_va->ref_count = 1;
bo_va->last_pt_update = dma_fence_get_stub();
INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids); INIT_LIST_HEAD(&bo_va->invalids);
@ -1433,14 +1434,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
uint64_t eaddr; uint64_t eaddr;
/* validate the parameters */ /* validate the parameters */
if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
size == 0 || size & ~PAGE_MASK) return -EINVAL;
if (saddr + size <= saddr || offset + size <= offset)
return -EINVAL; return -EINVAL;
/* make sure object fit at this offset */ /* make sure object fit at this offset */
eaddr = saddr + size - 1; eaddr = saddr + size - 1;
if (saddr >= eaddr || if ((bo && offset + size > amdgpu_bo_size(bo)) ||
(bo && offset + size > amdgpu_bo_size(bo)) ||
(eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
return -EINVAL; return -EINVAL;
@ -1499,14 +1500,14 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
int r; int r;
/* validate the parameters */ /* validate the parameters */
if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
size == 0 || size & ~PAGE_MASK) return -EINVAL;
if (saddr + size <= saddr || offset + size <= offset)
return -EINVAL; return -EINVAL;
/* make sure object fit at this offset */ /* make sure object fit at this offset */
eaddr = saddr + size - 1; eaddr = saddr + size - 1;
if (saddr >= eaddr || if ((bo && offset + size > amdgpu_bo_size(bo)) ||
(bo && offset + size > amdgpu_bo_size(bo)) ||
(eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
return -EINVAL; return -EINVAL;
@ -2067,7 +2068,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->update_funcs = &amdgpu_vm_cpu_funcs; vm->update_funcs = &amdgpu_vm_cpu_funcs;
else else
vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->update_funcs = &amdgpu_vm_sdma_funcs;
vm->last_update = NULL;
vm->last_update = dma_fence_get_stub();
vm->last_unlocked = dma_fence_get_stub(); vm->last_unlocked = dma_fence_get_stub();
vm->last_tlb_flush = dma_fence_get_stub(); vm->last_tlb_flush = dma_fence_get_stub();
@ -2192,7 +2194,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
goto unreserve_bo; goto unreserve_bo;
dma_fence_put(vm->last_update); dma_fence_put(vm->last_update);
vm->last_update = NULL; vm->last_update = dma_fence_get_stub();
vm->is_compute_context = true; vm->is_compute_context = true;
/* Free the shadow bo for compute VM */ /* Free the shadow bo for compute VM */
@ -2282,8 +2284,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
} }
dma_fence_put(vm->last_update); dma_fence_put(vm->last_update);
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
amdgpu_vmid_free_reserved(adev, vm, i); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
if (vm->reserved_vmid[i]) {
amdgpu_vmid_free_reserved(adev, i);
vm->reserved_vmid[i] = false;
}
}
} }
/** /**
@ -2366,18 +2374,25 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
union drm_amdgpu_vm *args = data; union drm_amdgpu_vm *args = data;
struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_fpriv *fpriv = filp->driver_priv;
int r;
/* No valid flags defined yet */
if (args->in.flags)
return -EINVAL;
switch (args->in.op) { switch (args->in.op) {
case AMDGPU_VM_OP_RESERVE_VMID: case AMDGPU_VM_OP_RESERVE_VMID:
/* We only have requirement to reserve vmid from gfxhub */ /* We only have requirement to reserve vmid from gfxhub */
r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
AMDGPU_GFXHUB_0); amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
if (r) fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true;
return r; }
break; break;
case AMDGPU_VM_OP_UNRESERVE_VMID: case AMDGPU_VM_OP_UNRESERVE_VMID:
amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0));
fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false;
}
break; break;
default: default:
return -EINVAL; return -EINVAL;
@ -2432,6 +2447,9 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
* amdgpu_vm_handle_fault - graceful handling of VM faults. * amdgpu_vm_handle_fault - graceful handling of VM faults.
* @adev: amdgpu device pointer * @adev: amdgpu device pointer
* @pasid: PASID of the VM * @pasid: PASID of the VM
* @vmid: VMID, only used for GFX 9.4.3.
* @node_id: Node_id received in IH cookie. Only applicable for
* GFX 9.4.3.
* @addr: Address of the fault * @addr: Address of the fault
* @write_fault: true is write fault, false is read fault * @write_fault: true is write fault, false is read fault
* *
@ -2439,7 +2457,8 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
* shouldn't be reported any more. * shouldn't be reported any more.
*/ */
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
uint64_t addr, bool write_fault) u32 vmid, u32 node_id, uint64_t addr,
bool write_fault)
{ {
bool is_compute_context = false; bool is_compute_context = false;
struct amdgpu_bo *root; struct amdgpu_bo *root;
@ -2463,8 +2482,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
addr /= AMDGPU_GPU_PAGE_SIZE; addr /= AMDGPU_GPU_PAGE_SIZE;
if (is_compute_context && if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
!svm_range_restore_pages(adev, pasid, addr, write_fault)) { node_id, addr, write_fault)) {
amdgpu_bo_unref(&root); amdgpu_bo_unref(&root);
return true; return true;
} }

View file

@ -111,11 +111,14 @@ struct amdgpu_mem_stats;
/* Reserve 4MB VRAM for page tables */ /* Reserve 4MB VRAM for page tables */
#define AMDGPU_VM_RESERVED_VRAM (8ULL << 20) #define AMDGPU_VM_RESERVED_VRAM (8ULL << 20)
/* max number of VMHUB */ /*
#define AMDGPU_MAX_VMHUBS 3 * max number of VMHUB
#define AMDGPU_GFXHUB_0 0 * layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1
#define AMDGPU_MMHUB_0 1 */
#define AMDGPU_MMHUB_1 2 #define AMDGPU_MAX_VMHUBS 13
#define AMDGPU_GFXHUB(x) (x)
#define AMDGPU_MMHUB0(x) (8 + x)
#define AMDGPU_MMHUB1(x) (8 + 4 + x)
/* Reserve 2MB at top/bottom of address space for kernel use */ /* Reserve 2MB at top/bottom of address space for kernel use */
#define AMDGPU_VA_RESERVED_SIZE (2ULL << 20) #define AMDGPU_VA_RESERVED_SIZE (2ULL << 20)
@ -326,6 +329,9 @@ struct amdgpu_vm {
struct ttm_lru_bulk_move lru_bulk_move; struct ttm_lru_bulk_move lru_bulk_move;
/* Flag to indicate if VM is used for compute */ /* Flag to indicate if VM is used for compute */
bool is_compute_context; bool is_compute_context;
/* Memory partition number, -1 means any partition */
int8_t mem_id;
}; };
struct amdgpu_vm_manager { struct amdgpu_vm_manager {
@ -452,7 +458,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
struct amdgpu_task_info *task_info); struct amdgpu_task_info *task_info);
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
uint64_t addr, bool write_fault); u32 vmid, u32 node_id, uint64_t addr,
bool write_fault);
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);

View file

@ -502,6 +502,7 @@ exit:
int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int level, bool immediate, struct amdgpu_bo_vm **vmbo) int level, bool immediate, struct amdgpu_bo_vm **vmbo)
{ {
struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv, vm);
struct amdgpu_bo_param bp; struct amdgpu_bo_param bp;
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
struct dma_resv *resv; struct dma_resv *resv;
@ -512,7 +513,12 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bp.size = amdgpu_vm_pt_size(adev, level); bp.size = amdgpu_vm_pt_size(adev, level);
bp.byte_align = AMDGPU_GPU_PAGE_SIZE; bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
if (!adev->gmc.is_app_apu)
bp.domain = AMDGPU_GEM_DOMAIN_VRAM; bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
else
bp.domain = AMDGPU_GEM_DOMAIN_GTT;
bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_CPU_GTT_USWC; AMDGPU_GEM_CREATE_CPU_GTT_USWC;
@ -529,6 +535,8 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bp.type = ttm_bo_type_kernel; bp.type = ttm_bo_type_kernel;
bp.no_wait_gpu = immediate; bp.no_wait_gpu = immediate;
bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
if (vm->root.bo) if (vm->root.bo)
bp.resv = vm->root.bo->tbo.base.resv; bp.resv = vm->root.bo->tbo.base.resv;
@ -553,6 +561,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bp.type = ttm_bo_type_kernel; bp.type = ttm_bo_type_kernel;
bp.resv = bo->tbo.base.resv; bp.resv = bo->tbo.base.resv;
bp.bo_ptr_size = sizeof(struct amdgpu_bo); bp.bo_ptr_size = sizeof(struct amdgpu_bo);
bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
@ -564,7 +573,6 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return r; return r;
} }
(*vmbo)->shadow->parent = amdgpu_bo_ref(bo);
amdgpu_bo_add_to_shadow_list(*vmbo); amdgpu_bo_add_to_shadow_list(*vmbo);
return 0; return 0;
@ -781,13 +789,14 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
uint64_t pe, uint64_t addr, uint64_t pe, uint64_t addr,
unsigned int count, uint32_t incr, unsigned int count, uint32_t incr,
uint64_t flags) uint64_t flags)
{ {
struct amdgpu_device *adev = params->adev;
if (level != AMDGPU_VM_PTB) { if (level != AMDGPU_VM_PTB) {
flags |= AMDGPU_PDE_PTE; flags |= AMDGPU_PDE_PTE;
amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags);
} else if (params->adev->asic_type >= CHIP_VEGA10 && } else if (adev->asic_type >= CHIP_VEGA10 &&
!(flags & AMDGPU_PTE_VALID) && !(flags & AMDGPU_PTE_VALID) &&
!(flags & AMDGPU_PTE_PRT)) { !(flags & AMDGPU_PTE_PRT)) {
@ -795,6 +804,21 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
flags |= AMDGPU_PTE_EXECUTABLE; flags |= AMDGPU_PTE_EXECUTABLE;
} }
/* APUs mapping system memory may need different MTYPEs on different
* NUMA nodes. Only do this for contiguous ranges that can be assumed
* to be on the same NUMA node.
*/
if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) &&
adev->gmc.gmc_funcs->override_vm_pte_flags &&
num_possible_nodes() > 1) {
if (!params->pages_addr)
amdgpu_gmc_override_vm_pte_flags(adev, params->vm,
addr, &flags);
else
dev_dbg(adev->dev,
"override_vm_pte_flags skipped: non-contiguous\n");
}
params->vm->update_funcs->update(params, pt, pe, addr, count, incr, params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
flags); flags);
} }

View file

@ -370,6 +370,45 @@ out:
return ret; return ret;
} }
static void amdgpu_dummy_vram_mgr_debug(struct ttm_resource_manager *man,
struct drm_printer *printer)
{
DRM_DEBUG_DRIVER("Dummy vram mgr debug\n");
}
static bool amdgpu_dummy_vram_mgr_compatible(struct ttm_resource_manager *man,
struct ttm_resource *res,
const struct ttm_place *place,
size_t size)
{
DRM_DEBUG_DRIVER("Dummy vram mgr compatible\n");
return false;
}
static bool amdgpu_dummy_vram_mgr_intersects(struct ttm_resource_manager *man,
struct ttm_resource *res,
const struct ttm_place *place,
size_t size)
{
DRM_DEBUG_DRIVER("Dummy vram mgr intersects\n");
return true;
}
static void amdgpu_dummy_vram_mgr_del(struct ttm_resource_manager *man,
struct ttm_resource *res)
{
DRM_DEBUG_DRIVER("Dummy vram mgr deleted\n");
}
static int amdgpu_dummy_vram_mgr_new(struct ttm_resource_manager *man,
struct ttm_buffer_object *tbo,
const struct ttm_place *place,
struct ttm_resource **res)
{
DRM_DEBUG_DRIVER("Dummy vram mgr new\n");
return -ENOSPC;
}
/** /**
* amdgpu_vram_mgr_new - allocate new ranges * amdgpu_vram_mgr_new - allocate new ranges
* *
@ -800,7 +839,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
{ {
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct drm_buddy *mm = &mgr->mm; struct drm_buddy *mm = &mgr->mm;
struct drm_buddy_block *block; struct amdgpu_vram_reservation *rsv;
drm_printf(printer, " vis usage:%llu\n", drm_printf(printer, " vis usage:%llu\n",
amdgpu_vram_mgr_vis_usage(mgr)); amdgpu_vram_mgr_vis_usage(mgr));
@ -812,11 +851,20 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
drm_buddy_print(mm, printer); drm_buddy_print(mm, printer);
drm_printf(printer, "reserved:\n"); drm_printf(printer, "reserved:\n");
list_for_each_entry(block, &mgr->reserved_pages, link) list_for_each_entry(rsv, &mgr->reserved_pages, blocks)
drm_buddy_block_print(mm, block, printer); drm_printf(printer, "%#018llx-%#018llx: %llu\n",
rsv->start, rsv->start + rsv->size, rsv->size);
mutex_unlock(&mgr->lock); mutex_unlock(&mgr->lock);
} }
static const struct ttm_resource_manager_func amdgpu_dummy_vram_mgr_func = {
.alloc = amdgpu_dummy_vram_mgr_new,
.free = amdgpu_dummy_vram_mgr_del,
.intersects = amdgpu_dummy_vram_mgr_intersects,
.compatible = amdgpu_dummy_vram_mgr_compatible,
.debug = amdgpu_dummy_vram_mgr_debug
};
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
.alloc = amdgpu_vram_mgr_new, .alloc = amdgpu_vram_mgr_new,
.free = amdgpu_vram_mgr_del, .free = amdgpu_vram_mgr_del,
@ -841,16 +889,21 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
ttm_resource_manager_init(man, &adev->mman.bdev, ttm_resource_manager_init(man, &adev->mman.bdev,
adev->gmc.real_vram_size); adev->gmc.real_vram_size);
mutex_init(&mgr->lock);
INIT_LIST_HEAD(&mgr->reservations_pending);
INIT_LIST_HEAD(&mgr->reserved_pages);
mgr->default_page_size = PAGE_SIZE;
if (!adev->gmc.is_app_apu) {
man->func = &amdgpu_vram_mgr_func; man->func = &amdgpu_vram_mgr_func;
err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
if (err) if (err)
return err; return err;
} else {
mutex_init(&mgr->lock); man->func = &amdgpu_dummy_vram_mgr_func;
INIT_LIST_HEAD(&mgr->reservations_pending); DRM_INFO("Setup dummy vram mgr\n");
INIT_LIST_HEAD(&mgr->reserved_pages); }
mgr->default_page_size = PAGE_SIZE;
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
ttm_resource_manager_set_used(man, true); ttm_resource_manager_set_used(man, true);
@ -886,6 +939,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
drm_buddy_free_list(&mgr->mm, &rsv->allocated); drm_buddy_free_list(&mgr->mm, &rsv->allocated);
kfree(rsv); kfree(rsv);
} }
if (!adev->gmc.is_app_apu)
drm_buddy_fini(&mgr->mm); drm_buddy_fini(&mgr->mm);
mutex_unlock(&mgr->lock); mutex_unlock(&mgr->lock);

View file

@ -0,0 +1,399 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "amdgpu_xcp.h"
#include "amdgpu_drv.h"
#include <drm/drm_drv.h>
#include "../amdxcp/amdgpu_xcp_drv.h"
static int __amdgpu_xcp_run(struct amdgpu_xcp_mgr *xcp_mgr,
struct amdgpu_xcp_ip *xcp_ip, int xcp_state)
{
int (*run_func)(void *handle, uint32_t inst_mask);
int ret = 0;
if (!xcp_ip || !xcp_ip->valid || !xcp_ip->ip_funcs)
return 0;
run_func = NULL;
switch (xcp_state) {
case AMDGPU_XCP_PREPARE_SUSPEND:
run_func = xcp_ip->ip_funcs->prepare_suspend;
break;
case AMDGPU_XCP_SUSPEND:
run_func = xcp_ip->ip_funcs->suspend;
break;
case AMDGPU_XCP_PREPARE_RESUME:
run_func = xcp_ip->ip_funcs->prepare_resume;
break;
case AMDGPU_XCP_RESUME:
run_func = xcp_ip->ip_funcs->resume;
break;
}
if (run_func)
ret = run_func(xcp_mgr->adev, xcp_ip->inst_mask);
return ret;
}
static int amdgpu_xcp_run_transition(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
int state)
{
struct amdgpu_xcp_ip *xcp_ip;
struct amdgpu_xcp *xcp;
int i, ret;
if (xcp_id >= MAX_XCP || !xcp_mgr->xcp[xcp_id].valid)
return -EINVAL;
xcp = &xcp_mgr->xcp[xcp_id];
for (i = 0; i < AMDGPU_XCP_MAX_BLOCKS; ++i) {
xcp_ip = &xcp->ip[i];
ret = __amdgpu_xcp_run(xcp_mgr, xcp_ip, state);
if (ret)
break;
}
return ret;
}
int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
{
return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
AMDGPU_XCP_PREPARE_SUSPEND);
}
int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
{
return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_SUSPEND);
}
int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
{
return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
AMDGPU_XCP_PREPARE_RESUME);
}
int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
{
return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_RESUME);
}
static void __amdgpu_xcp_add_block(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
struct amdgpu_xcp_ip *ip)
{
struct amdgpu_xcp *xcp;
if (!ip)
return;
xcp = &xcp_mgr->xcp[xcp_id];
xcp->ip[ip->ip_id] = *ip;
xcp->ip[ip->ip_id].valid = true;
xcp->valid = true;
}
int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode)
{
struct amdgpu_device *adev = xcp_mgr->adev;
struct amdgpu_xcp_ip ip;
uint8_t mem_id;
int i, j, ret;
if (!num_xcps || num_xcps > MAX_XCP)
return -EINVAL;
xcp_mgr->mode = mode;
for (i = 0; i < MAX_XCP; ++i)
xcp_mgr->xcp[i].valid = false;
for (i = 0; i < num_xcps; ++i) {
for (j = AMDGPU_XCP_GFXHUB; j < AMDGPU_XCP_MAX_BLOCKS; ++j) {
ret = xcp_mgr->funcs->get_ip_details(xcp_mgr, i, j,
&ip);
if (ret)
continue;
__amdgpu_xcp_add_block(xcp_mgr, i, &ip);
}
xcp_mgr->xcp[i].id = i;
if (xcp_mgr->funcs->get_xcp_mem_id) {
ret = xcp_mgr->funcs->get_xcp_mem_id(
xcp_mgr, &xcp_mgr->xcp[i], &mem_id);
if (ret)
continue;
else
xcp_mgr->xcp[i].mem_id = mem_id;
}
}
xcp_mgr->num_xcps = num_xcps;
amdgpu_xcp_update_partition_sched_list(adev);
xcp_mgr->num_xcp_per_mem_partition = num_xcps / xcp_mgr->adev->gmc.num_mem_partitions;
return 0;
}
int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
{
int ret, curr_mode, num_xcps = 0;
if (!xcp_mgr || mode == AMDGPU_XCP_MODE_NONE)
return -EINVAL;
if (xcp_mgr->mode == mode)
return 0;
if (!xcp_mgr->funcs || !xcp_mgr->funcs->switch_partition_mode)
return 0;
mutex_lock(&xcp_mgr->xcp_lock);
curr_mode = xcp_mgr->mode;
/* State set to transient mode */
xcp_mgr->mode = AMDGPU_XCP_MODE_TRANS;
ret = xcp_mgr->funcs->switch_partition_mode(xcp_mgr, mode, &num_xcps);
if (ret) {
/* Failed, get whatever mode it's at now */
if (xcp_mgr->funcs->query_partition_mode)
xcp_mgr->mode = amdgpu_xcp_query_partition_mode(
xcp_mgr, AMDGPU_XCP_FL_LOCKED);
else
xcp_mgr->mode = curr_mode;
goto out;
}
out:
mutex_unlock(&xcp_mgr->xcp_lock);
return ret;
}
int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
{
int mode;
if (xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
return xcp_mgr->mode;
if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode)
return xcp_mgr->mode;
if (!(flags & AMDGPU_XCP_FL_LOCKED))
mutex_lock(&xcp_mgr->xcp_lock);
mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr);
if (xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS && mode != xcp_mgr->mode)
dev_WARN(
xcp_mgr->adev->dev,
"Cached partition mode %d not matching with device mode %d",
xcp_mgr->mode, mode);
if (!(flags & AMDGPU_XCP_FL_LOCKED))
mutex_unlock(&xcp_mgr->xcp_lock);
return mode;
}
static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
{
struct drm_device *p_ddev;
struct drm_device *ddev;
int i, ret;
ddev = adev_to_drm(adev);
for (i = 0; i < MAX_XCP; i++) {
ret = amdgpu_xcp_drm_dev_alloc(&p_ddev);
if (ret)
return ret;
/* Redirect all IOCTLs to the primary device */
adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev;
adev->xcp_mgr->xcp[i].pdev = p_ddev->primary->dev;
adev->xcp_mgr->xcp[i].driver = (struct drm_driver *)p_ddev->driver;
adev->xcp_mgr->xcp[i].vma_offset_manager = p_ddev->vma_offset_manager;
p_ddev->render->dev = ddev;
p_ddev->primary->dev = ddev;
p_ddev->vma_offset_manager = ddev->vma_offset_manager;
p_ddev->driver = &amdgpu_partition_driver;
adev->xcp_mgr->xcp[i].ddev = p_ddev;
}
return 0;
}
int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
int init_num_xcps,
struct amdgpu_xcp_mgr_funcs *xcp_funcs)
{
struct amdgpu_xcp_mgr *xcp_mgr;
if (!xcp_funcs || !xcp_funcs->switch_partition_mode ||
!xcp_funcs->get_ip_details)
return -EINVAL;
xcp_mgr = kzalloc(sizeof(*xcp_mgr), GFP_KERNEL);
if (!xcp_mgr)
return -ENOMEM;
xcp_mgr->adev = adev;
xcp_mgr->funcs = xcp_funcs;
xcp_mgr->mode = init_mode;
mutex_init(&xcp_mgr->xcp_lock);
if (init_mode != AMDGPU_XCP_MODE_NONE)
amdgpu_xcp_init(xcp_mgr, init_num_xcps, init_mode);
adev->xcp_mgr = xcp_mgr;
return amdgpu_xcp_dev_alloc(adev);
}
int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
enum AMDGPU_XCP_IP_BLOCK ip, int instance)
{
struct amdgpu_xcp *xcp;
int i, id_mask = 0;
if (ip >= AMDGPU_XCP_MAX_BLOCKS)
return -EINVAL;
for (i = 0; i < xcp_mgr->num_xcps; ++i) {
xcp = &xcp_mgr->xcp[i];
if ((xcp->valid) && (xcp->ip[ip].valid) &&
(xcp->ip[ip].inst_mask & BIT(instance)))
id_mask |= BIT(i);
}
if (!id_mask)
id_mask = -ENXIO;
return id_mask;
}
int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
enum AMDGPU_XCP_IP_BLOCK ip,
uint32_t *inst_mask)
{
if (!xcp->valid || !inst_mask || !(xcp->ip[ip].valid))
return -EINVAL;
*inst_mask = xcp->ip[ip].inst_mask;
return 0;
}
int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
const struct pci_device_id *ent)
{
int i, ret;
if (!adev->xcp_mgr)
return 0;
for (i = 0; i < MAX_XCP; i++) {
ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data);
if (ret)
return ret;
}
return 0;
}
void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
{
struct drm_device *p_ddev;
int i;
if (!adev->xcp_mgr)
return;
for (i = 0; i < MAX_XCP; i++) {
p_ddev = adev->xcp_mgr->xcp[i].ddev;
drm_dev_unplug(p_ddev);
p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev;
p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev;
p_ddev->driver = adev->xcp_mgr->xcp[i].driver;
p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager;
}
}
int amdgpu_xcp_open_device(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
struct drm_file *file_priv)
{
int i;
if (!adev->xcp_mgr)
return 0;
fpriv->xcp_id = ~0;
for (i = 0; i < MAX_XCP; ++i) {
if (!adev->xcp_mgr->xcp[i].ddev)
break;
if (file_priv->minor == adev->xcp_mgr->xcp[i].ddev->render) {
if (adev->xcp_mgr->xcp[i].valid == FALSE) {
dev_err(adev->dev, "renderD%d partition %d not valid!",
file_priv->minor->index, i);
return -ENOENT;
}
dev_dbg(adev->dev, "renderD%d partition %d opened!",
file_priv->minor->index, i);
fpriv->xcp_id = i;
break;
}
}
fpriv->vm.mem_id = fpriv->xcp_id == ~0 ? -1 :
adev->xcp_mgr->xcp[fpriv->xcp_id].mem_id;
return 0;
}
void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
struct amdgpu_ctx_entity *entity)
{
struct drm_gpu_scheduler *sched;
struct amdgpu_ring *ring;
if (!adev->xcp_mgr)
return;
sched = entity->entity.rq->sched;
if (sched->ready) {
ring = to_amdgpu_ring(entity->entity.rq->sched);
atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt);
}
}

View file

@ -0,0 +1,182 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef AMDGPU_XCP_H
#define AMDGPU_XCP_H
#include <linux/pci.h>
#include <linux/xarray.h>
#include "amdgpu_ctx.h"
#define MAX_XCP 8
#define AMDGPU_XCP_MODE_NONE -1
#define AMDGPU_XCP_MODE_TRANS -2
#define AMDGPU_XCP_FL_NONE 0
#define AMDGPU_XCP_FL_LOCKED (1 << 0)
struct amdgpu_fpriv;
enum AMDGPU_XCP_IP_BLOCK {
AMDGPU_XCP_GFXHUB,
AMDGPU_XCP_GFX,
AMDGPU_XCP_SDMA,
AMDGPU_XCP_VCN,
AMDGPU_XCP_MAX_BLOCKS
};
enum AMDGPU_XCP_STATE {
AMDGPU_XCP_PREPARE_SUSPEND,
AMDGPU_XCP_SUSPEND,
AMDGPU_XCP_PREPARE_RESUME,
AMDGPU_XCP_RESUME,
};
struct amdgpu_xcp_ip_funcs {
int (*prepare_suspend)(void *handle, uint32_t inst_mask);
int (*suspend)(void *handle, uint32_t inst_mask);
int (*prepare_resume)(void *handle, uint32_t inst_mask);
int (*resume)(void *handle, uint32_t inst_mask);
};
struct amdgpu_xcp_ip {
struct amdgpu_xcp_ip_funcs *ip_funcs;
uint32_t inst_mask;
enum AMDGPU_XCP_IP_BLOCK ip_id;
bool valid;
};
struct amdgpu_xcp {
struct amdgpu_xcp_ip ip[AMDGPU_XCP_MAX_BLOCKS];
uint8_t id;
uint8_t mem_id;
bool valid;
atomic_t ref_cnt;
struct drm_device *ddev;
struct drm_device *rdev;
struct drm_device *pdev;
struct drm_driver *driver;
struct drm_vma_offset_manager *vma_offset_manager;
struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
};
struct amdgpu_xcp_mgr {
struct amdgpu_device *adev;
struct mutex xcp_lock;
struct amdgpu_xcp_mgr_funcs *funcs;
struct amdgpu_xcp xcp[MAX_XCP];
uint8_t num_xcps;
int8_t mode;
/* Used to determine KFD memory size limits per XCP */
unsigned int num_xcp_per_mem_partition;
};
struct amdgpu_xcp_mgr_funcs {
int (*switch_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr, int mode,
int *num_xcps);
int (*query_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr);
int (*get_ip_details)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
enum AMDGPU_XCP_IP_BLOCK ip_id,
struct amdgpu_xcp_ip *ip);
int (*get_xcp_mem_id)(struct amdgpu_xcp_mgr *xcp_mgr,
struct amdgpu_xcp *xcp, uint8_t *mem_id);
int (*prepare_suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
int (*suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
int (*prepare_resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
int (*resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
int (*select_scheds)(struct amdgpu_device *adev,
u32 hw_ip, u32 hw_prio, struct amdgpu_fpriv *fpriv,
unsigned int *num_scheds, struct drm_gpu_scheduler ***scheds);
int (*update_partition_sched_list)(struct amdgpu_device *adev);
};
int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
int init_xcps, struct amdgpu_xcp_mgr_funcs *xcp_funcs);
int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode);
int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode);
int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
enum AMDGPU_XCP_IP_BLOCK ip, int instance);
int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
enum AMDGPU_XCP_IP_BLOCK ip,
uint32_t *inst_mask);
int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
const struct pci_device_id *ent);
void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev);
int amdgpu_xcp_open_device(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
struct drm_file *file_priv);
void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
struct amdgpu_ctx_entity *entity);
#define amdgpu_xcp_select_scheds(adev, e, c, d, x, y) \
((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \
(adev)->xcp_mgr->funcs->select_scheds ? \
(adev)->xcp_mgr->funcs->select_scheds((adev), (e), (c), (d), (x), (y)) : -ENOENT)
#define amdgpu_xcp_update_partition_sched_list(adev) \
((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \
(adev)->xcp_mgr->funcs->update_partition_sched_list ? \
(adev)->xcp_mgr->funcs->update_partition_sched_list(adev) : 0)
static inline int amdgpu_xcp_get_num_xcp(struct amdgpu_xcp_mgr *xcp_mgr)
{
if (!xcp_mgr)
return 1;
else
return xcp_mgr->num_xcps;
}
static inline struct amdgpu_xcp *
amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from)
{
if (!xcp_mgr)
return NULL;
while (*from < MAX_XCP) {
if (xcp_mgr->xcp[*from].valid)
return &xcp_mgr->xcp[*from];
++(*from);
}
return NULL;
}
#define for_each_xcp(xcp_mgr, xcp, i) \
for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \
xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
#endif

View file

@ -1014,7 +1014,8 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
} }
/* Trigger XGMI/WAFL error */ /* Trigger XGMI/WAFL error */
static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *inject_if) static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
void *inject_if, uint32_t instance_mask)
{ {
int ret = 0; int ret = 0;
struct ta_ras_trigger_error_input *block_info = struct ta_ras_trigger_error_input *block_info =
@ -1026,7 +1027,7 @@ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *injec
if (amdgpu_dpm_allow_xgmi_power_down(adev, false)) if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
dev_warn(adev->dev, "Failed to disallow XGMI power down"); dev_warn(adev->dev, "Failed to disallow XGMI power down");
ret = psp_ras_trigger_error(&adev->psp, block_info); ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
if (amdgpu_ras_intr_triggered()) if (amdgpu_ras_intr_triggered())
return ret; return ret;

View file

@ -70,7 +70,6 @@ enum amd_sriov_ucode_engine_id {
AMD_SRIOV_UCODE_ID_RLC_SRLS, AMD_SRIOV_UCODE_ID_RLC_SRLS,
AMD_SRIOV_UCODE_ID_MEC, AMD_SRIOV_UCODE_ID_MEC,
AMD_SRIOV_UCODE_ID_MEC2, AMD_SRIOV_UCODE_ID_MEC2,
AMD_SRIOV_UCODE_ID_IMU,
AMD_SRIOV_UCODE_ID_SOS, AMD_SRIOV_UCODE_ID_SOS,
AMD_SRIOV_UCODE_ID_ASD, AMD_SRIOV_UCODE_ID_ASD,
AMD_SRIOV_UCODE_ID_TA_RAS, AMD_SRIOV_UCODE_ID_TA_RAS,

View file

@ -0,0 +1,661 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "soc15.h"
#include "soc15_common.h"
#include "amdgpu_xcp.h"
#include "gfx_v9_4_3.h"
#include "gfxhub_v1_2.h"
#include "sdma_v4_4_2.h"
#define XCP_INST_MASK(num_inst, xcp_id) \
(num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0)
#define AMDGPU_XCP_OPS_KFD (1 << 0)
void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
{
int i;
adev->doorbell_index.kiq = AMDGPU_DOORBELL_LAYOUT1_KIQ_START;
adev->doorbell_index.mec_ring0 = AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START;
adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START;
adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END;
adev->doorbell_index.xcc_doorbell_range = AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE;
adev->doorbell_index.sdma_doorbell_range = 20;
for (i = 0; i < adev->sdma.num_instances; i++)
adev->doorbell_index.sdma_engine[i] =
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START +
i * (adev->doorbell_index.sdma_doorbell_range >> 1);
adev->doorbell_index.ih = AMDGPU_DOORBELL_LAYOUT1_IH;
adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_DOORBELL_LAYOUT1_VCN_START;
adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP;
adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP;
adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1;
}
static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
uint32_t inst_idx, struct amdgpu_ring *ring)
{
int xcp_id;
enum AMDGPU_XCP_IP_BLOCK ip_blk;
uint32_t inst_mask;
ring->xcp_id = ~0;
if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
return;
inst_mask = 1 << inst_idx;
switch (ring->funcs->type) {
case AMDGPU_HW_IP_GFX:
case AMDGPU_RING_TYPE_COMPUTE:
case AMDGPU_RING_TYPE_KIQ:
ip_blk = AMDGPU_XCP_GFX;
break;
case AMDGPU_RING_TYPE_SDMA:
ip_blk = AMDGPU_XCP_SDMA;
break;
case AMDGPU_RING_TYPE_VCN_ENC:
case AMDGPU_RING_TYPE_VCN_JPEG:
ip_blk = AMDGPU_XCP_VCN;
if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
inst_mask = 1 << (inst_idx * 2);
break;
default:
DRM_ERROR("Not support ring type %d!", ring->funcs->type);
return;
}
for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) {
if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) {
ring->xcp_id = xcp_id;
break;
}
}
}
static void aqua_vanjaram_xcp_gpu_sched_update(
struct amdgpu_device *adev,
struct amdgpu_ring *ring,
unsigned int sel_xcp_id)
{
unsigned int *num_gpu_sched;
num_gpu_sched = &adev->xcp_mgr->xcp[sel_xcp_id]
.gpu_sched[ring->funcs->type][ring->hw_prio].num_scheds;
adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[ring->funcs->type][ring->hw_prio]
.sched[(*num_gpu_sched)++] = &ring->sched;
DRM_DEBUG("%s :[%d] gpu_sched[%d][%d] = %d", ring->name,
sel_xcp_id, ring->funcs->type,
ring->hw_prio, *num_gpu_sched);
}
static int aqua_vanjaram_xcp_sched_list_update(
struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
int i;
for (i = 0; i < MAX_XCP; i++) {
atomic_set(&adev->xcp_mgr->xcp[i].ref_cnt, 0);
memset(adev->xcp_mgr->xcp[i].gpu_sched, 0, sizeof(adev->xcp_mgr->xcp->gpu_sched));
}
if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
return 0;
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
ring = adev->rings[i];
if (!ring || !ring->sched.ready)
continue;
aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id);
/* VCN is shared by two partitions under CPX MODE */
if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1);
}
return 0;
}
static int aqua_vanjaram_update_partition_sched_list(struct amdgpu_device *adev)
{
int i;
for (i = 0; i < adev->num_rings; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ||
ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
aqua_vanjaram_set_xcp_id(adev, ring->xcc_id, ring);
else
aqua_vanjaram_set_xcp_id(adev, ring->me, ring);
}
return aqua_vanjaram_xcp_sched_list_update(adev);
}
static int aqua_vanjaram_select_scheds(
struct amdgpu_device *adev,
u32 hw_ip,
u32 hw_prio,
struct amdgpu_fpriv *fpriv,
unsigned int *num_scheds,
struct drm_gpu_scheduler ***scheds)
{
u32 sel_xcp_id;
int i;
if (fpriv->xcp_id == ~0) {
u32 least_ref_cnt = ~0;
fpriv->xcp_id = 0;
for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
u32 total_ref_cnt;
total_ref_cnt = atomic_read(&adev->xcp_mgr->xcp[i].ref_cnt);
if (total_ref_cnt < least_ref_cnt) {
fpriv->xcp_id = i;
least_ref_cnt = total_ref_cnt;
}
}
}
sel_xcp_id = fpriv->xcp_id;
if (adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds) {
*num_scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds;
*scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].sched;
atomic_inc(&adev->xcp_mgr->xcp[sel_xcp_id].ref_cnt);
DRM_DEBUG("Selected partition #%d", sel_xcp_id);
} else {
DRM_ERROR("Failed to schedule partition #%d.", sel_xcp_id);
return -ENOENT;
}
return 0;
}
static int8_t aqua_vanjaram_logical_to_dev_inst(struct amdgpu_device *adev,
enum amd_hw_ip_block_type block,
int8_t inst)
{
int8_t dev_inst;
switch (block) {
case GC_HWIP:
case SDMA0_HWIP:
/* Both JPEG and VCN as JPEG is only alias of VCN */
case VCN_HWIP:
dev_inst = adev->ip_map.dev_inst[block][inst];
break;
default:
/* For rest of the IPs, no look up required.
* Assume 'logical instance == physical instance' for all configs. */
dev_inst = inst;
break;
}
return dev_inst;
}
static uint32_t aqua_vanjaram_logical_to_dev_mask(struct amdgpu_device *adev,
enum amd_hw_ip_block_type block,
uint32_t mask)
{
uint32_t dev_mask = 0;
int8_t log_inst, dev_inst;
while (mask) {
log_inst = ffs(mask) - 1;
dev_inst = aqua_vanjaram_logical_to_dev_inst(adev, block, log_inst);
dev_mask |= (1 << dev_inst);
mask &= ~(1 << log_inst);
}
return dev_mask;
}
static void aqua_vanjaram_populate_ip_map(struct amdgpu_device *adev,
enum amd_hw_ip_block_type ip_block,
uint32_t inst_mask)
{
int l = 0, i;
while (inst_mask) {
i = ffs(inst_mask) - 1;
adev->ip_map.dev_inst[ip_block][l++] = i;
inst_mask &= ~(1 << i);
}
for (; l < HWIP_MAX_INSTANCE; l++)
adev->ip_map.dev_inst[ip_block][l] = -1;
}
void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev)
{
u32 ip_map[][2] = {
{ GC_HWIP, adev->gfx.xcc_mask },
{ SDMA0_HWIP, adev->sdma.sdma_mask },
{ VCN_HWIP, adev->vcn.inst_mask },
};
int i;
for (i = 0; i < ARRAY_SIZE(ip_map); ++i)
aqua_vanjaram_populate_ip_map(adev, ip_map[i][0], ip_map[i][1]);
adev->ip_map.logical_to_dev_inst = aqua_vanjaram_logical_to_dev_inst;
adev->ip_map.logical_to_dev_mask = aqua_vanjaram_logical_to_dev_mask;
}
/* Fixed pattern for smn addressing on different AIDs:
* bit[34]: indicate cross AID access
* bit[33:32]: indicate target AID id
* AID id range is 0 ~ 3 as maximum AID number is 4.
*/
u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id)
{
u64 ext_offset;
/* local routing and bit[34:32] will be zeros */
if (ext_id == 0)
return 0;
/* Initiated from host, accessing to all non-zero aids are cross traffic */
ext_offset = ((u64)(ext_id & 0x3) << 32) | (1ULL << 34);
return ext_offset;
}
static int aqua_vanjaram_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
{
enum amdgpu_gfx_partition mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
struct amdgpu_device *adev = xcp_mgr->adev;
if (adev->nbio.funcs->get_compute_partition_mode)
mode = adev->nbio.funcs->get_compute_partition_mode(adev);
return mode;
}
static int __aqua_vanjaram_get_xcc_per_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
{
int num_xcc, num_xcc_per_xcp = 0;
num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
switch (mode) {
case AMDGPU_SPX_PARTITION_MODE:
num_xcc_per_xcp = num_xcc;
break;
case AMDGPU_DPX_PARTITION_MODE:
num_xcc_per_xcp = num_xcc / 2;
break;
case AMDGPU_TPX_PARTITION_MODE:
num_xcc_per_xcp = num_xcc / 3;
break;
case AMDGPU_QPX_PARTITION_MODE:
num_xcc_per_xcp = num_xcc / 4;
break;
case AMDGPU_CPX_PARTITION_MODE:
num_xcc_per_xcp = 1;
break;
}
return num_xcc_per_xcp;
}
static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
enum AMDGPU_XCP_IP_BLOCK ip_id,
struct amdgpu_xcp_ip *ip)
{
struct amdgpu_device *adev = xcp_mgr->adev;
int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp;
int num_sdma, num_vcn;
num_sdma = adev->sdma.num_instances;
num_vcn = adev->vcn.num_vcn_inst;
switch (xcp_mgr->mode) {
case AMDGPU_SPX_PARTITION_MODE:
num_sdma_xcp = num_sdma;
num_vcn_xcp = num_vcn;
break;
case AMDGPU_DPX_PARTITION_MODE:
num_sdma_xcp = num_sdma / 2;
num_vcn_xcp = num_vcn / 2;
break;
case AMDGPU_TPX_PARTITION_MODE:
num_sdma_xcp = num_sdma / 3;
num_vcn_xcp = num_vcn / 3;
break;
case AMDGPU_QPX_PARTITION_MODE:
num_sdma_xcp = num_sdma / 4;
num_vcn_xcp = num_vcn / 4;
break;
case AMDGPU_CPX_PARTITION_MODE:
num_sdma_xcp = 2;
num_vcn_xcp = num_vcn ? 1 : 0;
break;
default:
return -EINVAL;
}
num_xcc_xcp = adev->gfx.num_xcc_per_xcp;
switch (ip_id) {
case AMDGPU_XCP_GFXHUB:
ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
ip->ip_funcs = &gfxhub_v1_2_xcp_funcs;
break;
case AMDGPU_XCP_GFX:
ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
ip->ip_funcs = &gfx_v9_4_3_xcp_funcs;
break;
case AMDGPU_XCP_SDMA:
ip->inst_mask = XCP_INST_MASK(num_sdma_xcp, xcp_id);
ip->ip_funcs = &sdma_v4_4_2_xcp_funcs;
break;
case AMDGPU_XCP_VCN:
ip->inst_mask = XCP_INST_MASK(num_vcn_xcp, xcp_id);
/* TODO : Assign IP funcs */
break;
default:
return -EINVAL;
}
ip->ip_id = ip_id;
return 0;
}
static enum amdgpu_gfx_partition
__aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr)
{
struct amdgpu_device *adev = xcp_mgr->adev;
int num_xcc;
num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
if (adev->gmc.num_mem_partitions == 1)
return AMDGPU_SPX_PARTITION_MODE;
if (adev->gmc.num_mem_partitions == num_xcc)
return AMDGPU_CPX_PARTITION_MODE;
if (adev->gmc.num_mem_partitions == num_xcc / 2)
return (adev->flags & AMD_IS_APU) ? AMDGPU_TPX_PARTITION_MODE :
AMDGPU_QPX_PARTITION_MODE;
if (adev->gmc.num_mem_partitions == 2 && !(adev->flags & AMD_IS_APU))
return AMDGPU_DPX_PARTITION_MODE;
return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
}
static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr,
enum amdgpu_gfx_partition mode)
{
struct amdgpu_device *adev = xcp_mgr->adev;
int num_xcc, num_xccs_per_xcp;
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
switch (mode) {
case AMDGPU_SPX_PARTITION_MODE:
return adev->gmc.num_mem_partitions == 1 && num_xcc > 0;
case AMDGPU_DPX_PARTITION_MODE:
return adev->gmc.num_mem_partitions != 8 && (num_xcc % 4) == 0;
case AMDGPU_TPX_PARTITION_MODE:
return (adev->gmc.num_mem_partitions == 1 ||
adev->gmc.num_mem_partitions == 3) &&
((num_xcc % 3) == 0);
case AMDGPU_QPX_PARTITION_MODE:
num_xccs_per_xcp = num_xcc / 4;
return (adev->gmc.num_mem_partitions == 1 ||
adev->gmc.num_mem_partitions == 4) &&
(num_xccs_per_xcp >= 2);
case AMDGPU_CPX_PARTITION_MODE:
return ((num_xcc > 1) &&
(adev->gmc.num_mem_partitions == 1 || adev->gmc.num_mem_partitions == 4) &&
(num_xcc % adev->gmc.num_mem_partitions) == 0);
default:
return false;
}
return false;
}
static int __aqua_vanjaram_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
{
/* TODO:
* Stop user queues and threads, and make sure GPU is empty of work.
*/
if (flags & AMDGPU_XCP_OPS_KFD)
amdgpu_amdkfd_device_fini_sw(xcp_mgr->adev);
return 0;
}
static int __aqua_vanjaram_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
{
int ret = 0;
if (flags & AMDGPU_XCP_OPS_KFD) {
amdgpu_amdkfd_device_probe(xcp_mgr->adev);
amdgpu_amdkfd_device_init(xcp_mgr->adev);
/* If KFD init failed, return failure */
if (!xcp_mgr->adev->kfd.init_complete)
ret = -EIO;
}
return ret;
}
static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
int mode, int *num_xcps)
{
int num_xcc_per_xcp, num_xcc, ret;
struct amdgpu_device *adev;
u32 flags = 0;
adev = xcp_mgr->adev;
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) {
mode = __aqua_vanjaram_get_auto_mode(xcp_mgr);
} else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) {
dev_err(adev->dev,
"Invalid compute partition mode requested, requested: %s, available memory partitions: %d",
amdgpu_gfx_compute_mode_desc(mode), adev->gmc.num_mem_partitions);
return -EINVAL;
}
if (adev->kfd.init_complete)
flags |= AMDGPU_XCP_OPS_KFD;
if (flags & AMDGPU_XCP_OPS_KFD) {
ret = amdgpu_amdkfd_check_and_lock_kfd(adev);
if (ret)
goto out;
}
ret = __aqua_vanjaram_pre_partition_switch(xcp_mgr, flags);
if (ret)
goto unlock;
num_xcc_per_xcp = __aqua_vanjaram_get_xcc_per_xcp(xcp_mgr, mode);
if (adev->gfx.funcs->switch_partition_mode)
adev->gfx.funcs->switch_partition_mode(xcp_mgr->adev,
num_xcc_per_xcp);
if (adev->nbio.funcs->set_compute_partition_mode)
adev->nbio.funcs->set_compute_partition_mode(adev, mode);
/* Init info about new xcps */
*num_xcps = num_xcc / num_xcc_per_xcp;
amdgpu_xcp_init(xcp_mgr, *num_xcps, mode);
ret = __aqua_vanjaram_post_partition_switch(xcp_mgr, flags);
unlock:
if (flags & AMDGPU_XCP_OPS_KFD)
amdgpu_amdkfd_unlock_kfd(adev);
out:
return ret;
}
static int __aqua_vanjaram_get_xcp_mem_id(struct amdgpu_device *adev,
int xcc_id, uint8_t *mem_id)
{
/* memory/spatial modes validation check is already done */
*mem_id = xcc_id / adev->gfx.num_xcc_per_xcp;
*mem_id /= adev->xcp_mgr->num_xcp_per_mem_partition;
return 0;
}
static int aqua_vanjaram_get_xcp_mem_id(struct amdgpu_xcp_mgr *xcp_mgr,
struct amdgpu_xcp *xcp, uint8_t *mem_id)
{
struct amdgpu_numa_info numa_info;
struct amdgpu_device *adev;
uint32_t xcc_mask;
int r, i, xcc_id;
adev = xcp_mgr->adev;
/* TODO: BIOS is not returning the right info now
* Check on this later
*/
/*
if (adev->gmc.gmc_funcs->query_mem_partition_mode)
mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
*/
if (adev->gmc.num_mem_partitions == 1) {
/* Only one range */
*mem_id = 0;
return 0;
}
r = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &xcc_mask);
if (r || !xcc_mask)
return -EINVAL;
xcc_id = ffs(xcc_mask) - 1;
if (!adev->gmc.is_app_apu)
return __aqua_vanjaram_get_xcp_mem_id(adev, xcc_id, mem_id);
r = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
if (r)
return r;
r = -EINVAL;
for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
if (adev->gmc.mem_partitions[i].numa.node == numa_info.nid) {
*mem_id = i;
r = 0;
break;
}
}
return r;
}
static int aqua_vanjaram_get_xcp_ip_details(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
enum AMDGPU_XCP_IP_BLOCK ip_id,
struct amdgpu_xcp_ip *ip)
{
if (!ip)
return -EINVAL;
return __aqua_vanjaram_get_xcp_ip_info(xcp_mgr, xcp_id, ip_id, ip);
}
struct amdgpu_xcp_mgr_funcs aqua_vanjaram_xcp_funcs = {
.switch_partition_mode = &aqua_vanjaram_switch_partition_mode,
.query_partition_mode = &aqua_vanjaram_query_partition_mode,
.get_ip_details = &aqua_vanjaram_get_xcp_ip_details,
.get_xcp_mem_id = &aqua_vanjaram_get_xcp_mem_id,
.select_scheds = &aqua_vanjaram_select_scheds,
.update_partition_sched_list = &aqua_vanjaram_update_partition_sched_list
};
static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev)
{
int ret;
ret = amdgpu_xcp_mgr_init(adev, AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, 1,
&aqua_vanjaram_xcp_funcs);
if (ret)
return ret;
/* TODO: Default memory node affinity init */
return ret;
}
int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
{
u32 mask, inst_mask = adev->sdma.sdma_mask;
int ret, i;
/* generally 1 AID supports 4 instances */
adev->sdma.num_inst_per_aid = 4;
adev->sdma.num_instances = NUM_SDMA(adev->sdma.sdma_mask);
adev->aid_mask = i = 1;
inst_mask >>= adev->sdma.num_inst_per_aid;
for (mask = (1 << adev->sdma.num_inst_per_aid) - 1; inst_mask;
inst_mask >>= adev->sdma.num_inst_per_aid, ++i) {
if ((inst_mask & mask) == mask)
adev->aid_mask |= (1 << i);
}
/* Harvest config is not used for aqua vanjaram. VCN and JPEGs will be
* addressed based on logical instance ids.
*/
adev->vcn.harvest_config = 0;
adev->vcn.num_inst_per_aid = 1;
adev->vcn.num_vcn_inst = hweight32(adev->vcn.inst_mask);
adev->jpeg.harvest_config = 0;
adev->jpeg.num_inst_per_aid = 1;
adev->jpeg.num_jpeg_inst = hweight32(adev->jpeg.inst_mask);
ret = aqua_vanjaram_xcp_mgr_init(adev);
if (ret)
return ret;
aqua_vanjaram_ip_map_init(adev);
return 0;
}

View file

@ -1141,12 +1141,12 @@ static uint32_t cik_get_register_value(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff) if (se_num != 0xffffffff || sh_num != 0xffffffff)
amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset); val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff) if (se_num != 0xffffffff || sh_num != 0xffffffff)
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
return val; return val;
} else { } else {

Some files were not shown because too many files have changed in this diff Show more