mirror of
https://gitee.com/bianbu-linux/linux-6.6
synced 2025-04-24 14:07:52 -04:00
drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v5)
what: the MQD's save and restore of KCQ (kernel compute queue) cost lots of clocks during world switch which impacts a lot to multi-VF performance how: introduce a paramter to control the number of KCQ to avoid performance drop if there is no kernel compute queue needed notes: this paramter only affects gfx 8/9/10 v2: refine namings v3: choose queues for each ring to that try best to cross pipes evenly. v4: fix indentation some cleanupsin the gfx_compute_queue_acquire() v5: further fix on indentations more cleanupsin gfx_compute_queue_acquire() TODO: in the future we will let hypervisor driver to set this paramter automatically thus no need for user to configure it through modprobe in virtual machine Signed-off-by: Monk Liu <Monk.Liu@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
9b856defbe
commit
a300de40f6
7 changed files with 75 additions and 72 deletions
|
@ -202,6 +202,7 @@ extern int amdgpu_si_support;
|
|||
#ifdef CONFIG_DRM_AMDGPU_CIK
|
||||
extern int amdgpu_cik_support;
|
||||
#endif
|
||||
extern int amdgpu_num_kcq;
|
||||
|
||||
#define AMDGPU_VM_MAX_NUM_CTX 4096
|
||||
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
|
||||
|
|
|
@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
|
|||
|
||||
amdgpu_gmc_tmz_set(adev);
|
||||
|
||||
if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
|
||||
amdgpu_num_kcq = 8;
|
||||
dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid paramter provided by user\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -150,6 +150,7 @@ int amdgpu_noretry;
|
|||
int amdgpu_force_asic_type = -1;
|
||||
int amdgpu_tmz = 0;
|
||||
int amdgpu_reset_method = -1; /* auto */
|
||||
int amdgpu_num_kcq = -1;
|
||||
|
||||
struct amdgpu_mgpu_info mgpu_info = {
|
||||
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
|
||||
|
@ -776,6 +777,9 @@ module_param_named(reset_method, amdgpu_reset_method, int, 0444);
|
|||
MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = auto(default value), 0 = disable bad page retirement)");
|
||||
module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444);
|
||||
|
||||
MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
|
||||
module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
|
||||
|
||||
static const struct pci_device_id pciidlist[] = {
|
||||
#ifdef CONFIG_DRM_AMDGPU_SI
|
||||
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
|
||||
|
|
|
@ -202,40 +202,29 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
|
|||
|
||||
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
|
||||
{
|
||||
int i, queue, pipe, mec;
|
||||
int i, queue, pipe;
|
||||
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
|
||||
int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
|
||||
adev->gfx.mec.num_queue_per_pipe,
|
||||
adev->gfx.num_compute_rings);
|
||||
|
||||
/* policy for amdgpu compute queue ownership */
|
||||
for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
|
||||
queue = i % adev->gfx.mec.num_queue_per_pipe;
|
||||
pipe = (i / adev->gfx.mec.num_queue_per_pipe)
|
||||
% adev->gfx.mec.num_pipe_per_mec;
|
||||
mec = (i / adev->gfx.mec.num_queue_per_pipe)
|
||||
/ adev->gfx.mec.num_pipe_per_mec;
|
||||
if (multipipe_policy) {
|
||||
/* policy: make queues evenly cross all pipes on MEC1 only */
|
||||
for (i = 0; i < max_queues_per_mec; i++) {
|
||||
pipe = i % adev->gfx.mec.num_pipe_per_mec;
|
||||
queue = (i / adev->gfx.mec.num_pipe_per_mec) %
|
||||
adev->gfx.mec.num_queue_per_pipe;
|
||||
|
||||
/* we've run out of HW */
|
||||
if (mec >= adev->gfx.mec.num_mec)
|
||||
break;
|
||||
|
||||
if (multipipe_policy) {
|
||||
/* policy: amdgpu owns the first two queues of the first MEC */
|
||||
if (mec == 0 && queue < 2)
|
||||
set_bit(i, adev->gfx.mec.queue_bitmap);
|
||||
} else {
|
||||
/* policy: amdgpu owns all queues in the first pipe */
|
||||
if (mec == 0 && pipe == 0)
|
||||
set_bit(i, adev->gfx.mec.queue_bitmap);
|
||||
set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
|
||||
adev->gfx.mec.queue_bitmap);
|
||||
}
|
||||
} else {
|
||||
/* policy: amdgpu owns all queues in the given pipe */
|
||||
for (i = 0; i < max_queues_per_mec; ++i)
|
||||
set_bit(i, adev->gfx.mec.queue_bitmap);
|
||||
}
|
||||
|
||||
/* update the number of active compute rings */
|
||||
adev->gfx.num_compute_rings =
|
||||
bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
|
||||
|
||||
/* If you hit this case and edited the policy, you probably just
|
||||
* need to increase AMDGPU_MAX_COMPUTE_RINGS */
|
||||
if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
|
||||
adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
|
||||
dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
|
||||
}
|
||||
|
||||
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
|
||||
|
|
|
@ -4022,22 +4022,24 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
|
|||
amdgpu_gfx_compute_queue_acquire(adev);
|
||||
mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
|
||||
|
||||
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_GTT,
|
||||
&adev->gfx.mec.hpd_eop_obj,
|
||||
&adev->gfx.mec.hpd_eop_gpu_addr,
|
||||
(void **)&hpd);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
|
||||
gfx_v10_0_mec_fini(adev);
|
||||
return r;
|
||||
if (mec_hpd_size) {
|
||||
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_GTT,
|
||||
&adev->gfx.mec.hpd_eop_obj,
|
||||
&adev->gfx.mec.hpd_eop_gpu_addr,
|
||||
(void **)&hpd);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
|
||||
gfx_v10_0_mec_fini(adev);
|
||||
return r;
|
||||
}
|
||||
|
||||
memset(hpd, 0, mec_hpd_size);
|
||||
|
||||
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
|
||||
}
|
||||
|
||||
memset(hpd, 0, mec_hpd_size);
|
||||
|
||||
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
|
||||
|
||||
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
|
||||
mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
|
||||
|
||||
|
@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
|
|||
break;
|
||||
}
|
||||
|
||||
adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
|
||||
adev->gfx.num_compute_rings = amdgpu_num_kcq;
|
||||
|
||||
gfx_v10_0_set_kiq_pm4_funcs(adev);
|
||||
gfx_v10_0_set_ring_funcs(adev);
|
||||
|
|
|
@ -1343,22 +1343,23 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
|
|||
amdgpu_gfx_compute_queue_acquire(adev);
|
||||
|
||||
mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
|
||||
if (mec_hpd_size) {
|
||||
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.mec.hpd_eop_obj,
|
||||
&adev->gfx.mec.hpd_eop_gpu_addr,
|
||||
(void **)&hpd);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.mec.hpd_eop_obj,
|
||||
&adev->gfx.mec.hpd_eop_gpu_addr,
|
||||
(void **)&hpd);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
|
||||
return r;
|
||||
memset(hpd, 0, mec_hpd_size);
|
||||
|
||||
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
|
||||
}
|
||||
|
||||
memset(hpd, 0, mec_hpd_size);
|
||||
|
||||
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
|
|||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
|
||||
adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
|
||||
adev->gfx.num_compute_rings = amdgpu_num_kcq;
|
||||
adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
|
||||
gfx_v8_0_set_ring_funcs(adev);
|
||||
gfx_v8_0_set_irq_funcs(adev);
|
||||
|
|
|
@ -1938,23 +1938,24 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
|
|||
/* take ownership of the relevant compute queues */
|
||||
amdgpu_gfx_compute_queue_acquire(adev);
|
||||
mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
|
||||
if (mec_hpd_size) {
|
||||
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.mec.hpd_eop_obj,
|
||||
&adev->gfx.mec.hpd_eop_gpu_addr,
|
||||
(void **)&hpd);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
|
||||
gfx_v9_0_mec_fini(adev);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.mec.hpd_eop_obj,
|
||||
&adev->gfx.mec.hpd_eop_gpu_addr,
|
||||
(void **)&hpd);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
|
||||
gfx_v9_0_mec_fini(adev);
|
||||
return r;
|
||||
memset(hpd, 0, mec_hpd_size);
|
||||
|
||||
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
|
||||
}
|
||||
|
||||
memset(hpd, 0, mec_hpd_size);
|
||||
|
||||
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
|
||||
|
||||
mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
|
||||
|
||||
fw_data = (const __le32 *)
|
||||
|
@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
|
|||
adev->gfx.num_gfx_rings = 0;
|
||||
else
|
||||
adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
|
||||
adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
|
||||
adev->gfx.num_compute_rings = amdgpu_num_kcq;
|
||||
gfx_v9_0_set_kiq_pm4_funcs(adev);
|
||||
gfx_v9_0_set_ring_funcs(adev);
|
||||
gfx_v9_0_set_irq_funcs(adev);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue