summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorRodrigo Vivi <rodrigo.vivi@intel.com>2018-07-23 09:13:12 -0700
committerRodrigo Vivi <rodrigo.vivi@intel.com>2018-07-23 09:13:12 -0700
commitc74a7469f97c0f40b46e82ee979f9fb1bb6e847c (patch)
treef2690a1a916b73ef94657fbf0e0141ae57701825 /drivers/gpu/drm/amd/amdgpu
parent6f15a7de86c8cf2dc09fc9e6d07047efa40ef809 (diff)
parent500775074f88d9cf5416bed2ca19592812d62c41 (diff)
Merge drm/drm-next into drm-intel-next-queued
We need a backmerge to get DP_DPCD_REV_14 before we push other i915 changes to dinq that could break compilation. Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ObjectID.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h165
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c131
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c117
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c1043
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c586
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c255
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c93
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c146
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c93
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c195
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c296
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c283
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c202
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h74
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c125
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c298
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c516
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c465
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c139
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c557
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c271
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c75
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c243
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c625
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_virtual.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v1_7.c120
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v1_7.h40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c116
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.h40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c72
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c399
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c461
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c161
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_dpm.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h67
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c126
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c126
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c142
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c1090
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c655
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c142
114 files changed, 9500 insertions, 3154 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2ca2b5154d52..bfd332c95b61 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -56,13 +56,18 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
# add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
- ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \
- amdgpu_amdkfd_gfx_v7.o
+ ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
amdgpu-y += \
- vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o
+ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
+ vega20_reg_init.o
+
+# add DF block
+amdgpu-y += \
+ df_v1_7.o \
+ df_v3_6.o
# add GMC block
amdgpu-y += \
@@ -126,11 +131,20 @@ amdgpu-y += \
vcn_v1_0.o
# add amdkfd interfaces
+amdgpu-y += amdgpu_amdkfd.o
+
+ifneq ($(CONFIG_HSA_AMD),)
amdgpu-y += \
- amdgpu_amdkfd.o \
amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \
- amdgpu_amdkfd_gfx_v8.o
+ amdgpu_amdkfd_gfx_v8.o \
+ amdgpu_amdkfd_gfx_v9.o
+
+ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
+amdgpu-y += amdgpu_amdkfd_gfx_v7.o
+endif
+
+endif
# add cgs
amdgpu-y += amdgpu_cgs.o
diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
index 06192698bd96..5b393622f592 100644
--- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h
+++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
@@ -136,6 +136,7 @@
#define GENERIC_OBJECT_ID_PX2_NON_DRIVABLE 0x02
#define GENERIC_OBJECT_ID_MXM_OPM 0x03
#define GENERIC_OBJECT_ID_STEREO_PIN 0x04 //This object could show up from Misc Object table, it follows ATOM_OBJECT format, and contains one ATOM_OBJECT_GPIO_CNTL_RECORD for the stereo pin
+#define GENERIC_OBJECT_ID_BRACKET_LAYOUT 0x05
/****************************************************/
/* Graphics Object ENUM ID Definition */
@@ -714,6 +715,13 @@
GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
GENERIC_OBJECT_ID_STEREO_PIN << OBJECT_ID_SHIFT)
+#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID1 (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT)
+
+#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID2 (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT)
/****************************************************/
/* Object Cap definition - Shared with BIOS */
/****************************************************/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c8b605f3dc05..44f62fda4022 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -73,6 +73,7 @@
#include "amdgpu_virt.h"
#include "amdgpu_gart.h"
#include "amdgpu_debugfs.h"
+#include "amdgpu_job.h"
/*
* Modules parameters.
@@ -105,11 +106,8 @@ extern int amdgpu_vm_fault_stop;
extern int amdgpu_vm_debug;
extern int amdgpu_vm_update_mode;
extern int amdgpu_dc;
-extern int amdgpu_dc_log;
extern int amdgpu_sched_jobs;
extern int amdgpu_sched_hw_submission;
-extern int amdgpu_no_evict;
-extern int amdgpu_direct_gma_size;
extern uint amdgpu_pcie_gen_cap;
extern uint amdgpu_pcie_lane_cap;
extern uint amdgpu_cg_mask;
@@ -129,6 +127,7 @@ extern int amdgpu_lbpw;
extern int amdgpu_compute_multipipe;
extern int amdgpu_gpu_recovery;
extern int amdgpu_emu_mode;
+extern uint amdgpu_smu_memory_pool_size;
#ifdef CONFIG_DRM_AMDGPU_SI
extern int amdgpu_si_support;
@@ -137,6 +136,7 @@ extern int amdgpu_si_support;
extern int amdgpu_cik_support;
#endif
+#define AMDGPU_SG_THRESHOLD (256*1024*1024)
#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
@@ -188,6 +188,7 @@ struct amdgpu_job;
struct amdgpu_irq_src;
struct amdgpu_fpriv;
struct amdgpu_bo_va_mapping;
+struct amdgpu_atif;
enum amdgpu_cp_irq {
AMDGPU_CP_IRQ_GFX_EOP = 0,
@@ -222,10 +223,10 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_LAST
};
-int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_clockgating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_clockgating_state state);
-int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_powergating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_powergating_state state);
void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
@@ -597,17 +598,6 @@ struct amdgpu_ib {
extern const struct drm_sched_backend_ops amdgpu_sched_ops;
-int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
- struct amdgpu_job **job, struct amdgpu_vm *vm);
-int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
- struct amdgpu_job **job);
-
-void amdgpu_job_free_resources(struct amdgpu_job *job);
-void amdgpu_job_free(struct amdgpu_job *job);
-int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
- struct drm_sched_entity *entity, void *owner,
- struct dma_fence **f);
-
/*
* Queue manager
*/
@@ -681,6 +671,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
@@ -727,6 +719,14 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
struct list_head *validated);
void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+ struct drm_amdgpu_bo_list_entry **info_param);
+
+int amdgpu_bo_list_create(struct amdgpu_device *adev,
+ struct drm_file *filp,
+ struct drm_amdgpu_bo_list_entry *info,
+ unsigned num_entries,
+ struct amdgpu_bo_list **list);
/*
* GFX stuff
@@ -771,9 +771,18 @@ struct amdgpu_rlc {
u32 starting_offsets_start;
u32 reg_list_format_size_bytes;
u32 reg_list_size_bytes;
+ u32 reg_list_format_direct_reg_list_length;
+ u32 save_restore_list_cntl_size_bytes;
+ u32 save_restore_list_gpm_size_bytes;
+ u32 save_restore_list_srm_size_bytes;
u32 *register_list_format;
u32 *register_restore;
+ u8 *save_restore_list_cntl;
+ u8 *save_restore_list_gpm;
+ u8 *save_restore_list_srm;
+
+ bool is_rlc_v2_1;
};
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
@@ -867,6 +876,8 @@ struct amdgpu_gfx_config {
/* gfx configure feature */
uint32_t double_offchip_lds_buf;
+ /* cached value of DB_DEBUG2 */
+ uint32_t db_debug2;
};
struct amdgpu_cu_info {
@@ -915,6 +926,11 @@ struct amdgpu_ngg {
bool init;
};
+struct sq_work {
+ struct work_struct work;
+ unsigned ih_data;
+};
+
struct amdgpu_gfx {
struct mutex gpu_clock_mutex;
struct amdgpu_gfx_config config;
@@ -938,6 +954,12 @@ struct amdgpu_gfx {
uint32_t ce_feature_version;
uint32_t pfp_feature_version;
uint32_t rlc_feature_version;
+ uint32_t rlc_srlc_fw_version;
+ uint32_t rlc_srlc_feature_version;
+ uint32_t rlc_srlg_fw_version;
+ uint32_t rlc_srlg_feature_version;
+ uint32_t rlc_srls_fw_version;
+ uint32_t rlc_srls_feature_version;
uint32_t mec_feature_version;
uint32_t mec2_feature_version;
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
@@ -947,6 +969,10 @@ struct amdgpu_gfx {
struct amdgpu_irq_src eop_irq;
struct amdgpu_irq_src priv_reg_irq;
struct amdgpu_irq_src priv_inst_irq;
+ struct amdgpu_irq_src cp_ecc_error_irq;
+ struct amdgpu_irq_src sq_irq;
+ struct sq_work sq_work;
+
/* gfx status */
uint32_t gfx_current_status;
/* ce ram size*/
@@ -998,6 +1024,7 @@ struct amdgpu_cs_parser {
/* scheduler job object */
struct amdgpu_job *job;
+ struct amdgpu_ring *ring;
/* buffer objects */
struct ww_acquire_ctx ticket;
@@ -1019,40 +1046,6 @@ struct amdgpu_cs_parser {
struct drm_syncobj **post_dep_syncobjs;
};
-#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
-#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */
-#define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */
-
-struct amdgpu_job {
- struct drm_sched_job base;
- struct amdgpu_device *adev;
- struct amdgpu_vm *vm;
- struct amdgpu_ring *ring;
- struct amdgpu_sync sync;
- struct amdgpu_sync sched_sync;
- struct amdgpu_ib *ibs;
- struct dma_fence *fence; /* the hw fence */
- uint32_t preamble_status;
- uint32_t num_ibs;
- void *owner;
- uint64_t fence_ctx; /* the fence_context this job uses */
- bool vm_needs_flush;
- uint64_t vm_pd_addr;
- unsigned vmid;
- unsigned pasid;
- uint32_t gds_base, gds_size;
- uint32_t gws_base, gws_size;
- uint32_t oa_base, oa_size;
- uint32_t vram_lost_counter;
-
- /* user fence handling */
- uint64_t uf_addr;
- uint64_t uf_sequence;
-
-};
-#define to_amdgpu_job(sched_job) \
- container_of((sched_job), struct amdgpu_job, base)
-
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
uint32_t ib_idx, int idx)
{
@@ -1204,6 +1197,8 @@ struct amdgpu_asic_funcs {
/* invalidate hdp read cache */
void (*invalidate_hdp)(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
+ /* check if the asic needs a full reset of if soft reset will work */
+ bool (*need_full_reset)(struct amdgpu_device *adev);
};
/*
@@ -1246,43 +1241,6 @@ struct amdgpu_vram_scratch {
/*
* ACPI
*/
-struct amdgpu_atif_notification_cfg {
- bool enabled;
- int command_code;
-};
-
-struct amdgpu_atif_notifications {
- bool display_switch;
- bool expansion_mode_change;
- bool thermal_state;
- bool forced_power_state;
- bool system_power_state;
- bool display_conf_change;
- bool px_gfx_switch;
- bool brightness_change;
- bool dgpu_display_event;
-};
-
-struct amdgpu_atif_functions {
- bool system_params;
- bool sbios_requests;
- bool select_active_disp;
- bool lid_state;
- bool get_tv_standard;
- bool set_tv_standard;
- bool get_panel_expansion_mode;
- bool set_panel_expansion_mode;
- bool temperature_change;
- bool graphics_device_types;
-};
-
-struct amdgpu_atif {
- struct amdgpu_atif_notifications notifications;
- struct amdgpu_atif_functions functions;
- struct amdgpu_atif_notification_cfg notification_cfg;
- struct amdgpu_encoder *encoder_for_bl;
-};
-
struct amdgpu_atcs_functions {
bool get_ext_state;
bool pcie_perf_req;
@@ -1368,7 +1326,19 @@ struct amdgpu_nbio_funcs {
void (*detect_hw_virt)(struct amdgpu_device *adev);
};
-
+struct amdgpu_df_funcs {
+ void (*init)(struct amdgpu_device *adev);
+ void (*enable_broadcast_mode)(struct amdgpu_device *adev,
+ bool enable);
+ u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
+ u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
+ void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+ bool enable);
+ void (*get_clockgating_state)(struct amdgpu_device *adev,
+ u32 *flags);
+ void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
+ bool enable);
+};
/* Define the HW IP blocks will be used in driver , add more if necessary */
enum amd_hw_ip_block_type {
GC_HWIP = 1,
@@ -1390,6 +1360,7 @@ enum amd_hw_ip_block_type {
PWR_HWIP,
NBIF_HWIP,
THM_HWIP,
+ CLK_HWIP,
MAX_HWIP
};
@@ -1398,6 +1369,7 @@ enum amd_hw_ip_block_type {
struct amd_powerplay {
void *pp_handle;
const struct amd_pm_funcs *pp_funcs;
+ uint32_t pp_feature;
};
#define AMDGPU_RESET_MAGIC_NUM 64
@@ -1430,7 +1402,7 @@ struct amdgpu_device {
#if defined(CONFIG_DEBUG_FS)
struct dentry *debugfs_regs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
#endif
- struct amdgpu_atif atif;
+ struct amdgpu_atif *atif;
struct amdgpu_atcs atcs;
struct mutex srbm_mutex;
/* GRBM index mutex. Protects concurrent access to GRBM index */
@@ -1579,9 +1551,9 @@ struct amdgpu_device {
DECLARE_HASHTABLE(mn_hash, 7);
/* tracking pinned memory */
- u64 vram_pin_size;
- u64 invisible_pin_size;
- u64 gart_pin_size;
+ atomic64_t vram_pin_size;
+ atomic64_t visible_pin_size;
+ atomic64_t gart_pin_size;
/* amdkfd interface */
struct kfd_dev *kfd;
@@ -1590,6 +1562,7 @@ struct amdgpu_device {
uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
const struct amdgpu_nbio_funcs *nbio_funcs;
+ const struct amdgpu_df_funcs *df_funcs;
/* delayed work_func for deferring clockgating during resume */
struct delayed_work late_init_work;
@@ -1764,6 +1737,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
+#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
@@ -1790,6 +1764,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
+#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
@@ -1855,6 +1830,12 @@ static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false;
static inline bool amdgpu_has_atpx(void) { return false; }
#endif
+#if defined(CONFIG_VGA_SWITCHEROO) && defined(CONFIG_ACPI)
+void *amdgpu_atpx_get_dhandle(void);
+#else
+static inline void *amdgpu_atpx_get_dhandle(void) { return NULL; }
+#endif
+
/*
* KMS
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index a29362f9ef41..f4c474a95875 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -290,12 +290,11 @@ static int acp_hw_init(void *handle)
else if (r)
return r;
- r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO,
- 0x5289, 0, &acp_base);
- if (r == -ENODEV)
- return 0;
- else if (r)
- return r;
+ if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
+ return -EINVAL;
+
+ acp_base = adev->rmmio_base;
+
if (adev->asic_type != CHIP_STONEY) {
adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
if (adev->acp.acp_genpd == NULL)
@@ -311,20 +310,20 @@ static int acp_hw_init(void *handle)
pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
}
- adev->acp.acp_cell = kzalloc(sizeof(struct mfd_cell) * ACP_DEVS,
+ adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
GFP_KERNEL);
if (adev->acp.acp_cell == NULL)
return -ENOMEM;
- adev->acp.acp_res = kzalloc(sizeof(struct resource) * 4, GFP_KERNEL);
+ adev->acp.acp_res = kcalloc(4, sizeof(struct resource), GFP_KERNEL);
if (adev->acp.acp_res == NULL) {
kfree(adev->acp.acp_cell);
return -ENOMEM;
}
- i2s_pdata = kzalloc(sizeof(struct i2s_platform_data) * 2, GFP_KERNEL);
+ i2s_pdata = kcalloc(2, sizeof(struct i2s_platform_data), GFP_KERNEL);
if (i2s_pdata == NULL) {
kfree(adev->acp.acp_res);
kfree(adev->acp.acp_cell);
@@ -513,7 +512,7 @@ static int acp_hw_fini(void *handle)
if (adev->acp.acp_genpd) {
for (i = 0; i < ACP_DEVS ; i++) {
dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
- ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev);
+ ret = pm_genpd_remove_device(dev);
/* If removal fails, dont giveup and try rest */
if (ret)
dev_err(dev, "remove dev from genpd failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 8fa850a070e0..0d8c3fc6eace 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -34,6 +34,45 @@
#include "amd_acpi.h"
#include "atom.h"
+struct amdgpu_atif_notification_cfg {
+ bool enabled;
+ int command_code;
+};
+
+struct amdgpu_atif_notifications {
+ bool display_switch;
+ bool expansion_mode_change;
+ bool thermal_state;
+ bool forced_power_state;
+ bool system_power_state;
+ bool display_conf_change;
+ bool px_gfx_switch;
+ bool brightness_change;
+ bool dgpu_display_event;
+};
+
+struct amdgpu_atif_functions {
+ bool system_params;
+ bool sbios_requests;
+ bool select_active_disp;
+ bool lid_state;
+ bool get_tv_standard;
+ bool set_tv_standard;
+ bool get_panel_expansion_mode;
+ bool set_panel_expansion_mode;
+ bool temperature_change;
+ bool graphics_device_types;
+};
+
+struct amdgpu_atif {
+ acpi_handle handle;
+
+ struct amdgpu_atif_notifications notifications;
+ struct amdgpu_atif_functions functions;
+ struct amdgpu_atif_notification_cfg notification_cfg;
+ struct amdgpu_encoder *encoder_for_bl;
+};
+
/* Call the ATIF method
*/
/**
@@ -46,8 +85,9 @@
* Executes the requested ATIF function (all asics).
* Returns a pointer to the acpi output buffer.
*/
-static union acpi_object *amdgpu_atif_call(acpi_handle handle, int function,
- struct acpi_buffer *params)
+static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
+ int function,
+ struct acpi_buffer *params)
{
acpi_status status;
union acpi_object atif_arg_elements[2];
@@ -70,7 +110,8 @@ static union acpi_object *amdgpu_atif_call(acpi_handle handle, int function,
atif_arg_elements[1].integer.value = 0;
}
- status = acpi_evaluate_object(handle, "ATIF", &atif_arg, &buffer);
+ status = acpi_evaluate_object(atif->handle, NULL, &atif_arg,
+ &buffer);
/* Fail only if calling the method fails and ATIF is supported */
if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
@@ -141,15 +182,14 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas
* (all asics).
* returns 0 on success, error on failure.
*/
-static int amdgpu_atif_verify_interface(acpi_handle handle,
- struct amdgpu_atif *atif)
+static int amdgpu_atif_verify_interface(struct amdgpu_atif *atif)
{
union acpi_object *info;
struct atif_verify_interface output;
size_t size;
int err = 0;
- info = amdgpu_atif_call(handle, ATIF_FUNCTION_VERIFY_INTERFACE, NULL);
+ info = amdgpu_atif_call(atif, ATIF_FUNCTION_VERIFY_INTERFACE, NULL);
if (!info)
return -EIO;
@@ -176,6 +216,35 @@ out:
return err;
}
+static acpi_handle amdgpu_atif_probe_handle(acpi_handle dhandle)
+{
+ acpi_handle handle = NULL;
+ char acpi_method_name[255] = { 0 };
+ struct acpi_buffer buffer = { sizeof(acpi_method_name), acpi_method_name };
+ acpi_status status;
+
+ /* For PX/HG systems, ATIF and ATPX are in the iGPU's namespace, on dGPU only
+ * systems, ATIF is in the dGPU's namespace.
+ */
+ status = acpi_get_handle(dhandle, "ATIF", &handle);
+ if (ACPI_SUCCESS(status))
+ goto out;
+
+ if (amdgpu_has_atpx()) {
+ status = acpi_get_handle(amdgpu_atpx_get_dhandle(), "ATIF",
+ &handle);
+ if (ACPI_SUCCESS(status))
+ goto out;
+ }
+
+ DRM_DEBUG_DRIVER("No ATIF handle found\n");
+ return NULL;
+out:
+ acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer);
+ DRM_DEBUG_DRIVER("Found ATIF handle %s\n", acpi_method_name);
+ return handle;
+}
+
/**
* amdgpu_atif_get_notification_params - determine notify configuration
*
@@ -188,15 +257,16 @@ out:
* where n is specified in the result if a notifier is used.
* Returns 0 on success, error on failure.
*/
-static int amdgpu_atif_get_notification_params(acpi_handle handle,
- struct amdgpu_atif_notification_cfg *n)
+static int amdgpu_atif_get_notification_params(struct amdgpu_atif *atif)
{
union acpi_object *info;
+ struct amdgpu_atif_notification_cfg *n = &atif->notification_cfg;
struct atif_system_params params;
size_t size;
int err = 0;
- info = amdgpu_atif_call(handle, ATIF_FUNCTION_GET_SYSTEM_PARAMETERS, NULL);
+ info = amdgpu_atif_call(atif, ATIF_FUNCTION_GET_SYSTEM_PARAMETERS,
+ NULL);
if (!info) {
err = -EIO;
goto out;
@@ -250,14 +320,15 @@ out:
* (all asics).
* Returns 0 on success, error on failure.
*/
-static int amdgpu_atif_get_sbios_requests(acpi_handle handle,
- struct atif_sbios_requests *req)
+static int amdgpu_atif_get_sbios_requests(struct amdgpu_atif *atif,
+ struct atif_sbios_requests *req)
{
union acpi_object *info;
size_t size;
int count = 0;
- info = amdgpu_atif_call(handle, ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS, NULL);
+ info = amdgpu_atif_call(atif, ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS,
+ NULL);
if (!info)
return -EIO;
@@ -290,11 +361,10 @@ out:
* Returns NOTIFY code
*/
static int amdgpu_atif_handler(struct amdgpu_device *adev,
- struct acpi_bus_event *event)
+ struct acpi_bus_event *event)
{
- struct amdgpu_atif *atif = &adev->atif;
+ struct amdgpu_atif *atif = adev->atif;
struct atif_sbios_requests req;
- acpi_handle handle;
int count;
DRM_DEBUG_DRIVER("event, device_class = %s, type = %#x\n",
@@ -303,14 +373,14 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
if (strcmp(event->device_class, ACPI_VIDEO_CLASS) != 0)
return NOTIFY_DONE;
- if (!atif->notification_cfg.enabled ||
+ if (!atif ||
+ !atif->notification_cfg.enabled ||
event->type != atif->notification_cfg.command_code)
/* Not our event */
return NOTIFY_DONE;
/* Check pending SBIOS requests */
- handle = ACPI_HANDLE(&adev->pdev->dev);
- count = amdgpu_atif_get_sbios_requests(handle, &req);
+ count = amdgpu_atif_get_sbios_requests(atif, &req);
if (count <= 0)
return NOTIFY_DONE;
@@ -641,8 +711,8 @@ static int amdgpu_acpi_event(struct notifier_block *nb,
*/
int amdgpu_acpi_init(struct amdgpu_device *adev)
{
- acpi_handle handle;
- struct amdgpu_atif *atif = &adev->atif;
+ acpi_handle handle, atif_handle;
+ struct amdgpu_atif *atif;
struct amdgpu_atcs *atcs = &adev->atcs;
int ret;
@@ -658,12 +728,26 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
DRM_DEBUG_DRIVER("Call to ATCS verify_interface failed: %d\n", ret);
}
+ /* Probe for ATIF, and initialize it if found */
+ atif_handle = amdgpu_atif_probe_handle(handle);
+ if (!atif_handle)
+ goto out;
+
+ atif = kzalloc(sizeof(*atif), GFP_KERNEL);
+ if (!atif) {
+ DRM_WARN("Not enough memory to initialize ATIF\n");
+ goto out;
+ }
+ atif->handle = atif_handle;
+
/* Call the ATIF method */
- ret = amdgpu_atif_verify_interface(handle, atif);
+ ret = amdgpu_atif_verify_interface(atif);
if (ret) {
DRM_DEBUG_DRIVER("Call to ATIF verify_interface failed: %d\n", ret);
+ kfree(atif);
goto out;
}
+ adev->atif = atif;
if (atif->notifications.brightness_change) {
struct drm_encoder *tmp;
@@ -693,8 +777,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
}
if (atif->functions.system_params) {
- ret = amdgpu_atif_get_notification_params(handle,
- &atif->notification_cfg);
+ ret = amdgpu_atif_get_notification_params(atif);
if (ret) {
DRM_DEBUG_DRIVER("Call to GET_SYSTEM_PARAMS failed: %d\n",
ret);
@@ -720,4 +803,6 @@ out:
void amdgpu_acpi_fini(struct amdgpu_device *adev)
{
unregister_acpi_notifier(&adev->acpi_nb);
+ if (adev->atif)
+ kfree(adev->atif);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 4d36203ffb11..e3ed08dca7b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -50,15 +50,21 @@ int amdgpu_amdkfd_init(void)
kgd2kfd = NULL;
}
+
#elif defined(CONFIG_HSA_AMD)
+
ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
if (ret)
kgd2kfd = NULL;
#else
+ kgd2kfd = NULL;
ret = -ENOENT;
#endif
+
+#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD)
amdgpu_amdkfd_gpuvm_init_mem_limits();
+#endif
return ret;
}
@@ -92,8 +98,12 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
case CHIP_POLARIS11:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
+ break;
default:
- dev_dbg(adev->dev, "kfd not supported on this ASIC\n");
+ dev_info(adev->dev, "kfd not supported on this ASIC\n");
return;
}
@@ -175,6 +185,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
&gpu_resources.doorbell_physical_address,
&gpu_resources.doorbell_aperture_size,
&gpu_resources.doorbell_start_offset);
+ if (adev->asic_type >= CHIP_VEGA10) {
+ /* On SOC15 the BIF is involved in routing
+ * doorbells using the low 12 bits of the
+ * address. Communicate the assignments to
+ * KFD. KFD uses two doorbell pages per
+ * process in case of 64-bit doorbells so we
+ * can use each doorbell assignment twice.
+ */
+ gpu_resources.sdma_doorbell[0][0] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE0;
+ gpu_resources.sdma_doorbell[0][1] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
+ gpu_resources.sdma_doorbell[1][0] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE1;
+ gpu_resources.sdma_doorbell[1][1] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
+ /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
+ * SDMA, IH and VCN. So don't use them for the CP.
+ */
+ gpu_resources.reserved_doorbell_mask = 0x1f0;
+ gpu_resources.reserved_doorbell_val = 0x0f0;
+ }
kgd2kfd->device_init(adev->kfd, &gpu_resources);
}
@@ -217,13 +249,18 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_bo *bo = NULL;
+ struct amdgpu_bo_param bp;
int r;
- uint64_t gpu_addr_tmp = 0;
void *cpu_ptr_tmp = NULL;
- r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
- AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel,
- NULL, &bo);
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
+ r = amdgpu_bo_create(adev, &bp, &bo);
if (r) {
dev_err(adev->dev,
"failed to allocate BO for amdkfd (%d)\n", r);
@@ -237,13 +274,18 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
goto allocate_mem_reserve_bo_failed;
}
- r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
- &gpu_addr_tmp);
+ r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
if (r) {
dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
goto allocate_mem_pin_bo_failed;
}
+ r = amdgpu_ttm_alloc_gart(&bo->tbo);
+ if (r) {
+ dev_err(adev->dev, "%p bind failed\n", bo);
+ goto allocate_mem_kmap_bo_failed;
+ }
+
r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
if (r) {
dev_err(adev->dev,
@@ -252,7 +294,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
}
*mem_obj = bo;
- *gpu_addr = gpu_addr_tmp;
+ *gpu_addr = amdgpu_bo_gpu_offset(bo);
*cpu_ptr = cpu_ptr_tmp;
amdgpu_bo_unreserve(bo);
@@ -304,15 +346,12 @@ void get_local_mem_info(struct kgd_dev *kgd,
mem_info->local_mem_size_public,
mem_info->local_mem_size_private);
- if (amdgpu_emu_mode == 1) {
- mem_info->mem_clk_max = 100;
- return;
- }
-
if (amdgpu_sriov_vf(adev))
mem_info->mem_clk_max = adev->clock.default_mclk / 100;
- else
+ else if (adev->powerplay.pp_funcs)
mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
+ else
+ mem_info->mem_clk_max = 100;
}
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
@@ -329,13 +368,12 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
/* the sclk is in quantas of 10kHz */
- if (amdgpu_emu_mode == 1)
- return 100;
-
if (amdgpu_sriov_vf(adev))
return adev->clock.default_sclk / 100;
-
- return amdgpu_dpm_get_sclk(adev, false) / 100;
+ else if (adev->powerplay.pp_funcs)
+ return amdgpu_dpm_get_sclk(adev, false) / 100;
+ else
+ return 100;
}
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
@@ -432,3 +470,44 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
return false;
}
+
+#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD)
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+{
+ return false;
+}
+
+void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
+{
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+}
+
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+ return NULL;
+}
+
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
+{
+ return 0;
+}
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
+{
+ return NULL;
+}
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
+{
+ return NULL;
+}
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
+{
+ return NULL;
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index c2c2bea731e0..a8418a3f4e9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -28,6 +28,7 @@
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
+#include <linux/workqueue.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include "amdgpu_sync.h"
@@ -59,7 +60,9 @@ struct kgd_mem {
uint32_t mapping_flags;
+ atomic_t invalid;
struct amdkfd_process_info *process_info;
+ struct page **user_pages;
struct amdgpu_sync sync;
@@ -84,6 +87,9 @@ struct amdkfd_process_info {
struct list_head vm_list_head;
/* List head for all KFD BOs that belong to a KFD process. */
struct list_head kfd_bo_list;
+ /* List of userptr BOs that are valid or invalid */
+ struct list_head userptr_valid_list;
+ struct list_head userptr_inval_list;
/* Lock to protect kfd_bo_list */
struct mutex lock;
@@ -91,6 +97,11 @@ struct amdkfd_process_info {
unsigned int n_vms;
/* Eviction Fence */
struct amdgpu_amdkfd_fence *eviction_fence;
+
+ /* MMU-notifier related fields */
+ atomic_t evicted_bos;
+ struct delayed_work restore_userptr_work;
+ struct pid *pid;
};
int amdgpu_amdkfd_init(void);
@@ -104,12 +115,14 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
@@ -143,14 +156,14 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
/* GPUVM API */
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
- void **process_info,
- struct dma_fence **ef);
+ void **process_info,
+ struct dma_fence **ef);
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
- struct file *filp,
- void **vm, void **process_info,
- struct dma_fence **ef);
+ struct file *filp,
+ void **vm, void **process_info,
+ struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
+ struct amdgpu_vm *vm);
void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 2c14025e5e76..574c1181ae9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -173,7 +173,5 @@ static const struct dma_fence_ops amdkfd_fence_ops = {
.get_driver_name = amdkfd_fence_get_driver_name,
.get_timeline_name = amdkfd_fence_get_timeline_name,
.enable_signaling = amdkfd_fence_enable_signaling,
- .signaled = NULL,
- .wait = dma_fence_default_wait,
.release = amdkfd_fence_release,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index ea54e53172b9..ea79908dac4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -98,8 +98,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
@@ -183,7 +181,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_pipeline = kgd_init_pipeline,
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
@@ -309,13 +306,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
return 0;
}
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr)
-{
- /* amdgpu owns the per-pipe state */
- return 0;
-}
-
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
@@ -417,7 +407,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
(*dump)[i++][1] = RREG32(addr); \
} while (0)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -514,7 +504,7 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
#undef HQD_N_REGS
#define HQD_N_REGS (19+4)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 89264c9a5e9f..19dd665e7307 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -57,8 +57,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
uint32_t sh_mem_bases);
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
@@ -141,7 +139,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_pipeline = kgd_init_pipeline,
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
@@ -270,13 +267,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
return 0;
}
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr)
-{
- /* amdgpu owns the per-pipe state */
- return 0;
-}
-
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
@@ -405,7 +395,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
(*dump)[i++][1] = RREG32(addr); \
} while (0)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -501,7 +491,7 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
#undef HQD_N_REGS
#define HQD_N_REGS (19+4+2+3+7)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
new file mode 100644
index 000000000000..1db60aa5b7f0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -0,0 +1,1043 @@
+/*
+ * Copyright 2014-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "kfd2kgd: " fmt
+
+#include <linux/module.h>
+#include <linux/fdtable.h>
+#include <linux/uaccess.h>
+#include <linux/firmware.h>
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_ucode.h"
+#include "soc15_hw_ip.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "vega10_enum.h"
+#include "sdma0/sdma0_4_0_offset.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+#include "sdma1/sdma1_4_0_offset.h"
+#include "sdma1/sdma1_4_0_sh_mask.h"
+#include "athub/athub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
+#include "oss/osssys_4_0_offset.h"
+#include "oss/osssys_4_0_sh_mask.h"
+#include "soc15_common.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "soc15d.h"
+
+/* HACK: MMHUB and GC both have VM-related register with the same
+ * names but different offsets. Define the MMHUB register we need here
+ * with a prefix. A proper solution would be to move the functions
+ * programming these registers into gfx_v9_0.c and mmhub_v1_0.c
+ * respectively.
+ */
+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3
+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0
+
+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705
+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0
+
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0
+
+#define V9_PIPE_PER_MEC (4)
+#define V9_QUEUES_PER_PIPE_MEC (8)
+
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES
+};
+
+/*
+ * Register access functions
+ */
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases);
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid);
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm);
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm);
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id);
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id);
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+ unsigned int utimeout);
+static int kgd_address_watch_disable(struct kgd_dev *kgd);
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo);
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd);
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset);
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+ uint8_t vmid);
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t page_table_base);
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+ uint64_t va, uint32_t vmid);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+
+/* Because of REG_GET_FIELD() being used, we put this function in the
+ * asic specific file.
+ */
+static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+ struct tile_config *config)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
+
+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+ config->num_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+ config->macro_tile_config_ptr =
+ adev->gfx.config.macrotile_mode_array;
+ config->num_macro_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+ return 0;
+}
+
+static const struct kfd2kgd_calls kfd2kgd = {
+ .init_gtt_mem_allocation = alloc_gtt_mem,
+ .free_gtt_mem = free_gtt_mem,
+ .get_local_mem_info = get_local_mem_info,
+ .get_gpu_clock_counter = get_gpu_clock_counter,
+ .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
+ .alloc_pasid = amdgpu_pasid_alloc,
+ .free_pasid = amdgpu_pasid_free,
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_address_watch_disable,
+ .address_watch_execute = kgd_address_watch_execute,
+ .wave_control_execute = kgd_wave_control_execute,
+ .address_watch_get_offset = kgd_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_pasid =
+ get_atc_vmid_pasid_mapping_pasid,
+ .get_atc_vmid_pasid_mapping_valid =
+ get_atc_vmid_pasid_mapping_valid,
+ .get_fw_version = get_fw_version,
+ .set_scratch_backing_va = set_scratch_backing_va,
+ .get_tile_config = amdgpu_amdkfd_get_tile_config,
+ .get_cu_info = get_cu_info,
+ .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
+ .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+ .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
+ .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+ .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+ .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+ .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+ .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
+ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+ .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
+ .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
+ .invalidate_tlbs = invalidate_tlbs,
+ .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+ .submit_ib = amdgpu_amdkfd_submit_ib,
+};
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
+{
+ return (struct kfd2kgd_calls *)&kfd2kgd;
+}
+
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+ return (struct amdgpu_device *)kgd;
+}
+
+static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ soc15_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(kgd, mec, pipe, queue_id, 0);
+}
+
+static uint32_t get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id) & 31;
+
+ return ((uint32_t)1) << bit;
+}
+
+static void release_queue(struct kgd_dev *kgd)
+{
+ unlock_srbm(kgd);
+}
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ lock_srbm(kgd, 0, 0, 0, vmid);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+ /* APE1 no longer exists on GFX9 */
+
+ unlock_srbm(kgd);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+ * a mapping is in progress or because a mapping finished
+ * and the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+ /*
+ * need to do this twice, once for gfx and once for mmhub
+ * for ATC add 16 to VMID for mmhub, for IH different registers.
+ * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
+ */
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
+ pasid_mapping);
+
+ while (!(RREG32(SOC15_REG_OFFSET(
+ ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << vmid)))
+ cpu_relax();
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << vmid);
+
+ /* Mapping vmid to pasid also for IH block */
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
+ pasid_mapping);
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid,
+ pasid_mapping);
+
+ while (!(RREG32(SOC15_REG_OFFSET(
+ ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << (vmid + 16))))
+ cpu_relax();
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << (vmid + 16));
+
+ /* Mapping vmid to pasid also for IH block */
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
+ pasid_mapping);
+ return 0;
+}
+
+/* TODO - RING0 form of field is obsolete, seems to date back to SI
+ * but still works
+ */
+
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(kgd, mec, pipe, 0, 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(kgd);
+
+ return 0;
+}
+
+static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t base[2] = {
+ SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
+ SOC15_REG_OFFSET(SDMA1, 0,
+ mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL
+ };
+ uint32_t retval;
+
+ retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
+ mmSDMA0_RLC0_RB_CNTL);
+
+ pr_debug("sdma base address: 0x%x\n", retval);
+
+ return retval;
+}
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+ return (struct v9_mqd *)mqd;
+}
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v9_sdma_mqd *)mqd;
+}
+
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, data;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ /* HIQ is set during driver init period with vmid set to 0*/
+ if (m->cp_hqd_vmid == 0) {
+ uint32_t value, mec, pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+ value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
+ value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+ ((mec << 5) | (pipe << 3) | queue_id | 0x80));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
+ }
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+
+ for (reg = hqd_base;
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ WREG32(reg, mqd_hqd[reg - hqd_base]);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ lower_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ upper_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ lower_32_bits((uintptr_t)wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ upper_32_bits((uintptr_t)wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+ get_queue_mask(adev, pipe_id, queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+
+ release_queue(kgd);
+
+ return 0;
+}
+
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ release_queue(kgd);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+ sdmax_gfx_context_cntl = m->sdma_engine_id ?
+ SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
+ SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies))
+ return -ETIME;
+ usleep_range(500, 1000);
+ }
+ data = RREG32(sdmax_gfx_context_cntl);
+ data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
+ RESUME_CTX, 0);
+ WREG32(sdmax_gfx_context_cntl, data);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+ for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+ reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+ reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
+ high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+ retval = true;
+ }
+ release_queue(kgd);
+ return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ enum hqd_dequeue_request_type type;
+ unsigned long end_jiffies;
+ uint32_t temp;
+ struct v9_mqd *m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ if (m->cp_hqd_vmid == 0)
+ WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ while (true) {
+ temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue preemption time out.\n");
+ release_queue(kgd);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ release_queue(kgd);
+ return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+ unsigned int utimeout)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_base_addr;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies))
+ return -ETIME;
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+
+ return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+ uint8_t vmid)
+{
+ uint32_t reg;
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
+}
+
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid)
+{
+ uint32_t reg;
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+}
+
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ uint32_t req = (1 << vmid) |
+ (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
+
+ mutex_lock(&adev->srbm_mutex);
+
+ /* Use legacy mode tlb invalidation.
+ *
+ * Currently on Raven the code below is broken for anything but
+ * legacy mode due to a MMHUB power gating problem. A workaround
+ * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
+ * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
+ * bit.
+ *
+ * TODO 1: agree on the right set of invalidation registers for
+ * KFD use. Use the last one for now. Invalidate both GC and
+ * MMHUB.
+ *
+ * TODO 2: support range-based invalidation, requires kfg2kgd
+ * interface change
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
+ 0xffffffff);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
+ 0x0000001f);
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
+ 0xffffffff);
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
+ 0x0000001f);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
+ req);
+
+ while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
+ (1 << vmid)))
+ cpu_relax();
+
+ while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
+ (1 << vmid)))
+ cpu_relax();
+
+ mutex_unlock(&adev->srbm_mutex);
+
+}
+
+static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
+{
+ signed long r;
+ uint32_t seq;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */
+ amdgpu_fence_emit_polling(ring, &seq);
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+
+ r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ if (r < 1) {
+ DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+ return -ETIME;
+ }
+
+ return 0;
+}
+
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ int vmid;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+ if (ring->ready)
+ return invalidate_tlbs_with_kiq(adev, pasid);
+
+ for (vmid = 0; vmid < 16; vmid++) {
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+ continue;
+ if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
+ if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
+ == pasid) {
+ write_vmid_invalidate_request(kgd, vmid);
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("non kfd vmid %d\n", vmid);
+ return 0;
+ }
+
+ write_vmid_invalidate_request(kgd, vmid);
+ return 0;
+}
+
+static int kgd_address_watch_disable(struct kgd_dev *kgd)
+{
+ return 0;
+}
+
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo)
+{
+ return 0;
+}
+
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SH_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset)
+{
+ return 0;
+}
+
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+ uint64_t va, uint32_t vmid)
+{
+ /* No longer needed on GFXv9. The scratch base address is
+ * passed to the shader by the CP. It's the user mode driver's
+ * responsibility.
+ */
+}
+
+/* FIXME: Does this need to be ASIC-specific code? */
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ const union amdgpu_firmware_header *hdr;
+
+ switch (type) {
+ case KGD_ENGINE_PFP:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data;
+ break;
+
+ case KGD_ENGINE_ME:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data;
+ break;
+
+ case KGD_ENGINE_CE:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data;
+ break;
+
+ case KGD_ENGINE_MEC1:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data;
+ break;
+
+ case KGD_ENGINE_MEC2:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data;
+ break;
+
+ case KGD_ENGINE_RLC:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data;
+ break;
+
+ case KGD_ENGINE_SDMA1:
+ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data;
+ break;
+
+ case KGD_ENGINE_SDMA2:
+ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data;
+ break;
+
+ default:
+ return 0;
+ }
+
+ if (hdr == NULL)
+ return 0;
+
+ /* Only 12 bit in use*/
+ return hdr->common.ucode_version;
+}
+
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t page_table_base)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT |
+ AMDGPU_PTE_VALID;
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID %u\n",
+ vmid);
+ return;
+ }
+
+ /* TODO: take advantage of per-process address space size. For
+ * now, all processes share the same address space size, like
+ * on GFX8 and older.
+ */
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 1d6e1479da38..079af8ac2636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -23,6 +23,8 @@
#define pr_fmt(fmt) "kfd2kgd: " fmt
#include <linux/list.h>
+#include <linux/pagemap.h>
+#include <linux/sched/mm.h>
#include <drm/drmP.h>
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
@@ -33,10 +35,20 @@
*/
#define VI_BO_SIZE_ALIGN (0x8000)
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
+
+/* Userptr restore delay, just long enough to allow consecutive VM
+ * changes to accumulate
+ */
+#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+
/* Impose limit on how much memory KFD can use */
static struct {
uint64_t max_system_mem_limit;
+ uint64_t max_userptr_mem_limit;
int64_t system_mem_used;
+ int64_t userptr_mem_used;
spinlock_t mem_limit_lock;
} kfd_mem_limit;
@@ -57,6 +69,7 @@ static const char * const domain_bit_to_string[] = {
#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@ -78,6 +91,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
/* Set memory usage limits. Current, limits are
* System (kernel) memory - 3/8th System RAM
+ * Userptr memory - 3/4th System RAM
*/
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
{
@@ -90,8 +104,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
- pr_debug("Kernel memory limit %lluM\n",
- (kfd_mem_limit.max_system_mem_limit >> 20));
+ kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
+ pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
+ (kfd_mem_limit.max_system_mem_limit >> 20),
+ (kfd_mem_limit.max_userptr_mem_limit >> 20));
}
static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
@@ -111,6 +127,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
goto err_no_mem;
}
kfd_mem_limit.system_mem_used += (acc_size + size);
+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+ if ((kfd_mem_limit.system_mem_used + acc_size >
+ kfd_mem_limit.max_system_mem_limit) ||
+ (kfd_mem_limit.userptr_mem_used + (size + acc_size) >
+ kfd_mem_limit.max_userptr_mem_limit)) {
+ ret = -ENOMEM;
+ goto err_no_mem;
+ }
+ kfd_mem_limit.system_mem_used += acc_size;
+ kfd_mem_limit.userptr_mem_used += size;
}
err_no_mem:
spin_unlock(&kfd_mem_limit.mem_limit_lock);
@@ -126,10 +152,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
sizeof(struct amdgpu_bo));
spin_lock(&kfd_mem_limit.mem_limit_lock);
- if (domain == AMDGPU_GEM_DOMAIN_GTT)
+ if (domain == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -= (acc_size + size);
+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+ kfd_mem_limit.system_mem_used -= acc_size;
+ kfd_mem_limit.userptr_mem_used -= size;
+ }
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced");
+ WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+ "kfd userptr memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
@@ -138,12 +170,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
{
spin_lock(&kfd_mem_limit.mem_limit_lock);
- if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
+ if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
+ kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
+ kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
+ } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -=
(bo->tbo.acc_size + amdgpu_bo_size(bo));
}
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced");
+ WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+ "kfd userptr memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
@@ -506,7 +543,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev,
}
static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
- struct amdkfd_process_info *process_info)
+ struct amdkfd_process_info *process_info,
+ bool userptr)
{
struct ttm_validate_buffer *entry = &mem->validate_list;
struct amdgpu_bo *bo = mem->bo;
@@ -515,10 +553,95 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
entry->shared = true;
entry->bo = &bo->tbo;
mutex_lock(&process_info->lock);
- list_add_tail(&entry->head, &process_info->kfd_bo_list);
+ if (userptr)
+ list_add_tail(&entry->head, &process_info->userptr_valid_list);
+ else
+ list_add_tail(&entry->head, &process_info->kfd_bo_list);
mutex_unlock(&process_info->lock);
}
+/* Initializes user pages. It registers the MMU notifier and validates
+ * the userptr BO in the GTT domain.
+ *
+ * The BO must already be on the userptr_valid_list. Otherwise an
+ * eviction and restore may happen that leaves the new BO unmapped
+ * with the user mode queues running.
+ *
+ * Takes the process_info->lock to protect against concurrent restore
+ * workers.
+ *
+ * Returns 0 for success, negative errno for errors.
+ */
+static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
+ uint64_t user_addr)
+{
+ struct amdkfd_process_info *process_info = mem->process_info;
+ struct amdgpu_bo *bo = mem->bo;
+ struct ttm_operation_ctx ctx = { true, false };
+ int ret = 0;
+
+ mutex_lock(&process_info->lock);
+
+ ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
+ if (ret) {
+ pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
+ goto out;
+ }
+
+ ret = amdgpu_mn_register(bo, user_addr);
+ if (ret) {
+ pr_err("%s: Failed to register MMU notifier: %d\n",
+ __func__, ret);
+ goto out;
+ }
+
+ /* If no restore worker is running concurrently, user_pages
+ * should not be allocated
+ */
+ WARN(mem->user_pages, "Leaking user_pages array");
+
+ mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+ sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!mem->user_pages) {
+ pr_err("%s: Failed to allocate pages array\n", __func__);
+ ret = -ENOMEM;
+ goto unregister_out;
+ }
+
+ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+ if (ret) {
+ pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+ goto free_out;
+ }
+
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ pr_err("%s: Failed to reserve BO\n", __func__);
+ goto release_out;
+ }
+ amdgpu_ttm_placement_from_domain(bo, mem->domain);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ pr_err("%s: failed to validate BO\n", __func__);
+ amdgpu_bo_unreserve(bo);
+
+release_out:
+ if (ret)
+ release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+free_out:
+ kvfree(mem->user_pages);
+ mem->user_pages = NULL;
+unregister_out:
+ if (ret)
+ amdgpu_mn_unregister(bo);
+out:
+ mutex_unlock(&process_info->lock);
+ return ret;
+}
+
/* Reserving a BO and its page table BOs must happen atomically to
* avoid deadlocks. Some operations update multiple VMs at once. Track
* all the reservation info in a context structure. Optionally a sync
@@ -748,7 +871,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
}
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
- struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
+ struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
+ bool no_update_pte)
{
int ret;
@@ -762,6 +886,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
return ret;
}
+ if (no_update_pte)
+ return 0;
+
ret = update_gpuvm_pte(adev, entry, sync);
if (ret) {
pr_err("update_gpuvm_pte() failed\n");
@@ -820,6 +947,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
mutex_init(&info->lock);
INIT_LIST_HEAD(&info->vm_list_head);
INIT_LIST_HEAD(&info->kfd_bo_list);
+ INIT_LIST_HEAD(&info->userptr_valid_list);
+ INIT_LIST_HEAD(&info->userptr_inval_list);
info->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
@@ -830,6 +959,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
goto create_evict_fence_fail;
}
+ info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
+ atomic_set(&info->evicted_bos, 0);
+ INIT_DELAYED_WORK(&info->restore_userptr_work,
+ amdgpu_amdkfd_restore_userptr_worker);
+
*process_info = info;
*ef = dma_fence_get(&info->eviction_fence->base);
}
@@ -872,6 +1006,7 @@ reserve_pd_fail:
dma_fence_put(*ef);
*ef = NULL;
*process_info = NULL;
+ put_pid(info->pid);
create_evict_fence_fail:
mutex_destroy(&info->lock);
kfree(info);
@@ -967,8 +1102,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
/* Release per-process resources when last compute VM is destroyed */
if (!process_info->n_vms) {
WARN_ON(!list_empty(&process_info->kfd_bo_list));
+ WARN_ON(!list_empty(&process_info->userptr_valid_list));
+ WARN_ON(!list_empty(&process_info->userptr_inval_list));
dma_fence_put(&process_info->eviction_fence->base);
+ cancel_delayed_work_sync(&process_info->restore_userptr_work);
+ put_pid(process_info->pid);
mutex_destroy(&process_info->lock);
kfree(process_info);
}
@@ -1003,9 +1142,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ uint64_t user_addr = 0;
struct amdgpu_bo *bo;
+ struct amdgpu_bo_param bp;
int byte_align;
- u32 alloc_domain;
+ u32 domain, alloc_domain;
u64 alloc_flags;
uint32_t mapping_flags;
int ret;
@@ -1014,14 +1155,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
* Check on which domain to allocate BO
*/
if (flags & ALLOC_MEM_FLAGS_VRAM) {
- alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
+ domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
} else if (flags & ALLOC_MEM_FLAGS_GTT) {
- alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+ domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_flags = 0;
+ } else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
alloc_flags = 0;
+ if (!offset || !*offset)
+ return -EINVAL;
+ user_addr = *offset;
} else {
return -EINVAL;
}
@@ -1069,8 +1217,14 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
va, size, domain_string(alloc_domain));
- ret = amdgpu_bo_create(adev, size, byte_align,
- alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo);
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = byte_align;
+ bp.domain = alloc_domain;
+ bp.flags = alloc_flags;
+ bp.type = ttm_bo_type_device;
+ bp.resv = NULL;
+ ret = amdgpu_bo_create(adev, &bp, &bo);
if (ret) {
pr_debug("Failed to create BO on domain %s. ret %d\n",
domain_string(alloc_domain), ret);
@@ -1078,18 +1232,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
}
bo->kfd_bo = *mem;
(*mem)->bo = bo;
+ if (user_addr)
+ bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
(*mem)->va = va;
- (*mem)->domain = alloc_domain;
+ (*mem)->domain = domain;
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
- add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
+ add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
+
+ if (user_addr) {
+ ret = init_user_pages(*mem, current->mm, user_addr);
+ if (ret) {
+ mutex_lock(&avm->process_info->lock);
+ list_del(&(*mem)->validate_list.head);
+ mutex_unlock(&avm->process_info->lock);
+ goto allocate_init_user_pages_failed;
+ }
+ }
if (offset)
*offset = amdgpu_bo_mmap_offset(bo);
return 0;
+allocate_init_user_pages_failed:
+ amdgpu_bo_unref(&bo);
+ /* Don't unreserve system mem limit twice */
+ goto err_reserve_system_mem;
err_bo_create:
unreserve_system_mem_limit(adev, size, alloc_domain);
err_reserve_system_mem:
@@ -1122,12 +1292,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
* be freed anyway
*/
+ /* No more MMU notifiers */
+ amdgpu_mn_unregister(mem->bo);
+
/* Make sure restore workers don't access the BO any more */
bo_list_entry = &mem->validate_list;
mutex_lock(&process_info->lock);
list_del(&bo_list_entry->head);
mutex_unlock(&process_info->lock);
+ /* Free user pages if necessary */
+ if (mem->user_pages) {
+ pr_debug("%s: Freeing user_pages array\n", __func__);
+ if (mem->user_pages[0])
+ release_pages(mem->user_pages,
+ mem->bo->tbo.ttm->num_pages);
+ kvfree(mem->user_pages);
+ }
+
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
if (unlikely(ret))
return ret;
@@ -1173,21 +1355,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kfd_bo_va_list *bo_va_entry = NULL;
struct kfd_bo_va_list *bo_va_entry_aql = NULL;
unsigned long bo_size;
-
- /* Make sure restore is not running concurrently.
- */
- mutex_lock(&mem->process_info->lock);
-
- mutex_lock(&mem->lock);
+ bool is_invalid_userptr = false;
bo = mem->bo;
-
if (!bo) {
pr_err("Invalid BO when mapping memory to GPU\n");
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
+ /* Make sure restore is not running concurrently. Since we
+ * don't map invalid userptr BOs, we rely on the next restore
+ * worker to do the mapping
+ */
+ mutex_lock(&mem->process_info->lock);
+
+ /* Lock mmap-sem. If we find an invalid userptr BO, we can be
+ * sure that the MMU notifier is no longer running
+ * concurrently and the queues are actually stopped
+ */
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+ down_write(&current->mm->mmap_sem);
+ is_invalid_userptr = atomic_read(&mem->invalid);
+ up_write(&current->mm->mmap_sem);
+ }
+
+ mutex_lock(&mem->lock);
+
domain = mem->domain;
bo_size = bo->tbo.mem.size;
@@ -1200,6 +1393,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
if (unlikely(ret))
goto out;
+ /* Userptr can be marked as "not invalid", but not actually be
+ * validated yet (still in the system domain). In that case
+ * the queues are still stopped and we can leave mapping for
+ * the next restore worker
+ */
+ if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+ is_invalid_userptr = true;
+
if (check_if_add_bo_to_vm(avm, mem)) {
ret = add_bo_to_vm(adev, mem, avm, false,
&bo_va_entry);
@@ -1217,7 +1418,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
goto add_bo_to_vm_failed;
}
- if (mem->mapped_to_gpu_memory == 0) {
+ if (mem->mapped_to_gpu_memory == 0 &&
+ !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
/* Validate BO only once. The eviction fence gets added to BO
* the first time it is mapped. Validate will wait for all
* background evictions to complete.
@@ -1235,7 +1437,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
entry->va, entry->va + bo_size,
entry);
- ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
+ ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
+ is_invalid_userptr);
if (ret) {
pr_err("Failed to map radeon bo to gpuvm\n");
goto map_bo_to_gpuvm_failed;
@@ -1384,7 +1587,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
goto bo_reserve_failed;
}
- ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
if (ret) {
pr_err("Failed to pin bo. ret %d\n", ret);
goto pin_failed;
@@ -1418,6 +1621,337 @@ bo_reserve_failed:
return ret;
}
+/* Evict a userptr BO by stopping the queues if necessary
+ *
+ * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
+ * cannot do any memory allocations, and cannot take any locks that
+ * are held elsewhere while allocating memory. Therefore this is as
+ * simple as possible, using atomic counters.
+ *
+ * It doesn't do anything to the BO itself. The real work happens in
+ * restore, where we get updated page addresses. This function only
+ * ensures that GPU access to the BO is stopped.
+ */
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
+ struct mm_struct *mm)
+{
+ struct amdkfd_process_info *process_info = mem->process_info;
+ int invalid, evicted_bos;
+ int r = 0;
+
+ invalid = atomic_inc_return(&mem->invalid);
+ evicted_bos = atomic_inc_return(&process_info->evicted_bos);
+ if (evicted_bos == 1) {
+ /* First eviction, stop the queues */
+ r = kgd2kfd->quiesce_mm(mm);
+ if (r)
+ pr_err("Failed to quiesce KFD\n");
+ schedule_delayed_work(&process_info->restore_userptr_work,
+ msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+ }
+
+ return r;
+}
+
+/* Update invalid userptr BOs
+ *
+ * Moves invalidated (evicted) userptr BOs from userptr_valid_list to
+ * userptr_inval_list and updates user pages for all BOs that have
+ * been invalidated since their last update.
+ */
+static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
+ struct mm_struct *mm)
+{
+ struct kgd_mem *mem, *tmp_mem;
+ struct amdgpu_bo *bo;
+ struct ttm_operation_ctx ctx = { false, false };
+ int invalid, ret;
+
+ /* Move all invalidated BOs to the userptr_inval_list and
+ * release their user pages by migration to the CPU domain
+ */
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_valid_list,
+ validate_list.head) {
+ if (!atomic_read(&mem->invalid))
+ continue; /* BO is still valid */
+
+ bo = mem->bo;
+
+ if (amdgpu_bo_reserve(bo, true))
+ return -EAGAIN;
+ amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ amdgpu_bo_unreserve(bo);
+ if (ret) {
+ pr_err("%s: Failed to invalidate userptr BO\n",
+ __func__);
+ return -EAGAIN;
+ }
+
+ list_move_tail(&mem->validate_list.head,
+ &process_info->userptr_inval_list);
+ }
+
+ if (list_empty(&process_info->userptr_inval_list))
+ return 0; /* All evicted userptr BOs were freed */
+
+ /* Go through userptr_inval_list and update any invalid user_pages */
+ list_for_each_entry(mem, &process_info->userptr_inval_list,
+ validate_list.head) {
+ invalid = atomic_read(&mem->invalid);
+ if (!invalid)
+ /* BO hasn't been invalidated since the last
+ * revalidation attempt. Keep its BO list.
+ */
+ continue;
+
+ bo = mem->bo;
+
+ if (!mem->user_pages) {
+ mem->user_pages =
+ kvmalloc_array(bo->tbo.ttm->num_pages,
+ sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!mem->user_pages) {
+ pr_err("%s: Failed to allocate pages array\n",
+ __func__);
+ return -ENOMEM;
+ }
+ } else if (mem->user_pages[0]) {
+ release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+ }
+
+ /* Get updated user pages */
+ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
+ mem->user_pages);
+ if (ret) {
+ mem->user_pages[0] = NULL;
+ pr_info("%s: Failed to get user pages: %d\n",
+ __func__, ret);
+ /* Pretend it succeeded. It will fail later
+ * with a VM fault if the GPU tries to access
+ * it. Better than hanging indefinitely with
+ * stalled user mode queues.
+ */
+ }
+
+ /* Mark the BO as valid unless it was invalidated
+ * again concurrently
+ */
+ if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+/* Validate invalid userptr BOs
+ *
+ * Validates BOs on the userptr_inval_list, and moves them back to the
+ * userptr_valid_list. Also updates GPUVM page tables with new page
+ * addresses and waits for the page table updates to complete.
+ */
+static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
+{
+ struct amdgpu_bo_list_entry *pd_bo_list_entries;
+ struct list_head resv_list, duplicates;
+ struct ww_acquire_ctx ticket;
+ struct amdgpu_sync sync;
+
+ struct amdgpu_vm *peer_vm;
+ struct kgd_mem *mem, *tmp_mem;
+ struct amdgpu_bo *bo;
+ struct ttm_operation_ctx ctx = { false, false };
+ int i, ret;
+
+ pd_bo_list_entries = kcalloc(process_info->n_vms,
+ sizeof(struct amdgpu_bo_list_entry),
+ GFP_KERNEL);
+ if (!pd_bo_list_entries) {
+ pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&resv_list);
+ INIT_LIST_HEAD(&duplicates);
+
+ /* Get all the page directory BOs that need to be reserved */
+ i = 0;
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node)
+ amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
+ &pd_bo_list_entries[i++]);
+ /* Add the userptr_inval_list entries to resv_list */
+ list_for_each_entry(mem, &process_info->userptr_inval_list,
+ validate_list.head) {
+ list_add_tail(&mem->resv_list.head, &resv_list);
+ mem->resv_list.bo = mem->validate_list.bo;
+ mem->resv_list.shared = mem->validate_list.shared;
+ }
+
+ /* Reserve all BOs and page tables for validation */
+ ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
+ WARN(!list_empty(&duplicates), "Duplicates should be empty");
+ if (ret)
+ goto out;
+
+ amdgpu_sync_create(&sync);
+
+ /* Avoid triggering eviction fences when unmapping invalid
+ * userptr BOs (waits for all fences, doesn't use
+ * FENCE_OWNER_VM)
+ */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node)
+ amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
+ process_info->eviction_fence,
+ NULL, NULL);
+
+ ret = process_validate_vms(process_info);
+ if (ret)
+ goto unreserve_out;
+
+ /* Validate BOs and update GPUVM page tables */
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_inval_list,
+ validate_list.head) {
+ struct kfd_bo_va_list *bo_va_entry;
+
+ bo = mem->bo;
+
+ /* Copy pages array and validate the BO if we got user pages */
+ if (mem->user_pages[0]) {
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
+ mem->user_pages);
+ amdgpu_ttm_placement_from_domain(bo, mem->domain);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret) {
+ pr_err("%s: failed to validate BO\n", __func__);
+ goto unreserve_out;
+ }
+ }
+
+ /* Validate succeeded, now the BO owns the pages, free
+ * our copy of the pointer array. Put this BO back on
+ * the userptr_valid_list. If we need to revalidate
+ * it, we need to start from scratch.
+ */
+ kvfree(mem->user_pages);
+ mem->user_pages = NULL;
+ list_move_tail(&mem->validate_list.head,
+ &process_info->userptr_valid_list);
+
+ /* Update mapping. If the BO was not validated
+ * (because we couldn't get user pages), this will
+ * clear the page table entries, which will result in
+ * VM faults if the GPU tries to access the invalid
+ * memory.
+ */
+ list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
+ if (!bo_va_entry->is_mapped)
+ continue;
+
+ ret = update_gpuvm_pte((struct amdgpu_device *)
+ bo_va_entry->kgd_dev,
+ bo_va_entry, &sync);
+ if (ret) {
+ pr_err("%s: update PTE failed\n", __func__);
+ /* make sure this gets validated again */
+ atomic_inc(&mem->invalid);
+ goto unreserve_out;
+ }
+ }
+ }
+
+ /* Update page directories */
+ ret = process_update_pds(process_info, &sync);
+
+unreserve_out:
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node)
+ amdgpu_bo_fence(peer_vm->root.base.bo,
+ &process_info->eviction_fence->base, true);
+ ttm_eu_backoff_reservation(&ticket, &resv_list);
+ amdgpu_sync_wait(&sync, false);
+ amdgpu_sync_free(&sync);
+out:
+ kfree(pd_bo_list_entries);
+
+ return ret;
+}
+
+/* Worker callback to restore evicted userptr BOs
+ *
+ * Tries to update and validate all userptr BOs. If successful and no
+ * concurrent evictions happened, the queues are restarted. Otherwise,
+ * reschedule for another attempt later.
+ */
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct amdkfd_process_info *process_info =
+ container_of(dwork, struct amdkfd_process_info,
+ restore_userptr_work);
+ struct task_struct *usertask;
+ struct mm_struct *mm;
+ int evicted_bos;
+
+ evicted_bos = atomic_read(&process_info->evicted_bos);
+ if (!evicted_bos)
+ return;
+
+ /* Reference task and mm in case of concurrent process termination */
+ usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
+ if (!usertask)
+ return;
+ mm = get_task_mm(usertask);
+ if (!mm) {
+ put_task_struct(usertask);
+ return;
+ }
+
+ mutex_lock(&process_info->lock);
+
+ if (update_invalid_user_pages(process_info, mm))
+ goto unlock_out;
+ /* userptr_inval_list can be empty if all evicted userptr BOs
+ * have been freed. In that case there is nothing to validate
+ * and we can just restart the queues.
+ */
+ if (!list_empty(&process_info->userptr_inval_list)) {
+ if (atomic_read(&process_info->evicted_bos) != evicted_bos)
+ goto unlock_out; /* Concurrent eviction, try again */
+
+ if (validate_invalid_user_pages(process_info))
+ goto unlock_out;
+ }
+ /* Final check for concurrent evicton and atomic update. If
+ * another eviction happens after successful update, it will
+ * be a first eviction that calls quiesce_mm. The eviction
+ * reference counting inside KFD will handle this case.
+ */
+ if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
+ evicted_bos)
+ goto unlock_out;
+ evicted_bos = 0;
+ if (kgd2kfd->resume_mm(mm)) {
+ pr_err("%s: Failed to resume KFD\n", __func__);
+ /* No recovery from this failure. Probably the CP is
+ * hanging. No point trying again.
+ */
+ }
+unlock_out:
+ mutex_unlock(&process_info->lock);
+ mmput(mm);
+ put_task_struct(usertask);
+
+ /* If validation failed, reschedule another attempt */
+ if (evicted_bos)
+ schedule_delayed_work(&process_info->restore_userptr_work,
+ msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+}
+
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
* KFD process identified by process_info
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index a0f48cb9b8f0..236915849cfe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -322,3 +322,47 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
return ret;
}
+
+union gfx_info {
+ struct atom_gfx_info_v2_4 v24;
+};
+
+int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index;
+ uint8_t frev, crev;
+ uint16_t data_offset;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ gfx_info);
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset)) {
+ union gfx_info *gfx_info = (union gfx_info *)
+ (mode_info->atom_context->bios + data_offset);
+ switch (crev) {
+ case 4:
+ adev->gfx.config.max_shader_engines = gfx_info->v24.gc_num_se;
+ adev->gfx.config.max_cu_per_sh = gfx_info->v24.gc_num_cu_per_sh;
+ adev->gfx.config.max_sh_per_se = gfx_info->v24.gc_num_sh_per_se;
+ adev->gfx.config.max_backends_per_se = gfx_info->v24.gc_num_rb_per_se;
+ adev->gfx.config.max_texture_channel_caches = gfx_info->v24.gc_num_tccs;
+ adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds;
+ adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth;
+ adev->gfx.config.gs_prim_buffer_depth =
+ le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf =
+ gfx_info->v24.gc_double_offchip_lds_buffer;
+ adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu;
+ adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+
+ }
+ return -EINVAL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 7689c961c4ef..20f158fd3b76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -30,5 +30,6 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 1ae5ae8c45a4..b33f1680c9a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -32,7 +32,7 @@ struct amdgpu_atpx_functions {
bool switch_start;
bool switch_end;
bool disp_connectors_mapping;
- bool disp_detetion_ports;
+ bool disp_detection_ports;
};
struct amdgpu_atpx {
@@ -90,6 +90,12 @@ bool amdgpu_atpx_dgpu_req_power_for_displays(void) {
return amdgpu_atpx_priv.atpx.dgpu_req_power_for_displays;
}
+#if defined(CONFIG_ACPI)
+void *amdgpu_atpx_get_dhandle(void) {
+ return amdgpu_atpx_priv.dhandle;
+}
+#endif
+
/**
* amdgpu_atpx_call - call an ATPX method
*
@@ -156,7 +162,7 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
f->switch_start = mask & ATPX_GRAPHICS_DEVICE_SWITCH_START_NOTIFICATION_SUPPORTED;
f->switch_end = mask & ATPX_GRAPHICS_DEVICE_SWITCH_END_NOTIFICATION_SUPPORTED;
f->disp_connectors_mapping = mask & ATPX_GET_DISPLAY_CONNECTORS_MAPPING_SUPPORTED;
- f->disp_detetion_ports = mask & ATPX_GET_DISPLAY_DETECTION_PORTS_SUPPORTED;
+ f->disp_detection_ports = mask & ATPX_GET_DISPLAY_DETECTION_PORTS_SUPPORTED;
}
/**
@@ -550,7 +556,7 @@ static int amdgpu_atpx_init(void)
* look up whether we are the integrated or discrete GPU (all asics).
* Returns the client id.
*/
-static int amdgpu_atpx_get_client_id(struct pci_dev *pdev)
+static enum vga_switcheroo_client_id amdgpu_atpx_get_client_id(struct pci_dev *pdev)
{
if (amdgpu_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev))
return VGA_SWITCHEROO_IGD;
@@ -569,7 +575,6 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
{ 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
- { 0x1002, 0x67DF, 0x1028, 0x0774, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0, 0, 0, 0, 0 },
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 02b849be083b..3079ea8523c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -75,37 +75,56 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
{
struct amdgpu_bo *dobj = NULL;
struct amdgpu_bo *sobj = NULL;
+ struct amdgpu_bo_param bp;
uint64_t saddr, daddr;
int r, n;
int time;
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = sdomain;
+ bp.flags = 0;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
n = AMDGPU_BENCHMARK_ITERATIONS;
- r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0,
- ttm_bo_type_kernel, NULL, &sobj);
+ r = amdgpu_bo_create(adev, &bp, &sobj);
if (r) {
goto out_cleanup;
}
r = amdgpu_bo_reserve(sobj, false);
if (unlikely(r != 0))
goto out_cleanup;
- r = amdgpu_bo_pin(sobj, sdomain, &saddr);
+ r = amdgpu_bo_pin(sobj, sdomain);
+ if (r) {
+ amdgpu_bo_unreserve(sobj);
+ goto out_cleanup;
+ }
+ r = amdgpu_ttm_alloc_gart(&sobj->tbo);
amdgpu_bo_unreserve(sobj);
if (r) {
goto out_cleanup;
}
- r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0,
- ttm_bo_type_kernel, NULL, &dobj);
+ saddr = amdgpu_bo_gpu_offset(sobj);
+ bp.domain = ddomain;
+ r = amdgpu_bo_create(adev, &bp, &dobj);
if (r) {
goto out_cleanup;
}
r = amdgpu_bo_reserve(dobj, false);
if (unlikely(r != 0))
goto out_cleanup;
- r = amdgpu_bo_pin(dobj, ddomain, &daddr);
+ r = amdgpu_bo_pin(dobj, ddomain);
+ if (r) {
+ amdgpu_bo_unreserve(sobj);
+ goto out_cleanup;
+ }
+ r = amdgpu_ttm_alloc_gart(&dobj->tbo);
amdgpu_bo_unreserve(dobj);
if (r) {
goto out_cleanup;
}
+ daddr = amdgpu_bo_gpu_offset(dobj);
if (adev->mman.buffer_funcs) {
time = amdgpu_benchmark_do_move(adev, size, saddr, daddr, n);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 92be7f6de197..7679c068c89a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -55,15 +55,15 @@ static void amdgpu_bo_list_release_rcu(struct kref *ref)
kfree_rcu(list, rhead);
}
-static int amdgpu_bo_list_create(struct amdgpu_device *adev,
+int amdgpu_bo_list_create(struct amdgpu_device *adev,
struct drm_file *filp,
struct drm_amdgpu_bo_list_entry *info,
unsigned num_entries,
- int *id)
+ struct amdgpu_bo_list **list_out)
{
- int r;
- struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_bo_list *list;
+ int r;
+
list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
if (!list)
@@ -78,16 +78,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev,
return r;
}
- /* idr alloc should be called only after initialization of bo list. */
- mutex_lock(&fpriv->bo_list_lock);
- r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
- mutex_unlock(&fpriv->bo_list_lock);
- if (r < 0) {
- amdgpu_bo_list_free(list);
- return r;
- }
- *id = r;
-
+ *list_out = list;
return 0;
}
@@ -263,55 +254,79 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
kfree(list);
}
-int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp)
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+ struct drm_amdgpu_bo_list_entry **info_param)
{
+ const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
-
- struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_fpriv *fpriv = filp->driver_priv;
- union drm_amdgpu_bo_list *args = data;
- uint32_t handle = args->in.list_handle;
- const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
-
struct drm_amdgpu_bo_list_entry *info;
- struct amdgpu_bo_list *list;
-
int r;
- info = kvmalloc_array(args->in.bo_number,
- sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
+ info = kvmalloc_array(in->bo_number, info_size, GFP_KERNEL);
if (!info)
return -ENOMEM;
/* copy the handle array from userspace to a kernel buffer */
r = -EFAULT;
- if (likely(info_size == args->in.bo_info_size)) {
- unsigned long bytes = args->in.bo_number *
- args->in.bo_info_size;
+ if (likely(info_size == in->bo_info_size)) {
+ unsigned long bytes = in->bo_number *
+ in->bo_info_size;
if (copy_from_user(info, uptr, bytes))
goto error_free;
} else {
- unsigned long bytes = min(args->in.bo_info_size, info_size);
+ unsigned long bytes = min(in->bo_info_size, info_size);
unsigned i;
- memset(info, 0, args->in.bo_number * info_size);
- for (i = 0; i < args->in.bo_number; ++i) {
+ memset(info, 0, in->bo_number * info_size);
+ for (i = 0; i < in->bo_number; ++i) {
if (copy_from_user(&info[i], uptr, bytes))
goto error_free;
- uptr += args->in.bo_info_size;
+ uptr += in->bo_info_size;
}
}
+ *info_param = info;
+ return 0;
+
+error_free:
+ kvfree(info);
+ return r;
+}
+
+int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct amdgpu_device *adev = dev->dev_private;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ union drm_amdgpu_bo_list *args = data;
+ uint32_t handle = args->in.list_handle;
+ struct drm_amdgpu_bo_list_entry *info = NULL;
+ struct amdgpu_bo_list *list;
+ int r;
+
+ r = amdgpu_bo_create_list_entry_array(&args->in, &info);
+ if (r)
+ goto error_free;
+
switch (args->in.operation) {
case AMDGPU_BO_LIST_OP_CREATE:
r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
- &handle);
+ &list);
if (r)
goto error_free;
+
+ mutex_lock(&fpriv->bo_list_lock);
+ r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
+ mutex_unlock(&fpriv->bo_list_lock);
+ if (r < 0) {
+ amdgpu_bo_list_free(list);
+ return r;
+ }
+
+ handle = r;
break;
case AMDGPU_BO_LIST_OP_DESTROY:
@@ -345,6 +360,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
return 0;
error_free:
- kvfree(info);
+ if (info)
+ kvfree(info);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 71a57b2f7f04..693ec5ea4950 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -23,7 +23,6 @@
*/
#include <linux/list.h>
#include <linux/slab.h>
-#include <linux/pci.h>
#include <drm/drmP.h>
#include <linux/firmware.h>
#include <drm/amdgpu_drm.h>
@@ -109,121 +108,6 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
WARN(1, "Invalid indirect register space");
}
-static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device,
- enum cgs_resource_type resource_type,
- uint64_t size,
- uint64_t offset,
- uint64_t *resource_base)
-{
- CGS_FUNC_ADEV;
-
- if (resource_base == NULL)
- return -EINVAL;
-
- switch (resource_type) {
- case CGS_RESOURCE_TYPE_MMIO:
- if (adev->rmmio_size == 0)
- return -ENOENT;
- if ((offset + size) > adev->rmmio_size)
- return -EINVAL;
- *resource_base = adev->rmmio_base;
- return 0;
- case CGS_RESOURCE_TYPE_DOORBELL:
- if (adev->doorbell.size == 0)
- return -ENOENT;
- if ((offset + size) > adev->doorbell.size)
- return -EINVAL;
- *resource_base = adev->doorbell.base;
- return 0;
- case CGS_RESOURCE_TYPE_FB:
- case CGS_RESOURCE_TYPE_IO:
- case CGS_RESOURCE_TYPE_ROM:
- default:
- return -EINVAL;
- }
-}
-
-static const void *amdgpu_cgs_atom_get_data_table(struct cgs_device *cgs_device,
- unsigned table, uint16_t *size,
- uint8_t *frev, uint8_t *crev)
-{
- CGS_FUNC_ADEV;
- uint16_t data_start;
-
- if (amdgpu_atom_parse_data_header(
- adev->mode_info.atom_context, table, size,
- frev, crev, &data_start))
- return (uint8_t*)adev->mode_info.atom_context->bios +
- data_start;
-
- return NULL;
-}
-
-static int amdgpu_cgs_atom_get_cmd_table_revs(struct cgs_device *cgs_device, unsigned table,
- uint8_t *frev, uint8_t *crev)
-{
- CGS_FUNC_ADEV;
-
- if (amdgpu_atom_parse_cmd_header(
- adev->mode_info.atom_context, table,
- frev, crev))
- return 0;
-
- return -EINVAL;
-}
-
-static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigned table,
- void *args)
-{
- CGS_FUNC_ADEV;
-
- return amdgpu_atom_execute_table(
- adev->mode_info.atom_context, table, args);
-}
-
-static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
- enum amd_ip_block_type block_type,
- enum amd_clockgating_state state)
-{
- CGS_FUNC_ADEV;
- int i, r = -1;
-
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
-
- if (adev->ip_blocks[i].version->type == block_type) {
- r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
- (void *)adev,
- state);
- break;
- }
- }
- return r;
-}
-
-static int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device,
- enum amd_ip_block_type block_type,
- enum amd_powergating_state state)
-{
- CGS_FUNC_ADEV;
- int i, r = -1;
-
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
-
- if (adev->ip_blocks[i].version->type == block_type) {
- r = adev->ip_blocks[i].version->funcs->set_powergating_state(
- (void *)adev,
- state);
- break;
- }
- }
- return r;
-}
-
-
static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
{
CGS_FUNC_ADEV;
@@ -271,18 +155,6 @@ static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
return result;
}
-static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type)
-{
- CGS_FUNC_ADEV;
- if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) {
- release_firmware(adev->pm.fw);
- adev->pm.fw = NULL;
- return 0;
- }
- /* cannot release other firmware because they are not created by cgs */
- return -EINVAL;
-}
-
static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
enum cgs_ucode_id type)
{
@@ -326,34 +198,6 @@ static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
return fw_version;
}
-static int amdgpu_cgs_enter_safe_mode(struct cgs_device *cgs_device,
- bool en)
-{
- CGS_FUNC_ADEV;
-
- if (adev->gfx.rlc.funcs->enter_safe_mode == NULL ||
- adev->gfx.rlc.funcs->exit_safe_mode == NULL)
- return 0;
-
- if (en)
- adev->gfx.rlc.funcs->enter_safe_mode(adev);
- else
- adev->gfx.rlc.funcs->exit_safe_mode(adev);
-
- return 0;
-}
-
-static void amdgpu_cgs_lock_grbm_idx(struct cgs_device *cgs_device,
- bool lock)
-{
- CGS_FUNC_ADEV;
-
- if (lock)
- mutex_lock(&adev->grbm_idx_mutex);
- else
- mutex_unlock(&adev->grbm_idx_mutex);
-}
-
static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
enum cgs_ucode_id type,
struct cgs_firmware_info *info)
@@ -470,17 +314,17 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
(adev->pdev->revision == 0x81) ||
(adev->pdev->device == 0x665f)) {
info->is_kicker = true;
- strcpy(fw_name, "radeon/bonaire_k_smc.bin");
+ strcpy(fw_name, "amdgpu/bonaire_k_smc.bin");
} else {
- strcpy(fw_name, "radeon/bonaire_smc.bin");
+ strcpy(fw_name, "amdgpu/bonaire_smc.bin");
}
break;
case CHIP_HAWAII:
if (adev->pdev->revision == 0x80) {
info->is_kicker = true;
- strcpy(fw_name, "radeon/hawaii_k_smc.bin");
+ strcpy(fw_name, "amdgpu/hawaii_k_smc.bin");
} else {
- strcpy(fw_name, "radeon/hawaii_smc.bin");
+ strcpy(fw_name, "amdgpu/hawaii_smc.bin");
}
break;
case CHIP_TOPAZ:
@@ -541,6 +385,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
case CHIP_POLARIS12:
strcpy(fw_name, "amdgpu/polaris12_smc.bin");
break;
+ case CHIP_VEGAM:
+ strcpy(fw_name, "amdgpu/vegam_smc.bin");
+ break;
case CHIP_VEGA10:
if ((adev->pdev->device == 0x687f) &&
((adev->pdev->revision == 0xc0) ||
@@ -553,6 +400,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
case CHIP_VEGA12:
strcpy(fw_name, "amdgpu/vega12_smc.bin");
break;
+ case CHIP_VEGA20:
+ strcpy(fw_name, "amdgpu/vega20_smc.bin");
+ break;
default:
DRM_ERROR("SMC firmware not supported\n");
return -EINVAL;
@@ -598,97 +448,12 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
return 0;
}
-static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device)
-{
- CGS_FUNC_ADEV;
- return amdgpu_sriov_vf(adev);
-}
-
-static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
- struct cgs_display_info *info)
-{
- CGS_FUNC_ADEV;
- struct cgs_mode_info *mode_info;
-
- if (info == NULL)
- return -EINVAL;
-
- mode_info = info->mode_info;
- if (mode_info)
- /* if the displays are off, vblank time is max */
- mode_info->vblank_time_us = 0xffffffff;
-
- if (!amdgpu_device_has_dc_support(adev)) {
- struct amdgpu_crtc *amdgpu_crtc;
- struct drm_device *ddev = adev->ddev;
- struct drm_crtc *crtc;
- uint32_t line_time_us, vblank_lines;
-
- if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
- list_for_each_entry(crtc,
- &ddev->mode_config.crtc_list, head) {
- amdgpu_crtc = to_amdgpu_crtc(crtc);
- if (crtc->enabled) {
- info->active_display_mask |= (1 << amdgpu_crtc->crtc_id);
- info->display_count++;
- }
- if (mode_info != NULL &&
- crtc->enabled && amdgpu_crtc->enabled &&
- amdgpu_crtc->hw_mode.clock) {
- line_time_us = (amdgpu_crtc->hw_mode.crtc_htotal * 1000) /
- amdgpu_crtc->hw_mode.clock;
- vblank_lines = amdgpu_crtc->hw_mode.crtc_vblank_end -
- amdgpu_crtc->hw_mode.crtc_vdisplay +
- (amdgpu_crtc->v_border * 2);
- mode_info->vblank_time_us = vblank_lines * line_time_us;
- mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
- /* we have issues with mclk switching with refresh rates
- * over 120 hz on the non-DC code.
- */
- if (mode_info->refresh_rate > 120)
- mode_info->vblank_time_us = 0;
- mode_info = NULL;
- }
- }
- }
- } else {
- info->display_count = adev->pm.pm_display_cfg.num_display;
- if (mode_info != NULL) {
- mode_info->vblank_time_us = adev->pm.pm_display_cfg.min_vblank_time;
- mode_info->refresh_rate = adev->pm.pm_display_cfg.vrefresh;
- }
- }
- return 0;
-}
-
-
-static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool enabled)
-{
- CGS_FUNC_ADEV;
-
- adev->pm.dpm_enabled = enabled;
-
- return 0;
-}
-
static const struct cgs_ops amdgpu_cgs_ops = {
.read_register = amdgpu_cgs_read_register,
.write_register = amdgpu_cgs_write_register,
.read_ind_register = amdgpu_cgs_read_ind_register,
.write_ind_register = amdgpu_cgs_write_ind_register,
- .get_pci_resource = amdgpu_cgs_get_pci_resource,
- .atom_get_data_table = amdgpu_cgs_atom_get_data_table,
- .atom_get_cmd_table_revs = amdgpu_cgs_atom_get_cmd_table_revs,
- .atom_exec_cmd_table = amdgpu_cgs_atom_exec_cmd_table,
.get_firmware_info = amdgpu_cgs_get_firmware_info,
- .rel_firmware = amdgpu_cgs_rel_firmware,
- .set_powergating_state = amdgpu_cgs_set_powergating_state,
- .set_clockgating_state = amdgpu_cgs_set_clockgating_state,
- .get_active_displays_info = amdgpu_cgs_get_active_displays_info,
- .notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled,
- .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
- .enter_safe_mode = amdgpu_cgs_enter_safe_mode,
- .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
};
struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 96501ff0e55b..c770d73352a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -212,30 +212,21 @@ static void
amdgpu_connector_update_scratch_regs(struct drm_connector *connector,
enum drm_connector_status status)
{
- struct drm_encoder *best_encoder = NULL;
- struct drm_encoder *encoder = NULL;
+ struct drm_encoder *best_encoder;
+ struct drm_encoder *encoder;
const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
bool connected;
int i;
best_encoder = connector_funcs->best_encoder(connector);
- for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
- if (connector->encoder_ids[i] == 0)
- break;
-
- encoder = drm_encoder_find(connector->dev, NULL,
- connector->encoder_ids[i]);
- if (!encoder)
- continue;
-
+ drm_connector_for_each_possible_encoder(connector, encoder, i) {
if ((encoder == best_encoder) && (status == connector_status_connected))
connected = true;
else
connected = false;
amdgpu_atombios_encoder_set_bios_scratch_regs(connector, encoder, connected);
-
}
}
@@ -246,17 +237,11 @@ amdgpu_connector_find_encoder(struct drm_connector *connector,
struct drm_encoder *encoder;
int i;
- for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
- if (connector->encoder_ids[i] == 0)
- break;
- encoder = drm_encoder_find(connector->dev, NULL,
- connector->encoder_ids[i]);
- if (!encoder)
- continue;
-
+ drm_connector_for_each_possible_encoder(connector, encoder, i) {
if (encoder->encoder_type == encoder_type)
return encoder;
}
+
return NULL;
}
@@ -349,22 +334,24 @@ static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector)
int ret;
if (amdgpu_connector->edid) {
- drm_mode_connector_update_edid_property(connector, amdgpu_connector->edid);
+ drm_connector_update_edid_property(connector, amdgpu_connector->edid);
ret = drm_add_edid_modes(connector, amdgpu_connector->edid);
return ret;
}
- drm_mode_connector_update_edid_property(connector, NULL);
+ drm_connector_update_edid_property(connector, NULL);
return 0;
}
static struct drm_encoder *
amdgpu_connector_best_single_encoder(struct drm_connector *connector)
{
- int enc_id = connector->encoder_ids[0];
+ struct drm_encoder *encoder;
+ int i;
+
+ /* pick the first one */
+ drm_connector_for_each_possible_encoder(connector, encoder, i)
+ return encoder;
- /* pick the encoder ids */
- if (enc_id)
- return drm_encoder_find(connector->dev, NULL, enc_id);
return NULL;
}
@@ -691,7 +678,7 @@ static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector)
return ret;
}
-static int amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
@@ -843,7 +830,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector)
return ret;
}
-static int amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
struct drm_device *dev = connector->dev;
@@ -985,9 +972,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
struct drm_device *dev = connector->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
- struct drm_encoder *encoder = NULL;
const struct drm_encoder_helper_funcs *encoder_funcs;
- int i, r;
+ int r;
enum drm_connector_status ret = connector_status_disconnected;
bool dret = false, broken_edid = false;
@@ -1077,14 +1063,10 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
/* find analog encoder */
if (amdgpu_connector->dac_load_detect) {
- for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
- if (connector->encoder_ids[i] == 0)
- break;
-
- encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]);
- if (!encoder)
- continue;
+ struct drm_encoder *encoder;
+ int i;
+ drm_connector_for_each_possible_encoder(connector, encoder, i) {
if (encoder->encoder_type != DRM_MODE_ENCODER_DAC &&
encoder->encoder_type != DRM_MODE_ENCODER_TVDAC)
continue;
@@ -1132,18 +1114,11 @@ exit:
static struct drm_encoder *
amdgpu_connector_dvi_encoder(struct drm_connector *connector)
{
- int enc_id = connector->encoder_ids[0];
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
struct drm_encoder *encoder;
int i;
- for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
- if (connector->encoder_ids[i] == 0)
- break;
-
- encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]);
- if (!encoder)
- continue;
+ drm_connector_for_each_possible_encoder(connector, encoder, i) {
if (amdgpu_connector->use_digital == true) {
if (encoder->encoder_type == DRM_MODE_ENCODER_TMDS)
return encoder;
@@ -1158,8 +1133,9 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
/* then check use digitial */
/* pick the first one */
- if (enc_id)
- return drm_encoder_find(connector->dev, NULL, enc_id);
+ drm_connector_for_each_possible_encoder(connector, encoder, i)
+ return encoder;
+
return NULL;
}
@@ -1172,7 +1148,7 @@ static void amdgpu_connector_dvi_force(struct drm_connector *connector)
amdgpu_connector->use_digital = true;
}
-static int amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
struct drm_device *dev = connector->dev;
@@ -1296,15 +1272,7 @@ u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *conn
struct amdgpu_encoder *amdgpu_encoder;
int i;
- for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
- if (connector->encoder_ids[i] == 0)
- break;
-
- encoder = drm_encoder_find(connector->dev, NULL,
- connector->encoder_ids[i]);
- if (!encoder)
- continue;
-
+ drm_connector_for_each_possible_encoder(connector, encoder, i) {
amdgpu_encoder = to_amdgpu_encoder(encoder);
switch (amdgpu_encoder->encoder_id) {
@@ -1326,14 +1294,7 @@ static bool amdgpu_connector_encoder_is_hbr2(struct drm_connector *connector)
int i;
bool found = false;
- for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
- if (connector->encoder_ids[i] == 0)
- break;
- encoder = drm_encoder_find(connector->dev, NULL,
- connector->encoder_ids[i]);
- if (!encoder)
- continue;
-
+ drm_connector_for_each_possible_encoder(connector, encoder, i) {
amdgpu_encoder = to_amdgpu_encoder(encoder);
if (amdgpu_encoder->caps & ATOM_ENCODER_CAP_RECORD_HBR2)
found = true;
@@ -1448,7 +1409,7 @@ out:
return ret;
}
-static int amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dc34b50e6b29..7c5cc33d0cda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -31,6 +31,7 @@
#include <drm/drm_syncobj.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "amdgpu_gmc.h"
static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
struct drm_amdgpu_cs_chunk_fence *data,
@@ -65,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
return 0;
}
-static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
+static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_bo_list_in *data)
+{
+ int r;
+ struct drm_amdgpu_bo_list_entry *info = NULL;
+
+ r = amdgpu_bo_create_list_entry_array(data, &info);
+ if (r)
+ return r;
+
+ r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
+ &p->bo_list);
+ if (r)
+ goto error_free;
+
+ kvfree(info);
+ return 0;
+
+error_free:
+ if (info)
+ kvfree(info);
+
+ return r;
+}
+
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
- union drm_amdgpu_cs *cs = data;
uint64_t *chunk_array_user;
uint64_t *chunk_array;
unsigned size, num_ibs = 0;
@@ -163,6 +188,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
break;
+ case AMDGPU_CHUNK_ID_BO_HANDLES:
+ size = sizeof(struct drm_amdgpu_bo_list_in);
+ if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+ ret = -EINVAL;
+ goto free_partial_kdata;
+ }
+
+ ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
+ if (ret)
+ goto free_partial_kdata;
+
+ break;
+
case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -186,6 +224,10 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
if (p->uf_entry.robj)
p->job->uf_addr = uf_offset;
kfree(chunk_array);
+
+ /* Use this opportunity to fill in task info for the vm */
+ amdgpu_vm_set_task_info(vm);
+
return 0;
free_all_kdata:
@@ -257,7 +299,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
return;
}
- total_vram = adev->gmc.real_vram_size - adev->vram_pin_size;
+ total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
@@ -302,7 +344,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
/* Do the same for visible VRAM if half of it is free */
- if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) {
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
u64 total_vis_vram = adev->gmc.visible_vram_size;
u64 used_vis_vram =
amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
@@ -359,7 +401,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
* to move it. Don't move anything if the threshold is zero.
*/
if (p->bytes_moved < p->bytes_moved_threshold) {
- if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
(bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
/* And don't move a CPU_ACCESS_REQUIRED BO to limited
* visible VRAM if we've depleted our allowance to do
@@ -381,9 +423,8 @@ retry:
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
p->bytes_moved += ctx.bytes_moved;
- if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
- bo->tbo.mem.mem_type == TTM_PL_VRAM &&
- bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+ amdgpu_bo_in_cpu_visible_vram(bo))
p->bytes_moved_vis += ctx.bytes_moved;
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@ -411,7 +452,6 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
struct amdgpu_bo_list_entry *candidate = p->evictable;
struct amdgpu_bo *bo = candidate->robj;
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- u64 initial_bytes_moved, bytes_moved;
bool update_bytes_moved_vis;
uint32_t other;
@@ -435,18 +475,14 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
continue;
/* Good we can try to move this BO somewhere else */
- amdgpu_ttm_placement_from_domain(bo, other);
update_bytes_moved_vis =
- adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
- bo->tbo.mem.mem_type == TTM_PL_VRAM &&
- bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT;
- initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
+ !amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+ amdgpu_bo_in_cpu_visible_vram(bo);
+ amdgpu_ttm_placement_from_domain(bo, other);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- bytes_moved = atomic64_read(&adev->num_bytes_moved) -
- initial_bytes_moved;
- p->bytes_moved += bytes_moved;
+ p->bytes_moved += ctx.bytes_moved;
if (update_bytes_moved_vis)
- p->bytes_moved_vis += bytes_moved;
+ p->bytes_moved_vis += ctx.bytes_moved;
if (unlikely(r))
break;
@@ -528,15 +564,23 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
struct amdgpu_bo_list_entry *e;
struct list_head duplicates;
unsigned i, tries = 10;
+ struct amdgpu_bo *gds;
+ struct amdgpu_bo *gws;
+ struct amdgpu_bo *oa;
int r;
INIT_LIST_HEAD(&p->validated);
- p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
+ /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
+ if (!p->bo_list)
+ p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
+ else
+ mutex_lock(&p->bo_list->lock);
+
if (p->bo_list) {
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
if (p->bo_list->first_userptr != p->bo_list->num_entries)
- p->mn = amdgpu_mn_get(p->adev);
+ p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
}
INIT_LIST_HEAD(&duplicates);
@@ -658,31 +702,36 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
p->bytes_moved_vis);
+
if (p->bo_list) {
- struct amdgpu_bo *gds = p->bo_list->gds_obj;
- struct amdgpu_bo *gws = p->bo_list->gws_obj;
- struct amdgpu_bo *oa = p->bo_list->oa_obj;
struct amdgpu_vm *vm = &fpriv->vm;
unsigned i;
+ gds = p->bo_list->gds_obj;
+ gws = p->bo_list->gws_obj;
+ oa = p->bo_list->oa_obj;
for (i = 0; i < p->bo_list->num_entries; i++) {
struct amdgpu_bo *bo = p->bo_list->array[i].robj;
p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);
}
+ } else {
+ gds = p->adev->gds.gds_gfx_bo;
+ gws = p->adev->gds.gws_gfx_bo;
+ oa = p->adev->gds.oa_gfx_bo;
+ }
- if (gds) {
- p->job->gds_base = amdgpu_bo_gpu_offset(gds);
- p->job->gds_size = amdgpu_bo_size(gds);
- }
- if (gws) {
- p->job->gws_base = amdgpu_bo_gpu_offset(gws);
- p->job->gws_size = amdgpu_bo_size(gws);
- }
- if (oa) {
- p->job->oa_base = amdgpu_bo_gpu_offset(oa);
- p->job->oa_size = amdgpu_bo_size(oa);
- }
+ if (gds) {
+ p->job->gds_base = amdgpu_bo_gpu_offset(gds);
+ p->job->gds_size = amdgpu_bo_size(gds);
+ }
+ if (gws) {
+ p->job->gws_base = amdgpu_bo_gpu_offset(gws);
+ p->job->gws_size = amdgpu_bo_size(gws);
+ }
+ if (oa) {
+ p->job->oa_base = amdgpu_bo_gpu_offset(oa);
+ p->job->oa_size = amdgpu_bo_size(oa);
}
if (!r && p->uf_entry.robj) {
@@ -863,11 +912,11 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
- struct amdgpu_ring *ring = p->job->ring;
+ struct amdgpu_ring *ring = p->ring;
int r;
/* Only for UVD/VCE VM emulation */
- if (p->job->ring->funcs->parse_cs) {
+ if (p->ring->funcs->parse_cs) {
unsigned i, j;
for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
@@ -925,6 +974,10 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
r = amdgpu_bo_vm_update_pte(p);
if (r)
return r;
+
+ r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
+ if (r)
+ return r;
}
return amdgpu_cs_sync_rings(p);
@@ -977,10 +1030,10 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
}
}
- if (parser->job->ring && parser->job->ring != ring)
+ if (parser->ring && parser->ring != ring)
return -EINVAL;
- parser->job->ring = ring;
+ parser->ring = ring;
r = amdgpu_ib_get(adev, vm,
ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
@@ -999,11 +1052,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
/* UVD & VCE fw doesn't support user fences */
if (parser->job->uf_addr && (
- parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
- parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
+ parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+ parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
return -EINVAL;
- return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx);
+ return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx);
}
static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
@@ -1154,8 +1207,9 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
- struct amdgpu_ring *ring = p->job->ring;
+ struct amdgpu_ring *ring = p->ring;
struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
+ enum drm_sched_priority priority;
struct amdgpu_job *job;
unsigned i;
uint64_t seq;
@@ -1186,7 +1240,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
}
job->owner = p->filp;
- job->fence_ctx = entity->fence_context;
p->fence = dma_fence_get(&job->base.s_fence->finished);
r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
@@ -1204,11 +1257,14 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
job->uf_sequence = seq;
amdgpu_job_free_resources(job);
- amdgpu_ring_priority_get(job->ring, job->base.s_priority);
trace_amdgpu_cs_ioctl(job);
+ priority = job->base.s_priority;
drm_sched_entity_push_job(&job->base, entity);
+ ring = to_amdgpu_ring(entity->sched);
+ amdgpu_ring_priority_get(ring, priority);
+
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
amdgpu_mn_unlock(p->mn);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 09d35051fdd6..83e3b320a793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -90,8 +90,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
if (ring == &adev->gfx.kiq.ring)
continue;
- r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
- rq, amdgpu_sched_jobs, &ctx->guilty);
+ r = drm_sched_entity_init(&ctx->rings[i].entity,
+ &rq, 1, &ctx->guilty);
if (r)
goto failed;
}
@@ -104,15 +104,16 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
failed:
for (j = 0; j < i; j++)
- drm_sched_entity_fini(&adev->rings[j]->sched,
+ drm_sched_entity_destroy(&adev->rings[j]->sched,
&ctx->rings[j].entity);
kfree(ctx->fences);
ctx->fences = NULL;
return r;
}
-static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
+static void amdgpu_ctx_fini(struct kref *ref)
{
+ struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
@@ -125,13 +126,11 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
kfree(ctx->fences);
ctx->fences = NULL;
- for (i = 0; i < adev->num_rings; i++)
- drm_sched_entity_fini(&adev->rings[i]->sched,
- &ctx->rings[i].entity);
-
amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
mutex_destroy(&ctx->lock);
+
+ kfree(ctx);
}
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
@@ -170,12 +169,20 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
+ u32 i;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
- amdgpu_ctx_fini(ctx);
+ for (i = 0; i < ctx->adev->num_rings; i++) {
- kfree(ctx);
+ if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+ continue;
+
+ drm_sched_entity_destroy(&ctx->adev->rings[i]->sched,
+ &ctx->rings[i].entity);
+ }
+
+ amdgpu_ctx_fini(ref);
}
static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
@@ -419,9 +426,11 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
if (other) {
signed long r;
- r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
+ r = dma_fence_wait(other, true);
if (r < 0) {
- DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+
return r;
}
}
@@ -435,16 +444,74 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
idr_init(&mgr->ctx_handles);
}
+void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
+{
+ struct amdgpu_ctx *ctx;
+ struct idr *idp;
+ uint32_t id, i;
+ long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY;
+
+ idp = &mgr->ctx_handles;
+
+ mutex_lock(&mgr->lock);
+ idr_for_each_entry(idp, ctx, id) {
+
+ if (!ctx->adev) {
+ mutex_unlock(&mgr->lock);
+ return;
+ }
+
+ for (i = 0; i < ctx->adev->num_rings; i++) {
+
+ if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+ continue;
+
+ max_wait = drm_sched_entity_flush(&ctx->adev->rings[i]->sched,
+ &ctx->rings[i].entity, max_wait);
+ }
+ }
+ mutex_unlock(&mgr->lock);
+}
+
+void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
+{
+ struct amdgpu_ctx *ctx;
+ struct idr *idp;
+ uint32_t id, i;
+
+ idp = &mgr->ctx_handles;
+
+ idr_for_each_entry(idp, ctx, id) {
+
+ if (!ctx->adev)
+ return;
+
+ for (i = 0; i < ctx->adev->num_rings; i++) {
+
+ if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+ continue;
+
+ if (kref_read(&ctx->refcount) == 1)
+ drm_sched_entity_fini(&ctx->adev->rings[i]->sched,
+ &ctx->rings[i].entity);
+ else
+ DRM_ERROR("ctx %p is still alive\n", ctx);
+ }
+ }
+}
+
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
{
struct amdgpu_ctx *ctx;
struct idr *idp;
uint32_t id;
+ amdgpu_ctx_mgr_entity_fini(mgr);
+
idp = &mgr->ctx_handles;
idr_for_each_entry(idp, ctx, id) {
- if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1)
+ if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
DRM_ERROR("ctx %p is still alive\n", ctx);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 448d69fe3756..f5fb93795a69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -28,8 +28,13 @@
#include <linux/debugfs.h>
#include "amdgpu.h"
-/*
- * Debugfs
+/**
+ * amdgpu_debugfs_add_files - Add simple debugfs entries
+ *
+ * @adev: Device to attach debugfs entries to
+ * @files: Array of function callbacks that respond to reads
+ * @nfiles: Number of callbacks to register
+ *
*/
int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
const struct drm_info_list *files,
@@ -64,7 +69,33 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
#if defined(CONFIG_DEBUG_FS)
-
+/**
+ * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
+ *
+ * @read: True if reading
+ * @f: open file handle
+ * @buf: User buffer to write/read to
+ * @size: Number of bytes to write/read
+ * @pos: Offset to seek to
+ *
+ * This debugfs entry has special meaning on the offset being sought.
+ * Various bits have different meanings:
+ *
+ * Bit 62: Indicates a GRBM bank switch is needed
+ * Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is
+ * zero)
+ * Bits 24..33: The SE or ME selector if needed
+ * Bits 34..43: The SH (or SA) or PIPE selector if needed
+ * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed
+ *
+ * Bit 23: Indicates that the PM power gating lock should be held
+ * This is necessary to read registers that might be
+ * unreliable during a power gating transistion.
+ *
+ * The lower bits are the BYTE offset of the register to read. This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
char __user *buf, size_t size, loff_t *pos)
{
@@ -164,19 +195,37 @@ end:
return result;
}
-
+/**
+ * amdgpu_debugfs_regs_read - Callback for reading MMIO registers
+ */
static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos);
}
+/**
+ * amdgpu_debugfs_regs_write - Callback for writing MMIO registers
+ */
static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos);
}
+
+/**
+ * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read. This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -204,6 +253,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
return result;
}
+/**
+ * amdgpu_debugfs_regs_pcie_write - Write to a PCIE register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write. This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
@@ -232,6 +293,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
return result;
}
+/**
+ * amdgpu_debugfs_regs_didt_read - Read from a DIDT register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read. This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -259,6 +332,18 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
return result;
}
+/**
+ * amdgpu_debugfs_regs_didt_write - Write to a DIDT register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write. This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
@@ -287,6 +372,18 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
return result;
}
+/**
+ * amdgpu_debugfs_regs_smc_read - Read from a SMC register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read. This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -314,6 +411,18 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
return result;
}
+/**
+ * amdgpu_debugfs_regs_smc_write - Write to a SMC register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write. This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
@@ -342,6 +451,20 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
return result;
}
+/**
+ * amdgpu_debugfs_gca_config_read - Read from gfx config data
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * This file is used to access configuration data in a somewhat
+ * stable fashion. The format is a series of DWORDs with the first
+ * indicating which revision it is. New content is appended to the
+ * end so that older software can still read the data.
+ */
+
static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -418,6 +541,19 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
return result;
}
+/**
+ * amdgpu_debugfs_sensor_read - Read from the powerplay sensors
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The offset is treated as the BYTE address of one of the sensors
+ * enumerated in amd/include/kgd_pp_interface.h under the
+ * 'amd_pp_sensors' enumeration. For instance to read the UVD VCLK
+ * you would use the offset 3 * 4 = 12.
+ */
static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -428,7 +564,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
if (size & 3 || *pos & 0x3)
return -EINVAL;
- if (amdgpu_dpm == 0)
+ if (!adev->pm.dpm_enabled)
return -EINVAL;
/* convert offset to sensor number */
@@ -457,6 +593,27 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
return !r ? outsize : r;
}
+/** amdgpu_debugfs_wave_read - Read WAVE STATUS data
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The offset being sought changes which wave that the status data
+ * will be returned for. The bits are used as follows:
+ *
+ * Bits 0..6: Byte offset into data
+ * Bits 7..14: SE selector
+ * Bits 15..22: SH/SA selector
+ * Bits 23..30: CU/{WGP+SIMD} selector
+ * Bits 31..36: WAVE ID selector
+ * Bits 37..44: SIMD ID selector
+ *
+ * The returned data begins with one DWORD of version information
+ * Followed by WAVE STATUS registers relevant to the GFX IP version
+ * being used. See gfx_v8_0_read_wave_data() for an example output.
+ */
static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -507,6 +664,28 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
return result;
}
+/** amdgpu_debugfs_gpr_read - Read wave gprs
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The offset being sought changes which wave that the status data
+ * will be returned for. The bits are used as follows:
+ *
+ * Bits 0..11: Byte offset into data
+ * Bits 12..19: SE selector
+ * Bits 20..27: SH/SA selector
+ * Bits 28..35: CU/{WGP+SIMD} selector
+ * Bits 36..43: WAVE ID selector
+ * Bits 37..44: SIMD ID selector
+ * Bits 52..59: Thread selector
+ * Bits 60..61: Bank selector (VGPR=0,SGPR=1)
+ *
+ * The return data comes from the SGPR or VGPR register bank for
+ * the selected operational unit.
+ */
static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -637,6 +816,12 @@ static const char *debugfs_regs_names[] = {
"amdgpu_gpr",
};
+/**
+ * amdgpu_debugfs_regs_init - Initialize debugfs entries that provide
+ * register access.
+ *
+ * @adev: The device to attach the debugfs entries to
+ */
int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
{
struct drm_minor *minor = adev->ddev->primary;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 34af664b9f93..386a7b34d2f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -25,6 +25,7 @@
* Alex Deucher
* Jerome Glisse
*/
+#include <linux/power_supply.h>
#include <linux/kthread.h>
#include <linux/console.h>
#include <linux/slab.h>
@@ -83,8 +84,10 @@ static const char *amdgpu_asic_name[] = {
"POLARIS10",
"POLARIS11",
"POLARIS12",
+ "VEGAM",
"VEGA10",
"VEGA12",
+ "VEGA20",
"RAVEN",
"LAST",
};
@@ -673,34 +676,34 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev,
}
/**
- * amdgpu_device_gart_location - try to find GTT location
+ * amdgpu_device_gart_location - try to find GART location
*
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
*
- * Function will place try to place GTT before or after VRAM.
+ * Function will place try to place GART before or after VRAM.
*
- * If GTT size is bigger than space left then we ajust GTT size.
+ * If GART size is bigger than space left then we ajust GART size.
* Thus function will never fails.
- *
- * FIXME: when reducing GTT size align new size on power of 2.
*/
void amdgpu_device_gart_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc)
{
u64 size_af, size_bf;
+ mc->gart_size += adev->pm.smu_prv_buffer_size;
+
size_af = adev->gmc.mc_mask - mc->vram_end;
size_bf = mc->vram_start;
if (size_bf > size_af) {
if (mc->gart_size > size_bf) {
- dev_warn(adev->dev, "limiting GTT\n");
+ dev_warn(adev->dev, "limiting GART\n");
mc->gart_size = size_bf;
}
mc->gart_start = 0;
} else {
if (mc->gart_size > size_af) {
- dev_warn(adev->dev, "limiting GTT\n");
+ dev_warn(adev->dev, "limiting GART\n");
mc->gart_size = size_af;
}
/* VCE doesn't like it when BOs cross a 4GB segment, so align
@@ -709,7 +712,7 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev,
mc->gart_start = ALIGN(mc->vram_end + 1, 0x100000000ULL);
}
mc->gart_end = mc->gart_start + mc->gart_size - 1;
- dev_info(adev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
+ dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
mc->gart_size >> 20, mc->gart_start, mc->gart_end);
}
@@ -907,6 +910,46 @@ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
}
}
+static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
+{
+ struct sysinfo si;
+ bool is_os_64 = (sizeof(void *) == 8) ? true : false;
+ uint64_t total_memory;
+ uint64_t dram_size_seven_GB = 0x1B8000000;
+ uint64_t dram_size_three_GB = 0xB8000000;
+
+ if (amdgpu_smu_memory_pool_size == 0)
+ return;
+
+ if (!is_os_64) {
+ DRM_WARN("Not 64-bit OS, feature not supported\n");
+ goto def_value;
+ }
+ si_meminfo(&si);
+ total_memory = (uint64_t)si.totalram * si.mem_unit;
+
+ if ((amdgpu_smu_memory_pool_size == 1) ||
+ (amdgpu_smu_memory_pool_size == 2)) {
+ if (total_memory < dram_size_three_GB)
+ goto def_value1;
+ } else if ((amdgpu_smu_memory_pool_size == 4) ||
+ (amdgpu_smu_memory_pool_size == 8)) {
+ if (total_memory < dram_size_seven_GB)
+ goto def_value1;
+ } else {
+ DRM_WARN("Smu memory pool size not supported\n");
+ goto def_value;
+ }
+ adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
+
+ return;
+
+def_value1:
+ DRM_WARN("No enough system memory\n");
+def_value:
+ adev->pm.smu_prv_buffer_size = 0;
+}
+
/**
* amdgpu_device_check_arguments - validate module params
*
@@ -948,6 +991,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_vm_fragment_size = -1;
}
+ amdgpu_device_check_smu_prv_buffer_size(adev);
+
amdgpu_device_check_vm_size(adev);
amdgpu_device_check_block_size(adev);
@@ -1031,7 +1076,7 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
/**
* amdgpu_device_ip_set_clockgating_state - set the CG state
*
- * @adev: amdgpu_device pointer
+ * @dev: amdgpu_device pointer
* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
* @state: clockgating state (gate or ungate)
*
@@ -1039,10 +1084,11 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
* the hardware IP specified.
* Returns the error code from the last instance.
*/
-int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_clockgating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_clockgating_state state)
{
+ struct amdgpu_device *adev = dev;
int i, r = 0;
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -1064,7 +1110,7 @@ int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
/**
* amdgpu_device_ip_set_powergating_state - set the PG state
*
- * @adev: amdgpu_device pointer
+ * @dev: amdgpu_device pointer
* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
* @state: powergating state (gate or ungate)
*
@@ -1072,10 +1118,11 @@ int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
* the hardware IP specified.
* Returns the error code from the last instance.
*/
-int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_powergating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_powergating_state state)
{
+ struct amdgpu_device *adev = dev;
int i, r = 0;
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -1174,7 +1221,7 @@ bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
* amdgpu_device_ip_get_ip_block - get a hw IP pointer
*
* @adev: amdgpu_device pointer
- * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
*
* Returns a pointer to the hardware IP block structure
* if it exists for the asic, otherwise NULL.
@@ -1320,9 +1367,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
- case CHIP_POLARIS11:
case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
case CHIP_CARRIZO:
case CHIP_STONEY:
#ifdef CONFIG_DRM_AMDGPU_SI
@@ -1339,6 +1387,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
case CHIP_KABINI:
case CHIP_MULLINS:
#endif
+ case CHIP_VEGA20:
default:
return 0;
case CHIP_VEGA10:
@@ -1428,9 +1477,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
- case CHIP_POLARIS11:
case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
case CHIP_CARRIZO:
case CHIP_STONEY:
if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
@@ -1472,6 +1522,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
#endif
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
if (adev->asic_type == CHIP_RAVEN)
adev->family = AMDGPU_FAMILY_RV;
@@ -1499,6 +1550,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
return -EAGAIN;
}
+ adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
DRM_ERROR("disabled ip block: %d <%s>\n",
@@ -1660,6 +1713,7 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
/* skip CG for VCE/UVD, it's handled specially */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* enable clockgating to save power */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@@ -1671,6 +1725,35 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
}
}
}
+
+ return 0;
+}
+
+static int amdgpu_device_ip_late_set_pg_state(struct amdgpu_device *adev)
+{
+ int i = 0, r;
+
+ if (amdgpu_emu_mode == 1)
+ return 0;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ /* skip CG for VCE/UVD, it's handled specially */
+ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+ adev->ip_blocks[i].version->funcs->set_powergating_state) {
+ /* enable powergating to save power */
+ r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
+ AMD_PG_STATE_GATE);
+ if (r) {
+ DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ }
+ }
return 0;
}
@@ -1704,8 +1787,11 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
}
}
- mod_delayed_work(system_wq, &adev->late_init_work,
- msecs_to_jiffies(AMDGPU_RESUME_MS));
+ amdgpu_device_ip_late_set_cg_state(adev);
+ amdgpu_device_ip_late_set_pg_state(adev);
+
+ queue_delayed_work(system_wq, &adev->late_init_work,
+ msecs_to_jiffies(AMDGPU_RESUME_MS));
amdgpu_device_fill_reset_magic(adev);
@@ -1742,6 +1828,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
+ if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false);
r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
/* XXX handle errors */
if (r) {
@@ -1759,6 +1847,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* ungate blocks before hw fini so that we can shutdown the blocks safely */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@@ -1829,7 +1918,11 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, late_init_work.work);
- amdgpu_device_ip_late_set_cg_state(adev);
+ int r;
+
+ r = amdgpu_ib_ring_tests(adev);
+ if (r)
+ DRM_ERROR("ib ring test failed (%d).\n", r);
}
/**
@@ -1857,6 +1950,10 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n", r);
}
+ /* call smu to disable gfx off feature first when suspend */
+ if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false);
+
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
continue;
@@ -2080,23 +2177,30 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
switch (asic_type) {
#if defined(CONFIG_DRM_AMD_DC)
case CHIP_BONAIRE:
- case CHIP_HAWAII:
case CHIP_KAVERI:
case CHIP_KABINI:
case CHIP_MULLINS:
+ /*
+ * We have systems in the wild with these ASICs that require
+ * LVDS and VGA support which is not supported with DC.
+ *
+ * Fallback to the non-DC driver here by default so as not to
+ * cause regressions.
+ */
+ return amdgpu_dc > 0;
+ case CHIP_HAWAII:
case CHIP_CARRIZO:
case CHIP_STONEY:
- case CHIP_POLARIS11:
case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
case CHIP_TONGA:
case CHIP_FIJI:
-#if defined(CONFIG_DRM_AMD_DC_PRE_VEGA)
- return amdgpu_dc != 0;
-#endif
case CHIP_VEGA10:
case CHIP_VEGA12:
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+ case CHIP_VEGA20:
+#ifdef CONFIG_X86
case CHIP_RAVEN:
#endif
return amdgpu_dc != 0;
@@ -2125,7 +2229,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
* amdgpu_device_init - initialize the driver
*
* @adev: amdgpu_device pointer
- * @pdev: drm dev pointer
+ * @ddev: drm dev pointer
* @pdev: pci dev pointer
* @flags: driver flags
*
@@ -2216,6 +2320,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_DELAYED_WORK(&adev->late_init_work,
amdgpu_device_ip_late_init_func_handler);
+ adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
+
/* Registers mapping */
/* TODO: block userspace mapping of io register */
if (adev->asic_type >= CHIP_BONAIRE) {
@@ -2375,10 +2481,6 @@ fence_driver_init:
goto failed;
}
- r = amdgpu_ib_ring_tests(adev);
- if (r)
- DRM_ERROR("ib ring test failed (%d).\n", r);
-
if (amdgpu_sriov_vf(adev))
amdgpu_virt_init_data_exchange(adev);
@@ -2500,8 +2602,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
/**
* amdgpu_device_suspend - initiate device suspend
*
- * @pdev: drm dev pointer
- * @state: suspend state
+ * @dev: drm dev pointer
+ * @suspend: suspend state
+ * @fbcon : notify the fbdev of suspend
*
* Puts the hw in the suspend state (all asics).
* Returns 0 for success or an error on failure.
@@ -2539,7 +2642,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
/* unpin the front buffers and cursors */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_framebuffer *rfb = to_amdgpu_framebuffer(crtc->primary->fb);
+ struct drm_framebuffer *fb = crtc->primary->fb;
struct amdgpu_bo *robj;
if (amdgpu_crtc->cursor_bo) {
@@ -2551,10 +2654,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
}
}
- if (rfb == NULL || rfb->obj == NULL) {
+ if (fb == NULL || fb->obj[0] == NULL) {
continue;
}
- robj = gem_to_amdgpu_bo(rfb->obj);
+ robj = gem_to_amdgpu_bo(fb->obj[0]);
/* don't unpin kernel fb objects */
if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
r = amdgpu_bo_reserve(robj, true);
@@ -2599,7 +2702,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
/**
* amdgpu_device_resume - initiate device resume
*
- * @pdev: drm dev pointer
+ * @dev: drm dev pointer
+ * @resume: resume state
+ * @fbcon : notify the fbdev of resume
*
* Bring the hw back to operating state (all asics).
* Returns 0 for success or an error on failure.
@@ -2640,11 +2745,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
}
amdgpu_fence_driver_resume(adev);
- if (resume) {
- r = amdgpu_ib_ring_tests(adev);
- if (r)
- DRM_ERROR("ib ring test failed (%d).\n", r);
- }
r = amdgpu_device_ip_late_init(adev);
if (r)
@@ -2658,11 +2758,10 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
r = amdgpu_bo_reserve(aobj, true);
if (r == 0) {
- r = amdgpu_bo_pin(aobj,
- AMDGPU_GEM_DOMAIN_VRAM,
- &amdgpu_crtc->cursor_addr);
+ r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
if (r != 0)
DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
amdgpu_bo_unreserve(aobj);
}
}
@@ -2736,6 +2835,9 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return true;
+ if (amdgpu_asic_need_full_reset(adev))
+ return true;
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
@@ -2792,6 +2894,9 @@ static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
{
int i;
+ if (amdgpu_asic_need_full_reset(adev))
+ return true;
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
@@ -3061,6 +3166,7 @@ out:
* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
*
* @adev: amdgpu device pointer
+ * @from_hypervisor: request from hypervisor
*
* do VF FLR and reinitialize Asic
* return 0 means successed otherwise failed
@@ -3087,20 +3193,19 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
/* now we are okay to resume SMC/CP/SDMA */
r = amdgpu_device_ip_reinit_late_sriov(adev);
- amdgpu_virt_release_full_gpu(adev, true);
if (r)
goto error;
amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
+error:
+ amdgpu_virt_release_full_gpu(adev, true);
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
atomic_inc(&adev->vram_lost_counter);
r = amdgpu_device_handle_vram_lost(adev);
}
-error:
-
return r;
}
@@ -3109,7 +3214,7 @@ error:
*
* @adev: amdgpu device pointer
* @job: which job trigger hang
- * @force forces reset regardless of amdgpu_gpu_recovery
+ * @force: forces reset regardless of amdgpu_gpu_recovery
*
* Attempt to reset the GPU if it has hung (all asics).
* Returns 0 for success or an error on failure.
@@ -3117,7 +3222,6 @@ error:
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job, bool force)
{
- struct drm_atomic_state *state = NULL;
int i, r, resched;
if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
@@ -3140,10 +3244,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
/* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
- /* store modesetting */
- if (amdgpu_device_has_dc_support(adev))
- state = drm_atomic_helper_suspend(adev->ddev);
-
/* block all schedulers and reset given job's ring */
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -3153,7 +3253,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
kthread_park(ring->sched.thread);
- if (job && job->ring->idx != i)
+ if (job && job->base.sched == &ring->sched)
continue;
drm_sched_hw_job_reset(&ring->sched, &job->base);
@@ -3177,16 +3277,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
* or all rings (in the case @job is NULL)
* after above amdgpu_reset accomplished
*/
- if ((!job || job->ring->idx == i) && !r)
+ if ((!job || job->base.sched == &ring->sched) && !r)
drm_sched_job_recovery(&ring->sched);
kthread_unpark(ring->sched.thread);
}
- if (amdgpu_device_has_dc_support(adev)) {
- if (drm_atomic_helper_resume(adev->ddev, state))
- dev_info(adev->dev, "drm resume failed:%d\n", r);
- } else {
+ if (!amdgpu_device_has_dc_support(adev)) {
drm_helper_resume_force_mode(adev->ddev);
}
@@ -3217,8 +3314,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
*/
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
{
- u32 mask;
- int ret;
+ struct pci_dev *pdev;
+ enum pci_bus_speed speed_cap;
+ enum pcie_link_width link_width;
if (amdgpu_pcie_gen_cap)
adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
@@ -3236,27 +3334,61 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
}
if (adev->pm.pcie_gen_mask == 0) {
- ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
- if (!ret) {
- adev->pm.pcie_gen_mask = (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ /* asic caps */
+ pdev = adev->pdev;
+ speed_cap = pcie_get_speed_cap(pdev);
+ if (speed_cap == PCI_SPEED_UNKNOWN) {
+ adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
-
- if (mask & DRM_PCIE_SPEED_25)
- adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
- if (mask & DRM_PCIE_SPEED_50)
- adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2;
- if (mask & DRM_PCIE_SPEED_80)
- adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3;
} else {
- adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
+ if (speed_cap == PCIE_SPEED_16_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
+ else if (speed_cap == PCIE_SPEED_8_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
+ else if (speed_cap == PCIE_SPEED_5_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
+ else
+ adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
+ }
+ /* platform caps */
+ pdev = adev->ddev->pdev->bus->self;
+ speed_cap = pcie_get_speed_cap(pdev);
+ if (speed_cap == PCI_SPEED_UNKNOWN) {
+ adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
+ } else {
+ if (speed_cap == PCIE_SPEED_16_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
+ else if (speed_cap == PCIE_SPEED_8_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
+ else if (speed_cap == PCIE_SPEED_5_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
+ else
+ adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
+
}
}
if (adev->pm.pcie_mlw_mask == 0) {
- ret = drm_pcie_get_max_link_width(adev->ddev, &mask);
- if (!ret) {
- switch (mask) {
- case 32:
+ pdev = adev->ddev->pdev->bus->self;
+ link_width = pcie_get_width_cap(pdev);
+ if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
+ adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
+ } else {
+ switch (link_width) {
+ case PCIE_LNK_X32:
adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
@@ -3265,7 +3397,7 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
- case 16:
+ case PCIE_LNK_X16:
adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
@@ -3273,36 +3405,34 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
- case 12:
+ case PCIE_LNK_X12:
adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
- case 8:
+ case PCIE_LNK_X8:
adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
- case 4:
+ case PCIE_LNK_X4:
adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
- case 2:
+ case PCIE_LNK_X2:
adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
- case 1:
+ case PCIE_LNK_X1:
adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
break;
default:
break;
}
- } else {
- adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
}
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 93f700ab1bfb..6748cd7fc129 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -35,6 +35,7 @@
#include <linux/pm_runtime.h>
#include <drm/drm_crtc_helper.h>
#include <drm/drm_edid.h>
+#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_fb_helper.h>
static void amdgpu_display_flip_callback(struct dma_fence *f,
@@ -151,14 +152,11 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_framebuffer *old_amdgpu_fb;
- struct amdgpu_framebuffer *new_amdgpu_fb;
struct drm_gem_object *obj;
struct amdgpu_flip_work *work;
struct amdgpu_bo *new_abo;
unsigned long flags;
u64 tiling_flags;
- u64 base;
int i, r;
work = kzalloc(sizeof *work, GFP_KERNEL);
@@ -174,15 +172,13 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
/* schedule unpin of the old buffer */
- old_amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
- obj = old_amdgpu_fb->obj;
+ obj = crtc->primary->fb->obj[0];
/* take a reference to the old object */
work->old_abo = gem_to_amdgpu_bo(obj);
amdgpu_bo_ref(work->old_abo);
- new_amdgpu_fb = to_amdgpu_framebuffer(fb);
- obj = new_amdgpu_fb->obj;
+ obj = fb->obj[0];
new_abo = gem_to_amdgpu_bo(obj);
/* pin the new buffer */
@@ -192,12 +188,18 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
goto cleanup;
}
- r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base);
+ r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev));
if (unlikely(r != 0)) {
DRM_ERROR("failed to pin new abo buffer before flip\n");
goto unreserve;
}
+ r = amdgpu_ttm_alloc_gart(&new_abo->tbo);
+ if (unlikely(r != 0)) {
+ DRM_ERROR("%p bind failed\n", new_abo);
+ goto unpin;
+ }
+
r = reservation_object_get_fences_rcu(new_abo->tbo.resv, &work->excl,
&work->shared_count,
&work->shared);
@@ -209,7 +211,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
amdgpu_bo_get_tiling_flags(new_abo, &tiling_flags);
amdgpu_bo_unreserve(new_abo);
- work->base = base;
+ work->base = amdgpu_bo_gpu_offset(new_abo);
work->target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) +
amdgpu_get_vblank_counter_kms(dev, work->crtc_id);
@@ -482,31 +484,12 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
return true;
}
-static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb)
-{
- struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
-
- drm_gem_object_put_unlocked(amdgpu_fb->obj);
- drm_framebuffer_cleanup(fb);
- kfree(amdgpu_fb);
-}
-
-static int amdgpu_display_user_framebuffer_create_handle(
- struct drm_framebuffer *fb,
- struct drm_file *file_priv,
- unsigned int *handle)
-{
- struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
-
- return drm_gem_handle_create(file_priv, amdgpu_fb->obj, handle);
-}
-
static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
- .destroy = amdgpu_display_user_framebuffer_destroy,
- .create_handle = amdgpu_display_user_framebuffer_create_handle,
+ .destroy = drm_gem_fb_destroy,
+ .create_handle = drm_gem_fb_create_handle,
};
-uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev)
+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev)
{
uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
@@ -526,11 +509,11 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
struct drm_gem_object *obj)
{
int ret;
- rfb->obj = obj;
+ rfb->base.obj[0] = obj;
drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
if (ret) {
- rfb->obj = NULL;
+ rfb->base.obj[0] = NULL;
return ret;
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index 2b11d808f297..f66e3e3fef0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -23,7 +23,7 @@
#ifndef __AMDGPU_DISPLAY_H__
#define __AMDGPU_DISPLAY_H__
-uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev);
+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev);
struct drm_framebuffer *
amdgpu_display_user_framebuffer_create(struct drm_device *dev,
struct drm_file *file_priv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index e997ebbe43ea..1c4595562f8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -28,6 +28,7 @@
#include "amdgpu_i2c.h"
#include "amdgpu_dpm.h"
#include "atom.h"
+#include "amd_pcie.h"
void amdgpu_dpm_print_class_info(u32 class, u32 class2)
{
@@ -115,6 +116,26 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev,
pr_cont("\n");
}
+void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev)
+{
+ struct drm_device *ddev = adev->ddev;
+ struct drm_crtc *crtc;
+ struct amdgpu_crtc *amdgpu_crtc;
+
+ adev->pm.dpm.new_active_crtcs = 0;
+ adev->pm.dpm.new_active_crtc_count = 0;
+ if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
+ list_for_each_entry(crtc,
+ &ddev->mode_config.crtc_list, head) {
+ amdgpu_crtc = to_amdgpu_crtc(crtc);
+ if (amdgpu_crtc->enabled) {
+ adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id);
+ adev->pm.dpm.new_active_crtc_count++;
+ }
+ }
+ }
+}
+
u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev)
{
@@ -432,7 +453,7 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
ATOM_PPLIB_PhaseSheddingLimits_Record *entry;
adev->pm.dpm.dyn_state.phase_shedding_limits_table.entries =
- kzalloc(psl->ucNumEntries *
+ kcalloc(psl->ucNumEntries,
sizeof(struct amdgpu_phase_shedding_limits_entry),
GFP_KERNEL);
if (!adev->pm.dpm.dyn_state.phase_shedding_limits_table.entries) {
@@ -916,9 +937,11 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev,
case AMDGPU_PCIE_GEN3:
return AMDGPU_PCIE_GEN3;
default:
- if ((sys_mask & DRM_PCIE_SPEED_80) && (default_gen == AMDGPU_PCIE_GEN3))
+ if ((sys_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) &&
+ (default_gen == AMDGPU_PCIE_GEN3))
return AMDGPU_PCIE_GEN3;
- else if ((sys_mask & DRM_PCIE_SPEED_50) && (default_gen == AMDGPU_PCIE_GEN2))
+ else if ((sys_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) &&
+ (default_gen == AMDGPU_PCIE_GEN2))
return AMDGPU_PCIE_GEN2;
else
return AMDGPU_PCIE_GEN1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 643d008410c6..ff24e1cc5b65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -52,8 +52,6 @@ enum amdgpu_dpm_event_src {
AMDGPU_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4
};
-#define SCLK_DEEP_SLEEP_MASK 0x8
-
struct amdgpu_ps {
u32 caps; /* vbios flags */
u32 class; /* vbios flags */
@@ -289,12 +287,6 @@ enum amdgpu_pcie_gen {
#define amdgpu_dpm_force_performance_level(adev, l) \
((adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)))
-#define amdgpu_dpm_powergate_uvd(adev, g) \
- ((adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)))
-
-#define amdgpu_dpm_powergate_vce(adev, g) \
- ((adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)))
-
#define amdgpu_dpm_get_current_power_state(adev) \
((adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle))
@@ -349,11 +341,9 @@ enum amdgpu_pcie_gen {
((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
(adev)->powerplay.pp_handle, msg_id))
-#define amdgpu_dpm_notify_smu_memory_info(adev, virtual_addr_low, \
- virtual_addr_hi, mc_addr_low, mc_addr_hi, size) \
- ((adev)->powerplay.pp_funcs->notify_smu_memory_info)( \
- (adev)->powerplay.pp_handle, virtual_addr_low, \
- virtual_addr_hi, mc_addr_low, mc_addr_hi, size)
+#define amdgpu_dpm_set_powergating_by_smu(adev, block_type, gate) \
+ ((adev)->powerplay.pp_funcs->set_powergating_by_smu(\
+ (adev)->powerplay.pp_handle, block_type, gate))
#define amdgpu_dpm_get_power_profile_mode(adev, buf) \
((adev)->powerplay.pp_funcs->get_power_profile_mode(\
@@ -367,10 +357,6 @@ enum amdgpu_pcie_gen {
((adev)->powerplay.pp_funcs->odn_edit_dpm_table(\
(adev)->powerplay.pp_handle, type, parameter, size))
-#define amdgpu_dpm_set_mmhub_powergating_by_smu(adev) \
- ((adev)->powerplay.pp_funcs->set_mmhub_powergating_by_smu( \
- (adev)->powerplay.pp_handle))
-
struct amdgpu_dpm {
struct amdgpu_ps *ps;
/* number of valid power states */
@@ -410,7 +396,6 @@ struct amdgpu_dpm {
u32 tdp_adjustment;
u16 load_line_slope;
bool power_control;
- bool ac_power;
/* special states active */
bool thermal_active;
bool uvd_active;
@@ -445,6 +430,9 @@ struct amdgpu_pm {
uint32_t pcie_gen_mask;
uint32_t pcie_mlw_mask;
struct amd_pp_display_configuration pm_display_cfg;/* set by dc */
+ uint32_t smu_prv_buffer_size;
+ struct amdgpu_bo *smu_prv_buffer;
+ bool ac_power;
};
#define R600_SSTU_DFLT 0
@@ -482,6 +470,7 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev,
struct amdgpu_ps *rps);
u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev);
u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev);
+void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev);
bool amdgpu_is_uvd_state(u32 class, u32 class2);
void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
u32 *p, u32 *u);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0b19482b36b8..8843a06360fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1,10 +1,3 @@
-/**
- * \file amdgpu_drv.c
- * AMD Amdgpu driver
- *
- * \author Gareth Hughes <gareth@valinux.com>
- */
-
/*
* Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
* All Rights Reserved.
@@ -75,9 +68,11 @@
* - 3.23.0 - Add query for VRAM lost counter
* - 3.24.0 - Add high priority compute support for gfx9
* - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
+ * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
+ * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 25
+#define KMS_DRIVER_MINOR 27
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
@@ -109,11 +104,8 @@ int amdgpu_vram_page_split = 512;
int amdgpu_vm_update_mode = -1;
int amdgpu_exp_hw_support = 0;
int amdgpu_dc = -1;
-int amdgpu_dc_log = 0;
int amdgpu_sched_jobs = 32;
int amdgpu_sched_hw_submission = 2;
-int amdgpu_no_evict = 0;
-int amdgpu_direct_gma_size = 0;
uint amdgpu_pcie_gen_cap = 0;
uint amdgpu_pcie_lane_cap = 0;
uint amdgpu_cg_mask = 0xffffffff;
@@ -121,7 +113,8 @@ uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu = NULL;
char *amdgpu_virtual_display = NULL;
-uint amdgpu_pp_feature_mask = 0xffffbfff;
+/* OverDrive(bit 14),gfxoff(bit 15),stutter mode(bit 17) disabled by default*/
+uint amdgpu_pp_feature_mask = 0xfffd3fff;
int amdgpu_ngg = 0;
int amdgpu_prim_buf_per_se = 0;
int amdgpu_pos_buf_per_se = 0;
@@ -132,164 +125,370 @@ int amdgpu_lbpw = -1;
int amdgpu_compute_multipipe = -1;
int amdgpu_gpu_recovery = -1; /* auto */
int amdgpu_emu_mode = 0;
+uint amdgpu_smu_memory_pool_size = 0;
+/**
+ * DOC: vramlimit (int)
+ * Restrict the total amount of VRAM in MiB for testing. The default is 0 (Use full VRAM).
+ */
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
+/**
+ * DOC: vis_vramlimit (int)
+ * Restrict the amount of CPU visible VRAM in MiB for testing. The default is 0 (Use full CPU visible VRAM).
+ */
MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes");
module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);
+/**
+ * DOC: gartsize (uint)
+ * Restrict the size of GART in Mib (32, 64, etc.) for testing. The default is -1 (The size depends on asic).
+ */
MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)");
module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
+/**
+ * DOC: gttsize (int)
+ * Restrict the size of GTT domain in MiB for testing. The default is -1 (It's VRAM size if 3GB < VRAM < 3/4 RAM,
+ * otherwise 3/4 RAM size).
+ */
MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
module_param_named(gttsize, amdgpu_gtt_size, int, 0600);
+/**
+ * DOC: moverate (int)
+ * Set maximum buffer migration rate in MB/s. The default is -1 (8 MB/s).
+ */
MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc., -1=auto, 0=1=disabled)");
module_param_named(moverate, amdgpu_moverate, int, 0600);
+/**
+ * DOC: benchmark (int)
+ * Run benchmarks. The default is 0 (Skip benchmarks).
+ */
MODULE_PARM_DESC(benchmark, "Run benchmark");
module_param_named(benchmark, amdgpu_benchmarking, int, 0444);
+/**
+ * DOC: test (int)
+ * Test BO GTT->VRAM and VRAM->GTT GPU copies. The default is 0 (Skip test, only set 1 to run test).
+ */
MODULE_PARM_DESC(test, "Run tests");
module_param_named(test, amdgpu_testing, int, 0444);
+/**
+ * DOC: audio (int)
+ * Set HDMI/DPAudio. Only affects non-DC display handling. The default is -1 (Enabled), set 0 to disabled it.
+ */
MODULE_PARM_DESC(audio, "Audio enable (-1 = auto, 0 = disable, 1 = enable)");
module_param_named(audio, amdgpu_audio, int, 0444);
+/**
+ * DOC: disp_priority (int)
+ * Set display Priority (1 = normal, 2 = high). Only affects non-DC display handling. The default is 0 (auto).
+ */
MODULE_PARM_DESC(disp_priority, "Display Priority (0 = auto, 1 = normal, 2 = high)");
module_param_named(disp_priority, amdgpu_disp_priority, int, 0444);
+/**
+ * DOC: hw_i2c (int)
+ * To enable hw i2c engine. Only affects non-DC display handling. The default is 0 (Disabled).
+ */
MODULE_PARM_DESC(hw_i2c, "hw i2c engine enable (0 = disable)");
module_param_named(hw_i2c, amdgpu_hw_i2c, int, 0444);
+/**
+ * DOC: pcie_gen2 (int)
+ * To disable PCIE Gen2/3 mode (0 = disable, 1 = enable). The default is -1 (auto, enabled).
+ */
MODULE_PARM_DESC(pcie_gen2, "PCIE Gen2 mode (-1 = auto, 0 = disable, 1 = enable)");
module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444);
+/**
+ * DOC: msi (int)
+ * To disable Message Signaled Interrupts (MSI) functionality (1 = enable, 0 = disable). The default is -1 (auto, enabled).
+ */
MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(msi, amdgpu_msi, int, 0444);
+/**
+ * DOC: lockup_timeout (int)
+ * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000.
+ * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000.
+ */
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)");
module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444);
+/**
+ * DOC: dpm (int)
+ * Override for dynamic power management setting (1 = enable, 0 = disable). The default is -1 (auto).
+ */
MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(dpm, amdgpu_dpm, int, 0444);
+/**
+ * DOC: fw_load_type (int)
+ * Set different firmware loading type for debugging (0 = direct, 1 = SMU, 2 = PSP). The default is -1 (auto).
+ */
MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = direct, 1 = SMU, 2 = PSP, -1 = auto)");
module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444);
+/**
+ * DOC: aspm (int)
+ * To disable ASPM (1 = enable, 0 = disable). The default is -1 (auto, enabled).
+ */
MODULE_PARM_DESC(aspm, "ASPM support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(aspm, amdgpu_aspm, int, 0444);
+/**
+ * DOC: runpm (int)
+ * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down
+ * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality.
+ */
MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)");
module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
+/**
+ * DOC: ip_block_mask (uint)
+ * Override what IP blocks are enabled on the GPU. Each GPU is a collection of IP blocks (gfx, display, video, etc.).
+ * Use this parameter to disable specific blocks. Note that the IP blocks do not have a fixed index. Some asics may not have
+ * some IPs or may include multiple instances of an IP so the ordering various from asic to asic. See the driver output in
+ * the kernel log for the list of IPs on the asic. The default is 0xffffffff (enable all blocks on a device).
+ */
MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");
module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
+/**
+ * DOC: bapm (int)
+ * Bidirectional Application Power Management (BAPM) used to dynamically share TDP between CPU and GPU. Set value 0 to disable it.
+ * The default -1 (auto, enabled)
+ */
MODULE_PARM_DESC(bapm, "BAPM support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(bapm, amdgpu_bapm, int, 0444);
+/**
+ * DOC: deep_color (int)
+ * Set 1 to enable Deep Color support. Only affects non-DC display handling. The default is 0 (disabled).
+ */
MODULE_PARM_DESC(deep_color, "Deep Color support (1 = enable, 0 = disable (default))");
module_param_named(deep_color, amdgpu_deep_color, int, 0444);
+/**
+ * DOC: vm_size (int)
+ * Override the size of the GPU's per client virtual address space in GiB. The default is -1 (automatic for each asic).
+ */
MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)");
module_param_named(vm_size, amdgpu_vm_size, int, 0444);
+/**
+ * DOC: vm_fragment_size (int)
+ * Override VM fragment size in bits (4, 5, etc. 4 = 64K, 9 = 2M). The default is -1 (automatic for each asic).
+ */
MODULE_PARM_DESC(vm_fragment_size, "VM fragment size in bits (4, 5, etc. 4 = 64K (default), Max 9 = 2M)");
module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 0444);
+/**
+ * DOC: vm_block_size (int)
+ * Override VM page table size in bits (default depending on vm_size and hw setup). The default is -1 (automatic for each asic).
+ */
MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)");
module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444);
+/**
+ * DOC: vm_fault_stop (int)
+ * Stop on VM fault for debugging (0 = never, 1 = print first, 2 = always). The default is 0 (No stop).
+ */
MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = print first, 2 = always)");
module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
+/**
+ * DOC: vm_debug (int)
+ * Debug VM handling (0 = disabled, 1 = enabled). The default is 0 (Disabled).
+ */
MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
+/**
+ * DOC: vm_update_mode (int)
+ * Override VM update mode. VM updated by using CPU (0 = never, 1 = Graphics only, 2 = Compute only, 3 = Both). The default
+ * is -1 (Only in large BAR(LB) systems Compute VM tables will be updated by CPU, otherwise 0, never).
+ */
MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both");
module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
+/**
+ * DOC: vram_page_split (int)
+ * Override the number of pages after we split VRAM allocations (default 512, -1 = disable). The default is 512.
+ */
MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 512, -1 = disable)");
module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444);
+/**
+ * DOC: exp_hw_support (int)
+ * Enable experimental hw support (1 = enable). The default is 0 (disabled).
+ */
MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
+/**
+ * DOC: dc (int)
+ * Disable/Enable Display Core driver for debugging (1 = enable, 0 = disable). The default is -1 (automatic for each asic).
+ */
MODULE_PARM_DESC(dc, "Display Core driver (1 = enable, 0 = disable, -1 = auto (default))");
module_param_named(dc, amdgpu_dc, int, 0444);
-MODULE_PARM_DESC(dc_log, "Display Core Log Level (0 = minimal (default), 1 = chatty");
-module_param_named(dc_log, amdgpu_dc_log, int, 0444);
-
+/**
+ * DOC: sched_jobs (int)
+ * Override the max number of jobs supported in the sw queue. The default is 32.
+ */
MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)");
module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444);
+/**
+ * DOC: sched_hw_submission (int)
+ * Override the max number of HW submissions. The default is 2.
+ */
MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
+/**
+ * DOC: ppfeaturemask (uint)
+ * Override power features enabled. See enum PP_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
+ * The default is the current set of stable power features.
+ */
MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444);
-MODULE_PARM_DESC(no_evict, "Support pinning request from user space (1 = enable, 0 = disable (default))");
-module_param_named(no_evict, amdgpu_no_evict, int, 0444);
-
-MODULE_PARM_DESC(direct_gma_size, "Direct GMA size in megabytes (max 96MB)");
-module_param_named(direct_gma_size, amdgpu_direct_gma_size, int, 0444);
-
+/**
+ * DOC: pcie_gen_cap (uint)
+ * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.
+ * The default is 0 (automatic for each asic).
+ */
MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))");
module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
+/**
+ * DOC: pcie_lane_cap (uint)
+ * Override PCIE lanes capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.
+ * The default is 0 (automatic for each asic).
+ */
MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
+/**
+ * DOC: cg_mask (uint)
+ * Override Clockgating features enabled on GPU (0 = disable clock gating). See the AMD_CG_SUPPORT flags in
+ * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled).
+ */
MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)");
module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444);
+/**
+ * DOC: pg_mask (uint)
+ * Override Powergating features enabled on GPU (0 = disable power gating). See the AMD_PG_SUPPORT flags in
+ * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled).
+ */
MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");
module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);
+/**
+ * DOC: sdma_phase_quantum (uint)
+ * Override SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change). The default is 32.
+ */
MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))");
module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444);
+/**
+ * DOC: disable_cu (charp)
+ * Set to disable CUs (It's set like se.sh.cu,...). The default is NULL.
+ */
MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);
+/**
+ * DOC: virtual_display (charp)
+ * Set to enable virtual display feature. This feature provides a virtual display hardware on headless boards
+ * or in virtualized environments. It will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x. It's the pci address of
+ * the device, plus the number of crtcs to expose. E.g., 0000:26:00.0,4 would enable 4 virtual crtcs on the pci
+ * device at 26:00.0. The default is NULL.
+ */
MODULE_PARM_DESC(virtual_display,
"Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)");
module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);
+/**
+ * DOC: ngg (int)
+ * Set to enable Next Generation Graphics (1 = enable). The default is 0 (disabled).
+ */
MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))");
module_param_named(ngg, amdgpu_ngg, int, 0444);
+/**
+ * DOC: prim_buf_per_se (int)
+ * Override the size of Primitive Buffer per Shader Engine in Byte. The default is 0 (depending on gfx).
+ */
MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)");
module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444);
+/**
+ * DOC: pos_buf_per_se (int)
+ * Override the size of Position Buffer per Shader Engine in Byte. The default is 0 (depending on gfx).
+ */
MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)");
module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444);
+/**
+ * DOC: cntl_sb_buf_per_se (int)
+ * Override the size of Control Sideband per Shader Engine in Byte. The default is 0 (depending on gfx).
+ */
MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)");
module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444);
+/**
+ * DOC: param_buf_per_se (int)
+ * Override the size of Off-Chip Pramater Cache per Shader Engine in Byte. The default is 0 (depending on gfx).
+ */
MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)");
module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);
+/**
+ * DOC: job_hang_limit (int)
+ * Set how much time allow a job hang and not drop it. The default is 0.
+ */
MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)");
module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444);
+/**
+ * DOC: lbpw (int)
+ * Override Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable). The default is -1 (auto, enabled).
+ */
MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(lbpw, amdgpu_lbpw, int, 0444);
MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");
module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
+/**
+ * DOC: gpu_recovery (int)
+ * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV).
+ */
MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
+/**
+ * DOC: emu_mode (int)
+ * Set value 1 to enable emulation mode. This is only needed when running on an emulator. The default is 0 (disabled).
+ */
MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");
module_param_named(emu_mode, amdgpu_emu_mode, int, 0444);
+/**
+ * DOC: si_support (int)
+ * Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled,
+ * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available,
+ * otherwise using amdgpu driver.
+ */
#ifdef CONFIG_DRM_AMDGPU_SI
#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
@@ -303,6 +502,12 @@ MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)")
module_param_named(si_support, amdgpu_si_support, int, 0444);
#endif
+/**
+ * DOC: cik_support (int)
+ * Set CIK support driver. This parameter works after set config CONFIG_DRM_AMDGPU_CIK. For CIK asic, when radeon driver is enabled,
+ * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available,
+ * otherwise using amdgpu driver.
+ */
#ifdef CONFIG_DRM_AMDGPU_CIK
#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
@@ -316,6 +521,16 @@ MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)
module_param_named(cik_support, amdgpu_cik_support, int, 0444);
#endif
+/**
+ * DOC: smu_memory_pool_size (uint)
+ * It is used to reserve gtt for smu debug usage, setting value 0 to disable it. The actual size is value * 256MiB.
+ * E.g. 0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte. The default is 0 (disabled).
+ */
+MODULE_PARM_DESC(smu_memory_pool_size,
+ "reserve gtt for smu debug usage, 0 = disable,"
+ "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
+module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
+
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -534,6 +749,9 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+ /* VEGAM */
+ {0x1002, 0x694C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
+ {0x1002, 0x694E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
/* Vega 10 */
{0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
{0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
@@ -550,6 +768,13 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
{0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
{0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+ /* Vega 20 */
+ {0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
/* Raven */
{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
@@ -647,7 +872,7 @@ retry_init:
err_pci:
pci_disable_device(pdev);
err_free:
- drm_dev_unref(dev);
+ drm_dev_put(dev);
return ret;
}
@@ -657,7 +882,7 @@ amdgpu_pci_remove(struct pci_dev *pdev)
struct drm_device *dev = pci_get_drvdata(pdev);
drm_dev_unregister(dev);
- drm_dev_unref(dev);
+ drm_dev_put(dev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
}
@@ -838,9 +1063,21 @@ static const struct dev_pm_ops amdgpu_pm_ops = {
.runtime_idle = amdgpu_pmops_runtime_idle,
};
+static int amdgpu_flush(struct file *f, fl_owner_t id)
+{
+ struct drm_file *file_priv = f->private_data;
+ struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+
+ amdgpu_ctx_mgr_entity_flush(&fpriv->ctx_mgr);
+
+ return 0;
+}
+
+
static const struct file_operations amdgpu_driver_kms_fops = {
.owner = THIS_MODULE,
.open = drm_open,
+ .flush = amdgpu_flush,
.release = drm_release,
.unlocked_ioctl = amdgpu_drm_ioctl,
.mmap = amdgpu_mmap,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
index 94138abe093b..ae8fac34f7a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
@@ -46,7 +46,7 @@ amdgpu_link_encoder_connector(struct drm_device *dev)
list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
amdgpu_encoder = to_amdgpu_encoder(encoder);
if (amdgpu_encoder->devices & amdgpu_connector->devices) {
- drm_mode_connector_attach_encoder(connector, encoder);
+ drm_connector_attach_encoder(connector, encoder);
if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
amdgpu_atombios_encoder_init_backlight(amdgpu_encoder, connector);
adev->mode_info.bl_encoder = amdgpu_encoder;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 12063019751b..d44b76455e89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -137,7 +137,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
/* need to align pitch with crtc limits */
mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
fb_tiled);
- domain = amdgpu_display_framebuffer_domains(adev);
+ domain = amdgpu_display_supported_domains(adev);
height = ALIGN(mode_cmd->height, 8);
size = mode_cmd->pitches[0] * height;
@@ -146,7 +146,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_VRAM_CLEARED,
- true, NULL, &gobj);
+ ttm_bo_type_kernel, NULL, &gobj);
if (ret) {
pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
return -ENOMEM;
@@ -168,11 +168,19 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
}
- ret = amdgpu_bo_pin(abo, domain, NULL);
+ ret = amdgpu_bo_pin(abo, domain);
if (ret) {
amdgpu_bo_unreserve(abo);
goto out_unref;
}
+
+ ret = amdgpu_ttm_alloc_gart(&abo->tbo);
+ if (ret) {
+ amdgpu_bo_unreserve(abo);
+ dev_err(adev->dev, "%p bind failed\n", abo);
+ goto out_unref;
+ }
+
ret = amdgpu_bo_kmap(abo, NULL);
amdgpu_bo_unreserve(abo);
if (ret) {
@@ -292,9 +300,9 @@ static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfb
drm_fb_helper_unregister_fbi(&rfbdev->helper);
- if (rfb->obj) {
- amdgpufb_destroy_pinned_object(rfb->obj);
- rfb->obj = NULL;
+ if (rfb->base.obj[0]) {
+ amdgpufb_destroy_pinned_object(rfb->base.obj[0]);
+ rfb->base.obj[0] = NULL;
drm_framebuffer_unregister_private(&rfb->base);
drm_framebuffer_cleanup(&rfb->base);
}
@@ -377,7 +385,7 @@ int amdgpu_fbdev_total_size(struct amdgpu_device *adev)
if (!adev->mode_info.rfbdev)
return 0;
- robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj);
+ robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]);
size += amdgpu_bo_size(robj);
return size;
}
@@ -386,7 +394,7 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
{
if (!adev->mode_info.rfbdev)
return false;
- if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj))
+ if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]))
return true;
return false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 97449e06a242..7056925eb386 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -131,7 +131,8 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
* Emits a fence command on the requested ring (all asics).
* Returns 0 on success, -ENOMEM on failure.
*/
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
+ unsigned flags)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_fence *fence;
@@ -149,7 +150,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
adev->fence_context + ring->idx,
seq);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
- seq, AMDGPU_FENCE_FLAG_INT);
+ seq, flags | AMDGPU_FENCE_FLAG_INT);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
/* This function can't be called concurrently anyway, otherwise
@@ -375,14 +376,14 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
uint64_t index;
- if (ring != &adev->uvd.ring) {
+ if (ring->funcs->type != AMDGPU_RING_TYPE_UVD) {
ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
} else {
/* put fence directly behind firmware */
index = ALIGN(adev->uvd.fw->size, 8);
- ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
- ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
+ ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index;
+ ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index;
}
amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
amdgpu_irq_get(adev, irq_src, irq_type);
@@ -645,7 +646,6 @@ static const struct dma_fence_ops amdgpu_fence_ops = {
.get_driver_name = amdgpu_fence_get_driver_name,
.get_timeline_name = amdgpu_fence_get_timeline_name,
.enable_signaling = amdgpu_fence_enable_signaling,
- .wait = dma_fence_default_wait,
.release = amdgpu_fence_release,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index cf0f186c6092..a54d5655a191 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -113,12 +113,17 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
int r;
if (adev->gart.robj == NULL) {
- r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
- ttm_bo_type_kernel, NULL,
- &adev->gart.robj);
+ struct amdgpu_bo_param bp;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.size = adev->gart.table_size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
+ r = amdgpu_bo_create(adev, &bp, &adev->gart.robj);
if (r) {
return r;
}
@@ -138,14 +143,12 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
*/
int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
{
- uint64_t gpu_addr;
int r;
r = amdgpu_bo_reserve(adev->gart.robj, false);
if (unlikely(r != 0))
return r;
- r = amdgpu_bo_pin(adev->gart.robj,
- AMDGPU_GEM_DOMAIN_VRAM, &gpu_addr);
+ r = amdgpu_bo_pin(adev->gart.robj, AMDGPU_GEM_DOMAIN_VRAM);
if (r) {
amdgpu_bo_unreserve(adev->gart.robj);
return r;
@@ -154,7 +157,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
if (r)
amdgpu_bo_unpin(adev->gart.robj);
amdgpu_bo_unreserve(adev->gart.robj);
- adev->gart.table_addr = gpu_addr;
+ adev->gart.table_addr = amdgpu_bo_gpu_offset(adev->gart.robj);
return r;
}
@@ -229,7 +232,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
}
t = offset / AMDGPU_GPU_PAGE_SIZE;
- p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+ p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
for (i = 0; i < pages; i++, p++) {
#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
adev->gart.pages[p] = NULL;
@@ -238,7 +241,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
if (!adev->gart.ptr)
continue;
- for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
+ for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr,
t, page_base, flags);
page_base += AMDGPU_GPU_PAGE_SIZE;
@@ -277,7 +280,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
for (i = 0; i < pages; i++) {
page_base = dma_addr[i];
- for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
+ for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags);
page_base += AMDGPU_GPU_PAGE_SIZE;
}
@@ -314,7 +317,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
t = offset / AMDGPU_GPU_PAGE_SIZE;
- p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+ p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
for (i = 0; i < pages; i++, p++)
adev->gart.pages[p] = pagelist ? pagelist[i] : NULL;
#endif
@@ -364,7 +367,8 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
/* Allocate pages table */
- adev->gart.pages = vzalloc(sizeof(void *) * adev->gart.num_cpu_pages);
+ adev->gart.pages = vzalloc(array_size(sizeof(void *),
+ adev->gart.num_cpu_pages));
if (adev->gart.pages == NULL)
return -ENOMEM;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index 456295c00291..9f9e9dc87da1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -37,6 +37,8 @@ struct amdgpu_bo;
#define AMDGPU_GPU_PAGE_SHIFT 12
#define AMDGPU_GPU_PAGE_ALIGN(a) (((a) + AMDGPU_GPU_PAGE_MASK) & ~AMDGPU_GPU_PAGE_MASK)
+#define AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE)
+
struct amdgpu_gart {
u64 table_addr;
struct amdgpu_bo *robj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 46b9ea4e6103..bcbdcf997d20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -30,6 +30,7 @@
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
+#include "amdgpu_display.h"
void amdgpu_gem_object_free(struct drm_gem_object *gobj)
{
@@ -48,17 +49,25 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
struct drm_gem_object **obj)
{
struct amdgpu_bo *bo;
+ struct amdgpu_bo_param bp;
int r;
+ memset(&bp, 0, sizeof(bp));
*obj = NULL;
/* At least align on page size */
if (alignment < PAGE_SIZE) {
alignment = PAGE_SIZE;
}
+ bp.size = size;
+ bp.byte_align = alignment;
+ bp.type = type;
+ bp.resv = resv;
+ bp.preferred_domain = initial_domain;
retry:
- r = amdgpu_bo_create(adev, size, alignment, initial_domain,
- flags, type, resv, &bo);
+ bp.flags = flags;
+ bp.domain = initial_domain;
+ r = amdgpu_bo_create(adev, &bp, &bo);
if (r) {
if (r != -ERESTARTSYS) {
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
@@ -221,17 +230,19 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
/* reject invalid gem domains */
- if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
- AMDGPU_GEM_DOMAIN_GTT |
- AMDGPU_GEM_DOMAIN_VRAM |
- AMDGPU_GEM_DOMAIN_GDS |
- AMDGPU_GEM_DOMAIN_GWS |
- AMDGPU_GEM_DOMAIN_OA))
+ if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
return -EINVAL;
/* create a gem object to contain this object in */
if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
+ if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
+ /* if gds bo is created from user space, it must be
+ * passed to bo list
+ */
+ DRM_ERROR("GDS bo cannot be per-vm-bo\n");
+ return -EINVAL;
+ }
flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS)
size = size << AMDGPU_GDS_SHIFT;
@@ -254,7 +265,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
r = amdgpu_gem_object_create(adev, size, args->in.alignment,
(u32)(0xffffffff & args->in.domains),
- flags, false, resv, &gobj);
+ flags, ttm_bo_type_device, resv, &gobj);
if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
if (!r) {
struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
@@ -306,7 +317,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
/* create a gem object to contain this object in */
r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
- 0, 0, NULL, &gobj);
+ 0, ttm_bo_type_device, NULL, &gobj);
if (r)
return r;
@@ -499,7 +510,6 @@ out:
* @adev: amdgpu_device pointer
* @vm: vm to update
* @bo_va: bo_va to update
- * @list: validation list
* @operation: map, unmap or clear
*
* Update the bo_va directly after setting its address. Errors are not
@@ -508,7 +518,6 @@ out:
static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_bo_va *bo_va,
- struct list_head *list,
uint32_t operation)
{
int r;
@@ -601,7 +610,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -ENOENT;
abo = gem_to_amdgpu_bo(gobj);
tv.bo = &abo->tbo;
- tv.shared = false;
+ tv.shared = !!(abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID);
list_add(&tv.head, &list);
} else {
gobj = NULL;
@@ -662,7 +671,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
break;
}
if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
- amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, &list,
+ amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
args->operation);
error_backoff:
@@ -746,17 +755,18 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
struct amdgpu_device *adev = dev->dev_private;
struct drm_gem_object *gobj;
uint32_t handle;
+ u32 domain;
int r;
args->pitch = amdgpu_align_pitch(adev, args->width,
DIV_ROUND_UP(args->bpp, 8), 0);
args->size = (u64)args->pitch * args->height;
args->size = ALIGN(args->size, PAGE_SIZE);
-
- r = amdgpu_gem_object_create(adev, args->size, 0,
- AMDGPU_GEM_DOMAIN_VRAM,
+ domain = amdgpu_bo_get_preferred_pin_domain(adev,
+ amdgpu_display_supported_domains(adev));
+ r = amdgpu_gem_object_create(adev, args->size, 0, domain,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
- false, NULL, &gobj);
+ ttm_bo_type_device, NULL, &gobj);
if (r)
return -ENOMEM;
@@ -771,16 +781,23 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
}
#if defined(CONFIG_DEBUG_FS)
+
+#define amdgpu_debugfs_gem_bo_print_flag(m, bo, flag) \
+ if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) { \
+ seq_printf((m), " " #flag); \
+ }
+
static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
{
struct drm_gem_object *gobj = ptr;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
struct seq_file *m = data;
+ struct dma_buf_attachment *attachment;
+ struct dma_buf *dma_buf;
unsigned domain;
const char *placement;
unsigned pin_count;
- uint64_t offset;
domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
switch (domain) {
@@ -798,13 +815,27 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
seq_printf(m, "\t0x%08x: %12ld byte %s",
id, amdgpu_bo_size(bo), placement);
- offset = READ_ONCE(bo->tbo.mem.start);
- if (offset != AMDGPU_BO_INVALID_OFFSET)
- seq_printf(m, " @ 0x%010Lx", offset);
-
pin_count = READ_ONCE(bo->pin_count);
if (pin_count)
seq_printf(m, " pin count %d", pin_count);
+
+ dma_buf = READ_ONCE(bo->gem_base.dma_buf);
+ attachment = READ_ONCE(bo->gem_base.import_attach);
+
+ if (attachment)
+ seq_printf(m, " imported from %p", dma_buf);
+ else if (dma_buf)
+ seq_printf(m, " exported as %p", dma_buf);
+
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, NO_CPU_ACCESS);
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_GTT_USWC);
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CLEARED);
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, SHADOW);
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, VM_ALWAYS_VALID);
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, EXPLICIT_SYNC);
+
seq_printf(m, "\n");
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 893c2490b783..6cb4948233cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -109,4 +109,19 @@ struct amdgpu_gmc {
const struct amdgpu_gmc_funcs *gmc_funcs;
};
+/**
+ * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns:
+ * True if full VRAM is visible through the BAR
+ */
+static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc)
+{
+ WARN_ON(gmc->real_vram_size < gmc->visible_vram_size);
+
+ return (gmc->real_vram_size == gmc->visible_vram_size);
+}
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 311589e02d17..5518e623fed2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
struct amdgpu_vm *vm;
uint64_t fence_ctx;
uint32_t status = 0, alloc_size;
+ unsigned fence_flags = 0;
unsigned i;
int r = 0;
@@ -138,7 +139,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
/* ring tests don't use a job */
if (job) {
vm = job->vm;
- fence_ctx = job->fence_ctx;
+ fence_ctx = job->base.s_fence->scheduled.context;
} else {
vm = NULL;
fence_ctx = 0;
@@ -227,7 +228,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
#endif
amdgpu_asic_invalidate_hdp(adev, ring);
- r = amdgpu_fence_emit(ring, f);
+ if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
+ fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+
+ /* wrap the last IB with fence */
+ if (job && job->uf_addr) {
+ amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
+ fence_flags | AMDGPU_FENCE_FLAG_64BIT);
+ }
+
+ r = amdgpu_fence_emit(ring, f, fence_flags);
if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r);
if (job && job->vmid)
@@ -239,12 +249,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
if (ring->funcs->insert_end)
ring->funcs->insert_end(ring);
- /* wrap the last IB with fence */
- if (job && job->uf_addr) {
- amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
- AMDGPU_FENCE_FLAG_64BIT);
- }
-
if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
amdgpu_ring_patch_cond_exec(ring, patch_offset);
@@ -349,7 +353,8 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
ring->funcs->type == AMDGPU_RING_TYPE_VCE ||
ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC ||
ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC ||
- ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
tmo = tmo_mm;
else
tmo = tmo_gfx;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index a1c78f90eadf..3a072a7a39f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -578,11 +578,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
}
}
-
- adev->vm_manager.fence_context =
- dma_fence_context_alloc(AMDGPU_MAX_RINGS);
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
- adev->vm_manager.seqno[i] = 0;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 3a5ca462abf0..1abf5b5bac9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -25,6 +25,23 @@
* Alex Deucher
* Jerome Glisse
*/
+
+/**
+ * DOC: Interrupt Handling
+ *
+ * Interrupts generated within GPU hardware raise interrupt requests that are
+ * passed to amdgpu IRQ handler which is responsible for detecting source and
+ * type of the interrupt and dispatching matching handlers. If handling an
+ * interrupt requires calling kernel functions that may sleep processing is
+ * dispatched to work handlers.
+ *
+ * If MSI functionality is not disabled by module parameter then MSI
+ * support will be enabled.
+ *
+ * For GPU interrupt sources that may be driven by another driver, IRQ domain
+ * support is used (with mapping between virtual and hardware IRQs).
+ */
+
#include <linux/irq.h>
#include <drm/drmP.h>
#include <drm/drm_crtc_helper.h>
@@ -43,19 +60,21 @@
#define AMDGPU_WAIT_IDLE_TIMEOUT 200
-/*
- * Handle hotplug events outside the interrupt handler proper.
- */
/**
- * amdgpu_hotplug_work_func - display hotplug work handler
+ * amdgpu_hotplug_work_func - work handler for display hotplug event
*
- * @work: work struct
+ * @work: work struct pointer
*
- * This is the hot plug event work handler (all asics).
- * The work gets scheduled from the irq handler if there
- * was a hot plug interrupt. It walks the connector table
- * and calls the hotplug handler for each one, then sends
- * a drm hotplug event to alert userspace.
+ * This is the hotplug event work handler (all ASICs).
+ * The work gets scheduled from the IRQ handler if there
+ * was a hotplug interrupt. It walks through the connector table
+ * and calls hotplug handler for each connector. After this, it sends
+ * a DRM hotplug event to alert userspace.
+ *
+ * This design approach is required in order to defer hotplug event handling
+ * from the IRQ handler to a work handler because hotplug handler has to use
+ * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may
+ * sleep).
*/
static void amdgpu_hotplug_work_func(struct work_struct *work)
{
@@ -74,13 +93,12 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
}
/**
- * amdgpu_irq_reset_work_func - execute gpu reset
+ * amdgpu_irq_reset_work_func - execute GPU reset
*
- * @work: work struct
+ * @work: work struct pointer
*
- * Execute scheduled gpu reset (cayman+).
- * This function is called when the irq handler
- * thinks we need a gpu reset.
+ * Execute scheduled GPU reset (Cayman+).
+ * This function is called when the IRQ handler thinks we need a GPU reset.
*/
static void amdgpu_irq_reset_work_func(struct work_struct *work)
{
@@ -91,7 +109,13 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
amdgpu_device_gpu_recover(adev, NULL, false);
}
-/* Disable *all* interrupts */
+/**
+ * amdgpu_irq_disable_all - disable *all* interrupts
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Disable all types of interrupts from all sources.
+ */
void amdgpu_irq_disable_all(struct amdgpu_device *adev)
{
unsigned long irqflags;
@@ -123,11 +147,15 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
}
/**
- * amdgpu_irq_handler - irq handler
+ * amdgpu_irq_handler - IRQ handler
+ *
+ * @irq: IRQ number (unused)
+ * @arg: pointer to DRM device
*
- * @int irq, void *arg: args
+ * IRQ handler for amdgpu driver (all ASICs).
*
- * This is the irq handler for the amdgpu driver (all asics).
+ * Returns:
+ * result of handling the IRQ, as defined by &irqreturn_t
*/
irqreturn_t amdgpu_irq_handler(int irq, void *arg)
{
@@ -142,18 +170,18 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)
}
/**
- * amdgpu_msi_ok - asic specific msi checks
+ * amdgpu_msi_ok - check whether MSI functionality is enabled
*
- * @adev: amdgpu device pointer
+ * @adev: amdgpu device pointer (unused)
+ *
+ * Checks whether MSI functionality has been disabled via module parameter
+ * (all ASICs).
*
- * Handles asic specific MSI checks to determine if
- * MSIs should be enabled on a particular chip (all asics).
- * Returns true if MSIs should be enabled, false if MSIs
- * should not be enabled.
+ * Returns:
+ * *true* if MSIs are allowed to be enabled or *false* otherwise
*/
static bool amdgpu_msi_ok(struct amdgpu_device *adev)
{
- /* force MSI on */
if (amdgpu_msi == 1)
return true;
else if (amdgpu_msi == 0)
@@ -163,12 +191,15 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev)
}
/**
- * amdgpu_irq_init - init driver interrupt info
+ * amdgpu_irq_init - initialize interrupt handling
*
* @adev: amdgpu device pointer
*
- * Sets up the work irq handlers, vblank init, MSIs, etc. (all asics).
- * Returns 0 for success, error for failure.
+ * Sets up work functions for hotplug and reset interrupts, enables MSI
+ * functionality, initializes vblank, hotplug and reset interrupt handling.
+ *
+ * Returns:
+ * 0 on success or error code on failure
*/
int amdgpu_irq_init(struct amdgpu_device *adev)
{
@@ -176,7 +207,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
spin_lock_init(&adev->irq.lock);
- /* enable msi */
+ /* Enable MSI if not disabled by module parameter */
adev->irq.msi_enabled = false;
if (amdgpu_msi_ok(adev)) {
@@ -189,7 +220,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
if (!amdgpu_device_has_dc_support(adev)) {
if (!adev->enable_virtual_display)
- /* Disable vblank irqs aggressively for power-saving */
+ /* Disable vblank IRQs aggressively for power-saving */
/* XXX: can this be enabled for DC? */
adev->ddev->vblank_disable_immediate = true;
@@ -197,7 +228,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
if (r)
return r;
- /* pre DCE11 */
+ /* Pre-DCE11 */
INIT_WORK(&adev->hotplug_work,
amdgpu_hotplug_work_func);
}
@@ -220,11 +251,13 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
}
/**
- * amdgpu_irq_fini - tear down driver interrupt info
+ * amdgpu_irq_fini - shut down interrupt handling
*
* @adev: amdgpu device pointer
*
- * Tears down the work irq handlers, vblank handlers, MSIs, etc. (all asics).
+ * Tears down work functions for hotplug and reset interrupts, disables MSI
+ * functionality, shuts down vblank, hotplug and reset interrupt handling,
+ * turns off interrupts from all sources (all ASICs).
*/
void amdgpu_irq_fini(struct amdgpu_device *adev)
{
@@ -264,12 +297,17 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
}
/**
- * amdgpu_irq_add_id - register irq source
+ * amdgpu_irq_add_id - register IRQ source
*
* @adev: amdgpu device pointer
- * @src_id: source id for this source
- * @source: irq source
+ * @client_id: client id
+ * @src_id: source id
+ * @source: IRQ source pointer
+ *
+ * Registers IRQ source on a client.
*
+ * Returns:
+ * 0 on success or error code otherwise
*/
int amdgpu_irq_add_id(struct amdgpu_device *adev,
unsigned client_id, unsigned src_id,
@@ -312,12 +350,12 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev,
}
/**
- * amdgpu_irq_dispatch - dispatch irq to IP blocks
+ * amdgpu_irq_dispatch - dispatch IRQ to IP blocks
*
* @adev: amdgpu device pointer
- * @entry: interrupt vector
+ * @entry: interrupt vector pointer
*
- * Dispatches the irq to the different IP blocks
+ * Dispatches IRQ to IP blocks.
*/
void amdgpu_irq_dispatch(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
@@ -361,13 +399,13 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
}
/**
- * amdgpu_irq_update - update hw interrupt state
+ * amdgpu_irq_update - update hardware interrupt state
*
* @adev: amdgpu device pointer
- * @src: interrupt src you want to enable
- * @type: type of interrupt you want to update
+ * @src: interrupt source pointer
+ * @type: type of interrupt
*
- * Updates the interrupt state for a specific src (all asics).
+ * Updates interrupt state for the specific source (all ASICs).
*/
int amdgpu_irq_update(struct amdgpu_device *adev,
struct amdgpu_irq_src *src, unsigned type)
@@ -378,7 +416,7 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
spin_lock_irqsave(&adev->irq.lock, irqflags);
- /* we need to determine after taking the lock, otherwise
+ /* We need to determine after taking the lock, otherwise
we might disable just enabled interrupts again */
if (amdgpu_irq_enabled(adev, src, type))
state = AMDGPU_IRQ_STATE_ENABLE;
@@ -390,6 +428,14 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
return r;
}
+/**
+ * amdgpu_irq_gpu_reset_resume_helper - update interrupt states on all sources
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Updates state of all types of interrupts on all sources on resume after
+ * reset.
+ */
void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
{
int i, j, k;
@@ -413,10 +459,13 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
* amdgpu_irq_get - enable interrupt
*
* @adev: amdgpu device pointer
- * @src: interrupt src you want to enable
- * @type: type of interrupt you want to enable
+ * @src: interrupt source pointer
+ * @type: type of interrupt
*
- * Enables the interrupt type for a specific src (all asics).
+ * Enables specified type of interrupt on the specified source (all ASICs).
+ *
+ * Returns:
+ * 0 on success or error code otherwise
*/
int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
unsigned type)
@@ -440,10 +489,13 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
* amdgpu_irq_put - disable interrupt
*
* @adev: amdgpu device pointer
- * @src: interrupt src you want to disable
- * @type: type of interrupt you want to disable
+ * @src: interrupt source pointer
+ * @type: type of interrupt
+ *
+ * Enables specified type of interrupt on the specified source (all ASICs).
*
- * Disables the interrupt type for a specific src (all asics).
+ * Returns:
+ * 0 on success or error code otherwise
*/
int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
unsigned type)
@@ -464,12 +516,17 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
}
/**
- * amdgpu_irq_enabled - test if irq is enabled or not
+ * amdgpu_irq_enabled - check whether interrupt is enabled or not
*
* @adev: amdgpu device pointer
- * @idx: interrupt src you want to test
+ * @src: interrupt source pointer
+ * @type: type of interrupt
*
- * Tests if the given interrupt source is enabled or not
+ * Checks whether the given type of interrupt is enabled on the given source.
+ *
+ * Returns:
+ * *true* if interrupt is enabled, *false* if interrupt is disabled or on
+ * invalid parameters
*/
bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
unsigned type)
@@ -486,7 +543,7 @@ bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
return !!atomic_read(&src->enabled_types[type]);
}
-/* gen irq */
+/* XXX: Generic IRQ handling */
static void amdgpu_irq_mask(struct irq_data *irqd)
{
/* XXX */
@@ -497,12 +554,26 @@ static void amdgpu_irq_unmask(struct irq_data *irqd)
/* XXX */
}
+/* amdgpu hardware interrupt chip descriptor */
static struct irq_chip amdgpu_irq_chip = {
.name = "amdgpu-ih",
.irq_mask = amdgpu_irq_mask,
.irq_unmask = amdgpu_irq_unmask,
};
+/**
+ * amdgpu_irqdomain_map - create mapping between virtual and hardware IRQ numbers
+ *
+ * @d: amdgpu IRQ domain pointer (unused)
+ * @irq: virtual IRQ number
+ * @hwirq: hardware irq number
+ *
+ * Current implementation assigns simple interrupt handler to the given virtual
+ * IRQ.
+ *
+ * Returns:
+ * 0 on success or error code otherwise
+ */
static int amdgpu_irqdomain_map(struct irq_domain *d,
unsigned int irq, irq_hw_number_t hwirq)
{
@@ -514,17 +585,21 @@ static int amdgpu_irqdomain_map(struct irq_domain *d,
return 0;
}
+/* Implementation of methods for amdgpu IRQ domain */
static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = {
.map = amdgpu_irqdomain_map,
};
/**
- * amdgpu_irq_add_domain - create a linear irq domain
+ * amdgpu_irq_add_domain - create a linear IRQ domain
*
* @adev: amdgpu device pointer
*
- * Create an irq domain for GPU interrupt sources
+ * Creates an IRQ domain for GPU interrupt sources
* that may be driven by another driver (e.g., ACP).
+ *
+ * Returns:
+ * 0 on success or error code otherwise
*/
int amdgpu_irq_add_domain(struct amdgpu_device *adev)
{
@@ -539,11 +614,11 @@ int amdgpu_irq_add_domain(struct amdgpu_device *adev)
}
/**
- * amdgpu_irq_remove_domain - remove the irq domain
+ * amdgpu_irq_remove_domain - remove the IRQ domain
*
* @adev: amdgpu device pointer
*
- * Remove the irq domain for GPU interrupt sources
+ * Removes the IRQ domain for GPU interrupt sources
* that may be driven by another driver (e.g., ACP).
*/
void amdgpu_irq_remove_domain(struct amdgpu_device *adev)
@@ -555,16 +630,17 @@ void amdgpu_irq_remove_domain(struct amdgpu_device *adev)
}
/**
- * amdgpu_irq_create_mapping - create a mapping between a domain irq and a
- * Linux irq
+ * amdgpu_irq_create_mapping - create mapping between domain Linux IRQs
*
* @adev: amdgpu device pointer
* @src_id: IH source id
*
- * Create a mapping between a domain irq (GPU IH src id) and a Linux irq
+ * Creates mapping between a domain IRQ (GPU IH src id) and a Linux IRQ
* Use this for components that generate a GPU interrupt, but are driven
* by a different driver (e.g., ACP).
- * Returns the Linux irq.
+ *
+ * Returns:
+ * Linux IRQ
*/
unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 2bd56760c744..5a2c26a85984 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -30,14 +30,14 @@
static void amdgpu_job_timedout(struct drm_sched_job *s_job)
{
- struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
+ struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
+ struct amdgpu_job *job = to_amdgpu_job(s_job);
- DRM_ERROR("ring %s timeout, last signaled seq=%u, last emitted seq=%u\n",
- job->base.sched->name,
- atomic_read(&job->ring->fence_drv.last_seq),
- job->ring->fence_drv.sync_seq);
+ DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
+ job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
+ ring->fence_drv.sync_seq);
- amdgpu_device_gpu_recover(job->adev, job, false);
+ amdgpu_device_gpu_recover(ring->adev, job, false);
}
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@@ -54,7 +54,11 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
if (!*job)
return -ENOMEM;
- (*job)->adev = adev;
+ /*
+ * Initialize the scheduler to at least some ring so that we always
+ * have a pointer to adev.
+ */
+ (*job)->base.sched = &adev->rings[0]->sched;
(*job)->vm = vm;
(*job)->ibs = (void *)&(*job)[1];
(*job)->num_ibs = num_ibs;
@@ -86,6 +90,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
void amdgpu_job_free_resources(struct amdgpu_job *job)
{
+ struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
struct dma_fence *f;
unsigned i;
@@ -93,14 +98,15 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
f = job->base.s_fence ? &job->base.s_fence->finished : job->fence;
for (i = 0; i < job->num_ibs; ++i)
- amdgpu_ib_free(job->adev, &job->ibs[i], f);
+ amdgpu_ib_free(ring->adev, &job->ibs[i], f);
}
static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
{
- struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
+ struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
+ struct amdgpu_job *job = to_amdgpu_job(s_job);
- amdgpu_ring_priority_put(job->ring, s_job->s_priority);
+ amdgpu_ring_priority_put(ring, s_job->s_priority);
dma_fence_put(job->fence);
amdgpu_sync_free(&job->sync);
amdgpu_sync_free(&job->sched_sync);
@@ -117,50 +123,68 @@ void amdgpu_job_free(struct amdgpu_job *job)
kfree(job);
}
-int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
- struct drm_sched_entity *entity, void *owner,
- struct dma_fence **f)
+int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
+ void *owner, struct dma_fence **f)
{
+ enum drm_sched_priority priority;
+ struct amdgpu_ring *ring;
int r;
- job->ring = ring;
if (!f)
return -EINVAL;
- r = drm_sched_job_init(&job->base, &ring->sched, entity, owner);
+ r = drm_sched_job_init(&job->base, entity->sched, entity, owner);
if (r)
return r;
job->owner = owner;
- job->fence_ctx = entity->fence_context;
*f = dma_fence_get(&job->base.s_fence->finished);
amdgpu_job_free_resources(job);
- amdgpu_ring_priority_get(job->ring, job->base.s_priority);
+ priority = job->base.s_priority;
drm_sched_entity_push_job(&job->base, entity);
+ ring = to_amdgpu_ring(entity->sched);
+ amdgpu_ring_priority_get(ring, priority);
+
+ return 0;
+}
+
+int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
+ struct dma_fence **fence)
+{
+ int r;
+
+ job->base.sched = &ring->sched;
+ r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence);
+ job->fence = dma_fence_get(*fence);
+ if (r)
+ return r;
+
+ amdgpu_job_free(job);
return 0;
}
static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
struct drm_sched_entity *s_entity)
{
+ struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->sched);
struct amdgpu_job *job = to_amdgpu_job(sched_job);
struct amdgpu_vm *vm = job->vm;
+ struct dma_fence *fence;
bool explicit = false;
int r;
- struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync, &explicit);
+ fence = amdgpu_sync_get_fence(&job->sync, &explicit);
if (fence && explicit) {
if (drm_sched_dependency_optimized(fence, s_entity)) {
- r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence, false);
+ r = amdgpu_sync_fence(ring->adev, &job->sched_sync,
+ fence, false);
if (r)
- DRM_ERROR("Error adding fence to sync (%d)\n", r);
+ DRM_ERROR("Error adding fence (%d)\n", r);
}
}
while (fence == NULL && vm && !job->vmid) {
- struct amdgpu_ring *ring = job->ring;
-
r = amdgpu_vmid_grab(vm, ring, &job->sync,
&job->base.s_fence->finished,
job);
@@ -175,30 +199,25 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
{
+ struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
struct dma_fence *fence = NULL, *finished;
- struct amdgpu_device *adev;
struct amdgpu_job *job;
int r;
- if (!sched_job) {
- DRM_ERROR("job is null\n");
- return NULL;
- }
job = to_amdgpu_job(sched_job);
finished = &job->base.s_fence->finished;
- adev = job->adev;
BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
trace_amdgpu_sched_run_job(job);
- if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
+ if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter))
dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */
if (finished->error < 0) {
DRM_INFO("Skip scheduling IBs!\n");
} else {
- r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job,
+ r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
&fence);
if (r)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
new file mode 100644
index 000000000000..57cfe78a262b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_JOB_H__
+#define __AMDGPU_JOB_H__
+
+/* bit set means command submit involves a preamble IB */
+#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0)
+/* bit set means preamble IB is first presented in belonging context */
+#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1)
+/* bit set means context switch occured */
+#define AMDGPU_HAVE_CTX_SWITCH (1 << 2)
+
+#define to_amdgpu_job(sched_job) \
+ container_of((sched_job), struct amdgpu_job, base)
+
+struct amdgpu_fence;
+
+struct amdgpu_job {
+ struct drm_sched_job base;
+ struct amdgpu_vm *vm;
+ struct amdgpu_sync sync;
+ struct amdgpu_sync sched_sync;
+ struct amdgpu_ib *ibs;
+ struct dma_fence *fence; /* the hw fence */
+ uint32_t preamble_status;
+ uint32_t num_ibs;
+ void *owner;
+ bool vm_needs_flush;
+ uint64_t vm_pd_addr;
+ unsigned vmid;
+ unsigned pasid;
+ uint32_t gds_base, gds_size;
+ uint32_t gws_base, gws_size;
+ uint32_t oa_base, oa_size;
+ uint32_t vram_lost_counter;
+
+ /* user fence handling */
+ uint64_t uf_addr;
+ uint64_t uf_sequence;
+
+};
+
+int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+ struct amdgpu_job **job, struct amdgpu_vm *vm);
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
+ struct amdgpu_job **job);
+
+void amdgpu_job_free_resources(struct amdgpu_job *job);
+void amdgpu_job_free(struct amdgpu_job *job);
+int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
+ void *owner, struct dma_fence **f);
+int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
+ struct dma_fence **fence);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 4b7824d30e73..207f238649b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -31,6 +31,7 @@
#include "amdgpu_sched.h"
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
+#include "atom.h"
#include <linux/vga_switcheroo.h>
#include <linux/slab.h>
@@ -214,6 +215,18 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->gfx.rlc_fw_version;
fw_info->feature = adev->gfx.rlc_feature_version;
break;
+ case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL:
+ fw_info->ver = adev->gfx.rlc_srlc_fw_version;
+ fw_info->feature = adev->gfx.rlc_srlc_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM:
+ fw_info->ver = adev->gfx.rlc_srlg_fw_version;
+ fw_info->feature = adev->gfx.rlc_srlg_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM:
+ fw_info->ver = adev->gfx.rlc_srls_fw_version;
+ fw_info->feature = adev->gfx.rlc_srls_feature_version;
+ break;
case AMDGPU_INFO_FW_GFX_MEC:
if (query_fw->index == 0) {
fw_info->ver = adev->gfx.mec_fw_version;
@@ -273,12 +286,15 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
struct drm_crtc *crtc;
uint32_t ui32 = 0;
uint64_t ui64 = 0;
- int i, found;
+ int i, j, found;
int ui32_size = sizeof(ui32);
if (!info->return_size || !info->return_pointer)
return -EINVAL;
+ /* Ensure IB tests are run on ring */
+ flush_delayed_work(&adev->late_init_work);
+
switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING:
ui32 = adev->accel_working;
@@ -313,56 +329,66 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
ring_mask |= ((adev->gfx.gfx_ring[i].ready ? 1 : 0) << i);
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
- ib_size_alignment = 8;
+ ib_start_alignment = 32;
+ ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_COMPUTE:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_compute_rings; i++)
ring_mask |= ((adev->gfx.compute_ring[i].ready ? 1 : 0) << i);
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
- ib_size_alignment = 8;
+ ib_start_alignment = 32;
+ ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_DMA:
type = AMD_IP_BLOCK_TYPE_SDMA;
for (i = 0; i < adev->sdma.num_instances; i++)
ring_mask |= ((adev->sdma.instance[i].ring.ready ? 1 : 0) << i);
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
- ib_size_alignment = 1;
+ ib_start_alignment = 256;
+ ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_UVD:
type = AMD_IP_BLOCK_TYPE_UVD;
- ring_mask = adev->uvd.ring.ready ? 1 : 0;
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
- ib_size_alignment = 16;
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++)
+ ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i);
+ ib_start_alignment = 64;
+ ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
for (i = 0; i < adev->vce.num_rings; i++)
ring_mask |= ((adev->vce.ring[i].ready ? 1 : 0) << i);
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
+ ib_start_alignment = 4;
ib_size_alignment = 1;
break;
case AMDGPU_HW_IP_UVD_ENC:
type = AMD_IP_BLOCK_TYPE_UVD;
- for (i = 0; i < adev->uvd.num_enc_rings; i++)
- ring_mask |= ((adev->uvd.ring_enc[i].ready ? 1 : 0) << i);
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
- ib_size_alignment = 1;
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++)
+ for (j = 0; j < adev->uvd.num_enc_rings; j++)
+ ring_mask |=
+ ((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) <<
+ (j + i * adev->uvd.num_enc_rings));
+ ib_start_alignment = 64;
+ ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
ring_mask = adev->vcn.ring_dec.ready ? 1 : 0;
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
+ ib_start_alignment = 16;
ib_size_alignment = 16;
break;
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_enc_rings; i++)
ring_mask |= ((adev->vcn.ring_enc[i].ready ? 1 : 0) << i);
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
+ ib_start_alignment = 64;
ib_size_alignment = 1;
break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ type = AMD_IP_BLOCK_TYPE_VCN;
+ ring_mask = adev->vcn.ring_jpeg.ready ? 1 : 0;
+ ib_start_alignment = 16;
+ ib_size_alignment = 16;
+ break;
default:
return -EINVAL;
}
@@ -407,6 +433,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
break;
case AMDGPU_HW_IP_VCN_DEC:
case AMDGPU_HW_IP_VCN_ENC:
+ case AMDGPU_HW_IP_VCN_JPEG:
type = AMD_IP_BLOCK_TYPE_VCN;
break;
default:
@@ -474,13 +501,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
case AMDGPU_INFO_VRAM_GTT: {
struct drm_amdgpu_info_vram_gtt vram_gtt;
- vram_gtt.vram_size = adev->gmc.real_vram_size;
- vram_gtt.vram_size -= adev->vram_pin_size;
- vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size;
- vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size);
+ vram_gtt.vram_size = adev->gmc.real_vram_size -
+ atomic64_read(&adev->vram_pin_size);
+ vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size -
+ atomic64_read(&adev->visible_pin_size);
vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size;
vram_gtt.gtt_size *= PAGE_SIZE;
- vram_gtt.gtt_size -= adev->gart_pin_size;
+ vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
return copy_to_user(out, &vram_gtt,
min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
}
@@ -489,17 +516,16 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
memset(&mem, 0, sizeof(mem));
mem.vram.total_heap_size = adev->gmc.real_vram_size;
- mem.vram.usable_heap_size =
- adev->gmc.real_vram_size - adev->vram_pin_size;
+ mem.vram.usable_heap_size = adev->gmc.real_vram_size -
+ atomic64_read(&adev->vram_pin_size);
mem.vram.heap_usage =
amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
mem.cpu_accessible_vram.total_heap_size =
adev->gmc.visible_vram_size;
- mem.cpu_accessible_vram.usable_heap_size =
- adev->gmc.visible_vram_size -
- (adev->vram_pin_size - adev->invisible_pin_size);
+ mem.cpu_accessible_vram.usable_heap_size = adev->gmc.visible_vram_size -
+ atomic64_read(&adev->visible_pin_size);
mem.cpu_accessible_vram.heap_usage =
amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
mem.cpu_accessible_vram.max_allocation =
@@ -507,8 +533,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
mem.gtt.total_heap_size = adev->mman.bdev.man[TTM_PL_TT].size;
mem.gtt.total_heap_size *= PAGE_SIZE;
- mem.gtt.usable_heap_size = mem.gtt.total_heap_size
- - adev->gart_pin_size;
+ mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
+ atomic64_read(&adev->gart_pin_size);
mem.gtt.heap_usage =
amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]);
mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;
@@ -701,10 +727,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
}
}
case AMDGPU_INFO_SENSOR: {
- struct pp_gpu_power query = {0};
- int query_size = sizeof(query);
-
- if (amdgpu_dpm == 0)
+ if (!adev->pm.dpm_enabled)
return -ENOENT;
switch (info->sensor_info.type) {
@@ -746,10 +769,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
/* get average GPU power */
if (amdgpu_dpm_read_sensor(adev,
AMDGPU_PP_SENSOR_GPU_POWER,
- (void *)&query, &query_size)) {
+ (void *)&ui32, &ui32_size)) {
return -EINVAL;
}
- ui32 = query.average_gpu_power >> 8;
+ ui32 >>= 8;
break;
case AMDGPU_INFO_SENSOR_VDDNB:
/* get VDDNB in millivolts */
@@ -914,8 +937,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
pm_runtime_get_sync(dev->dev);
- amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
-
if (adev->asic_type != CHIP_RAVEN) {
amdgpu_uvd_free_handles(adev, file_priv);
amdgpu_vce_free_handles(adev, file_priv);
@@ -935,6 +956,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
amdgpu_vm_fini(adev, &fpriv->vm);
+ amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
+
if (pasid)
amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
amdgpu_bo_unref(&pd);
@@ -1088,6 +1111,7 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
struct amdgpu_device *adev = dev->dev_private;
struct drm_amdgpu_info_firmware fw_info;
struct drm_amdgpu_query_fw query_fw;
+ struct atom_context *ctx = adev->mode_info.atom_context;
int ret, i;
/* VCE */
@@ -1146,6 +1170,30 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
+ /* RLC SAVE RESTORE LIST CNTL */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLC SRLC feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* RLC SAVE RESTORE LIST GPM MEM */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLC SRLG feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* RLC SAVE RESTORE LIST SRM MEM */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
/* MEC */
query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
query_fw.index = 0;
@@ -1210,6 +1258,9 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
+
+ seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index bd67f4cb8e6c..a365ea2383d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -28,6 +28,21 @@
* Christian König <christian.koenig@amd.com>
*/
+/**
+ * DOC: MMU Notifier
+ *
+ * For coherent userptr handling registers an MMU notifier to inform the driver
+ * about updates on the page tables of a process.
+ *
+ * When somebody tries to invalidate the page tables we block the update until
+ * all operations on the pages in question are completed, then those pages are
+ * marked as accessed and also dirty if it wasn't a read only access.
+ *
+ * New command submissions using the userptrs in question are delayed until all
+ * page table invalidation are completed and we once more see a coherent process
+ * address space.
+ */
+
#include <linux/firmware.h>
#include <linux/module.h>
#include <linux/mmu_notifier.h>
@@ -36,12 +51,30 @@
#include <drm/drm.h>
#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+/**
+ * struct amdgpu_mn
+ *
+ * @adev: amdgpu device pointer
+ * @mm: process address space
+ * @mn: MMU notifier structure
+ * @type: type of MMU notifier
+ * @work: destruction work item
+ * @node: hash table node to find structure by adev and mn
+ * @lock: rw semaphore protecting the notifier nodes
+ * @objects: interval tree containing amdgpu_mn_nodes
+ * @read_lock: mutex for recursive locking of @lock
+ * @recursion: depth of recursion
+ *
+ * Data for each amdgpu device and process address space.
+ */
struct amdgpu_mn {
/* constant after initialisation */
struct amdgpu_device *adev;
struct mm_struct *mm;
struct mmu_notifier mn;
+ enum amdgpu_mn_type type;
/* only used on destruction */
struct work_struct work;
@@ -56,13 +89,21 @@ struct amdgpu_mn {
atomic_t recursion;
};
+/**
+ * struct amdgpu_mn_node
+ *
+ * @it: interval node defining start-last of the affected address range
+ * @bos: list of all BOs in the affected address range
+ *
+ * Manages all BOs which are affected of a certain range of address space.
+ */
struct amdgpu_mn_node {
struct interval_tree_node it;
struct list_head bos;
};
/**
- * amdgpu_mn_destroy - destroy the rmn
+ * amdgpu_mn_destroy - destroy the MMU notifier
*
* @work: previously sheduled work item
*
@@ -70,47 +111,50 @@ struct amdgpu_mn_node {
*/
static void amdgpu_mn_destroy(struct work_struct *work)
{
- struct amdgpu_mn *rmn = container_of(work, struct amdgpu_mn, work);
- struct amdgpu_device *adev = rmn->adev;
+ struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work);
+ struct amdgpu_device *adev = amn->adev;
struct amdgpu_mn_node *node, *next_node;
struct amdgpu_bo *bo, *next_bo;
mutex_lock(&adev->mn_lock);
- down_write(&rmn->lock);
- hash_del(&rmn->node);
+ down_write(&amn->lock);
+ hash_del(&amn->node);
rbtree_postorder_for_each_entry_safe(node, next_node,
- &rmn->objects.rb_root, it.rb) {
+ &amn->objects.rb_root, it.rb) {
list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
bo->mn = NULL;
list_del_init(&bo->mn_list);
}
kfree(node);
}
- up_write(&rmn->lock);
+ up_write(&amn->lock);
mutex_unlock(&adev->mn_lock);
- mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm);
- kfree(rmn);
+ mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
+ kfree(amn);
}
/**
* amdgpu_mn_release - callback to notify about mm destruction
*
* @mn: our notifier
- * @mn: the mm this callback is about
+ * @mm: the mm this callback is about
*
* Shedule a work item to lazy destroy our notifier.
*/
static void amdgpu_mn_release(struct mmu_notifier *mn,
struct mm_struct *mm)
{
- struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
- INIT_WORK(&rmn->work, amdgpu_mn_destroy);
- schedule_work(&rmn->work);
+ struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+
+ INIT_WORK(&amn->work, amdgpu_mn_destroy);
+ schedule_work(&amn->work);
}
/**
- * amdgpu_mn_lock - take the write side lock for this mn
+ * amdgpu_mn_lock - take the write side lock for this notifier
+ *
+ * @mn: our notifier
*/
void amdgpu_mn_lock(struct amdgpu_mn *mn)
{
@@ -119,7 +163,9 @@ void amdgpu_mn_lock(struct amdgpu_mn *mn)
}
/**
- * amdgpu_mn_unlock - drop the write side lock for this mn
+ * amdgpu_mn_unlock - drop the write side lock for this notifier
+ *
+ * @mn: our notifier
*/
void amdgpu_mn_unlock(struct amdgpu_mn *mn)
{
@@ -128,40 +174,38 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
}
/**
- * amdgpu_mn_read_lock - take the rmn read lock
- *
- * @rmn: our notifier
+ * amdgpu_mn_read_lock - take the read side lock for this notifier
*
- * Take the rmn read side lock.
+ * @amn: our notifier
*/
-static void amdgpu_mn_read_lock(struct amdgpu_mn *rmn)
+static void amdgpu_mn_read_lock(struct amdgpu_mn *amn)
{
- mutex_lock(&rmn->read_lock);
- if (atomic_inc_return(&rmn->recursion) == 1)
- down_read_non_owner(&rmn->lock);
- mutex_unlock(&rmn->read_lock);
+ mutex_lock(&amn->read_lock);
+ if (atomic_inc_return(&amn->recursion) == 1)
+ down_read_non_owner(&amn->lock);
+ mutex_unlock(&amn->read_lock);
}
/**
- * amdgpu_mn_read_unlock - drop the rmn read lock
+ * amdgpu_mn_read_unlock - drop the read side lock for this notifier
*
- * @rmn: our notifier
- *
- * Drop the rmn read side lock.
+ * @amn: our notifier
*/
-static void amdgpu_mn_read_unlock(struct amdgpu_mn *rmn)
+static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
{
- if (atomic_dec_return(&rmn->recursion) == 0)
- up_read_non_owner(&rmn->lock);
+ if (atomic_dec_return(&amn->recursion) == 0)
+ up_read_non_owner(&amn->lock);
}
/**
* amdgpu_mn_invalidate_node - unmap all BOs of a node
*
* @node: the node with the BOs to unmap
+ * @start: start of address range affected
+ * @end: end of address range affected
*
- * We block for all BOs and unmap them by move them
- * into system domain again.
+ * Block for operations on BOs to finish and mark pages as accessed and
+ * potentially dirty.
*/
static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
unsigned long start,
@@ -185,30 +229,30 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
}
/**
- * amdgpu_mn_invalidate_range_start - callback to notify about mm change
+ * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
*
* @mn: our notifier
- * @mn: the mm this callback is about
+ * @mm: the mm this callback is about
* @start: start of updated range
* @end: end of updated range
*
- * We block for all BOs between start and end to be idle and
- * unmap them by move them into system domain again.
+ * Block for operations on BOs to finish and mark pages as accessed and
+ * potentially dirty.
*/
-static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
+static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
{
- struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
struct interval_tree_node *it;
/* notification is exclusive, but interval is inclusive */
end -= 1;
- amdgpu_mn_read_lock(rmn);
+ amdgpu_mn_read_lock(amn);
- it = interval_tree_iter_first(&rmn->objects, start, end);
+ it = interval_tree_iter_first(&amn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
@@ -220,10 +264,53 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
}
/**
+ * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
+ *
+ * @mn: our notifier
+ * @mm: the mm this callback is about
+ * @start: start of updated range
+ * @end: end of updated range
+ *
+ * We temporarily evict all BOs between start and end. This
+ * necessitates evicting all user-mode queues of the process. The BOs
+ * are restorted in amdgpu_mn_invalidate_range_end_hsa.
+ */
+static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct interval_tree_node *it;
+
+ /* notification is exclusive, but interval is inclusive */
+ end -= 1;
+
+ amdgpu_mn_read_lock(amn);
+
+ it = interval_tree_iter_first(&amn->objects, start, end);
+ while (it) {
+ struct amdgpu_mn_node *node;
+ struct amdgpu_bo *bo;
+
+ node = container_of(it, struct amdgpu_mn_node, it);
+ it = interval_tree_iter_next(it, start, end);
+
+ list_for_each_entry(bo, &node->bos, mn_list) {
+ struct kgd_mem *mem = bo->kfd_bo;
+
+ if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
+ start, end))
+ amdgpu_amdkfd_evict_userptr(mem, mm);
+ }
+ }
+}
+
+/**
* amdgpu_mn_invalidate_range_end - callback to notify about mm change
*
* @mn: our notifier
- * @mn: the mm this callback is about
+ * @mm: the mm this callback is about
* @start: start of updated range
* @end: end of updated range
*
@@ -234,28 +321,44 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
unsigned long start,
unsigned long end)
{
- struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
- amdgpu_mn_read_unlock(rmn);
+ amdgpu_mn_read_unlock(amn);
}
-static const struct mmu_notifier_ops amdgpu_mn_ops = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+ [AMDGPU_MN_TYPE_GFX] = {
+ .release = amdgpu_mn_release,
+ .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
+ .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ },
+ [AMDGPU_MN_TYPE_HSA] = {
+ .release = amdgpu_mn_release,
+ .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
+ .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ },
};
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
+ */
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
+
/**
* amdgpu_mn_get - create notifier context
*
* @adev: amdgpu device pointer
+ * @type: type of MMU notifier context
*
* Creates a notifier context for current->mm.
*/
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
+struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+ enum amdgpu_mn_type type)
{
struct mm_struct *mm = current->mm;
- struct amdgpu_mn *rmn;
+ struct amdgpu_mn *amn;
+ unsigned long key = AMDGPU_MN_KEY(mm, type);
int r;
mutex_lock(&adev->mn_lock);
@@ -264,40 +367,41 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
return ERR_PTR(-EINTR);
}
- hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
- if (rmn->mm == mm)
+ hash_for_each_possible(adev->mn_hash, amn, node, key)
+ if (AMDGPU_MN_KEY(amn->mm, amn->type) == key)
goto release_locks;
- rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
- if (!rmn) {
- rmn = ERR_PTR(-ENOMEM);
+ amn = kzalloc(sizeof(*amn), GFP_KERNEL);
+ if (!amn) {
+ amn = ERR_PTR(-ENOMEM);
goto release_locks;
}
- rmn->adev = adev;
- rmn->mm = mm;
- rmn->mn.ops = &amdgpu_mn_ops;
- init_rwsem(&rmn->lock);
- rmn->objects = RB_ROOT_CACHED;
- mutex_init(&rmn->read_lock);
- atomic_set(&rmn->recursion, 0);
+ amn->adev = adev;
+ amn->mm = mm;
+ init_rwsem(&amn->lock);
+ amn->type = type;
+ amn->mn.ops = &amdgpu_mn_ops[type];
+ amn->objects = RB_ROOT_CACHED;
+ mutex_init(&amn->read_lock);
+ atomic_set(&amn->recursion, 0);
- r = __mmu_notifier_register(&rmn->mn, mm);
+ r = __mmu_notifier_register(&amn->mn, mm);
if (r)
- goto free_rmn;
+ goto free_amn;
- hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
+ hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type));
release_locks:
up_write(&mm->mmap_sem);
mutex_unlock(&adev->mn_lock);
- return rmn;
+ return amn;
-free_rmn:
+free_amn:
up_write(&mm->mmap_sem);
mutex_unlock(&adev->mn_lock);
- kfree(rmn);
+ kfree(amn);
return ERR_PTR(r);
}
@@ -315,37 +419,40 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
unsigned long end = addr + amdgpu_bo_size(bo) - 1;
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_mn *rmn;
- struct amdgpu_mn_node *node = NULL;
+ enum amdgpu_mn_type type =
+ bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
+ struct amdgpu_mn *amn;
+ struct amdgpu_mn_node *node = NULL, *new_node;
struct list_head bos;
struct interval_tree_node *it;
- rmn = amdgpu_mn_get(adev);
- if (IS_ERR(rmn))
- return PTR_ERR(rmn);
+ amn = amdgpu_mn_get(adev, type);
+ if (IS_ERR(amn))
+ return PTR_ERR(amn);
+
+ new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
+ if (!new_node)
+ return -ENOMEM;
INIT_LIST_HEAD(&bos);
- down_write(&rmn->lock);
+ down_write(&amn->lock);
- while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) {
+ while ((it = interval_tree_iter_first(&amn->objects, addr, end))) {
kfree(node);
node = container_of(it, struct amdgpu_mn_node, it);
- interval_tree_remove(&node->it, &rmn->objects);
+ interval_tree_remove(&node->it, &amn->objects);
addr = min(it->start, addr);
end = max(it->last, end);
list_splice(&node->bos, &bos);
}
- if (!node) {
- node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
- if (!node) {
- up_write(&rmn->lock);
- return -ENOMEM;
- }
- }
+ if (!node)
+ node = new_node;
+ else
+ kfree(new_node);
- bo->mn = rmn;
+ bo->mn = amn;
node->it.start = addr;
node->it.last = end;
@@ -353,9 +460,9 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
list_splice(&bos, &node->bos);
list_add(&bo->mn_list, &node->bos);
- interval_tree_insert(&node->it, &rmn->objects);
+ interval_tree_insert(&node->it, &amn->objects);
- up_write(&rmn->lock);
+ up_write(&amn->lock);
return 0;
}
@@ -370,18 +477,18 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_mn *rmn;
+ struct amdgpu_mn *amn;
struct list_head *head;
mutex_lock(&adev->mn_lock);
- rmn = bo->mn;
- if (rmn == NULL) {
+ amn = bo->mn;
+ if (amn == NULL) {
mutex_unlock(&adev->mn_lock);
return;
}
- down_write(&rmn->lock);
+ down_write(&amn->lock);
/* save the next list entry for later */
head = bo->mn_list.next;
@@ -391,12 +498,13 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
if (list_empty(head)) {
struct amdgpu_mn_node *node;
+
node = container_of(head, struct amdgpu_mn_node, bos);
- interval_tree_remove(&node->it, &rmn->objects);
+ interval_tree_remove(&node->it, &amn->objects);
kfree(node);
}
- up_write(&rmn->lock);
+ up_write(&amn->lock);
mutex_unlock(&adev->mn_lock);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index d0095a3793b8..eb0f432f78fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -29,16 +29,23 @@
*/
struct amdgpu_mn;
+enum amdgpu_mn_type {
+ AMDGPU_MN_TYPE_GFX,
+ AMDGPU_MN_TYPE_HSA,
+};
+
#if defined(CONFIG_MMU_NOTIFIER)
void amdgpu_mn_lock(struct amdgpu_mn *mn);
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev);
+struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+ enum amdgpu_mn_type type);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
#else
static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
-static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
+static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+ enum amdgpu_mn_type type)
{
return NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index d6416ee52e32..b9e9e8b02fb7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -308,7 +308,6 @@ struct amdgpu_display_funcs {
struct amdgpu_framebuffer {
struct drm_framebuffer base;
- struct drm_gem_object *obj;
/* caching for later use */
uint64_t address;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6d08cde8443c..b12526ce1a9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -38,6 +38,19 @@
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
+/**
+ * DOC: amdgpu_object
+ *
+ * This defines the interfaces to operate on an &amdgpu_bo buffer object which
+ * represents memory used by driver (VRAM, system memory, etc.). The driver
+ * provides DRM/GEM APIs to userspace. DRM/GEM APIs then use these interfaces
+ * to create/destroy/set buffer object which are then managed by the kernel TTM
+ * memory manager.
+ * The interfaces are also used internally by kernel clients, including gfx,
+ * uvd, etc. for kernel managed allocations used by the GPU.
+ *
+ */
+
static bool amdgpu_need_backup(struct amdgpu_device *adev)
{
if (adev->flags & AMD_IS_APU)
@@ -50,11 +63,35 @@ static bool amdgpu_need_backup(struct amdgpu_device *adev)
return true;
}
+/**
+ * amdgpu_bo_subtract_pin_size - Remove BO from pin_size accounting
+ *
+ * @bo: &amdgpu_bo buffer object
+ *
+ * This function is called when a BO stops being pinned, and updates the
+ * &amdgpu_device pin_size values accordingly.
+ */
+static void amdgpu_bo_subtract_pin_size(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+ if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
+ atomic64_sub(amdgpu_bo_size(bo), &adev->vram_pin_size);
+ atomic64_sub(amdgpu_vram_mgr_bo_visible_size(bo),
+ &adev->visible_pin_size);
+ } else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
+ atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size);
+ }
+}
+
static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
+ if (WARN_ON_ONCE(bo->pin_count > 0))
+ amdgpu_bo_subtract_pin_size(bo);
+
if (bo->kfd_bo)
amdgpu_amdkfd_unreserve_system_memory_limit(bo);
@@ -73,6 +110,16 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
kfree(bo);
}
+/**
+ * amdgpu_ttm_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo
+ * @bo: buffer object to be checked
+ *
+ * Uses destroy function associated with the object to determine if this is
+ * an &amdgpu_bo.
+ *
+ * Returns:
+ * true if the object belongs to &amdgpu_bo, false if not.
+ */
bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
{
if (bo->destroy == &amdgpu_ttm_bo_destroy)
@@ -80,6 +127,14 @@ bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
return false;
}
+/**
+ * amdgpu_ttm_placement_from_domain - set buffer's placement
+ * @abo: &amdgpu_bo buffer object whose placement is to be set
+ * @domain: requested domain
+ *
+ * Sets buffer's placement according to requested domain and the buffer's
+ * flags.
+ */
void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
@@ -184,21 +239,29 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
*
* Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
*
- * Returns 0 on success, negative error code otherwise.
+ * Returns:
+ * 0 on success, negative error code otherwise.
*/
int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
unsigned long size, int align,
u32 domain, struct amdgpu_bo **bo_ptr,
u64 *gpu_addr, void **cpu_addr)
{
+ struct amdgpu_bo_param bp;
bool free = false;
int r;
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = align;
+ bp.domain = domain;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
+
if (!*bo_ptr) {
- r = amdgpu_bo_create(adev, size, align, domain,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
- ttm_bo_type_kernel, NULL, bo_ptr);
+ r = amdgpu_bo_create(adev, &bp, bo_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
r);
@@ -213,22 +276,33 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
goto error_free;
}
- r = amdgpu_bo_pin(*bo_ptr, domain, gpu_addr);
+ r = amdgpu_bo_pin(*bo_ptr, domain);
if (r) {
dev_err(adev->dev, "(%d) kernel bo pin failed\n", r);
goto error_unreserve;
}
+ r = amdgpu_ttm_alloc_gart(&(*bo_ptr)->tbo);
+ if (r) {
+ dev_err(adev->dev, "%p bind failed\n", *bo_ptr);
+ goto error_unpin;
+ }
+
+ if (gpu_addr)
+ *gpu_addr = amdgpu_bo_gpu_offset(*bo_ptr);
+
if (cpu_addr) {
r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
if (r) {
dev_err(adev->dev, "(%d) kernel bo map failed\n", r);
- goto error_unreserve;
+ goto error_unpin;
}
}
return 0;
+error_unpin:
+ amdgpu_bo_unpin(*bo_ptr);
error_unreserve:
amdgpu_bo_unreserve(*bo_ptr);
@@ -254,7 +328,8 @@ error_free:
*
* Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
*
- * Returns 0 on success, negative error code otherwise.
+ * Returns:
+ * 0 on success, negative error code otherwise.
*/
int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
unsigned long size, int align,
@@ -278,6 +353,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
* amdgpu_bo_free_kernel - free BO for kernel use
*
* @bo: amdgpu BO to free
+ * @gpu_addr: pointer to where the BO's GPU memory space address was stored
+ * @cpu_addr: pointer to where the BO's CPU memory space address was stored
*
* unmaps and unpin a BO for kernel internal use.
*/
@@ -341,27 +418,25 @@ fail:
return false;
}
-static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
- int byte_align, u32 domain,
- u64 flags, enum ttm_bo_type type,
- struct reservation_object *resv,
+static int amdgpu_bo_do_create(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr)
{
struct ttm_operation_ctx ctx = {
- .interruptible = (type != ttm_bo_type_kernel),
+ .interruptible = (bp->type != ttm_bo_type_kernel),
.no_wait_gpu = false,
- .resv = resv,
+ .resv = bp->resv,
.flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
};
struct amdgpu_bo *bo;
- unsigned long page_align;
+ unsigned long page_align, size = bp->size;
size_t acc_size;
int r;
- page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
+ page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
size = ALIGN(size, PAGE_SIZE);
- if (!amdgpu_bo_validate_size(adev, size, domain))
+ if (!amdgpu_bo_validate_size(adev, size, bp->domain))
return -ENOMEM;
*bo_ptr = NULL;
@@ -375,18 +450,14 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
INIT_LIST_HEAD(&bo->shadow_list);
INIT_LIST_HEAD(&bo->va);
- bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
- AMDGPU_GEM_DOMAIN_GTT |
- AMDGPU_GEM_DOMAIN_CPU |
- AMDGPU_GEM_DOMAIN_GDS |
- AMDGPU_GEM_DOMAIN_GWS |
- AMDGPU_GEM_DOMAIN_OA);
+ bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
+ bp->domain;
bo->allowed_domains = bo->preferred_domains;
- if (type != ttm_bo_type_kernel &&
+ if (bp->type != ttm_bo_type_kernel &&
bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
- bo->flags = flags;
+ bo->flags = bp->flags;
#ifdef CONFIG_X86_32
/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
@@ -417,15 +488,17 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
#endif
bo->tbo.bdev = &adev->mman.bdev;
- amdgpu_ttm_placement_from_domain(bo, domain);
+ amdgpu_ttm_placement_from_domain(bo, bp->domain);
+ if (bp->type == ttm_bo_type_kernel)
+ bo->tbo.priority = 1;
- r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
+ r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type,
&bo->placement, page_align, &ctx, acc_size,
- NULL, resv, &amdgpu_ttm_bo_destroy);
+ NULL, bp->resv, &amdgpu_ttm_bo_destroy);
if (unlikely(r != 0))
return r;
- if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
bo->tbo.mem.mem_type == TTM_PL_VRAM &&
bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
@@ -433,10 +506,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
else
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
- if (type == ttm_bo_type_kernel)
- bo->tbo.priority = 1;
-
- if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
+ if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
struct dma_fence *fence;
@@ -449,20 +519,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
bo->tbo.moving = dma_fence_get(fence);
dma_fence_put(fence);
}
- if (!resv)
+ if (!bp->resv)
amdgpu_bo_unreserve(bo);
*bo_ptr = bo;
trace_amdgpu_bo_create(bo);
/* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */
- if (type == ttm_bo_type_device)
+ if (bp->type == ttm_bo_type_device)
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
return 0;
fail_unreserve:
- if (!resv)
+ if (!bp->resv)
ww_mutex_unlock(&bo->tbo.resv->lock);
amdgpu_bo_unref(&bo);
return r;
@@ -472,16 +542,22 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
unsigned long size, int byte_align,
struct amdgpu_bo *bo)
{
+ struct amdgpu_bo_param bp;
int r;
if (bo->shadow)
return 0;
- r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT,
- AMDGPU_GEM_CREATE_CPU_GTT_USWC |
- AMDGPU_GEM_CREATE_SHADOW,
- ttm_bo_type_kernel,
- bo->tbo.resv, &bo->shadow);
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = byte_align;
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+ AMDGPU_GEM_CREATE_SHADOW;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = bo->tbo.resv;
+
+ r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);
if (!r) {
bo->shadow->parent = amdgpu_bo_ref(bo);
mutex_lock(&adev->shadow_list_lock);
@@ -492,28 +568,40 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
return r;
}
-int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
- int byte_align, u32 domain,
- u64 flags, enum ttm_bo_type type,
- struct reservation_object *resv,
+/**
+ * amdgpu_bo_create - create an &amdgpu_bo buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @bo_ptr: pointer to the buffer object pointer
+ *
+ * Creates an &amdgpu_bo buffer object; and if requested, also creates a
+ * shadow object.
+ * Shadow object is used to backup the original buffer object, and is always
+ * in GTT.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_create(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr)
{
- uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
+ u64 flags = bp->flags;
int r;
- r = amdgpu_bo_do_create(adev, size, byte_align, domain,
- parent_flags, type, resv, bo_ptr);
+ bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
+ r = amdgpu_bo_do_create(adev, bp, bo_ptr);
if (r)
return r;
if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
- if (!resv)
+ if (!bp->resv)
WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
NULL));
- r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
+ r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr));
- if (!resv)
+ if (!bp->resv)
reservation_object_unlock((*bo_ptr)->tbo.resv);
if (r)
@@ -523,6 +611,21 @@ int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
return r;
}
+/**
+ * amdgpu_bo_backup_to_shadow - Backs up an &amdgpu_bo buffer object
+ * @adev: amdgpu device object
+ * @ring: amdgpu_ring for the engine handling the buffer operations
+ * @bo: &amdgpu_bo buffer to be backed up
+ * @resv: reservation object with embedded fence
+ * @fence: dma_fence associated with the operation
+ * @direct: whether to submit the job directly
+ *
+ * Copies an &amdgpu_bo buffer object to its shadow object.
+ * Not used for now.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_bo *bo,
@@ -555,6 +658,18 @@ err:
return r;
}
+/**
+ * amdgpu_bo_validate - validate an &amdgpu_bo buffer object
+ * @bo: pointer to the buffer object
+ *
+ * Sets placement according to domain; and changes placement and caching
+ * policy of the buffer object according to the placement.
+ * This is used for validating shadow bos. It calls ttm_bo_validate() to
+ * make sure the buffer is resident where it needs to be.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_validate(struct amdgpu_bo *bo)
{
struct ttm_operation_ctx ctx = { false, false };
@@ -577,6 +692,22 @@ retry:
return r;
}
+/**
+ * amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object
+ * @adev: amdgpu device object
+ * @ring: amdgpu_ring for the engine handling the buffer operations
+ * @bo: &amdgpu_bo buffer to be restored
+ * @resv: reservation object with embedded fence
+ * @fence: dma_fence associated with the operation
+ * @direct: whether to submit the job directly
+ *
+ * Copies a buffer object's shadow content back to the object.
+ * This is used for recovering a buffer from its shadow in case of a gpu
+ * reset where vram context may be lost.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_bo *bo,
@@ -609,6 +740,17 @@ err:
return r;
}
+/**
+ * amdgpu_bo_kmap - map an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be mapped
+ * @ptr: kernel virtual address to be returned
+ *
+ * Calls ttm_bo_kmap() to set up the kernel virtual mapping; calls
+ * amdgpu_bo_kptr() to get the kernel virtual address.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
{
void *kptr;
@@ -639,6 +781,15 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
return 0;
}
+/**
+ * amdgpu_bo_kptr - returns a kernel virtual address of the buffer object
+ * @bo: &amdgpu_bo buffer object
+ *
+ * Calls ttm_kmap_obj_virtual() to get the kernel virtual address
+ *
+ * Returns:
+ * the virtual address of a buffer object area.
+ */
void *amdgpu_bo_kptr(struct amdgpu_bo *bo)
{
bool is_iomem;
@@ -646,12 +797,27 @@ void *amdgpu_bo_kptr(struct amdgpu_bo *bo)
return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
}
+/**
+ * amdgpu_bo_kunmap - unmap an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be unmapped
+ *
+ * Unmaps a kernel map set up by amdgpu_bo_kmap().
+ */
void amdgpu_bo_kunmap(struct amdgpu_bo *bo)
{
if (bo->kmap.bo)
ttm_bo_kunmap(&bo->kmap);
}
+/**
+ * amdgpu_bo_ref - reference an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object
+ *
+ * References the contained &ttm_buffer_object.
+ *
+ * Returns:
+ * a refcounted pointer to the &amdgpu_bo buffer object.
+ */
struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
{
if (bo == NULL)
@@ -661,6 +827,12 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
return bo;
}
+/**
+ * amdgpu_bo_unref - unreference an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object
+ *
+ * Unreferences the contained &ttm_buffer_object and clear the pointer
+ */
void amdgpu_bo_unref(struct amdgpu_bo **bo)
{
struct ttm_buffer_object *tbo;
@@ -674,9 +846,30 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo)
*bo = NULL;
}
+/**
+ * amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be pinned
+ * @domain: domain to be pinned to
+ * @min_offset: the start of requested address range
+ * @max_offset: the end of requested address range
+ *
+ * Pins the buffer object according to requested domain and address range. If
+ * the memory is unbound gart memory, binds the pages into gart table. Adjusts
+ * pin_count and pin_size accordingly.
+ *
+ * Pinning means to lock pages in memory along with keeping them at a fixed
+ * offset. It is required when a buffer can not be moved, for example, when
+ * a display buffer is being scanned out.
+ *
+ * Compared with amdgpu_bo_pin(), this function gives more flexibility on
+ * where to pin a buffer if there are specific restrictions on where a buffer
+ * must be located.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
- u64 min_offset, u64 max_offset,
- u64 *gpu_addr)
+ u64 min_offset, u64 max_offset)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { false, false };
@@ -689,8 +882,17 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return -EINVAL;
/* A shared bo cannot be migrated to VRAM */
- if (bo->prime_shared_count && (domain == AMDGPU_GEM_DOMAIN_VRAM))
- return -EINVAL;
+ if (bo->prime_shared_count) {
+ if (domain & AMDGPU_GEM_DOMAIN_GTT)
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ else
+ return -EINVAL;
+ }
+
+ /* This assumes only APU display buffers are pinned with (VRAM|GTT).
+ * See function amdgpu_display_supported_domains()
+ */
+ domain = amdgpu_bo_get_preferred_pin_domain(adev, domain);
if (bo->pin_count) {
uint32_t mem_type = bo->tbo.mem.mem_type;
@@ -699,8 +901,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return -EINVAL;
bo->pin_count++;
- if (gpu_addr)
- *gpu_addr = amdgpu_bo_gpu_offset(bo);
if (max_offset != 0) {
u64 domain_start = bo->tbo.bdev->man[mem_type].gpu_offset;
@@ -736,34 +936,48 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
goto error;
}
- r = amdgpu_ttm_alloc_gart(&bo->tbo);
- if (unlikely(r)) {
- dev_err(adev->dev, "%p bind failed\n", bo);
- goto error;
- }
-
bo->pin_count = 1;
- if (gpu_addr != NULL)
- *gpu_addr = amdgpu_bo_gpu_offset(bo);
domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
- adev->vram_pin_size += amdgpu_bo_size(bo);
- if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
- adev->invisible_pin_size += amdgpu_bo_size(bo);
+ atomic64_add(amdgpu_bo_size(bo), &adev->vram_pin_size);
+ atomic64_add(amdgpu_vram_mgr_bo_visible_size(bo),
+ &adev->visible_pin_size);
} else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
- adev->gart_pin_size += amdgpu_bo_size(bo);
+ atomic64_add(amdgpu_bo_size(bo), &adev->gart_pin_size);
}
error:
return r;
}
-int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr)
+/**
+ * amdgpu_bo_pin - pin an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be pinned
+ * @domain: domain to be pinned to
+ *
+ * A simple wrapper to amdgpu_bo_pin_restricted().
+ * Provides a simpler API for buffers that do not have any strict restrictions
+ * on where a buffer must be located.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
{
- return amdgpu_bo_pin_restricted(bo, domain, 0, 0, gpu_addr);
+ return amdgpu_bo_pin_restricted(bo, domain, 0, 0);
}
+/**
+ * amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be unpinned
+ *
+ * Decreases the pin_count, and clears the flags if pin_count reaches 0.
+ * Changes placement and pin size accordingly.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_unpin(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -777,28 +991,30 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
bo->pin_count--;
if (bo->pin_count)
return 0;
+
+ amdgpu_bo_subtract_pin_size(bo);
+
for (i = 0; i < bo->placement.num_placement; i++) {
bo->placements[i].lpfn = 0;
bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
}
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (unlikely(r)) {
+ if (unlikely(r))
dev_err(adev->dev, "%p validate failed for unpin\n", bo);
- goto error;
- }
-
- if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
- adev->vram_pin_size -= amdgpu_bo_size(bo);
- if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
- adev->invisible_pin_size -= amdgpu_bo_size(bo);
- } else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
- adev->gart_pin_size -= amdgpu_bo_size(bo);
- }
-error:
return r;
}
+/**
+ * amdgpu_bo_evict_vram - evict VRAM buffers
+ * @adev: amdgpu device object
+ *
+ * Evicts all VRAM buffers on the lru list of the memory type.
+ * Mainly used for evicting vram at suspend time.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
{
/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
@@ -821,6 +1037,15 @@ static const char *amdgpu_vram_names[] = {
"DDR4",
};
+/**
+ * amdgpu_bo_init - initialize memory manager
+ * @adev: amdgpu device object
+ *
+ * Calls amdgpu_ttm_init() to initialize amdgpu memory manager.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_init(struct amdgpu_device *adev)
{
/* reserve PAT memory space to WC for VRAM */
@@ -838,6 +1063,29 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
return amdgpu_ttm_init(adev);
}
+/**
+ * amdgpu_bo_late_init - late init
+ * @adev: amdgpu device object
+ *
+ * Calls amdgpu_ttm_late_init() to free resources used earlier during
+ * initialization.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_late_init(struct amdgpu_device *adev)
+{
+ amdgpu_ttm_late_init(adev);
+
+ return 0;
+}
+
+/**
+ * amdgpu_bo_fini - tear down memory manager
+ * @adev: amdgpu device object
+ *
+ * Reverses amdgpu_bo_init() to tear down memory manager.
+ */
void amdgpu_bo_fini(struct amdgpu_device *adev)
{
amdgpu_ttm_fini(adev);
@@ -845,12 +1093,33 @@ void amdgpu_bo_fini(struct amdgpu_device *adev)
arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
}
+/**
+ * amdgpu_bo_fbdev_mmap - mmap fbdev memory
+ * @bo: &amdgpu_bo buffer object
+ * @vma: vma as input from the fbdev mmap method
+ *
+ * Calls ttm_fbdev_mmap() to mmap fbdev memory if it is backed by a bo.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
struct vm_area_struct *vma)
{
return ttm_fbdev_mmap(vma, &bo->tbo);
}
+/**
+ * amdgpu_bo_set_tiling_flags - set tiling flags
+ * @bo: &amdgpu_bo buffer object
+ * @tiling_flags: new flags
+ *
+ * Sets buffer object's tiling flags with the new one. Used by GEM ioctl or
+ * kernel driver to set the tiling flags on a buffer.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -863,6 +1132,14 @@ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
return 0;
}
+/**
+ * amdgpu_bo_get_tiling_flags - get tiling flags
+ * @bo: &amdgpu_bo buffer object
+ * @tiling_flags: returned flags
+ *
+ * Gets buffer object's tiling flags. Used by GEM ioctl or kernel driver to
+ * set the tiling flags on a buffer.
+ */
void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
{
lockdep_assert_held(&bo->tbo.resv->lock.base);
@@ -871,6 +1148,19 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
*tiling_flags = bo->tiling_flags;
}
+/**
+ * amdgpu_bo_set_metadata - set metadata
+ * @bo: &amdgpu_bo buffer object
+ * @metadata: new metadata
+ * @metadata_size: size of the new metadata
+ * @flags: flags of the new metadata
+ *
+ * Sets buffer object's metadata, its size and flags.
+ * Used via GEM ioctl.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
uint32_t metadata_size, uint64_t flags)
{
@@ -900,6 +1190,21 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
return 0;
}
+/**
+ * amdgpu_bo_get_metadata - get metadata
+ * @bo: &amdgpu_bo buffer object
+ * @buffer: returned metadata
+ * @buffer_size: size of the buffer
+ * @metadata_size: size of the returned metadata
+ * @flags: flags of the returned metadata
+ *
+ * Gets buffer object's metadata, its size and flags. buffer_size shall not be
+ * less than metadata_size.
+ * Used via GEM ioctl.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
size_t buffer_size, uint32_t *metadata_size,
uint64_t *flags)
@@ -923,6 +1228,16 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
return 0;
}
+/**
+ * amdgpu_bo_move_notify - notification about a memory move
+ * @bo: pointer to a buffer object
+ * @evict: if this move is evicting the buffer from the graphics address space
+ * @new_mem: new information of the bufer object
+ *
+ * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
+ * bookkeeping.
+ * TTM driver callback which is called when ttm moves a buffer.
+ */
void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
bool evict,
struct ttm_mem_reg *new_mem)
@@ -951,6 +1266,17 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
}
+/**
+ * amdgpu_bo_fault_reserve_notify - notification about a memory fault
+ * @bo: pointer to a buffer object
+ *
+ * Notifies the driver we are taking a fault on this BO and have reserved it,
+ * also performs bookkeeping.
+ * TTM driver callback for dealing with vm faults.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
@@ -1024,10 +1350,11 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
* amdgpu_bo_gpu_offset - return GPU offset of bo
* @bo: amdgpu object for which we query the offset
*
- * Returns current GPU offset of the object.
- *
* Note: object should either be pinned or reserved when calling this
* function, it might be useful to add check for this for debugging.
+ *
+ * Returns:
+ * current GPU offset of the object.
*/
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
{
@@ -1042,3 +1369,22 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
return bo->tbo.offset;
}
+
+/**
+ * amdgpu_bo_get_preferred_pin_domain - get preferred domain for scanout
+ * @adev: amdgpu device object
+ * @domain: allowed :ref:`memory domains <amdgpu_memory_domains>`
+ *
+ * Returns:
+ * Which of the allowed domains is preferred for pinning the BO for scanout.
+ */
+uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
+ uint32_t domain)
+{
+ if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
+ domain = AMDGPU_GEM_DOMAIN_VRAM;
+ if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD)
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ }
+ return domain;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 546f77cb7882..9c3e29a04eb1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -33,6 +33,16 @@
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
+struct amdgpu_bo_param {
+ unsigned long size;
+ int byte_align;
+ u32 domain;
+ u32 preferred_domain;
+ u64 flags;
+ enum ttm_bo_type type;
+ struct reservation_object *resv;
+};
+
/* bo virtual addresses in a vm */
struct amdgpu_bo_va_mapping {
struct amdgpu_bo_va *bo_va;
@@ -196,6 +206,27 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo)
}
/**
+ * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
+ */
+static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
+ struct drm_mm_node *node = bo->tbo.mem.mm_node;
+ unsigned long pages_left;
+
+ if (bo->tbo.mem.mem_type != TTM_PL_VRAM)
+ return false;
+
+ for (pages_left = bo->tbo.mem.num_pages; pages_left;
+ pages_left -= node->size, node++)
+ if (node->start < fpfn)
+ return true;
+
+ return false;
+}
+
+/**
* amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
*/
static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
@@ -203,10 +234,8 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
}
-int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
- int byte_align, u32 domain,
- u64 flags, enum ttm_bo_type type,
- struct reservation_object *resv,
+int amdgpu_bo_create(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr);
int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
unsigned long size, int align,
@@ -223,13 +252,13 @@ void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);
void amdgpu_bo_unref(struct amdgpu_bo **bo);
-int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr);
+int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain);
int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
- u64 min_offset, u64 max_offset,
- u64 *gpu_addr);
+ u64 min_offset, u64 max_offset);
int amdgpu_bo_unpin(struct amdgpu_bo *bo);
int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
int amdgpu_bo_init(struct amdgpu_device *adev);
+int amdgpu_bo_late_init(struct amdgpu_device *adev);
void amdgpu_bo_fini(struct amdgpu_device *adev);
int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
struct vm_area_struct *vma);
@@ -259,7 +288,8 @@ int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
struct reservation_object *resv,
struct dma_fence **fence,
bool direct);
-
+uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
+ uint32_t domain);
/*
* sub allocation
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 361975cf45a9..15a1192c1ec5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -68,15 +68,49 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
if (adev->pm.dpm_enabled) {
mutex_lock(&adev->pm.mutex);
if (power_supply_is_system_supplied() > 0)
- adev->pm.dpm.ac_power = true;
+ adev->pm.ac_power = true;
else
- adev->pm.dpm.ac_power = false;
+ adev->pm.ac_power = false;
if (adev->powerplay.pp_funcs->enable_bapm)
- amdgpu_dpm_enable_bapm(adev, adev->pm.dpm.ac_power);
+ amdgpu_dpm_enable_bapm(adev, adev->pm.ac_power);
mutex_unlock(&adev->pm.mutex);
}
}
+/**
+ * DOC: power_dpm_state
+ *
+ * The power_dpm_state file is a legacy interface and is only provided for
+ * backwards compatibility. The amdgpu driver provides a sysfs API for adjusting
+ * certain power related parameters. The file power_dpm_state is used for this.
+ * It accepts the following arguments:
+ *
+ * - battery
+ *
+ * - balanced
+ *
+ * - performance
+ *
+ * battery
+ *
+ * On older GPUs, the vbios provided a special power state for battery
+ * operation. Selecting battery switched to this state. This is no
+ * longer provided on newer GPUs so the option does nothing in that case.
+ *
+ * balanced
+ *
+ * On older GPUs, the vbios provided a special power state for balanced
+ * operation. Selecting balanced switched to this state. This is no
+ * longer provided on newer GPUs so the option does nothing in that case.
+ *
+ * performance
+ *
+ * On older GPUs, the vbios provided a special power state for performance
+ * operation. Selecting performance switched to this state. This is no
+ * longer provided on newer GPUs so the option does nothing in that case.
+ *
+ */
+
static ssize_t amdgpu_get_dpm_state(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -131,6 +165,66 @@ fail:
return count;
}
+
+/**
+ * DOC: power_dpm_force_performance_level
+ *
+ * The amdgpu driver provides a sysfs API for adjusting certain power
+ * related parameters. The file power_dpm_force_performance_level is
+ * used for this. It accepts the following arguments:
+ *
+ * - auto
+ *
+ * - low
+ *
+ * - high
+ *
+ * - manual
+ *
+ * - profile_standard
+ *
+ * - profile_min_sclk
+ *
+ * - profile_min_mclk
+ *
+ * - profile_peak
+ *
+ * auto
+ *
+ * When auto is selected, the driver will attempt to dynamically select
+ * the optimal power profile for current conditions in the driver.
+ *
+ * low
+ *
+ * When low is selected, the clocks are forced to the lowest power state.
+ *
+ * high
+ *
+ * When high is selected, the clocks are forced to the highest power state.
+ *
+ * manual
+ *
+ * When manual is selected, the user can manually adjust which power states
+ * are enabled for each clock domain via the sysfs pp_dpm_mclk, pp_dpm_sclk,
+ * and pp_dpm_pcie files and adjust the power state transition heuristics
+ * via the pp_power_profile_mode sysfs file.
+ *
+ * profile_standard
+ * profile_min_sclk
+ * profile_min_mclk
+ * profile_peak
+ *
+ * When the profiling modes are selected, clock and power gating are
+ * disabled and the clocks are set for different profiling cases. This
+ * mode is recommended for profiling specific work loads where you do
+ * not want clock or power gating for clock fluctuation to interfere
+ * with your results. profile_standard sets the clocks to a fixed clock
+ * level which varies from asic to asic. profile_min_sclk forces the sclk
+ * to the lowest level. profile_min_mclk forces the mclk to the lowest level.
+ * profile_peak sets all clocks (mclk, sclk, pcie) to the highest levels.
+ *
+ */
+
static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -324,6 +418,17 @@ fail:
return count;
}
+/**
+ * DOC: pp_table
+ *
+ * The amdgpu driver provides a sysfs API for uploading new powerplay
+ * tables. The file pp_table is used for this. Reading the file
+ * will dump the current power play table. Writing to the file
+ * will attempt to upload a new powerplay table and re-initialize
+ * powerplay using that new table.
+ *
+ */
+
static ssize_t amdgpu_get_pp_table(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -360,6 +465,32 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
return count;
}
+/**
+ * DOC: pp_od_clk_voltage
+ *
+ * The amdgpu driver provides a sysfs API for adjusting the clocks and voltages
+ * in each power level within a power state. The pp_od_clk_voltage is used for
+ * this.
+ *
+ * Reading the file will display:
+ *
+ * - a list of engine clock levels and voltages labeled OD_SCLK
+ *
+ * - a list of memory clock levels and voltages labeled OD_MCLK
+ *
+ * - a list of valid ranges for sclk, mclk, and voltage labeled OD_RANGE
+ *
+ * To manually adjust these settings, first select manual using
+ * power_dpm_force_performance_level. Enter a new value for each
+ * level by writing a string that contains "s/m level clock voltage" to
+ * the file. E.g., "s 1 500 820" will update sclk level 1 to be 500 MHz
+ * at 820 mV; "m 0 350 810" will update mclk level 0 to be 350 MHz at
+ * 810 mV. When you have edited all of the states as needed, write
+ * "c" (commit) to the file to commit your changes. If you want to reset to the
+ * default power levels, write "r" (reset) to the file to reset them.
+ *
+ */
+
static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
struct device_attribute *attr,
const char *buf,
@@ -437,6 +568,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
if (adev->powerplay.pp_funcs->print_clock_levels) {
size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
+ size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size);
return size;
} else {
return snprintf(buf, PAGE_SIZE, "\n");
@@ -444,6 +576,23 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
}
+/**
+ * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie
+ *
+ * The amdgpu driver provides a sysfs API for adjusting what power levels
+ * are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk,
+ * and pp_dpm_pcie are used for this.
+ *
+ * Reading back the files will show you the available power levels within
+ * the power state and the clock information for those levels.
+ *
+ * To manually adjust these states, first select manual using
+ * power_dpm_force_performance_level.
+ * Secondly,Enter a new value for each level by inputing a string that
+ * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie"
+ * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6.
+ */
+
static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -457,6 +606,42 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
return snprintf(buf, PAGE_SIZE, "\n");
}
+/*
+ * Worst case: 32 bits individually specified, in octal at 12 characters
+ * per line (+1 for \n).
+ */
+#define AMDGPU_MASK_BUF_MAX (32 * 13)
+
+static ssize_t amdgpu_read_mask(const char *buf, size_t count, uint32_t *mask)
+{
+ int ret;
+ long level;
+ char *sub_str = NULL;
+ char *tmp;
+ char buf_cpy[AMDGPU_MASK_BUF_MAX + 1];
+ const char delimiter[3] = {' ', '\n', '\0'};
+ size_t bytes;
+
+ *mask = 0;
+
+ bytes = min(count, sizeof(buf_cpy) - 1);
+ memcpy(buf_cpy, buf, bytes);
+ buf_cpy[bytes] = '\0';
+ tmp = buf_cpy;
+ while (tmp[0]) {
+ sub_str = strsep(&tmp, delimiter);
+ if (strlen(sub_str)) {
+ ret = kstrtol(sub_str, 0, &level);
+ if (ret)
+ return -EINVAL;
+ *mask |= 1 << level;
+ } else
+ break;
+ }
+
+ return 0;
+}
+
static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
struct device_attribute *attr,
const char *buf,
@@ -465,28 +650,15 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
int ret;
- long level;
- uint32_t i, mask = 0;
- char sub_str[2];
-
- for (i = 0; i < strlen(buf); i++) {
- if (*(buf + i) == '\n')
- continue;
- sub_str[0] = *(buf + i);
- sub_str[1] = '\0';
- ret = kstrtol(sub_str, 0, &level);
+ uint32_t mask = 0;
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
- mask |= 1 << level;
- }
+ ret = amdgpu_read_mask(buf, count, &mask);
+ if (ret)
+ return ret;
if (adev->powerplay.pp_funcs->force_clock_level)
amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
-fail:
return count;
}
@@ -511,27 +683,15 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
int ret;
- long level;
- uint32_t i, mask = 0;
- char sub_str[2];
+ uint32_t mask = 0;
- for (i = 0; i < strlen(buf); i++) {
- if (*(buf + i) == '\n')
- continue;
- sub_str[0] = *(buf + i);
- sub_str[1] = '\0';
- ret = kstrtol(sub_str, 0, &level);
+ ret = amdgpu_read_mask(buf, count, &mask);
+ if (ret)
+ return ret;
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
- mask |= 1 << level;
- }
if (adev->powerplay.pp_funcs->force_clock_level)
amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
-fail:
return count;
}
@@ -556,27 +716,15 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
int ret;
- long level;
- uint32_t i, mask = 0;
- char sub_str[2];
+ uint32_t mask = 0;
- for (i = 0; i < strlen(buf); i++) {
- if (*(buf + i) == '\n')
- continue;
- sub_str[0] = *(buf + i);
- sub_str[1] = '\0';
- ret = kstrtol(sub_str, 0, &level);
+ ret = amdgpu_read_mask(buf, count, &mask);
+ if (ret)
+ return ret;
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
- mask |= 1 << level;
- }
if (adev->powerplay.pp_funcs->force_clock_level)
amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
-fail:
return count;
}
@@ -668,6 +816,26 @@ fail:
return count;
}
+/**
+ * DOC: pp_power_profile_mode
+ *
+ * The amdgpu driver provides a sysfs API for adjusting the heuristics
+ * related to switching between power levels in a power state. The file
+ * pp_power_profile_mode is used for this.
+ *
+ * Reading this file outputs a list of all of the predefined power profiles
+ * and the relevant heuristics settings for that profile.
+ *
+ * To select a profile or create a custom profile, first select manual using
+ * power_dpm_force_performance_level. Writing the number of a predefined
+ * profile to pp_power_profile_mode will enable those heuristics. To
+ * create a custom set of heuristics, write a string of numbers to the file
+ * starting with the number of the custom profile along with a setting
+ * for each heuristic parameter. Due to differences across asic families
+ * the heuristic parameters vary from family to family.
+ *
+ */
+
static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -734,6 +902,36 @@ fail:
return -EINVAL;
}
+/**
+ * DOC: busy_percent
+ *
+ * The amdgpu driver provides a sysfs API for reading how busy the GPU
+ * is as a percentage. The file gpu_busy_percent is used for this.
+ * The SMU firmware computes a percentage of load based on the
+ * aggregate activity level in the IP cores.
+ */
+static ssize_t amdgpu_get_busy_percent(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ int r, value, size = sizeof(value);
+
+ /* sanity check PP is enabled */
+ if (!(adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->read_sensor))
+ return -EINVAL;
+
+ /* read the IP busy sensor */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD,
+ (void *)&value, &size);
+ if (r)
+ return r;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", value);
+}
+
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
amdgpu_get_dpm_forced_performance_level,
@@ -767,6 +965,8 @@ static DEVICE_ATTR(pp_power_profile_mode, S_IRUGO | S_IWUSR,
static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
amdgpu_get_pp_od_clk_voltage,
amdgpu_set_pp_od_clk_voltage);
+static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
+ amdgpu_get_busy_percent, NULL);
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
struct device_attribute *attr,
@@ -1020,8 +1220,8 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
struct drm_device *ddev = adev->ddev;
- struct pp_gpu_power query = {0};
- int r, size = sizeof(query);
+ u32 query = 0;
+ int r, size = sizeof(u32);
unsigned uw;
/* Can't get power when the card is off */
@@ -1041,7 +1241,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
return r;
/* convert to microwatts */
- uw = (query.average_gpu_power >> 8) * 1000000;
+ uw = (query >> 8) * 1000000 + (query & 0xff) * 1000;
return snprintf(buf, PAGE_SIZE, "%u\n", uw);
}
@@ -1109,6 +1309,62 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
return count;
}
+
+/**
+ * DOC: hwmon
+ *
+ * The amdgpu driver exposes the following sensor interfaces:
+ *
+ * - GPU temperature (via the on-die sensor)
+ *
+ * - GPU voltage
+ *
+ * - Northbridge voltage (APUs only)
+ *
+ * - GPU power
+ *
+ * - GPU fan
+ *
+ * hwmon interfaces for GPU temperature:
+ *
+ * - temp1_input: the on die GPU temperature in millidegrees Celsius
+ *
+ * - temp1_crit: temperature critical max value in millidegrees Celsius
+ *
+ * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ *
+ * hwmon interfaces for GPU voltage:
+ *
+ * - in0_input: the voltage on the GPU in millivolts
+ *
+ * - in1_input: the voltage on the Northbridge in millivolts
+ *
+ * hwmon interfaces for GPU power:
+ *
+ * - power1_average: average power used by the GPU in microWatts
+ *
+ * - power1_cap_min: minimum cap supported in microWatts
+ *
+ * - power1_cap_max: maximum cap supported in microWatts
+ *
+ * - power1_cap: selected power cap in microWatts
+ *
+ * hwmon interfaces for GPU fan:
+ *
+ * - pwm1: pulse width modulation fan level (0-255)
+ *
+ * - pwm1_enable: pulse width modulation fan control method (0: no fan speed control, 1: manual fan speed control using pwm interface, 2: automatic fan speed control)
+ *
+ * - pwm1_min: pulse width modulation fan control minimum level (0)
+ *
+ * - pwm1_max: pulse width modulation fan control maximum level (255)
+ *
+ * - fan1_input: fan speed in RPM
+ *
+ * You can use hwmon tools like sensors to view this information on your system.
+ *
+ */
+
static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
@@ -1153,19 +1409,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
struct amdgpu_device *adev = dev_get_drvdata(dev);
umode_t effective_mode = attr->mode;
- /* handle non-powerplay limitations */
- if (!adev->powerplay.pp_handle) {
- /* Skip fan attributes if fan is not present */
- if (adev->pm.no_fan &&
- (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
- attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
- attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
- attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
- return 0;
- /* requires powerplay */
- if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr)
- return 0;
- }
+
+ /* Skip fan attributes if fan is not present */
+ if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_input.dev_attr.attr))
+ return 0;
/* Skip limit attributes if DPM is not enabled */
if (!adev->pm.dpm_enabled &&
@@ -1462,10 +1713,10 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
{
- if (adev->powerplay.pp_funcs->powergate_uvd) {
+ if (adev->powerplay.pp_funcs->set_powergating_by_smu) {
/* enable/disable UVD */
mutex_lock(&adev->pm.mutex);
- amdgpu_dpm_powergate_uvd(adev, !enable);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
mutex_unlock(&adev->pm.mutex);
} else {
if (enable) {
@@ -1484,10 +1735,10 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
{
- if (adev->powerplay.pp_funcs->powergate_vce) {
+ if (adev->powerplay.pp_funcs->set_powergating_by_smu) {
/* enable/disable VCE */
mutex_lock(&adev->pm.mutex);
- amdgpu_dpm_powergate_vce(adev, !enable);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
mutex_unlock(&adev->pm.mutex);
} else {
if (enable) {
@@ -1619,6 +1870,13 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
"pp_od_clk_voltage\n");
return ret;
}
+ ret = device_create_file(adev->dev,
+ &dev_attr_gpu_busy_percent);
+ if (ret) {
+ DRM_ERROR("failed to create device file "
+ "gpu_busy_level\n");
+ return ret;
+ }
ret = amdgpu_debugfs_pm_init(adev);
if (ret) {
DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -1654,13 +1912,11 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
&dev_attr_pp_power_profile_mode);
device_remove_file(adev->dev,
&dev_attr_pp_od_clk_voltage);
+ device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
}
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
{
- struct drm_device *ddev = adev->ddev;
- struct drm_crtc *crtc;
- struct amdgpu_crtc *amdgpu_crtc;
int i = 0;
if (!adev->pm.dpm_enabled)
@@ -1675,30 +1931,35 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
amdgpu_fence_wait_empty(ring);
}
+ mutex_lock(&adev->pm.mutex);
+ /* update battery/ac status */
+ if (power_supply_is_system_supplied() > 0)
+ adev->pm.ac_power = true;
+ else
+ adev->pm.ac_power = false;
+ mutex_unlock(&adev->pm.mutex);
+
if (adev->powerplay.pp_funcs->dispatch_tasks) {
+ if (!amdgpu_device_has_dc_support(adev)) {
+ mutex_lock(&adev->pm.mutex);
+ amdgpu_dpm_get_active_displays(adev);
+ adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtc_count;
+ adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev);
+ adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev);
+ /* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */
+ if (adev->pm.pm_display_cfg.vrefresh > 120)
+ adev->pm.pm_display_cfg.min_vblank_time = 0;
+ if (adev->powerplay.pp_funcs->display_configuration_change)
+ adev->powerplay.pp_funcs->display_configuration_change(
+ adev->powerplay.pp_handle,
+ &adev->pm.pm_display_cfg);
+ mutex_unlock(&adev->pm.mutex);
+ }
amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL);
} else {
mutex_lock(&adev->pm.mutex);
- adev->pm.dpm.new_active_crtcs = 0;
- adev->pm.dpm.new_active_crtc_count = 0;
- if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
- list_for_each_entry(crtc,
- &ddev->mode_config.crtc_list, head) {
- amdgpu_crtc = to_amdgpu_crtc(crtc);
- if (amdgpu_crtc->enabled) {
- adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id);
- adev->pm.dpm.new_active_crtc_count++;
- }
- }
- }
- /* update battery/ac status */
- if (power_supply_is_system_supplied() > 0)
- adev->pm.dpm.ac_power = true;
- else
- adev->pm.dpm.ac_power = false;
-
+ amdgpu_dpm_get_active_displays(adev);
amdgpu_dpm_change_power_state_locked(adev);
-
mutex_unlock(&adev->pm.mutex);
}
}
@@ -1711,7 +1972,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev)
{
uint32_t value;
- struct pp_gpu_power query = {0};
+ uint32_t query = 0;
int size;
/* sanity check PP is enabled */
@@ -1734,17 +1995,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
seq_printf(m, "\t%u mV (VDDGFX)\n", value);
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))
seq_printf(m, "\t%u mV (VDDNB)\n", value);
- size = sizeof(query);
- if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) {
- seq_printf(m, "\t%u.%u W (VDDC)\n", query.vddc_power >> 8,
- query.vddc_power & 0xff);
- seq_printf(m, "\t%u.%u W (VDDCI)\n", query.vddci_power >> 8,
- query.vddci_power & 0xff);
- seq_printf(m, "\t%u.%u W (max GPU)\n", query.max_gpu_power >> 8,
- query.max_gpu_power & 0xff);
- seq_printf(m, "\t%u.%u W (average GPU)\n", query.average_gpu_power >> 8,
- query.average_gpu_power & 0xff);
- }
+ size = sizeof(uint32_t);
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size))
+ seq_printf(m, "\t%u.%u W (average GPU)\n", query >> 8, query & 0xff);
size = sizeof(value);
seq_printf(m, "\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 4b584cb75bf4..3ed02f472003 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -23,6 +23,14 @@
*
* Authors: Alex Deucher
*/
+
+/**
+ * DOC: PRIME Buffer Sharing
+ *
+ * The following callback implementations are used for :ref:`sharing GEM buffer
+ * objects between different devices via PRIME <prime_buffer_sharing>`.
+ */
+
#include <drm/drmP.h>
#include "amdgpu.h"
@@ -32,6 +40,14 @@
static const struct dma_buf_ops amdgpu_dmabuf_ops;
+/**
+ * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
+ * implementation
+ * @obj: GEM buffer object
+ *
+ * Returns:
+ * A scatter/gather table for the pinned pages of the buffer object's memory.
+ */
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -40,6 +56,15 @@ struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);
}
+/**
+ * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation
+ * @obj: GEM buffer object
+ *
+ * Sets up an in-kernel virtual mapping of the buffer object's memory.
+ *
+ * Returns:
+ * The virtual address of the mapping or an error pointer.
+ */
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -53,6 +78,13 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)
return bo->dma_buf_vmap.virtual;
}
+/**
+ * amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation
+ * @obj: GEM buffer object
+ * @vaddr: virtual address (unused)
+ *
+ * Tears down the in-kernel virtual mapping of the buffer object's memory.
+ */
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -60,6 +92,17 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
ttm_bo_kunmap(&bo->dma_buf_vmap);
}
+/**
+ * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation
+ * @obj: GEM buffer object
+ * @vma: virtual memory area
+ *
+ * Sets up a userspace mapping of the buffer object's memory in the given
+ * virtual memory area.
+ *
+ * Returns:
+ * 0 on success or negative error code.
+ */
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -94,6 +137,19 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma
return ret;
}
+/**
+ * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
+ * implementation
+ * @dev: DRM device
+ * @attach: DMA-buf attachment
+ * @sg: Scatter/gather table
+ *
+ * Import shared DMA buffer memory exported by another device.
+ *
+ * Returns:
+ * A new GEM buffer object of the given DRM device, representing the memory
+ * described by the given DMA-buf attachment and scatter/gather table.
+ */
struct drm_gem_object *
amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
@@ -102,12 +158,18 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct reservation_object *resv = attach->dmabuf->resv;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_bo *bo;
+ struct amdgpu_bo_param bp;
int ret;
+ memset(&bp, 0, sizeof(bp));
+ bp.size = attach->dmabuf->size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_CPU;
+ bp.flags = 0;
+ bp.type = ttm_bo_type_sg;
+ bp.resv = resv;
ww_mutex_lock(&resv->lock, NULL);
- ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg,
- resv, &bo);
+ ret = amdgpu_bo_create(adev, &bp, &bo);
if (ret)
goto error;
@@ -126,8 +188,19 @@ error:
return ERR_PTR(ret);
}
+/**
+ * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
+ * @dma_buf: shared DMA buffer
+ * @attach: DMA-buf attachment
+ *
+ * Makes sure that the shared DMA buffer can be accessed by the target device.
+ * For now, simply pins it to the GTT domain, where it should be accessible by
+ * all DMA devices.
+ *
+ * Returns:
+ * 0 on success or negative error code.
+ */
static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
- struct device *target_dev,
struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = dma_buf->priv;
@@ -135,7 +208,7 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
long r;
- r = drm_gem_map_attach(dma_buf, target_dev, attach);
+ r = drm_gem_map_attach(dma_buf, attach);
if (r)
return r;
@@ -159,7 +232,7 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
}
/* pin buffer into GTT */
- r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+ r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
if (r)
goto error_unreserve;
@@ -175,6 +248,14 @@ error_detach:
return r;
}
+/**
+ * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation
+ * @dma_buf: shared DMA buffer
+ * @attach: DMA-buf attachment
+ *
+ * This is called when a shared DMA buffer no longer needs to be accessible by
+ * the other device. For now, simply unpins the buffer from GTT.
+ */
static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
struct dma_buf_attachment *attach)
{
@@ -196,6 +277,13 @@ error:
drm_gem_map_detach(dma_buf, attach);
}
+/**
+ * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation
+ * @obj: GEM buffer object
+ *
+ * Returns:
+ * The buffer object's reservation object.
+ */
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -203,13 +291,25 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
return bo->tbo.resv;
}
+/**
+ * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
+ * @dma_buf: shared DMA buffer
+ * @direction: direction of DMA transfer
+ *
+ * This is called before CPU access to the shared DMA buffer's memory. If it's
+ * a read access, the buffer is moved to the GTT domain if possible, for optimal
+ * CPU read performance.
+ *
+ * Returns:
+ * 0 on success or negative error code.
+ */
static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
enum dma_data_direction direction)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { true, false };
- u32 domain = amdgpu_display_framebuffer_domains(adev);
+ u32 domain = amdgpu_display_supported_domains(adev);
int ret;
bool reads = (direction == DMA_BIDIRECTIONAL ||
direction == DMA_FROM_DEVICE);
@@ -239,14 +339,24 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops = {
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_gem_begin_cpu_access,
.map = drm_gem_dmabuf_kmap,
- .map_atomic = drm_gem_dmabuf_kmap_atomic,
.unmap = drm_gem_dmabuf_kunmap,
- .unmap_atomic = drm_gem_dmabuf_kunmap_atomic,
.mmap = drm_gem_dmabuf_mmap,
.vmap = drm_gem_dmabuf_vmap,
.vunmap = drm_gem_dmabuf_vunmap,
};
+/**
+ * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
+ * @dev: DRM device
+ * @gobj: GEM buffer object
+ * @flags: flags like DRM_CLOEXEC and DRM_RDWR
+ *
+ * The main work is done by the &drm_gem_prime_export helper, which in turn
+ * uses &amdgpu_gem_prime_res_obj.
+ *
+ * Returns:
+ * Shared DMA buffer representing the GEM buffer object from the given device.
+ */
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
int flags)
@@ -267,6 +377,17 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
return buf;
}
+/**
+ * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
+ * @dev: DRM device
+ * @dma_buf: Shared DMA buffer
+ *
+ * The main work is done by the &drm_gem_prime_import helper, which in turn
+ * uses &amdgpu_gem_prime_import_sg_table.
+ *
+ * Returns:
+ * GEM buffer object representing the shared DMA buffer for the given device.
+ */
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index c7d43e064fc7..9f1a5bd39ae8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -52,6 +52,7 @@ static int psp_sw_init(void *handle)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
psp_v3_1_set_psp_funcs(psp);
break;
case CHIP_RAVEN:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
index 262c1267249e..ea9850c9224d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -66,6 +66,8 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
u32 ring,
struct amdgpu_ring **out_ring)
{
+ u32 instance;
+
switch (mapper->hw_ip) {
case AMDGPU_HW_IP_GFX:
*out_ring = &adev->gfx.gfx_ring[ring];
@@ -77,13 +79,16 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
*out_ring = &adev->sdma.instance[ring].ring;
break;
case AMDGPU_HW_IP_UVD:
- *out_ring = &adev->uvd.ring;
+ instance = ring;
+ *out_ring = &adev->uvd.inst[instance].ring;
break;
case AMDGPU_HW_IP_VCE:
*out_ring = &adev->vce.ring[ring];
break;
case AMDGPU_HW_IP_UVD_ENC:
- *out_ring = &adev->uvd.ring_enc[ring];
+ instance = ring / adev->uvd.num_enc_rings;
+ *out_ring =
+ &adev->uvd.inst[instance].ring_enc[ring%adev->uvd.num_enc_rings];
break;
case AMDGPU_HW_IP_VCN_DEC:
*out_ring = &adev->vcn.ring_dec;
@@ -91,6 +96,9 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
case AMDGPU_HW_IP_VCN_ENC:
*out_ring = &adev->vcn.ring_enc[ring];
break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ *out_ring = &adev->vcn.ring_jpeg;
+ break;
default:
*out_ring = NULL;
DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
@@ -240,13 +248,14 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
ip_num_rings = adev->sdma.num_instances;
break;
case AMDGPU_HW_IP_UVD:
- ip_num_rings = 1;
+ ip_num_rings = adev->uvd.num_uvd_inst;
break;
case AMDGPU_HW_IP_VCE:
ip_num_rings = adev->vce.num_rings;
break;
case AMDGPU_HW_IP_UVD_ENC:
- ip_num_rings = adev->uvd.num_enc_rings;
+ ip_num_rings =
+ adev->uvd.num_enc_rings * adev->uvd.num_uvd_inst;
break;
case AMDGPU_HW_IP_VCN_DEC:
ip_num_rings = 1;
@@ -254,6 +263,9 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
case AMDGPU_HW_IP_VCN_ENC:
ip_num_rings = adev->vcn.num_enc_rings;
break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ ip_num_rings = 1;
+ break;
default:
DRM_DEBUG("unknown ip type: %d\n", hw_ip);
return -EINVAL;
@@ -281,6 +293,7 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
case AMDGPU_HW_IP_UVD_ENC:
case AMDGPU_HW_IP_VCN_DEC:
case AMDGPU_HW_IP_VCN_ENC:
+ case AMDGPU_HW_IP_VCN_JPEG:
r = amdgpu_identity_map(adev, mapper, ring, out_ring);
break;
case AMDGPU_HW_IP_DMA:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index d5f526f38e50..93794a85f83d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -211,7 +211,8 @@ void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
if (!ring->funcs->set_priority)
return;
- atomic_inc(&ring->num_jobs[priority]);
+ if (atomic_inc_return(&ring->num_jobs[priority]) <= 0)
+ return;
mutex_lock(&ring->priority_mutex);
if (priority <= ring->priority)
@@ -304,7 +305,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
0xffffffffffffffff : ring->buf_mask;
/* Allocate ring buffer */
if (ring->ring_obj == NULL) {
- r = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
+ r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&ring->ring_obj,
&ring->gpu_addr,
@@ -362,6 +363,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
dma_fence_put(ring->vmid_wait);
ring->vmid_wait = NULL;
+ ring->me = 0;
ring->adev->rings[ring->idx] = NULL;
}
@@ -459,6 +461,26 @@ void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
spin_unlock(&adev->ring_lru_list_lock);
}
+/**
+ * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper
+ *
+ * @adev: amdgpu_device pointer
+ * @reg0: register to write
+ * @reg1: register to wait on
+ * @ref: reference value to write/wait on
+ * @mask: mask to wait on
+ *
+ * Helper for rings that don't support write and wait in a
+ * single oneshot packet.
+ */
+void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
/*
* Debugfs info
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 1a5911882657..5018c0b6bf1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -29,7 +29,7 @@
#include <drm/drm_print.h>
/* max number of rings */
-#define AMDGPU_MAX_RINGS 18
+#define AMDGPU_MAX_RINGS 21
#define AMDGPU_MAX_GFX_RINGS 1
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 3
@@ -42,6 +42,9 @@
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
+#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2)
+
+#define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
enum amdgpu_ring_type {
AMDGPU_RING_TYPE_GFX,
@@ -52,7 +55,8 @@ enum amdgpu_ring_type {
AMDGPU_RING_TYPE_KIQ,
AMDGPU_RING_TYPE_UVD_ENC,
AMDGPU_RING_TYPE_VCN_DEC,
- AMDGPU_RING_TYPE_VCN_ENC
+ AMDGPU_RING_TYPE_VCN_ENC,
+ AMDGPU_RING_TYPE_VCN_JPEG
};
struct amdgpu_device;
@@ -90,7 +94,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
unsigned irq_type);
void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
+ unsigned flags);
int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
void amdgpu_fence_process(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
@@ -110,6 +115,7 @@ struct amdgpu_ring_funcs {
u32 nop;
bool support_64bit_ptrs;
unsigned vmhub;
+ unsigned extra_dw;
/* ring read/write ptr handling */
u64 (*get_rptr)(struct amdgpu_ring *ring);
@@ -154,6 +160,9 @@ struct amdgpu_ring_funcs {
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val, uint32_t mask);
+ void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask);
void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
/* priority functions */
void (*set_priority) (struct amdgpu_ring *ring,
@@ -228,6 +237,10 @@ int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
int *blacklist, int num_blacklist,
bool lru_pipe_order, struct amdgpu_ring **ring);
void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t val0,
+ uint32_t reg1, uint32_t val1);
+
static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
{
int i = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index 2dbe87591f81..8904e62dca7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright 2009 VMware, Inc.
*
@@ -33,6 +34,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_bo *vram_obj = NULL;
struct amdgpu_bo **gtt_obj = NULL;
+ struct amdgpu_bo_param bp;
uint64_t gart_addr, vram_addr;
unsigned n, size;
int i, r;
@@ -52,15 +54,21 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
n -= adev->irq.ih.ring_size;
n /= size;
- gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL);
+ gtt_obj = kcalloc(n, sizeof(*gtt_obj), GFP_KERNEL);
if (!gtt_obj) {
DRM_ERROR("Failed to allocate %d pointers\n", n);
r = 1;
goto out_cleanup;
}
-
- r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0,
- ttm_bo_type_kernel, NULL, &vram_obj);
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.flags = 0;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
+
+ r = amdgpu_bo_create(adev, &bp, &vram_obj);
if (r) {
DRM_ERROR("Failed to create VRAM object\n");
goto out_cleanup;
@@ -68,20 +76,20 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
r = amdgpu_bo_reserve(vram_obj, false);
if (unlikely(r != 0))
goto out_unref;
- r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM, &vram_addr);
+ r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM);
if (r) {
DRM_ERROR("Failed to pin VRAM object\n");
goto out_unres;
}
+ vram_addr = amdgpu_bo_gpu_offset(vram_obj);
for (i = 0; i < n; i++) {
void *gtt_map, *vram_map;
void **gart_start, **gart_end;
void **vram_start, **vram_end;
struct dma_fence *fence = NULL;
- r = amdgpu_bo_create(adev, size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT, 0,
- ttm_bo_type_kernel, NULL, gtt_obj + i);
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+ r = amdgpu_bo_create(adev, &bp, gtt_obj + i);
if (r) {
DRM_ERROR("Failed to create GTT object %d\n", i);
goto out_lclean;
@@ -90,11 +98,17 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
r = amdgpu_bo_reserve(gtt_obj[i], false);
if (unlikely(r != 0))
goto out_lclean_unref;
- r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, &gart_addr);
+ r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT);
if (r) {
DRM_ERROR("Failed to pin GTT object %d\n", i);
goto out_lclean_unres;
}
+ r = amdgpu_ttm_alloc_gart(&gtt_obj[i]->tbo);
+ if (r) {
+ DRM_ERROR("%p bind failed\n", gtt_obj[i]);
+ goto out_lclean_unpin;
+ }
+ gart_addr = amdgpu_bo_gpu_offset(gtt_obj[i]);
r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 532263ab6e16..76920035eb22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -150,10 +150,10 @@ TRACE_EVENT(amdgpu_cs,
TP_fast_assign(
__entry->bo_list = p->bo_list;
- __entry->ring = p->job->ring->idx;
+ __entry->ring = p->ring->idx;
__entry->dw = p->job->ibs[i].length_dw;
__entry->fences = amdgpu_fence_count_emitted(
- p->job->ring);
+ p->ring);
),
TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
__entry->bo_list, __entry->ring, __entry->dw,
@@ -178,7 +178,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
__assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __entry->ring_name = job->ring->name;
+ __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;
__entry->num_ibs = job->num_ibs;
),
TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
@@ -203,7 +203,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
__assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __entry->ring_name = job->ring->name;
+ __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;
__entry->num_ibs = job->num_ibs;
),
TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
@@ -275,7 +275,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
),
TP_fast_assign(
- __entry->bo = bo_va->base.bo;
+ __entry->bo = bo_va ? bo_va->base.bo : NULL;
__entry->start = mapping->start;
__entry->last = mapping->last;
__entry->offset = mapping->offset;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 205da3ff9cd0..13977ea6a097 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -63,24 +63,52 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
/*
* Global memory.
*/
+
+/**
+ * amdgpu_ttm_mem_global_init - Initialize and acquire reference to
+ * memory object
+ *
+ * @ref: Object for initialization.
+ *
+ * This is called by drm_global_item_ref() when an object is being
+ * initialized.
+ */
static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
{
return ttm_mem_global_init(ref->object);
}
+/**
+ * amdgpu_ttm_mem_global_release - Drop reference to a memory object
+ *
+ * @ref: Object being removed
+ *
+ * This is called by drm_global_item_unref() when an object is being
+ * released.
+ */
static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
{
ttm_mem_global_release(ref->object);
}
+/**
+ * amdgpu_ttm_global_init - Initialize global TTM memory reference
+ * structures.
+ *
+ * @adev: AMDGPU device for which the global structures need to be
+ * registered.
+ *
+ * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init()
+ * during bring up.
+ */
static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
{
struct drm_global_reference *global_ref;
- struct amdgpu_ring *ring;
- struct drm_sched_rq *rq;
int r;
+ /* ensure reference is false in case init fails */
adev->mman.mem_global_referenced = false;
+
global_ref = &adev->mman.mem_global_ref;
global_ref->global_type = DRM_GLOBAL_TTM_MEM;
global_ref->size = sizeof(struct ttm_mem_global);
@@ -108,21 +136,10 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
mutex_init(&adev->mman.gtt_window_lock);
- ring = adev->mman.buffer_funcs_ring;
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
- r = drm_sched_entity_init(&ring->sched, &adev->mman.entity,
- rq, amdgpu_sched_jobs, NULL);
- if (r) {
- DRM_ERROR("Failed setting up TTM BO move run queue.\n");
- goto error_entity;
- }
-
adev->mman.mem_global_referenced = true;
return 0;
-error_entity:
- drm_global_item_unref(&adev->mman.bo_global_ref.ref);
error_bo:
drm_global_item_unref(&adev->mman.mem_global_ref);
error_mem:
@@ -132,8 +149,6 @@ error_mem:
static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
{
if (adev->mman.mem_global_referenced) {
- drm_sched_entity_fini(adev->mman.entity.sched,
- &adev->mman.entity);
mutex_destroy(&adev->mman.gtt_window_lock);
drm_global_item_unref(&adev->mman.bo_global_ref.ref);
drm_global_item_unref(&adev->mman.mem_global_ref);
@@ -146,6 +161,18 @@ static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
return 0;
}
+/**
+ * amdgpu_init_mem_type - Initialize a memory manager for a specific
+ * type of memory request.
+ *
+ * @bdev: The TTM BO device object (contains a reference to
+ * amdgpu_device)
+ * @type: The type of memory requested
+ * @man:
+ *
+ * This is called by ttm_bo_init_mm() when a buffer object is being
+ * initialized.
+ */
static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
struct ttm_mem_type_manager *man)
{
@@ -161,6 +188,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
man->default_caching = TTM_PL_FLAG_CACHED;
break;
case TTM_PL_TT:
+ /* GTT memory */
man->func = &amdgpu_gtt_mgr_func;
man->gpu_offset = adev->gmc.gart_start;
man->available_caching = TTM_PL_MASK_CACHING;
@@ -193,6 +221,14 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
return 0;
}
+/**
+ * amdgpu_evict_flags - Compute placement flags
+ *
+ * @bo: The buffer object to evict
+ * @placement: Possible destination(s) for evicted BO
+ *
+ * Fill in placement data when ttm_bo_evict() is called
+ */
static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
struct ttm_placement *placement)
{
@@ -204,12 +240,14 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
};
+ /* Don't handle scatter gather BOs */
if (bo->type == ttm_bo_type_sg) {
placement->num_placement = 0;
placement->num_busy_placement = 0;
return;
}
+ /* Object isn't an AMDGPU object so ignore */
if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
placement->placement = &placements;
placement->busy_placement = &placements;
@@ -217,26 +255,16 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
placement->num_busy_placement = 1;
return;
}
+
abo = ttm_to_amdgpu_bo(bo);
switch (bo->mem.mem_type) {
case TTM_PL_VRAM:
if (!adev->mman.buffer_funcs_enabled) {
+ /* Move to system memory */
amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
- } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
- !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
- unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
- struct drm_mm_node *node = bo->mem.mm_node;
- unsigned long pages_left;
-
- for (pages_left = bo->mem.num_pages;
- pages_left;
- pages_left -= node->size, node++) {
- if (node->start < fpfn)
- break;
- }
-
- if (!pages_left)
- goto gtt;
+ } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+ !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
+ amdgpu_bo_in_cpu_visible_vram(abo)) {
/* Try evicting to the CPU inaccessible part of VRAM
* first, but only set GTT as busy placement, so this
@@ -245,12 +273,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
*/
amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT);
- abo->placements[0].fpfn = fpfn;
+ abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
abo->placements[0].lpfn = 0;
abo->placement.busy_placement = &abo->placements[1];
abo->placement.num_busy_placement = 1;
} else {
-gtt:
+ /* Move to GTT memory */
amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
}
break;
@@ -261,6 +289,15 @@ gtt:
*placement = abo->placement;
}
+/**
+ * amdgpu_verify_access - Verify access for a mmap call
+ *
+ * @bo: The buffer object to map
+ * @filp: The file pointer from the process performing the mmap
+ *
+ * This is called by ttm_bo_mmap() to verify whether a process
+ * has the right to mmap a BO to their process space.
+ */
static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
{
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
@@ -278,6 +315,15 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
filp->private_data);
}
+/**
+ * amdgpu_move_null - Register memory for a buffer object
+ *
+ * @bo: The bo to assign the memory to
+ * @new_mem: The memory to be assigned.
+ *
+ * Assign the memory from new_mem to the memory of the buffer object
+ * bo.
+ */
static void amdgpu_move_null(struct ttm_buffer_object *bo,
struct ttm_mem_reg *new_mem)
{
@@ -288,6 +334,10 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo,
new_mem->mm_node = NULL;
}
+/**
+ * amdgpu_mm_node_addr - Compute the GPU relative offset of a GTT
+ * buffer.
+ */
static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
struct drm_mm_node *mm_node,
struct ttm_mem_reg *mem)
@@ -302,9 +352,10 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
}
/**
- * amdgpu_find_mm_node - Helper function finds the drm_mm_node
- * corresponding to @offset. It also modifies the offset to be
- * within the drm_mm_node returned
+ * amdgpu_find_mm_node - Helper function finds the drm_mm_node
+ * corresponding to @offset. It also modifies
+ * the offset to be within the drm_mm_node
+ * returned
*/
static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
unsigned long *offset)
@@ -443,7 +494,12 @@ error:
return r;
}
-
+/**
+ * amdgpu_move_blit - Copy an entire buffer to another buffer
+ *
+ * This is a helper called by amdgpu_bo_move() and
+ * amdgpu_move_vram_ram() to help move buffers to and from VRAM.
+ */
static int amdgpu_move_blit(struct ttm_buffer_object *bo,
bool evict, bool no_wait_gpu,
struct ttm_mem_reg *new_mem,
@@ -478,6 +534,11 @@ error:
return r;
}
+/**
+ * amdgpu_move_vram_ram - Copy VRAM buffer to RAM buffer
+ *
+ * Called by amdgpu_bo_move().
+ */
static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
@@ -490,6 +551,8 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
int r;
adev = amdgpu_ttm_adev(bo->bdev);
+
+ /* create space/pages for new_mem in GTT space */
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
placement.num_placement = 1;
@@ -504,25 +567,36 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
return r;
}
+ /* set caching flags */
r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
if (unlikely(r)) {
goto out_cleanup;
}
+ /* Bind the memory to the GTT space */
r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx);
if (unlikely(r)) {
goto out_cleanup;
}
+
+ /* blit VRAM to GTT */
r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem);
if (unlikely(r)) {
goto out_cleanup;
}
+
+ /* move BO (in tmp_mem) to new_mem */
r = ttm_bo_move_ttm(bo, ctx, new_mem);
out_cleanup:
ttm_bo_mem_put(bo, &tmp_mem);
return r;
}
+/**
+ * amdgpu_move_ram_vram - Copy buffer from RAM to VRAM
+ *
+ * Called by amdgpu_bo_move().
+ */
static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
@@ -535,6 +609,8 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
int r;
adev = amdgpu_ttm_adev(bo->bdev);
+
+ /* make space in GTT for old_mem buffer */
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
placement.num_placement = 1;
@@ -548,10 +624,14 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
if (unlikely(r)) {
return r;
}
+
+ /* move/bind old memory to GTT space */
r = ttm_bo_move_ttm(bo, ctx, &tmp_mem);
if (unlikely(r)) {
goto out_cleanup;
}
+
+ /* copy to VRAM */
r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem);
if (unlikely(r)) {
goto out_cleanup;
@@ -561,6 +641,11 @@ out_cleanup:
return r;
}
+/**
+ * amdgpu_bo_move - Move a buffer object to a new memory location
+ *
+ * Called by ttm_bo_handle_move_mem()
+ */
static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
@@ -626,6 +711,11 @@ memcpy:
return 0;
}
+/**
+ * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
+ *
+ * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
+ */
static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
{
struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
@@ -695,7 +785,7 @@ struct amdgpu_ttm_tt {
struct ttm_dma_tt ttm;
u64 offset;
uint64_t userptr;
- struct mm_struct *usermm;
+ struct task_struct *usertask;
uint32_t userflags;
spinlock_t guptasklock;
struct list_head guptasks;
@@ -703,17 +793,29 @@ struct amdgpu_ttm_tt {
uint32_t last_set_pages;
};
+/**
+ * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to
+ * by a USERPTR pointer to memory
+ *
+ * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
+ * This provides a wrapper around the get_user_pages() call to provide
+ * device accessible pages that back user memory.
+ */
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct mm_struct *mm = gtt->usertask->mm;
unsigned int flags = 0;
unsigned pinned = 0;
int r;
+ if (!mm) /* Happens during process shutdown */
+ return -ESRCH;
+
if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
flags |= FOLL_WRITE;
- down_read(&current->mm->mmap_sem);
+ down_read(&mm->mmap_sem);
if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
/* check that we only use anonymous memory
@@ -721,13 +823,14 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
struct vm_area_struct *vma;
- vma = find_vma(gtt->usermm, gtt->userptr);
+ vma = find_vma(mm, gtt->userptr);
if (!vma || vma->vm_file || vma->vm_end < end) {
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return -EPERM;
}
}
+ /* loop enough times using contiguous pages of memory */
do {
unsigned num_pages = ttm->num_pages - pinned;
uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
@@ -739,7 +842,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
list_add(&guptask.list, &gtt->guptasks);
spin_unlock(&gtt->guptasklock);
- r = get_user_pages(userptr, num_pages, flags, p, NULL);
+ if (mm == current->mm)
+ r = get_user_pages(userptr, num_pages, flags, p, NULL);
+ else
+ r = get_user_pages_remote(gtt->usertask,
+ mm, userptr, num_pages,
+ flags, p, NULL, NULL);
spin_lock(&gtt->guptasklock);
list_del(&guptask.list);
@@ -752,15 +860,23 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
} while (pinned < ttm->num_pages);
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return 0;
release_pages:
release_pages(pages, pinned);
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return r;
}
+/**
+ * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages
+ * as necessary.
+ *
+ * Called by amdgpu_cs_list_validate(). This creates the page list
+ * that backs user memory and will ultimately be mapped into the device
+ * address space.
+ */
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -775,6 +891,11 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
}
}
+/**
+ * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty
+ *
+ * Called while unpinning userptr pages
+ */
void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -793,7 +914,12 @@ void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
}
}
-/* prepare the sg table with the user pages */
+/**
+ * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the
+ * user pages
+ *
+ * Called by amdgpu_ttm_backend_bind()
+ **/
static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
@@ -805,17 +931,20 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ /* Allocate an SG array and squash pages into it */
r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
ttm->num_pages << PAGE_SHIFT,
GFP_KERNEL);
if (r)
goto release_sg;
+ /* Map SG to device */
r = -ENOMEM;
nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
if (nents != ttm->sg->nents)
goto release_sg;
+ /* convert SG to linear array of pages and dma addresses */
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
gtt->ttm.dma_address, ttm->num_pages);
@@ -826,6 +955,9 @@ release_sg:
return r;
}
+/**
+ * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
+ */
static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
@@ -839,14 +971,60 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
if (!ttm->sg->sgl)
return;
- /* free the sg table and pages again */
+ /* unmap the pages mapped to the device */
dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
+ /* mark the pages as dirty */
amdgpu_ttm_tt_mark_user_pages(ttm);
sg_free_table(ttm->sg);
}
+int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
+ struct ttm_buffer_object *tbo,
+ uint64_t flags)
+{
+ struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
+ struct ttm_tt *ttm = tbo->ttm;
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ int r;
+
+ if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) {
+ uint64_t page_idx = 1;
+
+ r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
+ ttm->pages, gtt->ttm.dma_address, flags);
+ if (r)
+ goto gart_bind_fail;
+
+ /* Patch mtype of the second part BO */
+ flags &= ~AMDGPU_PTE_MTYPE_MASK;
+ flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC);
+
+ r = amdgpu_gart_bind(adev,
+ gtt->offset + (page_idx << PAGE_SHIFT),
+ ttm->num_pages - page_idx,
+ &ttm->pages[page_idx],
+ &(gtt->ttm.dma_address[page_idx]), flags);
+ } else {
+ r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+ ttm->pages, gtt->ttm.dma_address, flags);
+ }
+
+gart_bind_fail:
+ if (r)
+ DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
+ ttm->num_pages, gtt->offset);
+
+ return r;
+}
+
+/**
+ * amdgpu_ttm_backend_bind - Bind GTT memory
+ *
+ * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
+ * This handles binding GTT memory to the device address space.
+ */
static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
struct ttm_mem_reg *bo_mem)
{
@@ -877,7 +1055,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
return 0;
}
+ /* compute PTE flags relevant to this BO memory */
flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
+
+ /* bind pages into GART page tables */
gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
ttm->pages, gtt->ttm.dma_address, flags);
@@ -888,6 +1069,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
return r;
}
+/**
+ * amdgpu_ttm_alloc_gart - Allocate GART memory for buffer object
+ */
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
@@ -903,6 +1087,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
amdgpu_gtt_mgr_has_gart_addr(&bo->mem))
return 0;
+ /* allocate GTT space */
tmp = bo->mem;
tmp.mm_node = NULL;
placement.num_placement = 1;
@@ -918,10 +1103,12 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
if (unlikely(r))
return r;
+ /* compute PTE flags for this buffer object */
flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
+
+ /* Bind pages */
gtt->offset = (u64)tmp.start << PAGE_SHIFT;
- r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages,
- bo->ttm->pages, gtt->ttm.dma_address, flags);
+ r = amdgpu_ttm_gart_bind(adev, bo, flags);
if (unlikely(r)) {
ttm_bo_mem_put(bo, &tmp);
return r;
@@ -935,31 +1122,40 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
return 0;
}
+/**
+ * amdgpu_ttm_recover_gart - Rebind GTT pages
+ *
+ * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
+ * rebind GTT pages during a GPU reset.
+ */
int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
- struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm;
uint64_t flags;
int r;
- if (!gtt)
+ if (!tbo->ttm)
return 0;
- flags = amdgpu_ttm_tt_pte_flags(adev, &gtt->ttm.ttm, &tbo->mem);
- r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages,
- gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags);
- if (r)
- DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
- gtt->ttm.ttm.num_pages, gtt->offset);
+ flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem);
+ r = amdgpu_ttm_gart_bind(adev, tbo, flags);
+
return r;
}
+/**
+ * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
+ *
+ * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
+ * ttm_tt_destroy().
+ */
static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
int r;
+ /* if the pages have userptr pinning then clear that first */
if (gtt->userptr)
amdgpu_ttm_tt_unpin_userptr(ttm);
@@ -978,6 +1174,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ if (gtt->usertask)
+ put_task_struct(gtt->usertask);
+
ttm_dma_tt_fini(&gtt->ttm);
kfree(gtt);
}
@@ -988,6 +1187,13 @@ static struct ttm_backend_func amdgpu_backend_func = {
.destroy = &amdgpu_ttm_backend_destroy,
};
+/**
+ * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
+ *
+ * @bo: The buffer object to create a GTT ttm_tt object around
+ *
+ * Called by ttm_tt_create().
+ */
static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
uint32_t page_flags)
{
@@ -1001,6 +1207,8 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
return NULL;
}
gtt->ttm.ttm.func = &amdgpu_backend_func;
+
+ /* allocate space for the uninitialized page entries */
if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
kfree(gtt);
return NULL;
@@ -1008,6 +1216,12 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
return &gtt->ttm.ttm;
}
+/**
+ * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
+ *
+ * Map the pages of a ttm_tt object to an address space visible
+ * to the underlying device.
+ */
static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
struct ttm_operation_ctx *ctx)
{
@@ -1015,6 +1229,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
struct amdgpu_ttm_tt *gtt = (void *)ttm;
bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+ /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
if (gtt && gtt->userptr) {
ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
if (!ttm->sg)
@@ -1039,9 +1254,17 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
}
#endif
+ /* fall back to generic helper to populate the page array
+ * and map them to the device */
return ttm_populate_and_map_pages(adev->dev, &gtt->ttm, ctx);
}
+/**
+ * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
+ *
+ * Unmaps pages of a ttm_tt object from the device address space and
+ * unpopulates the page array backing it.
+ */
static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
{
struct amdgpu_device *adev;
@@ -1067,9 +1290,21 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
}
#endif
+ /* fall back to generic helper to unmap and unpopulate array */
ttm_unmap_and_unpopulate_pages(adev->dev, &gtt->ttm);
}
+/**
+ * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt
+ * for the current task
+ *
+ * @ttm: The ttm_tt object to bind this userptr object to
+ * @addr: The address in the current tasks VM space to use
+ * @flags: Requirements of userptr object.
+ *
+ * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
+ * to current task
+ */
int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
uint32_t flags)
{
@@ -1079,8 +1314,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
return -EINVAL;
gtt->userptr = addr;
- gtt->usermm = current->mm;
gtt->userflags = flags;
+
+ if (gtt->usertask)
+ put_task_struct(gtt->usertask);
+ gtt->usertask = current->group_leader;
+ get_task_struct(gtt->usertask);
+
spin_lock_init(&gtt->guptasklock);
INIT_LIST_HEAD(&gtt->guptasks);
atomic_set(&gtt->mmu_invalidations, 0);
@@ -1089,6 +1329,9 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
return 0;
}
+/**
+ * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
+ */
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1096,9 +1339,18 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
if (gtt == NULL)
return NULL;
- return gtt->usermm;
+ if (gtt->usertask == NULL)
+ return NULL;
+
+ return gtt->usertask->mm;
}
+/**
+ * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays
+ * inside an address range for the
+ * current task.
+ *
+ */
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end)
{
@@ -1109,10 +1361,16 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
if (gtt == NULL || !gtt->userptr)
return false;
+ /* Return false if no part of the ttm_tt object lies within
+ * the range
+ */
size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
if (gtt->userptr > end || gtt->userptr + size <= start)
return false;
+ /* Search the lists of tasks that hold this mapping and see
+ * if current is one of them. If it is return false.
+ */
spin_lock(&gtt->guptasklock);
list_for_each_entry(entry, &gtt->guptasks, list) {
if (entry->task == current) {
@@ -1127,6 +1385,10 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
return true;
}
+/**
+ * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been
+ * invalidated?
+ */
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated)
{
@@ -1137,6 +1399,12 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
return prev_invalidated != *last_invalidated;
}
+/**
+ * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this
+ * ttm_tt object been invalidated
+ * since the last time they've
+ * been set?
+ */
bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1147,6 +1415,9 @@ bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
}
+/**
+ * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
+ */
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1157,6 +1428,12 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
}
+/**
+ * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
+ *
+ * @ttm: The ttm_tt object to compute the flags for
+ * @mem: The memory registry backing this ttm_tt object
+ */
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_mem_reg *mem)
{
@@ -1181,6 +1458,16 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
return flags;
}
+/**
+ * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict
+ * a buffer object.
+ *
+ * Return true if eviction is sensible. Called by
+ * ttm_mem_evict_first() on behalf of ttm_bo_mem_force_space()
+ * which tries to evict buffer objects until it can find space
+ * for a new object and by ttm_bo_force_list_clean() which is
+ * used to clean out a memory space.
+ */
static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
const struct ttm_place *place)
{
@@ -1227,6 +1514,19 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
return ttm_bo_eviction_valuable(bo, place);
}
+/**
+ * amdgpu_ttm_access_memory - Read or Write memory that backs a
+ * buffer object.
+ *
+ * @bo: The buffer object to read/write
+ * @offset: Offset into buffer object
+ * @buf: Secondary buffer to write/read from
+ * @len: Length in bytes of access
+ * @write: true if writing
+ *
+ * This is used to access VRAM that backs a buffer object via MMIO
+ * access for debugging purposes.
+ */
static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
unsigned long offset,
void *buf, int len, int write)
@@ -1329,6 +1629,7 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
{
struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_bo_param bp;
int r = 0;
int i;
u64 vram_size = adev->gmc.visible_vram_size;
@@ -1336,17 +1637,21 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
u64 size = adev->fw_vram_usage.size;
struct amdgpu_bo *bo;
+ memset(&bp, 0, sizeof(bp));
+ bp.size = adev->fw_vram_usage.size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
adev->fw_vram_usage.va = NULL;
adev->fw_vram_usage.reserved_bo = NULL;
if (adev->fw_vram_usage.size > 0 &&
adev->fw_vram_usage.size <= vram_size) {
- r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
- ttm_bo_type_kernel, NULL,
+ r = amdgpu_bo_create(adev, &bp,
&adev->fw_vram_usage.reserved_bo);
if (r)
goto error_create;
@@ -1375,7 +1680,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
AMDGPU_GEM_DOMAIN_VRAM,
adev->fw_vram_usage.start_offset,
(adev->fw_vram_usage.start_offset +
- adev->fw_vram_usage.size), NULL);
+ adev->fw_vram_usage.size));
if (r)
goto error_pin;
r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
@@ -1398,13 +1703,22 @@ error_create:
adev->fw_vram_usage.reserved_bo = NULL;
return r;
}
-
+/**
+ * amdgpu_ttm_init - Init the memory management (ttm) as well as
+ * various gtt/vram related fields.
+ *
+ * This initializes all of the memory space pools that the TTM layer
+ * will need such as the GTT space (system memory mapped to the device),
+ * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
+ * can be mapped per VMID.
+ */
int amdgpu_ttm_init(struct amdgpu_device *adev)
{
uint64_t gtt_size;
int r;
u64 vis_vram_limit;
+ /* initialize global references for vram/gtt */
r = amdgpu_ttm_global_init(adev);
if (r) {
return r;
@@ -1425,6 +1739,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* We opt to avoid OOM on system pages allocations */
adev->mman.bdev.no_retry = true;
+ /* Initialize VRAM pool with all of VRAM divided into pages */
r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
adev->gmc.real_vram_size >> PAGE_SHIFT);
if (r) {
@@ -1454,15 +1769,23 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r;
}
- r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->stolen_vga_memory,
- NULL, NULL);
- if (r)
- return r;
+ /* allocate memory as required for VGA
+ * This is used for VGA emulation and pre-OS scanout buffers to
+ * avoid display artifacts while transitioning between pre-OS
+ * and driver. */
+ if (adev->gmc.stolen_size) {
+ r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->stolen_vga_memory,
+ NULL, NULL);
+ if (r)
+ return r;
+ }
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
+ /* Compute GTT size, either bsaed on 3/4th the size of RAM size
+ * or whatever the user passed on module init */
if (amdgpu_gtt_size == -1) {
struct sysinfo si;
@@ -1473,6 +1796,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
}
else
gtt_size = (uint64_t)amdgpu_gtt_size << 20;
+
+ /* Initialize GTT memory pool */
r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
if (r) {
DRM_ERROR("Failed initializing GTT heap.\n");
@@ -1481,6 +1806,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
(unsigned)(gtt_size / (1024 * 1024)));
+ /* Initialize various on-chip memory pools */
adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
@@ -1520,6 +1846,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
}
}
+ /* Register debugfs entries for amdgpu_ttm */
r = amdgpu_ttm_debugfs_init(adev);
if (r) {
DRM_ERROR("Failed to init debugfs\n");
@@ -1528,13 +1855,25 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return 0;
}
+/**
+ * amdgpu_ttm_late_init - Handle any late initialization for
+ * amdgpu_ttm
+ */
+void amdgpu_ttm_late_init(struct amdgpu_device *adev)
+{
+ /* return the VGA stolen memory (if any) back to VRAM */
+ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
+}
+
+/**
+ * amdgpu_ttm_fini - De-initialize the TTM memory pools
+ */
void amdgpu_ttm_fini(struct amdgpu_device *adev)
{
if (!adev->mman.initialized)
return;
amdgpu_ttm_debugfs_fini(adev);
- amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
amdgpu_ttm_fw_reserve_vram_fini(adev);
if (adev->mman.aper_base_kaddr)
iounmap(adev->mman.aper_base_kaddr);
@@ -1567,10 +1906,29 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
{
struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM];
uint64_t size;
+ int r;
- if (!adev->mman.initialized || adev->in_gpu_reset)
+ if (!adev->mman.initialized || adev->in_gpu_reset ||
+ adev->mman.buffer_funcs_enabled == enable)
return;
+ if (enable) {
+ struct amdgpu_ring *ring;
+ struct drm_sched_rq *rq;
+
+ ring = adev->mman.buffer_funcs_ring;
+ rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+ r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL);
+ if (r) {
+ DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
+ r);
+ return;
+ }
+ } else {
+ drm_sched_entity_destroy(adev->mman.entity.sched,
+ &adev->mman.entity);
+ }
+
/* this just adjusts TTM size idea, which sets lpfn to the correct value */
if (enable)
size = adev->gmc.real_vram_size;
@@ -1648,7 +2006,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
if (r)
goto error_free;
- r = amdgpu_job_submit(job, ring, &adev->mman.entity,
+ r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
if (r)
goto error_free;
@@ -1717,24 +2075,19 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
- if (direct_submit) {
- r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
- NULL, fence);
- job->fence = dma_fence_get(*fence);
- if (r)
- DRM_ERROR("Error scheduling IBs (%d)\n", r);
- amdgpu_job_free(job);
- } else {
- r = amdgpu_job_submit(job, ring, &adev->mman.entity,
+ if (direct_submit)
+ r = amdgpu_job_submit_direct(job, ring, fence);
+ else
+ r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, fence);
- if (r)
- goto error_free;
- }
+ if (r)
+ goto error_free;
return r;
error_free:
amdgpu_job_free(job);
+ DRM_ERROR("Error scheduling IBs (%d)\n", r);
return r;
}
@@ -1817,7 +2170,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
- r = amdgpu_job_submit(job, ring, &adev->mman.entity,
+ r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, fence);
if (r)
goto error_free;
@@ -1856,6 +2209,11 @@ static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
#endif
};
+/**
+ * amdgpu_ttm_vram_read - Linear read access to VRAM
+ *
+ * Accesses VRAM via MMIO for debugging purposes.
+ */
static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -1895,6 +2253,11 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
return result;
}
+/**
+ * amdgpu_ttm_vram_write - Linear write access to VRAM
+ *
+ * Accesses VRAM via MMIO for debugging purposes.
+ */
static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
@@ -1943,6 +2306,9 @@ static const struct file_operations amdgpu_ttm_vram_fops = {
#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
+/**
+ * amdgpu_ttm_gtt_read - Linear read access to GTT memory
+ */
static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -1990,6 +2356,13 @@ static const struct file_operations amdgpu_ttm_gtt_fops = {
#endif
+/**
+ * amdgpu_iomem_read - Virtual read access to GPU mapped memory
+ *
+ * This function is used to read memory that has been mapped to the
+ * GPU and the known addresses are not physical addresses but instead
+ * bus addresses (e.g., what you'd put in an IB or ring buffer).
+ */
static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -1998,6 +2371,7 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
ssize_t result = 0;
int r;
+ /* retrieve the IOMMU domain if any for this device */
dom = iommu_get_domain_for_dev(adev->dev);
while (size) {
@@ -2010,6 +2384,10 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
bytes = bytes < size ? bytes : size;
+ /* Translate the bus address to a physical address. If
+ * the domain is NULL it means there is no IOMMU active
+ * and the address translation is the identity
+ */
addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
pfn = addr >> PAGE_SHIFT;
@@ -2034,6 +2412,13 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
return result;
}
+/**
+ * amdgpu_iomem_write - Virtual write access to GPU mapped memory
+ *
+ * This function is used to write memory that has been mapped to the
+ * GPU and the known addresses are not physical addresses but instead
+ * bus addresses (e.g., what you'd put in an IB or ring buffer).
+ */
static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 6ea7de863041..8b3cc6687769 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -73,10 +73,12 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_mem_reg *mem);
uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man);
int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man);
+u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
int amdgpu_ttm_init(struct amdgpu_device *adev);
+void amdgpu_ttm_late_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
bool enable);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 5916cc25e28b..f55f72a37ca8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -161,8 +161,38 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr)
le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
- DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
+ DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
+ if (version_minor == 1) {
+ const struct rlc_firmware_header_v2_1 *v2_1 =
+ container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
+ DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n",
+ le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length));
+ DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver));
+ DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver));
+ DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n",
+ le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes));
+ DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes));
+ DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver));
+ DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver));
+ DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n",
+ le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes));
+ DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes));
+ DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver));
+ DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_srm_feature_ver));
+ DRM_DEBUG("save_restore_list_srm_size_bytes %u\n",
+ le32_to_cpu(v2_1->save_restore_list_srm_size_bytes));
+ DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes));
+ }
} else {
DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor);
}
@@ -265,6 +295,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
@@ -276,6 +307,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_PSP;
+ case CHIP_VEGA20:
+ return AMDGPU_FW_LOAD_DIRECT;
default:
DRM_ERROR("Unknown firmware load type\n");
}
@@ -307,7 +340,10 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
(ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT &&
- ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) {
+ ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT &&
+ ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
+ ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
+ ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
@@ -329,6 +365,18 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
le32_to_cpu(header->ucode_array_offset_bytes) +
le32_to_cpu(cp_hdr->jt_offset) * 4),
ucode->ucode_size);
+ } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
+ ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
+ memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,
+ ucode->ucode_size);
+ } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM) {
+ ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes;
+ memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_gpm,
+ ucode->ucode_size);
+ } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
+ ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes;
+ memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm,
+ ucode->ucode_size);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 30b5500dc152..08e38579af24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -98,6 +98,24 @@ struct rlc_firmware_header_v2_0 {
uint32_t reg_list_separate_array_offset_bytes; /* payload offset from the start of the header */
};
+/* version_major=2, version_minor=1 */
+struct rlc_firmware_header_v2_1 {
+ struct rlc_firmware_header_v2_0 v2_0;
+ uint32_t reg_list_format_direct_reg_list_length; /* length of direct reg list format array */
+ uint32_t save_restore_list_cntl_ucode_ver;
+ uint32_t save_restore_list_cntl_feature_ver;
+ uint32_t save_restore_list_cntl_size_bytes;
+ uint32_t save_restore_list_cntl_offset_bytes;
+ uint32_t save_restore_list_gpm_ucode_ver;
+ uint32_t save_restore_list_gpm_feature_ver;
+ uint32_t save_restore_list_gpm_size_bytes;
+ uint32_t save_restore_list_gpm_offset_bytes;
+ uint32_t save_restore_list_srm_ucode_ver;
+ uint32_t save_restore_list_srm_feature_ver;
+ uint32_t save_restore_list_srm_size_bytes;
+ uint32_t save_restore_list_srm_offset_bytes;
+};
+
/* version_major=1, version_minor=0 */
struct sdma_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -148,6 +166,7 @@ union amdgpu_firmware_header {
struct gfx_firmware_header_v1_0 gfx;
struct rlc_firmware_header_v1_0 rlc;
struct rlc_firmware_header_v2_0 rlc_v2_0;
+ struct rlc_firmware_header_v2_1 rlc_v2_1;
struct sdma_firmware_header_v1_0 sdma;
struct sdma_firmware_header_v1_1 sdma_v1_1;
struct gpu_info_firmware_header_v1_0 gpu_info;
@@ -168,6 +187,9 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_CP_MEC2,
AMDGPU_UCODE_ID_CP_MEC2_JT,
AMDGPU_UCODE_ID_RLC_G,
+ AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
+ AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
+ AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
AMDGPU_UCODE_ID_STORAGE,
AMDGPU_UCODE_ID_SMC,
AMDGPU_UCODE_ID_UVD,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 627542b22ae4..80b5c453f8c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -53,11 +53,11 @@
/* Firmware Names */
#ifdef CONFIG_DRM_AMDGPU_CIK
-#define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin"
-#define FIRMWARE_KABINI "radeon/kabini_uvd.bin"
-#define FIRMWARE_KAVERI "radeon/kaveri_uvd.bin"
-#define FIRMWARE_HAWAII "radeon/hawaii_uvd.bin"
-#define FIRMWARE_MULLINS "radeon/mullins_uvd.bin"
+#define FIRMWARE_BONAIRE "amdgpu/bonaire_uvd.bin"
+#define FIRMWARE_KABINI "amdgpu/kabini_uvd.bin"
+#define FIRMWARE_KAVERI "amdgpu/kaveri_uvd.bin"
+#define FIRMWARE_HAWAII "amdgpu/hawaii_uvd.bin"
+#define FIRMWARE_MULLINS "amdgpu/mullins_uvd.bin"
#endif
#define FIRMWARE_TONGA "amdgpu/tonga_uvd.bin"
#define FIRMWARE_CARRIZO "amdgpu/carrizo_uvd.bin"
@@ -66,15 +66,18 @@
#define FIRMWARE_POLARIS10 "amdgpu/polaris10_uvd.bin"
#define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin"
#define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin"
+#define FIRMWARE_VEGAM "amdgpu/vegam_uvd.bin"
#define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin"
#define FIRMWARE_VEGA12 "amdgpu/vega12_uvd.bin"
+#define FIRMWARE_VEGA20 "amdgpu/vega20_uvd.bin"
-#define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00)
-#define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00)
-#define mmUVD_GPCOM_VCPU_CMD_VEGA10 (0x03c3 + 0x7e00)
-#define mmUVD_NO_OP_VEGA10 (0x03ff + 0x7e00)
-#define mmUVD_ENGINE_CNTL_VEGA10 (0x03c6 + 0x7e00)
+/* These are common relative offsets for all asics, from uvd_7_0_offset.h, */
+#define UVD_GPCOM_VCPU_CMD 0x03c3
+#define UVD_GPCOM_VCPU_DATA0 0x03c4
+#define UVD_GPCOM_VCPU_DATA1 0x03c5
+#define UVD_NO_OP 0x03ff
+#define UVD_BASE_SI 0x3800
/**
* amdgpu_uvd_cs_ctx - Command submission parser context
@@ -109,9 +112,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY);
MODULE_FIRMWARE(FIRMWARE_POLARIS10);
MODULE_FIRMWARE(FIRMWARE_POLARIS11);
MODULE_FIRMWARE(FIRMWARE_POLARIS12);
+MODULE_FIRMWARE(FIRMWARE_VEGAM);
MODULE_FIRMWARE(FIRMWARE_VEGA10);
MODULE_FIRMWARE(FIRMWARE_VEGA12);
+MODULE_FIRMWARE(FIRMWARE_VEGA20);
static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
@@ -122,8 +127,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
unsigned long bo_size;
const char *fw_name;
const struct common_firmware_header *hdr;
- unsigned version_major, version_minor, family_id;
- int i, r;
+ unsigned family_id;
+ int i, j, r;
INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
@@ -172,6 +177,12 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
case CHIP_VEGA12:
fw_name = FIRMWARE_VEGA12;
break;
+ case CHIP_VEGAM:
+ fw_name = FIRMWARE_VEGAM;
+ break;
+ case CHIP_VEGA20:
+ fw_name = FIRMWARE_VEGA20;
+ break;
default:
return -EINVAL;
}
@@ -197,52 +208,70 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
- version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
- version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
- DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n",
- version_major, version_minor, family_id);
-
- /*
- * Limit the number of UVD handles depending on microcode major
- * and minor versions. The firmware version which has 40 UVD
- * instances support is 1.80. So all subsequent versions should
- * also have the same support.
- */
- if ((version_major > 0x01) ||
- ((version_major == 0x01) && (version_minor >= 0x50)))
- adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
- adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) |
- (family_id << 8));
+ if (adev->asic_type < CHIP_VEGA20) {
+ unsigned version_major, version_minor;
+
+ version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
+ version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+ DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n",
+ version_major, version_minor, family_id);
+
+ /*
+ * Limit the number of UVD handles depending on microcode major
+ * and minor versions. The firmware version which has 40 UVD
+ * instances support is 1.80. So all subsequent versions should
+ * also have the same support.
+ */
+ if ((version_major > 0x01) ||
+ ((version_major == 0x01) && (version_minor >= 0x50)))
+ adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
+
+ adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) |
+ (family_id << 8));
+
+ if ((adev->asic_type == CHIP_POLARIS10 ||
+ adev->asic_type == CHIP_POLARIS11) &&
+ (adev->uvd.fw_version < FW_1_66_16))
+ DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n",
+ version_major, version_minor);
+ } else {
+ unsigned int enc_major, enc_minor, dec_minor;
+
+ dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+ enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f;
+ enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3;
+ DRM_INFO("Found UVD firmware ENC: %hu.%hu DEC: .%hu Family ID: %hu\n",
+ enc_major, enc_minor, dec_minor, family_id);
- if ((adev->asic_type == CHIP_POLARIS10 ||
- adev->asic_type == CHIP_POLARIS11) &&
- (adev->uvd.fw_version < FW_1_66_16))
- DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n",
- version_major, version_minor);
+ adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
+
+ adev->uvd.fw_version = le32_to_cpu(hdr->ucode_version);
+ }
bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
+ AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
- r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo,
- &adev->uvd.gpu_addr, &adev->uvd.cpu_addr);
- if (r) {
- dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
- return r;
+ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+
+ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
+ &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
+ return r;
+ }
}
- ring = &adev->uvd.ring;
+ ring = &adev->uvd.inst[0].ring;
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity,
- rq, amdgpu_sched_jobs, NULL);
- if (r != 0) {
- DRM_ERROR("Failed setting up UVD run queue.\n");
+ r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL);
+ if (r) {
+ DRM_ERROR("Failed setting up UVD kernel entity.\n");
return r;
}
-
for (i = 0; i < adev->uvd.max_handles; ++i) {
atomic_set(&adev->uvd.handles[i], 0);
adev->uvd.filp[i] = NULL;
@@ -274,20 +303,23 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
{
- int i;
- kfree(adev->uvd.saved_bo);
+ int i, j;
- drm_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
+ drm_sched_entity_destroy(&adev->uvd.inst->ring.sched,
+ &adev->uvd.entity);
- amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo,
- &adev->uvd.gpu_addr,
- (void **)&adev->uvd.cpu_addr);
+ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+ kfree(adev->uvd.inst[j].saved_bo);
- amdgpu_ring_fini(&adev->uvd.ring);
+ amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
+ &adev->uvd.inst[j].gpu_addr,
+ (void **)&adev->uvd.inst[j].cpu_addr);
- for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
- amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
+ amdgpu_ring_fini(&adev->uvd.inst[j].ring);
+ for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
+ amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
+ }
release_firmware(adev->uvd.fw);
return 0;
@@ -297,10 +329,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
{
unsigned size;
void *ptr;
- int i;
-
- if (adev->uvd.vcpu_bo == NULL)
- return 0;
+ int i, j;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -314,15 +343,19 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
return 0;
}
- size = amdgpu_bo_size(adev->uvd.vcpu_bo);
- ptr = adev->uvd.cpu_addr;
+ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+ if (adev->uvd.inst[j].vcpu_bo == NULL)
+ continue;
- adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL);
- if (!adev->uvd.saved_bo)
- return -ENOMEM;
+ size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
+ ptr = adev->uvd.inst[j].cpu_addr;
- memcpy_fromio(adev->uvd.saved_bo, ptr, size);
+ adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL);
+ if (!adev->uvd.inst[j].saved_bo)
+ return -ENOMEM;
+ memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
+ }
return 0;
}
@@ -330,51 +363,54 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
{
unsigned size;
void *ptr;
+ int i;
- if (adev->uvd.vcpu_bo == NULL)
- return -EINVAL;
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ if (adev->uvd.inst[i].vcpu_bo == NULL)
+ return -EINVAL;
- size = amdgpu_bo_size(adev->uvd.vcpu_bo);
- ptr = adev->uvd.cpu_addr;
+ size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo);
+ ptr = adev->uvd.inst[i].cpu_addr;
- if (adev->uvd.saved_bo != NULL) {
- memcpy_toio(ptr, adev->uvd.saved_bo, size);
- kfree(adev->uvd.saved_bo);
- adev->uvd.saved_bo = NULL;
- } else {
- const struct common_firmware_header *hdr;
- unsigned offset;
-
- hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
- offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
- memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset,
- le32_to_cpu(hdr->ucode_size_bytes));
- size -= le32_to_cpu(hdr->ucode_size_bytes);
- ptr += le32_to_cpu(hdr->ucode_size_bytes);
+ if (adev->uvd.inst[i].saved_bo != NULL) {
+ memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
+ kfree(adev->uvd.inst[i].saved_bo);
+ adev->uvd.inst[i].saved_bo = NULL;
+ } else {
+ const struct common_firmware_header *hdr;
+ unsigned offset;
+
+ hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+ memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
+ le32_to_cpu(hdr->ucode_size_bytes));
+ size -= le32_to_cpu(hdr->ucode_size_bytes);
+ ptr += le32_to_cpu(hdr->ucode_size_bytes);
+ }
+ memset_io(ptr, 0, size);
+ /* to restore uvd fence seq */
+ amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring);
}
- memset_io(ptr, 0, size);
- /* to restore uvd fence seq */
- amdgpu_fence_driver_force_completion(&adev->uvd.ring);
}
-
return 0;
}
void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
{
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst[0].ring;
int i, r;
for (i = 0; i < adev->uvd.max_handles; ++i) {
uint32_t handle = atomic_read(&adev->uvd.handles[i]);
+
if (handle != 0 && adev->uvd.filp[i] == filp) {
struct dma_fence *fence;
- r = amdgpu_uvd_get_destroy_msg(ring, handle,
- false, &fence);
+ r = amdgpu_uvd_get_destroy_msg(ring, handle, false,
+ &fence);
if (r) {
- DRM_ERROR("Error destroying UVD (%d)!\n", r);
+ DRM_ERROR("Error destroying UVD %d!\n", r);
continue;
}
@@ -665,7 +701,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
r = amdgpu_bo_kmap(bo, &ptr);
if (r) {
- DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r);
+ DRM_ERROR("Failed mapping the UVD) message (%ld)!\n", r);
return r;
}
@@ -687,7 +723,8 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
/* try to alloc a new handle */
for (i = 0; i < adev->uvd.max_handles; ++i) {
if (atomic_read(&adev->uvd.handles[i]) == handle) {
- DRM_ERROR("Handle 0x%x already in use!\n", handle);
+ DRM_ERROR(")Handle 0x%x already in use!\n",
+ handle);
return -EINVAL;
}
@@ -800,7 +837,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
}
if ((cmd == 0 || cmd == 0x3) &&
- (start >> 28) != (ctx->parser->adev->uvd.gpu_addr >> 28)) {
+ (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
start, end);
return -EINVAL;
@@ -968,6 +1005,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
uint64_t addr;
long r;
int i;
+ unsigned offset_idx = 0;
+ unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
amdgpu_bo_kunmap(bo);
amdgpu_bo_unpin(bo);
@@ -987,17 +1026,16 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
goto err;
if (adev->asic_type >= CHIP_VEGA10) {
- data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0_VEGA10, 0);
- data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1_VEGA10, 0);
- data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD_VEGA10, 0);
- data[3] = PACKET0(mmUVD_NO_OP_VEGA10, 0);
- } else {
- data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0);
- data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0);
- data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0);
- data[3] = PACKET0(mmUVD_NO_OP, 0);
+ offset_idx = 1 + ring->me;
+ offset[1] = adev->reg_offset[UVD_HWIP][0][1];
+ offset[2] = adev->reg_offset[UVD_HWIP][1][1];
}
+ data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
+ data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
+ data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
+ data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
+
ib = &job->ibs[0];
addr = amdgpu_bo_gpu_offset(bo);
ib->ptr[0] = data[0];
@@ -1021,19 +1059,16 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
if (r < 0)
goto err_free;
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
+ r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err_free;
-
- amdgpu_job_free(job);
} else {
r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
AMDGPU_FENCE_OWNER_UNDEFINED, false);
if (r)
goto err_free;
- r = amdgpu_job_submit(job, ring, &adev->uvd.entity,
+ r = amdgpu_job_submit(job, &adev->uvd.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &f);
if (r)
goto err_free;
@@ -1122,7 +1157,14 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, uvd.idle_work.work);
- unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
+ unsigned fences = 0, i, j;
+
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
+ for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
+ fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
+ }
+ }
if (fences == 0) {
if (adev->pm.dpm_enabled) {
@@ -1179,27 +1221,28 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence;
long r;
+ uint32_t ip_instance = ring->me;
r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
if (r) {
- DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
+ DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r);
goto error;
}
r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
if (r) {
- DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
+ DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r);
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
- DRM_ERROR("amdgpu: IB test timed out.\n");
+ DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance);
r = -ETIMEDOUT;
} else if (r < 0) {
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r);
} else {
- DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
+ DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx);
r = 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index 32ea20b99e53..66872286ab12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -31,30 +31,36 @@
#define AMDGPU_UVD_SESSION_SIZE (50*1024)
#define AMDGPU_UVD_FIRMWARE_OFFSET 256
+#define AMDGPU_MAX_UVD_INSTANCES 2
+
#define AMDGPU_UVD_FIRMWARE_SIZE(adev) \
(AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \
8) - AMDGPU_UVD_FIRMWARE_OFFSET)
-struct amdgpu_uvd {
+struct amdgpu_uvd_inst {
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
uint64_t gpu_addr;
- unsigned fw_version;
void *saved_bo;
- unsigned max_handles;
- atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
- struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
- struct delayed_work idle_work;
- const struct firmware *fw; /* UVD firmware */
struct amdgpu_ring ring;
struct amdgpu_ring ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
struct amdgpu_irq_src irq;
+ uint32_t srbm_soft_reset;
+};
+
+struct amdgpu_uvd {
+ const struct firmware *fw; /* UVD firmware */
+ unsigned fw_version;
+ unsigned max_handles;
+ unsigned num_enc_rings;
+ uint8_t num_uvd_inst;
bool address_64_bit;
bool use_ctx_buf;
+ struct amdgpu_uvd_inst inst[AMDGPU_MAX_UVD_INSTANCES];
+ struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
+ atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
struct drm_sched_entity entity;
- struct drm_sched_entity entity_enc;
- uint32_t srbm_soft_reset;
- unsigned num_enc_rings;
+ struct delayed_work idle_work;
};
int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index a33804bd3314..86182c966ed6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -40,22 +40,24 @@
/* Firmware Names */
#ifdef CONFIG_DRM_AMDGPU_CIK
-#define FIRMWARE_BONAIRE "radeon/bonaire_vce.bin"
-#define FIRMWARE_KABINI "radeon/kabini_vce.bin"
-#define FIRMWARE_KAVERI "radeon/kaveri_vce.bin"
-#define FIRMWARE_HAWAII "radeon/hawaii_vce.bin"
-#define FIRMWARE_MULLINS "radeon/mullins_vce.bin"
+#define FIRMWARE_BONAIRE "amdgpu/bonaire_vce.bin"
+#define FIRMWARE_KABINI "amdgpu/kabini_vce.bin"
+#define FIRMWARE_KAVERI "amdgpu/kaveri_vce.bin"
+#define FIRMWARE_HAWAII "amdgpu/hawaii_vce.bin"
+#define FIRMWARE_MULLINS "amdgpu/mullins_vce.bin"
#endif
#define FIRMWARE_TONGA "amdgpu/tonga_vce.bin"
#define FIRMWARE_CARRIZO "amdgpu/carrizo_vce.bin"
#define FIRMWARE_FIJI "amdgpu/fiji_vce.bin"
#define FIRMWARE_STONEY "amdgpu/stoney_vce.bin"
#define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin"
-#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin"
-#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin"
+#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin"
+#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin"
+#define FIRMWARE_VEGAM "amdgpu/vegam_vce.bin"
#define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin"
#define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin"
+#define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin"
#ifdef CONFIG_DRM_AMDGPU_CIK
MODULE_FIRMWARE(FIRMWARE_BONAIRE);
@@ -71,9 +73,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY);
MODULE_FIRMWARE(FIRMWARE_POLARIS10);
MODULE_FIRMWARE(FIRMWARE_POLARIS11);
MODULE_FIRMWARE(FIRMWARE_POLARIS12);
+MODULE_FIRMWARE(FIRMWARE_VEGAM);
MODULE_FIRMWARE(FIRMWARE_VEGA10);
MODULE_FIRMWARE(FIRMWARE_VEGA12);
+MODULE_FIRMWARE(FIRMWARE_VEGA20);
static void amdgpu_vce_idle_work_handler(struct work_struct *work);
@@ -132,12 +136,18 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
case CHIP_POLARIS12:
fw_name = FIRMWARE_POLARIS12;
break;
+ case CHIP_VEGAM:
+ fw_name = FIRMWARE_VEGAM;
+ break;
case CHIP_VEGA10:
fw_name = FIRMWARE_VEGA10;
break;
case CHIP_VEGA12:
fw_name = FIRMWARE_VEGA12;
break;
+ case CHIP_VEGA20:
+ fw_name = FIRMWARE_VEGA20;
+ break;
default:
return -EINVAL;
@@ -180,8 +190,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
ring = &adev->vce.ring[0];
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&ring->sched, &adev->vce.entity,
- rq, amdgpu_sched_jobs, NULL);
+ r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL);
if (r != 0) {
DRM_ERROR("Failed setting up VCE run queue.\n");
return r;
@@ -212,7 +221,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
if (adev->vce.vcpu_bo == NULL)
return 0;
- drm_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity);
+ drm_sched_entity_destroy(&adev->vce.ring[0].sched, &adev->vce.entity);
amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
(void **)&adev->vce.cpu_addr);
@@ -460,12 +469,10 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
+ r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
- amdgpu_job_free(job);
if (fence)
*fence = dma_fence_get(f);
dma_fence_put(f);
@@ -522,19 +529,13 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- if (direct) {
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
- if (r)
- goto err;
-
- amdgpu_job_free(job);
- } else {
- r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity,
+ if (direct)
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ else
+ r = amdgpu_job_submit(job, &ring->adev->vce.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &f);
- if (r)
- goto err;
- }
+ if (r)
+ goto err;
if (fence)
*fence = dma_fence_get(f);
@@ -755,6 +756,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
if (r)
goto out;
break;
+
+ case 0x0500000d: /* MV buffer */
+ r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
+ idx + 2, 0, 0);
+ if (r)
+ goto out;
+
+ r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8,
+ idx + 7, 0, 0);
+ if (r)
+ goto out;
+ break;
}
idx += len / 4;
@@ -860,6 +873,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
goto out;
break;
+ case 0x0500000d: /* MV buffer */
+ r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3,
+ idx + 2, *size, 0);
+ if (r)
+ goto out;
+
+ r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8,
+ idx + 7, *size / 12, 0);
+ if (r)
+ goto out;
+ break;
+
default:
DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
r = -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 58e495330b38..798648a19710 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -49,12 +49,10 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- struct drm_sched_rq *rq;
unsigned long bo_size;
const char *fw_name;
const struct common_firmware_header *hdr;
- unsigned version_major, version_minor, family_id;
+ unsigned char fw_check;
int r;
INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
@@ -84,12 +82,34 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
}
hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
- family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
- version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
- version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
- DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n",
- version_major, version_minor, family_id);
+ adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
+
+ /* Bit 20-23, it is encode major and non-zero for new naming convention.
+ * This field is part of version minor and DRM_DISABLED_FLAG in old naming
+ * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG
+ * is zero in old naming convention, this field is always zero so far.
+ * These four bits are used to tell which naming convention is present.
+ */
+ fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf;
+ if (fw_check) {
+ unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev;
+
+ fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff;
+ enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff;
+ enc_major = fw_check;
+ dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
+ vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
+ DRM_INFO("Found VCN firmware Version ENC: %hu.%hu DEC: %hu VEP: %hu Revision: %hu\n",
+ enc_major, enc_minor, dec_ver, vep, fw_rev);
+ } else {
+ unsigned int version_major, version_minor, family_id;
+ family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
+ version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
+ version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+ DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n",
+ version_major, version_minor, family_id);
+ }
bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
+ AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
@@ -102,24 +122,6 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
return r;
}
- ring = &adev->vcn.ring_dec;
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
- rq, amdgpu_sched_jobs, NULL);
- if (r != 0) {
- DRM_ERROR("Failed setting up VCN dec run queue.\n");
- return r;
- }
-
- ring = &adev->vcn.ring_enc[0];
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc,
- rq, amdgpu_sched_jobs, NULL);
- if (r != 0) {
- DRM_ERROR("Failed setting up VCN enc run queue.\n");
- return r;
- }
-
return 0;
}
@@ -129,10 +131,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
kfree(adev->vcn.saved_bo);
- drm_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec);
-
- drm_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc);
-
amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
&adev->vcn.gpu_addr,
(void **)&adev->vcn.cpu_addr);
@@ -142,6 +140,8 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
for (i = 0; i < adev->vcn.num_enc_rings; ++i)
amdgpu_ring_fini(&adev->vcn.ring_enc[i]);
+ amdgpu_ring_fini(&adev->vcn.ring_jpeg);
+
release_firmware(adev->vcn.fw);
return 0;
@@ -205,13 +205,20 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, vcn.idle_work.work);
unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
+ unsigned i;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
+ }
+
+ fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
if (fences == 0) {
- if (adev->pm.dpm_enabled) {
- /* might be used when with pg/cg
+ if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_uvd(adev, false);
- */
- }
+ else
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_GATE);
} else {
schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
}
@@ -222,10 +229,12 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
- if (set_clocks && adev->pm.dpm_enabled) {
- /* might be used when with pg/cg
- amdgpu_dpm_enable_uvd(adev, true);
- */
+ if (set_clocks) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+ else
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_UNGATE);
}
}
@@ -271,7 +280,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
}
static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
- struct amdgpu_bo *bo, bool direct,
+ struct amdgpu_bo *bo,
struct dma_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
@@ -299,19 +308,9 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
}
ib->length_dw = 16;
- if (direct) {
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
- if (r)
- goto err_free;
-
- amdgpu_job_free(job);
- } else {
- r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec,
- AMDGPU_FENCE_OWNER_UNDEFINED, &f);
- if (r)
- goto err_free;
- }
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err_free;
amdgpu_bo_fence(bo, f, false);
amdgpu_bo_unreserve(bo);
@@ -363,11 +362,11 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
for (i = 14; i < 1024; ++i)
msg[i] = cpu_to_le32(0x0);
- return amdgpu_vcn_dec_send_msg(ring, bo, true, fence);
+ return amdgpu_vcn_dec_send_msg(ring, bo, fence);
}
static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- bool direct, struct dma_fence **fence)
+ struct dma_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_bo *bo = NULL;
@@ -389,7 +388,7 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
for (i = 6; i < 1024; ++i)
msg[i] = cpu_to_le32(0x0);
- return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence);
+ return amdgpu_vcn_dec_send_msg(ring, bo, fence);
}
int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
@@ -403,7 +402,7 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto error;
}
- r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence);
+ r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
goto error;
@@ -497,12 +496,10 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
+ r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
- amdgpu_job_free(job);
if (fence)
*fence = dma_fence_get(f);
dma_fence_put(f);
@@ -551,12 +548,10 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
+ r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
- amdgpu_job_free(job);
if (fence)
*fence = dma_fence_get(f);
dma_fence_put(f);
@@ -599,3 +594,127 @@ error:
dma_fence_put(fence);
return r;
}
+
+int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
+ ring->idx, r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0, 0, 0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID));
+ if (tmp == 0xDEADBEEF)
+ break;
+ DRM_UDELAY(1);
+ }
+
+ if (i < adev->usec_timeout) {
+ DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
+ ring->idx, i);
+ } else {
+ DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
+ ring->idx, tmp);
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
+static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ struct dma_fence *f = NULL;
+ const unsigned ib_size_dw = 16;
+ int i, r;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ if (r)
+ return r;
+
+ ib = &job->ibs[0];
+
+ ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH), 0, 0, PACKETJ_TYPE0);
+ ib->ptr[1] = 0xDEADBEEF;
+ for (i = 2; i < 16; i += 2) {
+ ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+ ib->ptr[i+1] = 0;
+ }
+ ib->length_dw = 16;
+
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err;
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+
+ return 0;
+
+err:
+ amdgpu_job_free(job);
+ return r;
+}
+
+int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ struct dma_fence *fence = NULL;
+ long r = 0;
+
+ r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to set jpeg register (%ld).\n", r);
+ goto error;
+ }
+
+ r = dma_fence_wait_timeout(fence, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out.\n");
+ r = -ETIMEDOUT;
+ goto error;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto error;
+ } else
+ r = 0;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH));
+ if (tmp == 0xDEADBEEF)
+ break;
+ DRM_UDELAY(1);
+ }
+
+ if (i < adev->usec_timeout)
+ DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
+ else {
+ DRM_ERROR("ib test failed (0x%08X)\n", tmp);
+ r = -EINVAL;
+ }
+
+ dma_fence_put(fence);
+
+error:
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 2fd7db891689..0b0b8638d73f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -45,6 +45,17 @@
#define VCN_ENC_CMD_REG_WRITE 0x0000000b
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
+enum engine_status_constants {
+ UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
+ UVD_STATUS__UVD_BUSY = 0x00000004,
+ GB_ADDR_CONFIG_DEFAULT = 0x26010011,
+ UVD_STATUS__IDLE = 0x2,
+ UVD_STATUS__BUSY = 0x5,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1,
+ UVD_STATUS__RBC_BUSY = 0x1,
+};
+
struct amdgpu_vcn {
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
@@ -55,9 +66,8 @@ struct amdgpu_vcn {
const struct firmware *fw; /* VCN firmware */
struct amdgpu_ring ring_dec;
struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
+ struct amdgpu_ring ring_jpeg;
struct amdgpu_irq_src irq;
- struct drm_sched_entity entity_dec;
- struct drm_sched_entity entity_enc;
unsigned num_enc_rings;
};
@@ -74,4 +84,7 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index da55a78d7380..098dd1ba751a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -33,9 +33,11 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_gmc.h"
-/*
- * GPUVM
+/**
+ * DOC: GPUVM
+ *
* GPUVM is similar to the legacy gart on older asics, however
* rather than there being a single global gart table
* for the entire GPU, there are multiple VM page tables active
@@ -63,43 +65,123 @@ INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
#undef START
#undef LAST
-/* Local structure. Encapsulate some VM table update parameters to reduce
+/**
+ * struct amdgpu_pte_update_params - Local structure
+ *
+ * Encapsulate some VM table update parameters to reduce
* the number of function parameters
+ *
*/
struct amdgpu_pte_update_params {
- /* amdgpu device we do this update for */
+
+ /**
+ * @adev: amdgpu device we do this update for
+ */
struct amdgpu_device *adev;
- /* optional amdgpu_vm we do this update for */
+
+ /**
+ * @vm: optional amdgpu_vm we do this update for
+ */
struct amdgpu_vm *vm;
- /* address where to copy page table entries from */
+
+ /**
+ * @src: address where to copy page table entries from
+ */
uint64_t src;
- /* indirect buffer to fill with commands */
+
+ /**
+ * @ib: indirect buffer to fill with commands
+ */
struct amdgpu_ib *ib;
- /* Function which actually does the update */
+
+ /**
+ * @func: Function which actually does the update
+ */
void (*func)(struct amdgpu_pte_update_params *params,
struct amdgpu_bo *bo, uint64_t pe,
uint64_t addr, unsigned count, uint32_t incr,
uint64_t flags);
- /* The next two are used during VM update by CPU
- * DMA addresses to use for mapping
- * Kernel pointer of PD/PT BO that needs to be updated
+ /**
+ * @pages_addr:
+ *
+ * DMA addresses to use for mapping, used during VM update by CPU
*/
dma_addr_t *pages_addr;
+
+ /**
+ * @kptr:
+ *
+ * Kernel pointer of PD/PT BO that needs to be updated,
+ * used during VM update by CPU
+ */
void *kptr;
};
-/* Helper to disable partial resident texture feature from a fence callback */
+/**
+ * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback
+ */
struct amdgpu_prt_cb {
+
+ /**
+ * @adev: amdgpu device
+ */
struct amdgpu_device *adev;
+
+ /**
+ * @cb: callback
+ */
struct dma_fence_cb cb;
};
/**
+ * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
+ *
+ * @base: base structure for tracking BO usage in a VM
+ * @vm: vm to which bo is to be added
+ * @bo: amdgpu buffer object
+ *
+ * Initialize a bo_va_base structure and add it to the appropriate lists
+ *
+ */
+static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo)
+{
+ base->vm = vm;
+ base->bo = bo;
+ INIT_LIST_HEAD(&base->bo_list);
+ INIT_LIST_HEAD(&base->vm_status);
+
+ if (!bo)
+ return;
+ list_add_tail(&base->bo_list, &bo->va);
+
+ if (bo->tbo.type == ttm_bo_type_kernel)
+ list_move(&base->vm_status, &vm->relocated);
+
+ if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
+ return;
+
+ if (bo->preferred_domains &
+ amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
+ return;
+
+ /*
+ * we checked all the prerequisites, but it looks like this per vm bo
+ * is currently evicted. add the bo to the evicted list to make sure it
+ * is validated on next vm use to avoid fault.
+ * */
+ list_move_tail(&base->vm_status, &vm->evicted);
+}
+
+/**
* amdgpu_vm_level_shift - return the addr shift for each level
*
* @adev: amdgpu_device pointer
+ * @level: VMPT level
*
- * Returns the number of bits the pfn needs to be right shifted for a level.
+ * Returns:
+ * The number of bits the pfn needs to be right shifted for a level.
*/
static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
unsigned level)
@@ -127,8 +209,10 @@ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
* amdgpu_vm_num_entries - return the number of entries in a PD/PT
*
* @adev: amdgpu_device pointer
+ * @level: VMPT level
*
- * Calculate the number of entries in a page directory or page table.
+ * Returns:
+ * The number of entries in a page directory or page table.
*/
static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
unsigned level)
@@ -151,8 +235,10 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
* amdgpu_vm_bo_size - returns the size of the BOs in bytes
*
* @adev: amdgpu_device pointer
+ * @level: VMPT level
*
- * Calculate the size of the BO for a page directory or page table in bytes.
+ * Returns:
+ * The size of the BO for a page directory or page table in bytes.
*/
static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
{
@@ -190,30 +276,25 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
* @param: parameter for the validation callback
*
* Validate the page table BOs on command submission if neccessary.
+ *
+ * Returns:
+ * Validation result.
*/
int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int (*validate)(void *p, struct amdgpu_bo *bo),
void *param)
{
struct ttm_bo_global *glob = adev->mman.bdev.glob;
- int r;
+ struct amdgpu_vm_bo_base *bo_base, *tmp;
+ int r = 0;
- spin_lock(&vm->status_lock);
- while (!list_empty(&vm->evicted)) {
- struct amdgpu_vm_bo_base *bo_base;
- struct amdgpu_bo *bo;
+ list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
+ struct amdgpu_bo *bo = bo_base->bo;
- bo_base = list_first_entry(&vm->evicted,
- struct amdgpu_vm_bo_base,
- vm_status);
- spin_unlock(&vm->status_lock);
-
- bo = bo_base->bo;
- BUG_ON(!bo);
if (bo->parent) {
r = validate(param, bo);
if (r)
- return r;
+ break;
spin_lock(&glob->lru_lock);
ttm_bo_move_to_lru_tail(&bo->tbo);
@@ -222,22 +303,29 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
spin_unlock(&glob->lru_lock);
}
- if (bo->tbo.type == ttm_bo_type_kernel &&
- vm->use_cpu_for_update) {
- r = amdgpu_bo_kmap(bo, NULL);
- if (r)
- return r;
- }
-
- spin_lock(&vm->status_lock);
- if (bo->tbo.type != ttm_bo_type_kernel)
+ if (bo->tbo.type != ttm_bo_type_kernel) {
+ spin_lock(&vm->moved_lock);
list_move(&bo_base->vm_status, &vm->moved);
- else
+ spin_unlock(&vm->moved_lock);
+ } else {
list_move(&bo_base->vm_status, &vm->relocated);
+ }
}
- spin_unlock(&vm->status_lock);
- return 0;
+ spin_lock(&glob->lru_lock);
+ list_for_each_entry(bo_base, &vm->idle, vm_status) {
+ struct amdgpu_bo *bo = bo_base->bo;
+
+ if (!bo->parent)
+ continue;
+
+ ttm_bo_move_to_lru_tail(&bo->tbo);
+ if (bo->shadow)
+ ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
+ }
+ spin_unlock(&glob->lru_lock);
+
+ return r;
}
/**
@@ -246,26 +334,28 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
* @vm: VM to check
*
* Check if all VM PDs/PTs are ready for updates
+ *
+ * Returns:
+ * True if eviction list is empty.
*/
bool amdgpu_vm_ready(struct amdgpu_vm *vm)
{
- bool ready;
-
- spin_lock(&vm->status_lock);
- ready = list_empty(&vm->evicted);
- spin_unlock(&vm->status_lock);
-
- return ready;
+ return list_empty(&vm->evicted);
}
/**
* amdgpu_vm_clear_bo - initially clear the PDs/PTs
*
* @adev: amdgpu_device pointer
+ * @vm: VM to clear BO from
* @bo: BO to clear
* @level: level this BO is at
+ * @pte_support_ats: indicate ATS support from PTE
*
* Root PD needs to be reserved when calling this.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
*/
static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
struct amdgpu_vm *vm, struct amdgpu_bo *bo,
@@ -335,8 +425,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
if (r)
goto error_free;
- r = amdgpu_job_submit(job, ring, &vm->entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+ r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_UNDEFINED,
+ &fence);
if (r)
goto error_free;
@@ -361,10 +451,16 @@ error:
*
* @adev: amdgpu_device pointer
* @vm: requested vm
+ * @parent: parent PT
* @saddr: start of the address range
* @eaddr: end of the address range
+ * @level: VMPT level
+ * @ats: indicate ATS support from PTE
*
* Make sure the page directories and page tables are allocated
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
*/
static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
@@ -412,11 +508,16 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
struct amdgpu_bo *pt;
if (!entry->base.bo) {
- r = amdgpu_bo_create(adev,
- amdgpu_vm_bo_size(adev, level),
- AMDGPU_GPU_PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, flags,
- ttm_bo_type_kernel, resv, &pt);
+ struct amdgpu_bo_param bp;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.size = amdgpu_vm_bo_size(adev, level);
+ bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.flags = flags;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = resv;
+ r = amdgpu_bo_create(adev, &bp, &pt);
if (r)
return r;
@@ -441,12 +542,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
*/
pt->parent = amdgpu_bo_ref(parent->base.bo);
- entry->base.vm = vm;
- entry->base.bo = pt;
- list_add_tail(&entry->base.bo_list, &pt->va);
- spin_lock(&vm->status_lock);
- list_add(&entry->base.vm_status, &vm->relocated);
- spin_unlock(&vm->status_lock);
+ amdgpu_vm_bo_base_init(&entry->base, vm, pt);
}
if (level < AMDGPU_VM_PTB) {
@@ -472,6 +568,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
* @size: Size from start address we need.
*
* Make sure the page tables are allocated.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
*/
int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
@@ -537,6 +636,15 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
}
}
+/**
+ * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
+ *
+ * @ring: ring on which the job will be submitted
+ * @job: job to submit
+ *
+ * Returns:
+ * True if sync is needed.
+ */
bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_job *job)
{
@@ -564,19 +672,17 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
return vm_flush_needed || gds_switch_needed;
}
-static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
-{
- return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
-}
-
/**
* amdgpu_vm_flush - hardware flush the vm
*
* @ring: ring to use for flush
- * @vmid: vmid number to use
- * @pd_addr: address of the page directory
+ * @job: related job
+ * @need_pipe_sync: is pipe sync needed
*
* Emit a VM flush when it is necessary.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
*/
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
{
@@ -628,7 +734,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
if (vm_flush_needed || pasid_mapping_needed) {
- r = amdgpu_fence_emit(ring, &fence);
+ r = amdgpu_fence_emit(ring, &fence, 0);
if (r)
return r;
}
@@ -684,6 +790,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
* Returns the found bo_va or NULL if none is found
*
* Object has to be reserved!
+ *
+ * Returns:
+ * Found bo_va or NULL.
*/
struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
struct amdgpu_bo *bo)
@@ -765,7 +874,10 @@ static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
* @addr: the unmapped addr
*
* Look up the physical address of the page that the pte resolves
- * to and return the pointer for the page table entry.
+ * to.
+ *
+ * Returns:
+ * The pointer for the page table entry.
*/
static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
{
@@ -818,6 +930,17 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
}
}
+
+/**
+ * amdgpu_vm_wait_pd - Wait for PT BOs to be free.
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: related vm
+ * @owner: fence owner
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
void *owner)
{
@@ -871,7 +994,10 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
/*
* amdgpu_vm_invalidate_level - mark all PD levels as invalid
*
+ * @adev: amdgpu_device pointer
+ * @vm: related vm
* @parent: parent PD
+ * @level: VMPT level
*
* Mark all PD level as invalid after an error.
*/
@@ -893,10 +1019,8 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
if (!entry->base.bo)
continue;
- spin_lock(&vm->status_lock);
- if (list_empty(&entry->base.vm_status))
- list_add(&entry->base.vm_status, &vm->relocated);
- spin_unlock(&vm->status_lock);
+ if (!entry->base.moved)
+ list_move(&entry->base.vm_status, &vm->relocated);
amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
}
}
@@ -908,7 +1032,9 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
* @vm: requested vm
*
* Makes sure all directories are up to date.
- * Returns 0 for success, error for failure.
+ *
+ * Returns:
+ * 0 for success, error for failure.
*/
int amdgpu_vm_update_directories(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
@@ -926,6 +1052,14 @@ restart:
params.adev = adev;
if (vm->use_cpu_for_update) {
+ struct amdgpu_vm_bo_base *bo_base;
+
+ list_for_each_entry(bo_base, &vm->relocated, vm_status) {
+ r = amdgpu_bo_kmap(bo_base->bo, NULL);
+ if (unlikely(r))
+ return r;
+ }
+
r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
if (unlikely(r))
return r;
@@ -941,7 +1075,6 @@ restart:
params.func = amdgpu_vm_do_set_ptes;
}
- spin_lock(&vm->status_lock);
while (!list_empty(&vm->relocated)) {
struct amdgpu_vm_bo_base *bo_base, *parent;
struct amdgpu_vm_pt *pt, *entry;
@@ -950,14 +1083,12 @@ restart:
bo_base = list_first_entry(&vm->relocated,
struct amdgpu_vm_bo_base,
vm_status);
+ bo_base->moved = false;
list_del_init(&bo_base->vm_status);
- spin_unlock(&vm->status_lock);
bo = bo_base->bo->parent;
- if (!bo) {
- spin_lock(&vm->status_lock);
+ if (!bo)
continue;
- }
parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
bo_list);
@@ -966,12 +1097,10 @@ restart:
amdgpu_vm_update_pde(&params, vm, pt, entry);
- spin_lock(&vm->status_lock);
if (!vm->use_cpu_for_update &&
(ndw - params.ib->length_dw) < 32)
break;
}
- spin_unlock(&vm->status_lock);
if (vm->use_cpu_for_update) {
/* Flush HDP */
@@ -991,8 +1120,8 @@ restart:
amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
AMDGPU_FENCE_OWNER_VM, false);
WARN_ON(params.ib->length_dw > ndw);
- r = amdgpu_job_submit(job, ring, &vm->entity,
- AMDGPU_FENCE_OWNER_VM, &fence);
+ r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM,
+ &fence);
if (r)
goto error;
@@ -1074,9 +1203,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
if (entry->huge) {
/* Add the entry to the relocated list to update it. */
entry->huge = false;
- spin_lock(&p->vm->status_lock);
list_move(&entry->base.vm_status, &p->vm->relocated);
- spin_unlock(&p->vm->status_lock);
}
return;
}
@@ -1094,14 +1221,15 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
* amdgpu_vm_update_ptes - make sure that page tables are valid
*
* @params: see amdgpu_pte_update_params definition
- * @vm: requested vm
* @start: start of GPU address range
* @end: end of GPU address range
* @dst: destination address to map to, the next dst inside the function
* @flags: mapping flags
*
* Update the page tables in the range @start - @end.
- * Returns 0 for success, -EINVAL for failure.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
*/
static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
uint64_t start, uint64_t end,
@@ -1155,7 +1283,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
* @end: last PTE to handle
* @dst: addr those PTEs should point to
* @flags: hw mapping flags
- * Returns 0 for success, -EINVAL for failure.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
*/
static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
uint64_t start, uint64_t end,
@@ -1227,7 +1357,9 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
* @fence: optional resulting fence
*
* Fill in the page table entries between @start and @last.
- * Returns 0 for success, -EINVAL for failure.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
*/
static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
struct dma_fence *exclusive,
@@ -1303,7 +1435,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
ndw += ncmds * 10;
/* extra commands for begin/end fragments */
- ndw += 2 * 10 * adev->vm_manager.fragment_size;
+ if (vm->root.base.bo->shadow)
+ ndw += 2 * 10 * adev->vm_manager.fragment_size * 2;
+ else
+ ndw += 2 * 10 * adev->vm_manager.fragment_size;
params.func = amdgpu_vm_do_set_ptes;
}
@@ -1350,8 +1485,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
amdgpu_ring_pad_ib(ring, params.ib);
WARN_ON(params.ib->length_dw > ndw);
- r = amdgpu_job_submit(job, ring, &vm->entity,
- AMDGPU_FENCE_OWNER_VM, &f);
+ r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, &f);
if (r)
goto error_free;
@@ -1379,7 +1513,9 @@ error_free:
*
* Split the mapping into smaller chunks so that each update fits
* into a SDMA IB.
- * Returns 0 for success, -EINVAL for failure.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
*/
static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
struct dma_fence *exclusive,
@@ -1432,7 +1568,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
if (nodes) {
addr = nodes->start << PAGE_SHIFT;
max_entries = (nodes->size - pfn) *
- (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+ AMDGPU_GPU_PAGES_IN_CPU_PAGE;
} else {
addr = 0;
max_entries = S64_MAX;
@@ -1442,7 +1578,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
uint64_t count;
max_entries = min(max_entries, 16ull * 1024ull);
- for (count = 1; count < max_entries; ++count) {
+ for (count = 1;
+ count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+ ++count) {
uint64_t idx = pfn + count;
if (pages_addr[idx] !=
@@ -1455,7 +1593,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
dma_addr = pages_addr;
} else {
addr = pages_addr[pfn];
- max_entries = count;
+ max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
}
} else if (flags & AMDGPU_PTE_VALID) {
@@ -1470,7 +1608,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
if (r)
return r;
- pfn += last - start + 1;
+ pfn += (last - start + 1) / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
if (nodes && nodes->size == pfn) {
pfn = 0;
++nodes;
@@ -1490,7 +1628,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
* @clear: if true clear the entries
*
* Fill in the page table entries for @bo_va.
- * Returns 0 for success, -EINVAL for failure.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
*/
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
@@ -1506,18 +1646,17 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
uint64_t flags;
int r;
- if (clear || !bo_va->base.bo) {
+ if (clear || !bo) {
mem = NULL;
nodes = NULL;
exclusive = NULL;
} else {
struct ttm_dma_tt *ttm;
- mem = &bo_va->base.bo->tbo.mem;
+ mem = &bo->tbo.mem;
nodes = mem->mm_node;
if (mem->mem_type == TTM_PL_TT) {
- ttm = container_of(bo_va->base.bo->tbo.ttm,
- struct ttm_dma_tt, ttm);
+ ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
pages_addr = ttm->dma_address;
}
exclusive = reservation_object_get_excl(bo->tbo.resv);
@@ -1555,9 +1694,22 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
amdgpu_asic_flush_hdp(adev, NULL);
}
- spin_lock(&vm->status_lock);
+ spin_lock(&vm->moved_lock);
list_del_init(&bo_va->base.vm_status);
- spin_unlock(&vm->status_lock);
+ spin_unlock(&vm->moved_lock);
+
+ /* If the BO is not in its preferred location add it back to
+ * the evicted list so that it gets validated again on the
+ * next command submission.
+ */
+ if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+ uint32_t mem_type = bo->tbo.mem.mem_type;
+
+ if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
+ list_add_tail(&bo_va->base.vm_status, &vm->evicted);
+ else
+ list_add(&bo_va->base.vm_status, &vm->idle);
+ }
list_splice_init(&bo_va->invalids, &bo_va->valids);
bo_va->cleared = clear;
@@ -1572,6 +1724,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
/**
* amdgpu_vm_update_prt_state - update the global PRT state
+ *
+ * @adev: amdgpu_device pointer
*/
static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
{
@@ -1586,6 +1740,8 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
/**
* amdgpu_vm_prt_get - add a PRT user
+ *
+ * @adev: amdgpu_device pointer
*/
static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
{
@@ -1598,6 +1754,8 @@ static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
/**
* amdgpu_vm_prt_put - drop a PRT user
+ *
+ * @adev: amdgpu_device pointer
*/
static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
{
@@ -1607,6 +1765,9 @@ static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
/**
* amdgpu_vm_prt_cb - callback for updating the PRT status
+ *
+ * @fence: fence for the callback
+ * @_cb: the callback function
*/
static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
{
@@ -1618,6 +1779,9 @@ static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
/**
* amdgpu_vm_add_prt_cb - add callback for updating the PRT status
+ *
+ * @adev: amdgpu_device pointer
+ * @fence: fence for the callback
*/
static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
struct dma_fence *fence)
@@ -1709,9 +1873,11 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
* or if an error occurred)
*
* Make sure all freed BOs are cleared in the PT.
- * Returns 0 for success.
- *
* PTs have to be reserved and mutex must be locked!
+ *
+ * Returns:
+ * 0 for success.
+ *
*/
int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
@@ -1756,29 +1922,29 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
*
* @adev: amdgpu_device pointer
* @vm: requested vm
- * @sync: sync object to add fences to
*
* Make sure all BOs which are moved are updated in the PTs.
- * Returns 0 for success.
+ *
+ * Returns:
+ * 0 for success.
*
* PTs have to be reserved!
*/
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
+ struct amdgpu_bo_va *bo_va, *tmp;
+ struct list_head moved;
bool clear;
- int r = 0;
-
- spin_lock(&vm->status_lock);
- while (!list_empty(&vm->moved)) {
- struct amdgpu_bo_va *bo_va;
- struct reservation_object *resv;
+ int r;
- bo_va = list_first_entry(&vm->moved,
- struct amdgpu_bo_va, base.vm_status);
- spin_unlock(&vm->status_lock);
+ INIT_LIST_HEAD(&moved);
+ spin_lock(&vm->moved_lock);
+ list_splice_init(&vm->moved, &moved);
+ spin_unlock(&vm->moved_lock);
- resv = bo_va->base.bo->tbo.resv;
+ list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) {
+ struct reservation_object *resv = bo_va->base.bo->tbo.resv;
/* Per VM BOs never need to bo cleared in the page tables */
if (resv == vm->root.base.bo->tbo.resv)
@@ -1791,17 +1957,19 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
clear = true;
r = amdgpu_vm_bo_update(adev, bo_va, clear);
- if (r)
+ if (r) {
+ spin_lock(&vm->moved_lock);
+ list_splice(&moved, &vm->moved);
+ spin_unlock(&vm->moved_lock);
return r;
+ }
if (!clear && resv != vm->root.base.bo->tbo.resv)
reservation_object_unlock(resv);
- spin_lock(&vm->status_lock);
}
- spin_unlock(&vm->status_lock);
- return r;
+ return 0;
}
/**
@@ -1813,7 +1981,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
*
* Add @bo into the requested vm.
* Add @bo to the list of bos associated with the vm
- * Returns newly added bo_va or NULL for failure
+ *
+ * Returns:
+ * Newly added bo_va or NULL for failure
*
* Object has to be reserved!
*/
@@ -1827,36 +1997,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
if (bo_va == NULL) {
return NULL;
}
- bo_va->base.vm = vm;
- bo_va->base.bo = bo;
- INIT_LIST_HEAD(&bo_va->base.bo_list);
- INIT_LIST_HEAD(&bo_va->base.vm_status);
+ amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
bo_va->ref_count = 1;
INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids);
- if (!bo)
- return bo_va;
-
- list_add_tail(&bo_va->base.bo_list, &bo->va);
-
- if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
- return bo_va;
-
- if (bo->preferred_domains &
- amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
- return bo_va;
-
- /*
- * We checked all the prerequisites, but it looks like this per VM BO
- * is currently evicted. add the BO to the evicted list to make sure it
- * is validated on next VM use to avoid fault.
- * */
- spin_lock(&vm->status_lock);
- list_move_tail(&bo_va->base.vm_status, &vm->evicted);
- spin_unlock(&vm->status_lock);
-
return bo_va;
}
@@ -1884,11 +2030,11 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
if (mapping->flags & AMDGPU_PTE_PRT)
amdgpu_vm_prt_get(adev);
- if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
- spin_lock(&vm->status_lock);
- if (list_empty(&bo_va->base.vm_status))
- list_add(&bo_va->base.vm_status, &vm->moved);
- spin_unlock(&vm->status_lock);
+ if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
+ !bo_va->base.moved) {
+ spin_lock(&vm->moved_lock);
+ list_move(&bo_va->base.vm_status, &vm->moved);
+ spin_unlock(&vm->moved_lock);
}
trace_amdgpu_vm_bo_map(bo_va, mapping);
}
@@ -1900,10 +2046,13 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
* @bo_va: bo_va to store the address
* @saddr: where to map the BO
* @offset: requested offset in the BO
+ * @size: BO size in bytes
* @flags: attributes of pages (read/write/valid/etc.)
*
* Add a mapping of the BO at the specefied addr into the VM.
- * Returns 0 for success, error for failure.
+ *
+ * Returns:
+ * 0 for success, error for failure.
*
* Object has to be reserved and unreserved outside!
*/
@@ -1961,11 +2110,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
* @bo_va: bo_va to store the address
* @saddr: where to map the BO
* @offset: requested offset in the BO
+ * @size: BO size in bytes
* @flags: attributes of pages (read/write/valid/etc.)
*
* Add a mapping of the BO at the specefied addr into the VM. Replace existing
* mappings as we do so.
- * Returns 0 for success, error for failure.
+ *
+ * Returns:
+ * 0 for success, error for failure.
*
* Object has to be reserved and unreserved outside!
*/
@@ -2022,7 +2174,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
* @saddr: where to the BO is mapped
*
* Remove a mapping of the BO at the specefied addr from the VM.
- * Returns 0 for success, error for failure.
+ *
+ * Returns:
+ * 0 for success, error for failure.
*
* Object has to be reserved and unreserved outside!
*/
@@ -2076,7 +2230,9 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
* @size: size of the range
*
* Remove all mappings in a range, split them as appropriate.
- * Returns 0 for success, error for failure.
+ *
+ * Returns:
+ * 0 for success, error for failure.
*/
int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
@@ -2112,7 +2268,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
before->last = saddr - 1;
before->offset = tmp->offset;
before->flags = tmp->flags;
- list_add(&before->list, &tmp->list);
+ before->bo_va = tmp->bo_va;
+ list_add(&before->list, &tmp->bo_va->invalids);
}
/* Remember mapping split at the end */
@@ -2122,7 +2279,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
after->offset = tmp->offset;
after->offset += after->start - tmp->start;
after->flags = tmp->flags;
- list_add(&after->list, &tmp->list);
+ after->bo_va = tmp->bo_va;
+ list_add(&after->list, &tmp->bo_va->invalids);
}
list_del(&tmp->list);
@@ -2171,8 +2329,13 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
* amdgpu_vm_bo_lookup_mapping - find mapping by address
*
* @vm: the requested VM
+ * @addr: the address
*
* Find a mapping by it's address.
+ *
+ * Returns:
+ * The amdgpu_bo_va_mapping matching for addr or NULL
+ *
*/
struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
uint64_t addr)
@@ -2198,9 +2361,9 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
list_del(&bo_va->base.bo_list);
- spin_lock(&vm->status_lock);
+ spin_lock(&vm->moved_lock);
list_del(&bo_va->base.vm_status);
- spin_unlock(&vm->status_lock);
+ spin_unlock(&vm->moved_lock);
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
list_del(&mapping->list);
@@ -2224,8 +2387,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
* amdgpu_vm_bo_invalidate - mark the bo as invalid
*
* @adev: amdgpu_device pointer
- * @vm: requested vm
* @bo: amdgpu buffer object
+ * @evicted: is the BO evicted
*
* Mark @bo as invalid.
*/
@@ -2234,36 +2397,45 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
{
struct amdgpu_vm_bo_base *bo_base;
+ /* shadow bo doesn't have bo base, its validation needs its parent */
+ if (bo->parent && bo->parent->shadow == bo)
+ bo = bo->parent;
+
list_for_each_entry(bo_base, &bo->va, bo_list) {
struct amdgpu_vm *vm = bo_base->vm;
+ bool was_moved = bo_base->moved;
bo_base->moved = true;
if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
- spin_lock(&bo_base->vm->status_lock);
if (bo->tbo.type == ttm_bo_type_kernel)
list_move(&bo_base->vm_status, &vm->evicted);
else
list_move_tail(&bo_base->vm_status,
&vm->evicted);
- spin_unlock(&bo_base->vm->status_lock);
continue;
}
- if (bo->tbo.type == ttm_bo_type_kernel) {
- spin_lock(&bo_base->vm->status_lock);
- if (list_empty(&bo_base->vm_status))
- list_add(&bo_base->vm_status, &vm->relocated);
- spin_unlock(&bo_base->vm->status_lock);
+ if (was_moved)
continue;
- }
- spin_lock(&bo_base->vm->status_lock);
- if (list_empty(&bo_base->vm_status))
- list_add(&bo_base->vm_status, &vm->moved);
- spin_unlock(&bo_base->vm->status_lock);
+ if (bo->tbo.type == ttm_bo_type_kernel) {
+ list_move(&bo_base->vm_status, &vm->relocated);
+ } else {
+ spin_lock(&bo_base->vm->moved_lock);
+ list_move(&bo_base->vm_status, &vm->moved);
+ spin_unlock(&bo_base->vm->moved_lock);
+ }
}
}
+/**
+ * amdgpu_vm_get_block_size - calculate VM page table size as power of two
+ *
+ * @vm_size: VM size
+ *
+ * Returns:
+ * VM page table as power of two
+ */
static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
{
/* Total bits covered by PD + PTs */
@@ -2282,6 +2454,10 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
*
* @adev: amdgpu_device pointer
* @vm_size: the default vm size if it's set auto
+ * @fragment_size_default: Default PTE fragment size
+ * @max_level: max VMPT level
+ * @max_bits: max address space size in bits
+ *
*/
void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
uint32_t fragment_size_default, unsigned max_level,
@@ -2349,12 +2525,18 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
* @adev: amdgpu_device pointer
* @vm: requested vm
* @vm_context: Indicates if it GFX or Compute context
+ * @pasid: Process address space identifier
*
* Init @vm fields.
+ *
+ * Returns:
+ * 0 for success, error for failure.
*/
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int vm_context, unsigned int pasid)
{
+ struct amdgpu_bo_param bp;
+ struct amdgpu_bo *root;
const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
AMDGPU_VM_PTE_COUNT(adev) * 8);
unsigned ring_instance;
@@ -2367,10 +2549,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->va = RB_ROOT_CACHED;
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
vm->reserved_vmid[i] = NULL;
- spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->evicted);
INIT_LIST_HEAD(&vm->relocated);
+ spin_lock_init(&vm->moved_lock);
INIT_LIST_HEAD(&vm->moved);
+ INIT_LIST_HEAD(&vm->idle);
INIT_LIST_HEAD(&vm->freed);
/* create scheduler entity for page table updates */
@@ -2379,8 +2562,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
ring_instance %= adev->vm_manager.vm_pte_num_rings;
ring = adev->vm_manager.vm_pte_rings[ring_instance];
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
- r = drm_sched_entity_init(&ring->sched, &vm->entity,
- rq, amdgpu_sched_jobs, NULL);
+ r = drm_sched_entity_init(&vm->entity, &rq, 1, NULL);
if (r)
return r;
@@ -2398,7 +2580,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
- WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
+ WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
vm->last_update = NULL;
@@ -2409,24 +2591,28 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
flags |= AMDGPU_GEM_CREATE_SHADOW;
size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
- r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags,
- ttm_bo_type_kernel, NULL, &vm->root.base.bo);
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = align;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.flags = flags;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
+ r = amdgpu_bo_create(adev, &bp, &root);
if (r)
goto error_free_sched_entity;
- r = amdgpu_bo_reserve(vm->root.base.bo, true);
+ r = amdgpu_bo_reserve(root, true);
if (r)
goto error_free_root;
- r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
+ r = amdgpu_vm_clear_bo(adev, vm, root,
adev->vm_manager.root_level,
vm->pte_support_ats);
if (r)
goto error_unreserve;
- vm->root.base.vm = vm;
- list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
- list_add_tail(&vm->root.base.vm_status, &vm->evicted);
+ amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
amdgpu_bo_unreserve(vm->root.base.bo);
if (pasid) {
@@ -2456,7 +2642,7 @@ error_free_root:
vm->root.base.bo = NULL;
error_free_sched_entity:
- drm_sched_entity_fini(&ring->sched, &vm->entity);
+ drm_sched_entity_destroy(&ring->sched, &vm->entity);
return r;
}
@@ -2464,6 +2650,9 @@ error_free_sched_entity:
/**
* amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
*
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ *
* This only works on GFX VMs that don't have any BOs added and no
* page tables allocated yet.
*
@@ -2476,7 +2665,8 @@ error_free_sched_entity:
* setting. May leave behind an unused shadow BO for the page
* directory when switching from SDMA updates to CPU updates.
*
- * Returns 0 for success, -errno for errors.
+ * Returns:
+ * 0 for success, -errno for errors.
*/
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
@@ -2510,7 +2700,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->pte_support_ats = pte_support_ats;
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
- WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
+ WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
if (vm->pasid) {
@@ -2589,7 +2779,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
}
- drm_sched_entity_fini(vm->entity.sched, &vm->entity);
+ drm_sched_entity_destroy(vm->entity.sched, &vm->entity);
if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
dev_err(adev->dev, "still active bo inside vm\n");
@@ -2631,8 +2821,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
* @adev: amdgpu_device pointer
* @pasid: PASID do identify the VM
*
- * This function is expected to be called in interrupt context. Returns
- * true if there was fault credit, false otherwise
+ * This function is expected to be called in interrupt context.
+ *
+ * Returns:
+ * True if there was fault credit, false otherwise
*/
bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
unsigned int pasid)
@@ -2686,7 +2878,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
*/
#ifdef CONFIG_X86_64
if (amdgpu_vm_update_mode == -1) {
- if (amdgpu_vm_is_large_bar(adev))
+ if (amdgpu_gmc_vram_full_visible(&adev->gmc))
adev->vm_manager.vm_update_mode =
AMDGPU_VM_USE_CPU_FOR_COMPUTE;
else
@@ -2716,6 +2908,16 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
amdgpu_vmid_mgr_fini(adev);
}
+/**
+ * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs.
+ *
+ * @dev: drm device pointer
+ * @data: drm_amdgpu_vm
+ * @filp: drm file pointer
+ *
+ * Returns:
+ * 0 for success, -errno for errors.
+ */
int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
union drm_amdgpu_vm *args = data;
@@ -2739,3 +2941,42 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return 0;
}
+
+/**
+ * amdgpu_vm_get_task_info - Extracts task info for a PASID.
+ *
+ * @dev: drm device pointer
+ * @pasid: PASID identifier for VM
+ * @task_info: task_info to fill.
+ */
+void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
+ struct amdgpu_task_info *task_info)
+{
+ struct amdgpu_vm *vm;
+
+ spin_lock(&adev->vm_manager.pasid_lock);
+
+ vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+ if (vm)
+ *task_info = vm->task_info;
+
+ spin_unlock(&adev->vm_manager.pasid_lock);
+}
+
+/**
+ * amdgpu_vm_set_task_info - Sets VMs task info.
+ *
+ * @vm: vm for which to set the info
+ */
+void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
+{
+ if (!vm->task_info.pid) {
+ vm->task_info.pid = current->pid;
+ get_task_comm(vm->task_info.task_name, current);
+
+ if (current->group_leader->mm == current->mm) {
+ vm->task_info.tgid = current->group_leader->pid;
+ get_task_comm(vm->task_info.process_name, current->group_leader);
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 30f080364c97..d416f895233d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -75,11 +75,12 @@ struct amdgpu_bo_list_entry;
/* PDE Block Fragment Size for VEGA10 */
#define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59)
-/* VEGA10 only */
+
+/* For GFX9 */
#define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
#define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
-/* For Raven */
+#define AMDGPU_MTYPE_NC 0
#define AMDGPU_MTYPE_CC 2
#define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \
@@ -163,13 +164,18 @@ struct amdgpu_vm_pt {
#define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48)
#define AMDGPU_VM_FAULT_ADDR(fault) ((u64)(fault) & 0xfffffffff000ULL)
+
+struct amdgpu_task_info {
+ char process_name[TASK_COMM_LEN];
+ char task_name[TASK_COMM_LEN];
+ pid_t pid;
+ pid_t tgid;
+};
+
struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached va;
- /* protecting invalidated */
- spinlock_t status_lock;
-
/* BOs who needs a validation */
struct list_head evicted;
@@ -178,6 +184,10 @@ struct amdgpu_vm {
/* BOs moved, but not yet updated in the PT */
struct list_head moved;
+ spinlock_t moved_lock;
+
+ /* All BOs of this VM not currently in the state machine */
+ struct list_head idle;
/* BO mappings freed, but not yet updated in the PT */
struct list_head freed;
@@ -186,9 +196,6 @@ struct amdgpu_vm {
struct amdgpu_vm_pt root;
struct dma_fence *last_update;
- /* protecting freed */
- spinlock_t freed_lock;
-
/* Scheduler entity for page table updates */
struct drm_sched_entity entity;
@@ -216,6 +223,9 @@ struct amdgpu_vm {
/* Valid while the PD is reserved or fenced */
uint64_t pd_phys_addr;
+
+ /* Some basic info about the task */
+ struct amdgpu_task_info task_info;
};
struct amdgpu_vm_manager {
@@ -318,4 +328,9 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_job *job);
void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
+void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
+ struct amdgpu_task_info *task_info);
+
+void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 9aca653bec07..9cfa8a9ada92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -97,6 +97,34 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
}
/**
+ * amdgpu_vram_mgr_bo_visible_size - CPU visible BO size
+ *
+ * @bo: &amdgpu_bo buffer object (must be in VRAM)
+ *
+ * Returns:
+ * How much of the given &amdgpu_bo buffer object lies in CPU visible VRAM.
+ */
+u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct ttm_mem_reg *mem = &bo->tbo.mem;
+ struct drm_mm_node *nodes = mem->mm_node;
+ unsigned pages = mem->num_pages;
+ u64 usage;
+
+ if (amdgpu_gmc_vram_full_visible(&adev->gmc))
+ return amdgpu_bo_size(bo);
+
+ if (mem->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
+ return 0;
+
+ for (usage = 0; nodes && pages; pages -= nodes->size, nodes++)
+ usage += amdgpu_vram_mgr_vis_size(adev, nodes);
+
+ return usage;
+}
+
+/**
* amdgpu_vram_mgr_new - allocate new ranges
*
* @man: TTM memory type manager
@@ -135,7 +163,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
}
- nodes = kcalloc(num_nodes, sizeof(*nodes), GFP_KERNEL);
+ nodes = kvmalloc_array(num_nodes, sizeof(*nodes),
+ GFP_KERNEL | __GFP_ZERO);
if (!nodes)
return -ENOMEM;
@@ -190,7 +219,7 @@ error:
drm_mm_remove_node(&nodes[i]);
spin_unlock(&mgr->lock);
- kfree(nodes);
+ kvfree(nodes);
return r == -ENOSPC ? 0 : r;
}
@@ -229,7 +258,7 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man,
atomic64_sub(usage, &mgr->usage);
atomic64_sub(vis_usage, &mgr->vis_usage);
- kfree(mem->mm_node);
+ kvfree(mem->mm_node);
mem->mm_node = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 69500a8b4e2d..e9934de1b9cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -1221,7 +1221,7 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index,
ectx.abort = false;
ectx.last_jump = 0;
if (ws)
- ectx.ws = kzalloc(4 * ws, GFP_KERNEL);
+ ectx.ws = kcalloc(4, ws, GFP_KERNEL);
else
ectx.ws = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 47ef3e6e7178..d2469453dca2 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -49,10 +49,10 @@
#include "gmc/gmc_7_1_d.h"
#include "gmc/gmc_7_1_sh_mask.h"
-MODULE_FIRMWARE("radeon/bonaire_smc.bin");
-MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
-MODULE_FIRMWARE("radeon/hawaii_smc.bin");
-MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_smc.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_k_smc.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_smc.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_k_smc.bin");
#define MC_CG_ARB_FREQ_F0 0x0a
#define MC_CG_ARB_FREQ_F1 0x0b
@@ -951,12 +951,12 @@ static void ci_apply_state_adjust_rules(struct amdgpu_device *adev,
else
pi->battery_state = false;
- if (adev->pm.dpm.ac_power)
+ if (adev->pm.ac_power)
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
else
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
- if (adev->pm.dpm.ac_power == false) {
+ if (adev->pm.ac_power == false) {
for (i = 0; i < ps->performance_level_count; i++) {
if (ps->performance_levels[i].mclk > max_limits->mclk)
ps->performance_levels[i].mclk = max_limits->mclk;
@@ -4078,7 +4078,7 @@ static int ci_enable_uvd_dpm(struct amdgpu_device *adev, bool enable)
const struct amdgpu_clock_and_voltage_limits *max_limits;
int i;
- if (adev->pm.dpm.ac_power)
+ if (adev->pm.ac_power)
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
else
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
@@ -4127,7 +4127,7 @@ static int ci_enable_vce_dpm(struct amdgpu_device *adev, bool enable)
const struct amdgpu_clock_and_voltage_limits *max_limits;
int i;
- if (adev->pm.dpm.ac_power)
+ if (adev->pm.ac_power)
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
else
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
@@ -4160,7 +4160,7 @@ static int ci_enable_samu_dpm(struct amdgpu_device *adev, bool enable)
const struct amdgpu_clock_and_voltage_limits *max_limits;
int i;
- if (adev->pm.dpm.ac_power)
+ if (adev->pm.ac_power)
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
else
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
@@ -4191,7 +4191,7 @@ static int ci_enable_acp_dpm(struct amdgpu_device *adev, bool enable)
const struct amdgpu_clock_and_voltage_limits *max_limits;
int i;
- if (adev->pm.dpm.ac_power)
+ if (adev->pm.ac_power)
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
else
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
@@ -5679,8 +5679,9 @@ static int ci_parse_power_table(struct amdgpu_device *adev)
(mode_info->atom_context->bios + data_offset +
le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset));
- adev->pm.dpm.ps = kzalloc(sizeof(struct amdgpu_ps) *
- state_array->ucNumEntries, GFP_KERNEL);
+ adev->pm.dpm.ps = kcalloc(state_array->ucNumEntries,
+ sizeof(struct amdgpu_ps),
+ GFP_KERNEL);
if (!adev->pm.dpm.ps)
return -ENOMEM;
power_state_offset = (u8 *)state_array->states;
@@ -5814,7 +5815,7 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev)
default: BUG();
}
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name);
err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
if (err)
goto out;
@@ -5845,8 +5846,7 @@ static int ci_dpm_init(struct amdgpu_device *adev)
adev->pm.dpm.priv = pi;
pi->sys_pcie_mask =
- (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >>
- CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT;
+ adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK;
pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID;
@@ -5903,7 +5903,7 @@ static int ci_dpm_init(struct amdgpu_device *adev)
pi->pcie_dpm_key_disabled = 0;
pi->thermal_sclk_dpm_enabled = 0;
- if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK)
+ if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
pi->caps_sclk_ds = true;
else
pi->caps_sclk_ds = false;
@@ -5927,7 +5927,9 @@ static int ci_dpm_init(struct amdgpu_device *adev)
ci_set_private_data_variables_based_on_pptable(adev);
adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries =
- kzalloc(4 * sizeof(struct amdgpu_clock_voltage_dependency_entry), GFP_KERNEL);
+ kcalloc(4,
+ sizeof(struct amdgpu_clock_voltage_dependency_entry),
+ GFP_KERNEL);
if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) {
ci_dpm_fini(adev);
return -ENOMEM;
@@ -6255,7 +6257,7 @@ static int ci_dpm_late_init(void *handle)
int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!amdgpu_dpm)
+ if (!adev->pm.dpm_enabled)
return 0;
/* init the sysfs and debugfs files late */
@@ -6764,6 +6766,19 @@ static int ci_dpm_read_sensor(void *handle, int idx,
}
}
+static int ci_set_powergating_by_smu(void *handle,
+ uint32_t block_type, bool gate)
+{
+ switch (block_type) {
+ case AMD_IP_BLOCK_TYPE_UVD:
+ ci_dpm_powergate_uvd(handle, gate);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
static const struct amd_ip_funcs ci_dpm_ip_funcs = {
.name = "ci_dpm",
.early_init = ci_dpm_early_init,
@@ -6801,7 +6816,7 @@ static const struct amd_pm_funcs ci_dpm_funcs = {
.debugfs_print_current_performance_level = &ci_dpm_debugfs_print_current_performance_level,
.force_performance_level = &ci_dpm_force_performance_level,
.vblank_too_short = &ci_dpm_vblank_too_short,
- .powergate_uvd = &ci_dpm_powergate_uvd,
+ .set_powergating_by_smu = &ci_set_powergating_by_smu,
.set_fan_control_mode = &ci_dpm_set_fan_control_mode,
.get_fan_control_mode = &ci_dpm_get_fan_control_mode,
.set_fan_speed_percent = &ci_dpm_set_fan_speed_percent,
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 0df22030e713..702e257a483f 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1735,6 +1735,12 @@ static void cik_invalidate_hdp(struct amdgpu_device *adev,
}
}
+static bool cik_need_full_reset(struct amdgpu_device *adev)
+{
+ /* change this when we support soft reset */
+ return true;
+}
+
static const struct amdgpu_asic_funcs cik_asic_funcs =
{
.read_disabled_bios = &cik_read_disabled_bios,
@@ -1748,6 +1754,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.get_config_memsize = &cik_get_config_memsize,
.flush_hdp = &cik_flush_hdp,
.invalidate_hdp = &cik_invalidate_hdp,
+ .need_full_reset = &cik_need_full_reset,
};
static int cik_common_early_init(void *handle)
@@ -1996,9 +2003,9 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
if (amdgpu_dpm == -1)
- amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
- else
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ else
+ amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
@@ -2017,9 +2024,9 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
if (amdgpu_dpm == -1)
- amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
- else
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ else
+ amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index a7576255cc30..d0fa2aac2388 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -54,16 +54,16 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
static int cik_sdma_soft_reset(void *handle);
-MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
-MODULE_FIRMWARE("radeon/bonaire_sdma1.bin");
-MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
-MODULE_FIRMWARE("radeon/hawaii_sdma1.bin");
-MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
-MODULE_FIRMWARE("radeon/kaveri_sdma1.bin");
-MODULE_FIRMWARE("radeon/kabini_sdma.bin");
-MODULE_FIRMWARE("radeon/kabini_sdma1.bin");
-MODULE_FIRMWARE("radeon/mullins_sdma.bin");
-MODULE_FIRMWARE("radeon/mullins_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_sdma.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_sdma.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/kabini_sdma.bin");
+MODULE_FIRMWARE("amdgpu/kabini_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/mullins_sdma.bin");
+MODULE_FIRMWARE("amdgpu/mullins_sdma1.bin");
u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
@@ -132,9 +132,9 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma1.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
if (err)
goto out;
@@ -177,9 +177,8 @@ static uint64_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring)
static uint64_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- return (RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
+ return (RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) & 0x3fffc) >> 2;
}
/**
@@ -192,9 +191,8 @@ static uint64_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring)
static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me],
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
(lower_32_bits(ring->wptr) << 2) & 0x3fffc);
}
@@ -248,7 +246,7 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
u32 ref_and_mask;
- if (ring == &ring->adev->sdma.instance[0].ring)
+ if (ring->me == 0)
ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA0_MASK;
else
ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA1_MASK;
@@ -1290,8 +1288,10 @@ static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->sdma.num_instances; i++)
+ for (i = 0; i < adev->sdma.num_instances; i++) {
adev->sdma.instance[i].ring.funcs = &cik_sdma_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
}
static const struct amdgpu_irq_src_funcs cik_sdma_trap_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 452f88ea46a2..308f9f238bc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -41,6 +41,8 @@
#include "gmc/gmc_8_1_d.h"
#include "gmc/gmc_8_1_sh_mask.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+
static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev);
static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -1823,7 +1825,6 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_framebuffer *amdgpu_fb;
struct drm_framebuffer *target_fb;
struct drm_gem_object *obj;
struct amdgpu_bo *abo;
@@ -1842,32 +1843,28 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
return 0;
}
- if (atomic) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
+ if (atomic)
target_fb = fb;
- } else {
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+ else
target_fb = crtc->primary->fb;
- }
/* If atomic, assume fb object is pinned & idle & fenced and
* just update base pointers
*/
- obj = amdgpu_fb->obj;
+ obj = target_fb->obj[0];
abo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(abo, false);
if (unlikely(r != 0))
return r;
- if (atomic) {
- fb_location = amdgpu_bo_gpu_offset(abo);
- } else {
- r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
+ if (!atomic) {
+ r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
return -EINVAL;
}
}
+ fb_location = amdgpu_bo_gpu_offset(abo);
amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
amdgpu_bo_unreserve(abo);
@@ -2043,8 +2040,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
if (!atomic && fb && fb != crtc->primary->fb) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r != 0))
return r;
@@ -2375,13 +2371,14 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
- ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr);
+ ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
drm_gem_object_put_unlocked(obj);
return ret;
}
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
dce_v10_0_lock_cursor(crtc, true);
@@ -2526,11 +2523,9 @@ static void dce_v10_0_crtc_disable(struct drm_crtc *crtc)
dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
if (crtc->primary->fb) {
int r;
- struct amdgpu_framebuffer *amdgpu_fb;
struct amdgpu_bo *abo;
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r))
DRM_ERROR("failed to reserve abo before unpin\n");
@@ -2744,14 +2739,14 @@ static int dce_v10_0_sw_init(void *handle)
return r;
}
- for (i = 8; i < 20; i += 2) {
+ for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);
if (r)
return r;
}
/* HPD hotplug */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index a7c1c584a191..76dfb76f7900 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -41,6 +41,8 @@
#include "gmc/gmc_8_1_d.h"
#include "gmc/gmc_8_1_sh_mask.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+
static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev);
static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -173,6 +175,7 @@ static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev)
ARRAY_SIZE(polaris11_golden_settings_a11));
break;
case CHIP_POLARIS10:
+ case CHIP_VEGAM:
amdgpu_device_program_register_sequence(adev,
polaris10_golden_settings_a11,
ARRAY_SIZE(polaris10_golden_settings_a11));
@@ -473,6 +476,7 @@ static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev)
num_crtc = 2;
break;
case CHIP_POLARIS10:
+ case CHIP_VEGAM:
num_crtc = 6;
break;
case CHIP_POLARIS11:
@@ -1445,6 +1449,7 @@ static int dce_v11_0_audio_init(struct amdgpu_device *adev)
adev->mode_info.audio.num_pins = 7;
break;
case CHIP_POLARIS10:
+ case CHIP_VEGAM:
adev->mode_info.audio.num_pins = 8;
break;
case CHIP_POLARIS11:
@@ -1862,7 +1867,6 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_framebuffer *amdgpu_fb;
struct drm_framebuffer *target_fb;
struct drm_gem_object *obj;
struct amdgpu_bo *abo;
@@ -1881,32 +1885,28 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
return 0;
}
- if (atomic) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
+ if (atomic)
target_fb = fb;
- } else {
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+ else
target_fb = crtc->primary->fb;
- }
/* If atomic, assume fb object is pinned & idle & fenced and
* just update base pointers
*/
- obj = amdgpu_fb->obj;
+ obj = target_fb->obj[0];
abo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(abo, false);
if (unlikely(r != 0))
return r;
- if (atomic) {
- fb_location = amdgpu_bo_gpu_offset(abo);
- } else {
- r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
+ if (!atomic) {
+ r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
return -EINVAL;
}
}
+ fb_location = amdgpu_bo_gpu_offset(abo);
amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
amdgpu_bo_unreserve(abo);
@@ -2082,8 +2082,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
if (!atomic && fb && fb != crtc->primary->fb) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r != 0))
return r;
@@ -2253,7 +2252,8 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc)
if ((adev->asic_type == CHIP_POLARIS10) ||
(adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12)) {
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM)) {
struct amdgpu_encoder *amdgpu_encoder =
to_amdgpu_encoder(amdgpu_crtc->encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
@@ -2450,13 +2450,14 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
- ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr);
+ ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
drm_gem_object_put_unlocked(obj);
return ret;
}
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
dce_v11_0_lock_cursor(crtc, true);
@@ -2601,11 +2602,9 @@ static void dce_v11_0_crtc_disable(struct drm_crtc *crtc)
dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
if (crtc->primary->fb) {
int r;
- struct amdgpu_framebuffer *amdgpu_fb;
struct amdgpu_bo *abo;
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r))
DRM_ERROR("failed to reserve abo before unpin\n");
@@ -2673,7 +2672,8 @@ static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc,
if ((adev->asic_type == CHIP_POLARIS10) ||
(adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12)) {
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM)) {
struct amdgpu_encoder *amdgpu_encoder =
to_amdgpu_encoder(amdgpu_crtc->encoder);
int encoder_mode =
@@ -2830,6 +2830,7 @@ static int dce_v11_0_early_init(void *handle)
adev->mode_info.num_dig = 9;
break;
case CHIP_POLARIS10:
+ case CHIP_VEGAM:
adev->mode_info.num_hpd = 6;
adev->mode_info.num_dig = 6;
break;
@@ -2859,14 +2860,14 @@ static int dce_v11_0_sw_init(void *handle)
return r;
}
- for (i = 8; i < 20; i += 2) {
+ for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);
if (r)
return r;
}
/* HPD hotplug */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
if (r)
return r;
@@ -2949,7 +2950,8 @@ static int dce_v11_0_hw_init(void *handle)
amdgpu_atombios_encoder_init_dig(adev);
if ((adev->asic_type == CHIP_POLARIS10) ||
(adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12)) {
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM)) {
amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk,
DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS);
amdgpu_atombios_crtc_set_dce_clock(adev, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 9f67b7fd3487..c9adc627305d 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -1780,7 +1780,6 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_framebuffer *amdgpu_fb;
struct drm_framebuffer *target_fb;
struct drm_gem_object *obj;
struct amdgpu_bo *abo;
@@ -1798,32 +1797,28 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
return 0;
}
- if (atomic) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
+ if (atomic)
target_fb = fb;
- } else {
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+ else
target_fb = crtc->primary->fb;
- }
/* If atomic, assume fb object is pinned & idle & fenced and
* just update base pointers
*/
- obj = amdgpu_fb->obj;
+ obj = target_fb->obj[0];
abo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(abo, false);
if (unlikely(r != 0))
return r;
- if (atomic) {
- fb_location = amdgpu_bo_gpu_offset(abo);
- } else {
- r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
+ if (!atomic) {
+ r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
return -EINVAL;
}
}
+ fb_location = amdgpu_bo_gpu_offset(abo);
amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
amdgpu_bo_unreserve(abo);
@@ -1978,8 +1973,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
if (!atomic && fb && fb != crtc->primary->fb) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r != 0))
return r;
@@ -2268,13 +2262,14 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
- ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr);
+ ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
drm_gem_object_put_unlocked(obj);
return ret;
}
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
dce_v6_0_lock_cursor(crtc, true);
@@ -2414,11 +2409,9 @@ static void dce_v6_0_crtc_disable(struct drm_crtc *crtc)
dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
if (crtc->primary->fb) {
int r;
- struct amdgpu_framebuffer *amdgpu_fb;
struct amdgpu_bo *abo;
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r))
DRM_ERROR("failed to reserve abo before unpin\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index f55422cbd77a..50cd03beac7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -1754,7 +1754,6 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_framebuffer *amdgpu_fb;
struct drm_framebuffer *target_fb;
struct drm_gem_object *obj;
struct amdgpu_bo *abo;
@@ -1773,32 +1772,28 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
return 0;
}
- if (atomic) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
+ if (atomic)
target_fb = fb;
- } else {
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+ else
target_fb = crtc->primary->fb;
- }
/* If atomic, assume fb object is pinned & idle & fenced and
* just update base pointers
*/
- obj = amdgpu_fb->obj;
+ obj = target_fb->obj[0];
abo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(abo, false);
if (unlikely(r != 0))
return r;
- if (atomic) {
- fb_location = amdgpu_bo_gpu_offset(abo);
- } else {
- r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
+ if (!atomic) {
+ r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
return -EINVAL;
}
}
+ fb_location = amdgpu_bo_gpu_offset(abo);
amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
amdgpu_bo_unreserve(abo);
@@ -1955,8 +1950,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
if (!atomic && fb && fb != crtc->primary->fb) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r != 0))
return r;
@@ -2279,13 +2273,14 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
- ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr);
+ ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
drm_gem_object_put_unlocked(obj);
return ret;
}
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
dce_v8_0_lock_cursor(crtc, true);
@@ -2430,11 +2425,9 @@ static void dce_v8_0_crtc_disable(struct drm_crtc *crtc)
dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
if (crtc->primary->fb) {
int r;
- struct amdgpu_framebuffer *amdgpu_fb;
struct amdgpu_bo *abo;
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r))
DRM_ERROR("failed to reserve abo before unpin\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index b51f05dc9582..15257634a53a 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -36,6 +36,7 @@
#include "dce_v10_0.h"
#include "dce_v11_0.h"
#include "dce_virtual.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
#define DCE_VIRTUAL_VBLANK_PERIOD 16666666
@@ -168,11 +169,9 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc)
dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
if (crtc->primary->fb) {
int r;
- struct amdgpu_framebuffer *amdgpu_fb;
struct amdgpu_bo *abo;
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r))
DRM_ERROR("failed to reserve abo before unpin\n");
@@ -271,25 +270,18 @@ static int dce_virtual_early_init(void *handle)
static struct drm_encoder *
dce_virtual_encoder(struct drm_connector *connector)
{
- int enc_id = connector->encoder_ids[0];
struct drm_encoder *encoder;
int i;
- for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
- if (connector->encoder_ids[i] == 0)
- break;
-
- encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]);
- if (!encoder)
- continue;
-
+ drm_connector_for_each_possible_encoder(connector, encoder, i) {
if (encoder->encoder_type == DRM_MODE_ENCODER_VIRTUAL)
return encoder;
}
/* pick the first one */
- if (enc_id)
- return drm_encoder_find(connector->dev, NULL, enc_id);
+ drm_connector_for_each_possible_encoder(connector, encoder, i)
+ return encoder;
+
return NULL;
}
@@ -329,7 +321,7 @@ static int dce_virtual_get_modes(struct drm_connector *connector)
return 0;
}
-static int dce_virtual_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status dce_virtual_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
return MODE_OK;
@@ -380,7 +372,7 @@ static int dce_virtual_sw_init(void *handle)
int r, i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 229, &adev->crtc_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SMU_DISP_TIMER2_TRIGGER, &adev->crtc_irq);
if (r)
return r;
@@ -462,8 +454,9 @@ static int dce_virtual_hw_init(void *handle)
break;
case CHIP_CARRIZO:
case CHIP_STONEY:
- case CHIP_POLARIS11:
case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_VEGAM:
dce_v11_0_disable_dce(adev);
break;
case CHIP_TOPAZ:
@@ -474,6 +467,7 @@ static int dce_virtual_hw_init(void *handle)
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
break;
default:
DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type);
@@ -634,7 +628,7 @@ static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev,
drm_connector_register(connector);
/* link them */
- drm_mode_connector_attach_encoder(connector, encoder);
+ drm_connector_attach_encoder(connector, encoder);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
new file mode 100644
index 000000000000..9935371db7ce
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v1_7.h"
+
+#include "df/df_1_7_default.h"
+#include "df/df_1_7_offset.h"
+#include "df/df_1_7_sh_mask.h"
+
+static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
+
+static void df_v1_7_init (struct amdgpu_device *adev)
+{
+}
+
+static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ if (enable) {
+ tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
+ tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
+ WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
+ } else
+ WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
+ mmFabricConfigAccessControl_DEFAULT);
+}
+
+static u32 df_v1_7_get_fb_channel_number(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
+ tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+ tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
+
+ return tmp;
+}
+
+static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
+{
+ int fb_channel_number;
+
+ fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+
+ return df_v1_7_channel_number[fb_channel_number];
+}
+
+static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ /* Put DF on broadcast mode */
+ adev->df_funcs->enable_broadcast_mode(adev, true);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY;
+ WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ } else {
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V1_7_MGCG_DISABLE;
+ WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ }
+
+ /* Exit boradcast mode */
+ adev->df_funcs->enable_broadcast_mode(adev, false);
+}
+
+static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev,
+ u32 *flags)
+{
+ u32 tmp;
+
+ /* AMD_CG_SUPPORT_DF_MGCG */
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ if (tmp & DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY)
+ *flags |= AMD_CG_SUPPORT_DF_MGCG;
+}
+
+static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15(DF, 0, DF_CS_AON0_CoherentSlaveModeCtrlA0,
+ ForceParWrRMW, enable);
+}
+
+const struct amdgpu_df_funcs df_v1_7_funcs = {
+ .init = df_v1_7_init,
+ .enable_broadcast_mode = df_v1_7_enable_broadcast_mode,
+ .get_fb_channel_number = df_v1_7_get_fb_channel_number,
+ .get_hbm_channel_number = df_v1_7_get_hbm_channel_number,
+ .update_medium_grain_clock_gating = df_v1_7_update_medium_grain_clock_gating,
+ .get_clockgating_state = df_v1_7_get_clockgating_state,
+ .enable_ecc_force_par_wr_rmw = df_v1_7_enable_ecc_force_par_wr_rmw,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.h b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h
new file mode 100644
index 000000000000..74621104c487
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V1_7_H__
+#define __DF_V1_7_H__
+
+#include "soc15_common.h"
+enum DF_V1_7_MGCG
+{
+ DF_V1_7_MGCG_DISABLE = 0,
+ DF_V1_7_MGCG_ENABLE_00_CYCLE_DELAY =1,
+ DF_V1_7_MGCG_ENABLE_01_CYCLE_DELAY =2,
+ DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY =13,
+ DF_V1_7_MGCG_ENABLE_31_CYCLE_DELAY =14,
+ DF_V1_7_MGCG_ENABLE_63_CYCLE_DELAY =15
+};
+
+extern const struct amdgpu_df_funcs df_v1_7_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
new file mode 100644
index 000000000000..d5ebe566809b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v3_6.h"
+
+#include "df/df_3_6_default.h"
+#include "df/df_3_6_offset.h"
+#include "df/df_3_6_sh_mask.h"
+
+static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
+ 16, 32, 0, 0, 0, 2, 4, 8};
+
+static void df_v3_6_init(struct amdgpu_device *adev)
+{
+}
+
+static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ if (enable) {
+ tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
+ tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
+ WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
+ } else
+ WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
+ mmFabricConfigAccessControl_DEFAULT);
+}
+
+static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
+ tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+ tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
+
+ return tmp;
+}
+
+static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
+{
+ int fb_channel_number;
+
+ fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+ if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number))
+ fb_channel_number = 0;
+
+ return df_v3_6_channel_number[fb_channel_number];
+}
+
+static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ /* Put DF on broadcast mode */
+ adev->df_funcs->enable_broadcast_mode(adev, true);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
+ WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ } else {
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V3_6_MGCG_DISABLE;
+ WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ }
+
+ /* Exit broadcast mode */
+ adev->df_funcs->enable_broadcast_mode(adev, false);
+}
+
+static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
+ u32 *flags)
+{
+ u32 tmp;
+
+ /* AMD_CG_SUPPORT_DF_MGCG */
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY)
+ *flags |= AMD_CG_SUPPORT_DF_MGCG;
+}
+
+const struct amdgpu_df_funcs df_v3_6_funcs = {
+ .init = df_v3_6_init,
+ .enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
+ .get_fb_channel_number = df_v3_6_get_fb_channel_number,
+ .get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
+ .update_medium_grain_clock_gating =
+ df_v3_6_update_medium_grain_clock_gating,
+ .get_clockgating_state = df_v3_6_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
new file mode 100644
index 000000000000..e79c58e5efcb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V3_6_H__
+#define __DF_V3_6_H__
+
+#include "soc15_common.h"
+
+enum DF_V3_6_MGCG {
+ DF_V3_6_MGCG_DISABLE = 0,
+ DF_V3_6_MGCG_ENABLE_00_CYCLE_DELAY = 1,
+ DF_V3_6_MGCG_ENABLE_01_CYCLE_DELAY = 2,
+ DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY = 13,
+ DF_V3_6_MGCG_ENABLE_31_CYCLE_DELAY = 14,
+ DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
+};
+
+extern const struct amdgpu_df_funcs df_v3_6_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index cd6bf291a853..de184a886057 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -44,30 +44,30 @@ static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev);
-MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
-MODULE_FIRMWARE("radeon/tahiti_me.bin");
-MODULE_FIRMWARE("radeon/tahiti_ce.bin");
-MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
-
-MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
-MODULE_FIRMWARE("radeon/pitcairn_me.bin");
-MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
-MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
-
-MODULE_FIRMWARE("radeon/verde_pfp.bin");
-MODULE_FIRMWARE("radeon/verde_me.bin");
-MODULE_FIRMWARE("radeon/verde_ce.bin");
-MODULE_FIRMWARE("radeon/verde_rlc.bin");
-
-MODULE_FIRMWARE("radeon/oland_pfp.bin");
-MODULE_FIRMWARE("radeon/oland_me.bin");
-MODULE_FIRMWARE("radeon/oland_ce.bin");
-MODULE_FIRMWARE("radeon/oland_rlc.bin");
-
-MODULE_FIRMWARE("radeon/hainan_pfp.bin");
-MODULE_FIRMWARE("radeon/hainan_me.bin");
-MODULE_FIRMWARE("radeon/hainan_ce.bin");
-MODULE_FIRMWARE("radeon/hainan_rlc.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_pfp.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_me.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_ce.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/pitcairn_pfp.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_me.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_ce.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/verde_pfp.bin");
+MODULE_FIRMWARE("amdgpu/verde_me.bin");
+MODULE_FIRMWARE("amdgpu/verde_ce.bin");
+MODULE_FIRMWARE("amdgpu/verde_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/oland_pfp.bin");
+MODULE_FIRMWARE("amdgpu/oland_me.bin");
+MODULE_FIRMWARE("amdgpu/oland_ce.bin");
+MODULE_FIRMWARE("amdgpu/oland_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/hainan_pfp.bin");
+MODULE_FIRMWARE("amdgpu/hainan_me.bin");
+MODULE_FIRMWARE("amdgpu/hainan_ce.bin");
+MODULE_FIRMWARE("amdgpu/hainan_rlc.bin");
static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev);
static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
@@ -335,7 +335,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
default: BUG();
}
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -346,7 +346,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -357,7 +357,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -368,7 +368,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 42b6144c1fd5..95452c5a9df6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -57,36 +57,36 @@ static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
-MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
-MODULE_FIRMWARE("radeon/bonaire_me.bin");
-MODULE_FIRMWARE("radeon/bonaire_ce.bin");
-MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
-MODULE_FIRMWARE("radeon/bonaire_mec.bin");
-
-MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
-MODULE_FIRMWARE("radeon/hawaii_me.bin");
-MODULE_FIRMWARE("radeon/hawaii_ce.bin");
-MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
-MODULE_FIRMWARE("radeon/hawaii_mec.bin");
-
-MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
-MODULE_FIRMWARE("radeon/kaveri_me.bin");
-MODULE_FIRMWARE("radeon/kaveri_ce.bin");
-MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
-MODULE_FIRMWARE("radeon/kaveri_mec.bin");
-MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
-
-MODULE_FIRMWARE("radeon/kabini_pfp.bin");
-MODULE_FIRMWARE("radeon/kabini_me.bin");
-MODULE_FIRMWARE("radeon/kabini_ce.bin");
-MODULE_FIRMWARE("radeon/kabini_rlc.bin");
-MODULE_FIRMWARE("radeon/kabini_mec.bin");
-
-MODULE_FIRMWARE("radeon/mullins_pfp.bin");
-MODULE_FIRMWARE("radeon/mullins_me.bin");
-MODULE_FIRMWARE("radeon/mullins_ce.bin");
-MODULE_FIRMWARE("radeon/mullins_rlc.bin");
-MODULE_FIRMWARE("radeon/mullins_mec.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_pfp.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_me.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_ce.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_rlc.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_mec.bin");
+
+MODULE_FIRMWARE("amdgpu/hawaii_pfp.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_me.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_ce.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_rlc.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_mec.bin");
+
+MODULE_FIRMWARE("amdgpu/kaveri_pfp.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_me.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_ce.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_rlc.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_mec.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_mec2.bin");
+
+MODULE_FIRMWARE("amdgpu/kabini_pfp.bin");
+MODULE_FIRMWARE("amdgpu/kabini_me.bin");
+MODULE_FIRMWARE("amdgpu/kabini_ce.bin");
+MODULE_FIRMWARE("amdgpu/kabini_rlc.bin");
+MODULE_FIRMWARE("amdgpu/kabini_mec.bin");
+
+MODULE_FIRMWARE("amdgpu/mullins_pfp.bin");
+MODULE_FIRMWARE("amdgpu/mullins_me.bin");
+MODULE_FIRMWARE("amdgpu/mullins_ce.bin");
+MODULE_FIRMWARE("amdgpu/mullins_rlc.bin");
+MODULE_FIRMWARE("amdgpu/mullins_mec.bin");
static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
{
@@ -925,7 +925,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
default: BUG();
}
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -933,7 +933,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
if (err)
goto out;
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -941,7 +941,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
if (err)
goto out;
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -949,7 +949,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
if (err)
goto out;
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -958,7 +958,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
goto out;
if (adev->asic_type == CHIP_KAVERI) {
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -967,7 +967,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
goto out;
}
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index e14263fca1c9..5cd45210113f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -51,6 +51,8 @@
#include "smu/smu_7_1_3_d.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+
#define GFX8_NUM_GFX_RINGS 1
#define GFX8_MEC_HPD_SIZE 2048
@@ -125,18 +127,6 @@ MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
-
MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
@@ -149,6 +139,18 @@ MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
+
MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
@@ -161,6 +163,13 @@ MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
+MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
+MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vegam_me.bin");
+MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
+MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
+
static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
{
{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
@@ -292,6 +301,37 @@ static const u32 tonga_mgcg_cgcg_init[] =
mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
};
+static const u32 golden_settings_vegam_a11[] =
+{
+ mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
+ mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
+ mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
+ mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
+ mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
+ mmSQ_CONFIG, 0x07f80000, 0x01180000,
+ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
+ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
+ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+ mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
+ mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
+};
+
+static const u32 vegam_golden_common_all[] =
+{
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
+ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
+};
+
static const u32 golden_settings_polaris11_a11[] =
{
mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
@@ -666,6 +706,17 @@ static const u32 stoney_mgcg_cgcg_init[] =
mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
};
+
+static const char * const sq_edc_source_names[] = {
+ "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
+ "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
+ "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
+ "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
+ "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
+ "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
+ "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
+};
+
static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
@@ -712,6 +763,14 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
tonga_golden_common_all,
ARRAY_SIZE(tonga_golden_common_all));
break;
+ case CHIP_VEGAM:
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_vegam_a11,
+ ARRAY_SIZE(golden_settings_vegam_a11));
+ amdgpu_device_program_register_sequence(adev,
+ vegam_golden_common_all,
+ ARRAY_SIZE(vegam_golden_common_all));
+ break;
case CHIP_POLARIS11:
case CHIP_POLARIS12:
amdgpu_device_program_register_sequence(adev,
@@ -820,26 +879,32 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
- uint32_t scratch;
- uint32_t tmp = 0;
+
+ unsigned int index;
+ uint64_t gpu_addr;
+ uint32_t tmp;
long r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
+ r = amdgpu_device_wb_get(adev, &index);
if (r) {
- DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
return r;
}
- WREG32(scratch, 0xCAFEDEAD);
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 16, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err1;
}
- ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
- ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
- ib.ptr[2] = 0xDEADBEEF;
- ib.length_dw = 3;
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
@@ -854,20 +919,21 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err2;
}
- tmp = RREG32(scratch);
+
+ tmp = adev->wb.wb[index];
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
r = 0;
} else {
- DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
- scratch, tmp);
+ DRM_ERROR("ib test on ring %d failed\n", ring->idx);
r = -EINVAL;
}
+
err2:
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
err1:
- amdgpu_gfx_scratch_free(adev, scratch);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -918,17 +984,20 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
case CHIP_FIJI:
chip_name = "fiji";
break;
- case CHIP_POLARIS11:
- chip_name = "polaris11";
+ case CHIP_STONEY:
+ chip_name = "stoney";
break;
case CHIP_POLARIS10:
chip_name = "polaris10";
break;
+ case CHIP_POLARIS11:
+ chip_name = "polaris11";
+ break;
case CHIP_POLARIS12:
chip_name = "polaris12";
break;
- case CHIP_STONEY:
- chip_name = "stoney";
+ case CHIP_VEGAM:
+ chip_name = "vegam";
break;
default:
BUG();
@@ -1770,6 +1839,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
break;
case CHIP_POLARIS10:
+ case CHIP_VEGAM:
ret = amdgpu_atombios_get_gfx_info(adev);
if (ret)
return ret;
@@ -1949,6 +2019,8 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
return 0;
}
+static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
+
static int gfx_v8_0_sw_init(void *handle)
{
int i, j, k, r, ring_id;
@@ -1957,12 +2029,13 @@ static int gfx_v8_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
switch (adev->asic_type) {
- case CHIP_FIJI:
case CHIP_TONGA:
+ case CHIP_CARRIZO:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
- case CHIP_POLARIS10:
- case CHIP_CARRIZO:
+ case CHIP_VEGAM:
adev->gfx.mec.num_mec = 2;
break;
case CHIP_TOPAZ:
@@ -1976,27 +2049,43 @@ static int gfx_v8_0_sw_init(void *handle)
adev->gfx.mec.num_queue_per_pipe = 8;
/* KIQ event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq);
if (r)
return r;
/* EOP Event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
if (r)
return r;
/* Privileged reg */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
if (r)
return r;
/* Privileged inst */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
&adev->gfx.priv_inst_irq);
if (r)
return r;
+ /* Add CP EDC/ECC irq */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
+ &adev->gfx.cp_ecc_error_irq);
+ if (r)
+ return r;
+
+ /* SQ interrupts. */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
+ &adev->gfx.sq_irq);
+ if (r) {
+ DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
+ return r;
+ }
+
+ INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
+
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
gfx_v8_0_scratch_init(adev);
@@ -2323,6 +2412,7 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
break;
case CHIP_FIJI:
+ case CHIP_VEGAM:
modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
@@ -3504,6 +3594,7 @@ gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
{
switch (adev->asic_type) {
case CHIP_FIJI:
+ case CHIP_VEGAM:
*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
RB_XSEL2(1) | PKR_MAP(2) |
PKR_XSEL(1) | PKR_YSEL(1) |
@@ -4071,7 +4162,8 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
gfx_v8_0_init_power_gating(adev);
WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
} else if ((adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12)) {
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM)) {
gfx_v8_0_init_csb(adev);
gfx_v8_0_init_save_restore_list(adev);
gfx_v8_0_enable_save_restore_machine(adev);
@@ -4146,7 +4238,8 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
if (adev->asic_type == CHIP_POLARIS11 ||
adev->asic_type == CHIP_POLARIS10 ||
- adev->asic_type == CHIP_POLARIS12) {
+ adev->asic_type == CHIP_POLARIS12 ||
+ adev->asic_type == CHIP_VEGAM) {
tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
tmp &= ~0x3;
WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
@@ -5056,6 +5149,10 @@ static int gfx_v8_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
+
+ amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
+
/* disable KCQ to avoid CPC touch memory not valid anymore */
for (i = 0; i < adev->gfx.num_compute_rings; i++)
gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
@@ -5487,9 +5584,19 @@ static int gfx_v8_0_late_init(void *handle)
if (r)
return r;
- amdgpu_device_ip_set_powergating_state(adev,
- AMD_IP_BLOCK_TYPE_GFX,
- AMD_PG_STATE_GATE);
+ r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ if (r) {
+ DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
+ return r;
+ }
+
+ r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
+ if (r) {
+ DRM_ERROR(
+ "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
+ r);
+ return r;
+ }
return 0;
}
@@ -5497,13 +5604,12 @@ static int gfx_v8_0_late_init(void *handle)
static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
bool enable)
{
- if ((adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12))
+ if (((adev->asic_type == CHIP_POLARIS11) ||
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM)) &&
+ adev->powerplay.pp_funcs->set_powergating_by_smu)
/* Send msg to SMU via Powerplay */
- amdgpu_device_ip_set_powergating_state(adev,
- AMD_IP_BLOCK_TYPE_SMC,
- enable ?
- AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
}
@@ -5588,6 +5694,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
break;
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
else
@@ -6154,6 +6261,7 @@ static int gfx_v8_0_set_clockgating_state(void *handle,
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
break;
default:
@@ -6729,6 +6837,77 @@ static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ int enable_flag;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ enable_flag = 0;
+ break;
+
+ case AMDGPU_IRQ_STATE_ENABLE:
+ enable_flag = 1;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+ WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+ WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+ WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+ WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+ WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+
+ return 0;
+}
+
+static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ int enable_flag;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ enable_flag = 1;
+ break;
+
+ case AMDGPU_IRQ_STATE_ENABLE:
+ enable_flag = 0;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
+ enable_flag);
+
+ return 0;
+}
+
static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -6779,6 +6958,114 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("CP EDC/ECC error detected.");
+ return 0;
+}
+
+static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
+{
+ u32 enc, se_id, sh_id, cu_id;
+ char type[20];
+ int sq_edc_source = -1;
+
+ enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
+ se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
+
+ switch (enc) {
+ case 0:
+ DRM_INFO("SQ general purpose intr detected:"
+ "se_id %d, immed_overflow %d, host_reg_overflow %d,"
+ "host_cmd_overflow %d, cmd_timestamp %d,"
+ "reg_timestamp %d, thread_trace_buff_full %d,"
+ "wlt %d, thread_trace %d.\n",
+ se_id,
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
+ );
+ break;
+ case 1:
+ case 2:
+
+ cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
+ sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
+
+ /*
+ * This function can be called either directly from ISR
+ * or from BH in which case we can access SQ_EDC_INFO
+ * instance
+ */
+ if (in_task()) {
+ mutex_lock(&adev->grbm_idx_mutex);
+ gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
+
+ sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
+
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ }
+
+ if (enc == 1)
+ sprintf(type, "instruction intr");
+ else
+ sprintf(type, "EDC/ECC error");
+
+ DRM_INFO(
+ "SQ %s detected: "
+ "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
+ "trap %s, sq_ed_info.source %s.\n",
+ type, se_id, sh_id, cu_id,
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
+ (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
+ );
+ break;
+ default:
+ DRM_ERROR("SQ invalid encoding type\n.");
+ }
+}
+
+static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
+{
+
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
+ struct sq_work *sq_work = container_of(work, struct sq_work, work);
+
+ gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
+}
+
+static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ unsigned ih_data = entry->src_data[0];
+
+ /*
+ * Try to submit work so SQ_EDC_INFO can be accessed from
+ * BH. If previous work submission hasn't finished yet
+ * just print whatever info is possible directly from the ISR.
+ */
+ if (work_pending(&adev->gfx.sq_work.work)) {
+ gfx_v8_0_parse_sq_irq(adev, ih_data);
+ } else {
+ adev->gfx.sq_work.ih_data = ih_data;
+ schedule_work(&adev->gfx.sq_work.work);
+ }
+
+ return 0;
+}
+
static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned int type,
@@ -6979,6 +7266,16 @@ static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
.process = gfx_v8_0_kiq_irq,
};
+static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
+ .set = gfx_v8_0_set_cp_ecc_int_state,
+ .process = gfx_v8_0_cp_ecc_error_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
+ .set = gfx_v8_0_set_sq_int_state,
+ .process = gfx_v8_0_sq_irq,
+};
+
static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
{
adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
@@ -6992,6 +7289,12 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
+
+ adev->gfx.cp_ecc_error_irq.num_types = 1;
+ adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
+
+ adev->gfx.sq_irq.num_types = 1;
+ adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
}
static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9d39fd5b1822..9ab39117cc4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -27,6 +27,7 @@
#include "amdgpu_gfx.h"
#include "soc15.h"
#include "soc15d.h"
+#include "amdgpu_atomfirmware.h"
#include "gc/gc_9_0_offset.h"
#include "gc/gc_9_0_sh_mask.h"
@@ -37,11 +38,12 @@
#include "clearstate_gfx9.h"
#include "v9_structs.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
+
#define GFX9_NUM_GFX_RINGS 1
#define GFX9_MEC_HPD_SIZE 2048
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
-#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34
#define mmPWR_MISC_CNTL_STATUS 0x0183
#define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
@@ -64,6 +66,13 @@ MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
+MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
+MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vega20_me.bin");
+MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
+MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
+
MODULE_FIRMWARE("amdgpu/raven_ce.bin");
MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
MODULE_FIRMWARE("amdgpu/raven_me.bin");
@@ -73,40 +82,59 @@ MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
static const struct soc15_reg_golden golden_settings_gc_9_0[] =
{
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
};
static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800)
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
};
static const struct soc15_reg_golden golden_settings_gc_9_1[] =
@@ -185,6 +213,30 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000)
};
+static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
+{
+ mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+};
+
+static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
+{
+ mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+};
+
#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
@@ -218,6 +270,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_9_2_1_vg12,
ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
break;
+ case CHIP_VEGA20:
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_0,
+ ARRAY_SIZE(golden_settings_gc_9_0));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_0_vg20,
+ ARRAY_SIZE(golden_settings_gc_9_0_vg20));
+ break;
case CHIP_RAVEN:
soc15_program_register_sequence(adev,
golden_settings_gc_9_1,
@@ -401,6 +461,27 @@ static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
kfree(adev->gfx.rlc.register_list_format);
}
+static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_1 *rlc_hdr;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
+ adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
+ adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
+ adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
+ adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
+ adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
+ adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
+ adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
+ adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
+ adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
+ adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
+ adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
+ adev->gfx.rlc.reg_list_format_direct_reg_list_length =
+ le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
+}
+
static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
@@ -412,6 +493,8 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
const struct rlc_firmware_header_v2_0 *rlc_hdr;
unsigned int *tmp = NULL;
unsigned int i = 0;
+ uint16_t version_major;
+ uint16_t version_minor;
DRM_DEBUG("\n");
@@ -422,6 +505,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
case CHIP_VEGA12:
chip_name = "vega12";
break;
+ case CHIP_VEGA20:
+ chip_name = "vega20";
+ break;
case CHIP_RAVEN:
chip_name = "raven";
break;
@@ -468,6 +554,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
goto out;
err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ if (version_major == 2 && version_minor == 1)
+ adev->gfx.rlc.is_rlc_v2_1 = true;
+
adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
adev->gfx.rlc.save_and_restore_offset =
@@ -508,6 +600,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
+ if (adev->gfx.rlc.is_rlc_v2_1)
+ gfx_v9_0_init_rlc_ext_microcode(adev);
+
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
if (err)
@@ -566,6 +661,29 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ if (adev->gfx.rlc.is_rlc_v2_1 &&
+ adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
+ adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
+ adev->gfx.rlc.save_restore_list_srm_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
+ }
+
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
info->fw = adev->gfx.mec_fw;
@@ -841,6 +959,7 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
dst_ptr = adev->gfx.rlc.cs_ptr;
gfx_v9_0_get_csb_buffer(adev, dst_ptr);
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
+ amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
}
@@ -869,6 +988,39 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
return 0;
}
+static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
+ if (unlikely(r != 0))
+ return r;
+
+ r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (!r)
+ adev->gfx.rlc.clear_state_gpu_addr =
+ amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
+
+ amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
+
+ return r;
+}
+
+static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!adev->gfx.rlc.clear_state_obj)
+ return;
+
+ r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
+ if (likely(r == 0)) {
+ amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
+ amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
+ }
+}
+
static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -1013,9 +1165,10 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
};
-static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
+static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
{
u32 gb_addr_config;
+ int err;
adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
@@ -1037,6 +1190,20 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
DRM_INFO("fix gfx.config for vega12\n");
break;
+ case CHIP_VEGA20:
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+ gb_addr_config &= ~0xf3e777ff;
+ gb_addr_config |= 0x22014042;
+ /* check vbios table if gpu info is not available */
+ err = amdgpu_atomfirmware_get_gfx_info(adev);
+ if (err)
+ return err;
+ break;
case CHIP_RAVEN:
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
@@ -1086,6 +1253,8 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.gb_addr_config,
GB_ADDR_CONFIG,
PIPE_INTERLEAVE_SIZE));
+
+ return 0;
}
static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
@@ -1319,6 +1488,7 @@ static int gfx_v9_0_sw_init(void *handle)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
adev->gfx.mec.num_mec = 2;
break;
@@ -1331,23 +1501,23 @@ static int gfx_v9_0_sw_init(void *handle)
adev->gfx.mec.num_queue_per_pipe = 8;
/* KIQ event */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_IB2_INTERRUPT_PKT, &adev->gfx.kiq.irq);
if (r)
return r;
/* EOP Event */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
if (r)
return r;
/* Privileged reg */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 184,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
if (r)
return r;
/* Privileged inst */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 185,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
&adev->gfx.priv_inst_irq);
if (r)
return r;
@@ -1446,7 +1616,9 @@ static int gfx_v9_0_sw_init(void *handle)
adev->gfx.ce_ram_size = 0x8000;
- gfx_v9_0_gpu_early_init(adev);
+ r = gfx_v9_0_gpu_early_init(adev);
+ if (r)
+ return r;
r = gfx_v9_0_ngg_init(adev);
if (r)
@@ -1600,6 +1772,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
gfx_v9_0_setup_rb(adev);
gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
+ adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
@@ -1616,7 +1789,10 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
- tmp = adev->gmc.shared_aperture_start >> 48;
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >> 48));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >> 48));
WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
}
}
@@ -1708,55 +1884,44 @@ static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
adev->gfx.rlc.clear_state_size);
}
-static void gfx_v9_0_parse_ind_reg_list(int *register_list_format,
+static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
int indirect_offset,
int list_size,
int *unique_indirect_regs,
- int *unique_indirect_reg_count,
- int max_indirect_reg_count,
+ int unique_indirect_reg_count,
int *indirect_start_offsets,
int *indirect_start_offsets_count,
- int max_indirect_start_offsets_count)
+ int max_start_offsets_count)
{
int idx;
- bool new_entry = true;
for (; indirect_offset < list_size; indirect_offset++) {
+ WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
+ indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
+ *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
+
+ while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
+ indirect_offset += 2;
+
+ /* look for the matching indice */
+ for (idx = 0; idx < unique_indirect_reg_count; idx++) {
+ if (unique_indirect_regs[idx] ==
+ register_list_format[indirect_offset] ||
+ !unique_indirect_regs[idx])
+ break;
+ }
- if (new_entry) {
- new_entry = false;
- indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
- *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
- BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count);
- }
-
- if (register_list_format[indirect_offset] == 0xFFFFFFFF) {
- new_entry = true;
- continue;
- }
-
- indirect_offset += 2;
+ BUG_ON(idx >= unique_indirect_reg_count);
- /* look for the matching indice */
- for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
- if (unique_indirect_regs[idx] ==
- register_list_format[indirect_offset])
- break;
- }
+ if (!unique_indirect_regs[idx])
+ unique_indirect_regs[idx] = register_list_format[indirect_offset];
- if (idx >= *unique_indirect_reg_count) {
- unique_indirect_regs[*unique_indirect_reg_count] =
- register_list_format[indirect_offset];
- idx = *unique_indirect_reg_count;
- *unique_indirect_reg_count = *unique_indirect_reg_count + 1;
- BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count);
+ indirect_offset++;
}
-
- register_list_format[indirect_offset] = idx;
}
}
-static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
+static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
{
int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
int unique_indirect_reg_count = 0;
@@ -1765,7 +1930,7 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
int indirect_start_offsets_count = 0;
int list_size = 0;
- int i = 0;
+ int i = 0, j = 0;
u32 tmp = 0;
u32 *register_list_format =
@@ -1776,15 +1941,15 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
adev->gfx.rlc.reg_list_format_size_bytes);
/* setup unique_indirect_regs array and indirect_start_offsets array */
- gfx_v9_0_parse_ind_reg_list(register_list_format,
- GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH,
- adev->gfx.rlc.reg_list_format_size_bytes >> 2,
- unique_indirect_regs,
- &unique_indirect_reg_count,
- ARRAY_SIZE(unique_indirect_regs),
- indirect_start_offsets,
- &indirect_start_offsets_count,
- ARRAY_SIZE(indirect_start_offsets));
+ unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
+ gfx_v9_1_parse_ind_reg_list(register_list_format,
+ adev->gfx.rlc.reg_list_format_direct_reg_list_length,
+ adev->gfx.rlc.reg_list_format_size_bytes >> 2,
+ unique_indirect_regs,
+ unique_indirect_reg_count,
+ indirect_start_offsets,
+ &indirect_start_offsets_count,
+ ARRAY_SIZE(indirect_start_offsets));
/* enable auto inc in case it is disabled */
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
@@ -1798,19 +1963,37 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
adev->gfx.rlc.register_restore[i]);
- /* load direct register */
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0);
- for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
- adev->gfx.rlc.register_restore[i]);
-
/* load indirect register */
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
adev->gfx.rlc.reg_list_format_start);
- for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
+
+ /* direct register portion */
+ for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
register_list_format[i]);
+ /* indirect register portion */
+ while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
+ if (register_list_format[i] == 0xFFFFFFFF) {
+ WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+ continue;
+ }
+
+ WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+ WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+
+ for (j = 0; j < unique_indirect_reg_count; j++) {
+ if (register_list_format[i] == unique_indirect_regs[j]) {
+ WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
+ break;
+ }
+ }
+
+ BUG_ON(j >= unique_indirect_reg_count);
+
+ i++;
+ }
+
/* set save/restore list size */
list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
list_size = list_size >> 1;
@@ -1823,14 +2006,19 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
adev->gfx.rlc.starting_offsets_start);
for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
- indirect_start_offsets[i]);
+ indirect_start_offsets[i]);
/* load unique indirect regs*/
for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i,
- unique_indirect_regs[i] & 0x3FFFF);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i,
- unique_indirect_regs[i] >> 20);
+ if (unique_indirect_regs[i] != 0) {
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
+ + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
+ unique_indirect_regs[i] & 0x3FFFF);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
+ + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
+ unique_indirect_regs[i] >> 20);
+ }
}
kfree(register_list_format);
@@ -2010,34 +2198,26 @@ static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *ad
static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
{
+ gfx_v9_0_init_csb(adev);
+
+ /*
+ * Rlc save restore list is workable since v2_1.
+ * And it's needed by gfxoff feature.
+ */
+ if (adev->gfx.rlc.is_rlc_v2_1) {
+ gfx_v9_1_init_rlc_save_restore_list(adev);
+ gfx_v9_0_enable_save_restore_machine(adev);
+ }
+
if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
AMD_PG_SUPPORT_GFX_SMG |
AMD_PG_SUPPORT_GFX_DMG |
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_GDS |
AMD_PG_SUPPORT_RLC_SMU_HS)) {
- gfx_v9_0_init_csb(adev);
- gfx_v9_0_init_rlc_save_restore_list(adev);
- gfx_v9_0_enable_save_restore_machine(adev);
-
- if (adev->asic_type == CHIP_RAVEN) {
- WREG32(mmRLC_JUMP_TABLE_RESTORE,
- adev->gfx.rlc.cp_table_gpu_addr >> 8);
- gfx_v9_0_init_gfx_power_gating(adev);
-
- if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
- gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
- gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
- } else {
- gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
- gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
- }
-
- if (adev->pg_flags & AMD_PG_SUPPORT_CP)
- gfx_v9_0_enable_cp_power_gating(adev, true);
- else
- gfx_v9_0_enable_cp_power_gating(adev, false);
- }
+ WREG32(mmRLC_JUMP_TABLE_RESTORE,
+ adev->gfx.rlc.cp_table_gpu_addr >> 8);
+ gfx_v9_0_init_gfx_power_gating(adev);
}
}
@@ -2126,9 +2306,6 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
/* disable CG */
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
- /* disable PG */
- WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0);
-
gfx_v9_0_rlc_reset(adev);
gfx_v9_0_init_pg(adev);
@@ -2990,6 +3167,10 @@ static int gfx_v9_0_hw_init(void *handle)
gfx_v9_0_gpu_init(adev);
+ r = gfx_v9_0_csb_vram_pin(adev);
+ if (r)
+ return r;
+
r = gfx_v9_0_rlc_resume(adev);
if (r)
return r;
@@ -3061,6 +3242,9 @@ static int gfx_v9_0_hw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
+ AMD_PG_STATE_UNGATE);
+
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@@ -3095,6 +3279,8 @@ static int gfx_v9_0_hw_fini(void *handle)
gfx_v9_0_cp_enable(adev, false);
gfx_v9_0_rlc_stop(adev);
+ gfx_v9_0_csb_vram_unpin(adev);
+
return 0;
}
@@ -3339,8 +3525,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
bool enable)
{
- /* TODO: double check if we need to perform under safe mdoe */
- /* gfx_v9_0_enter_rlc_safe_mode(adev); */
+ gfx_v9_0_enter_rlc_safe_mode(adev);
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
@@ -3351,7 +3536,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
}
- /* gfx_v9_0_exit_rlc_safe_mode(adev); */
+ gfx_v9_0_exit_rlc_safe_mode(adev);
}
static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
@@ -3382,8 +3567,11 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
/* 1 - RLC_CGTT_MGCG_OVERRIDE */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
- data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
- RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+
+ if (adev->asic_type != CHIP_VEGA12)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
+
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
@@ -3413,11 +3601,15 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
} else {
/* 1 - MGCG_OVERRIDE */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
- data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
- RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+
+ if (adev->asic_type != CHIP_VEGA12)
+ data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
+
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
@@ -3453,9 +3645,11 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
/* update CGCG and CGLS override bits */
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
- /* enable 3Dcgcg FSM(0x0020003f) */
+
+ /* enable 3Dcgcg FSM(0x0000363f) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
- data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+
+ data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
@@ -3502,9 +3696,10 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
- /* enable cgcg FSM(0x0020003F) */
+ /* enable cgcg FSM(0x0000363F) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
- data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+
+ data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
@@ -3586,6 +3781,15 @@ static int gfx_v9_0_set_powergating_state(void *handle,
/* update mgcg state */
gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
+
+ /* set gfx off through smu */
+ if (enable && adev->powerplay.pp_funcs->set_powergating_by_smu)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true);
+ break;
+ case CHIP_VEGA12:
+ /* set gfx off through smu */
+ if (enable && adev->powerplay.pp_funcs->set_powergating_by_smu)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true);
break;
default:
break;
@@ -3605,6 +3809,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
gfx_v9_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
@@ -3742,7 +3947,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
}
amdgpu_ring_write(ring, header);
-BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
#ifdef __BIG_ENDIAN
(2 << 0) |
@@ -3774,13 +3979,16 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
{
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+ bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
- amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
- EOP_TC_ACTION_EN |
- EOP_TC_WB_ACTION_EN |
- EOP_TC_MD_ACTION_EN |
+ amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
+ EOP_TC_NC_ACTION_EN) :
+ (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EOP_TC_WB_ACTION_EN |
+ EOP_TC_MD_ACTION_EN)) |
EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
EVENT_INDEX(5)));
amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
@@ -4137,6 +4345,20 @@ static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
}
+static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ if (amdgpu_sriov_vf(ring->adev))
+ gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ ref, mask, 0x20);
+ else
+ amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+ ref, mask);
+}
+
static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
@@ -4458,6 +4680,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_tmz = gfx_v9_0_ring_emit_tmz,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -4492,6 +4715,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.set_priority = gfx_v9_0_ring_set_priority_compute,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
@@ -4522,6 +4746,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
.emit_rreg = gfx_v9_0_ring_emit_rreg,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
};
static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -4577,6 +4802,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
break;
@@ -4686,6 +4912,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
cu_info->number = active_cu_number;
cu_info->ao_cu_mask = ao_cu_mask;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 5617cf62c566..75317f283c69 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -41,11 +41,11 @@ static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static int gmc_v6_0_wait_for_idle(void *handle);
-MODULE_FIRMWARE("radeon/tahiti_mc.bin");
-MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
-MODULE_FIRMWARE("radeon/verde_mc.bin");
-MODULE_FIRMWARE("radeon/oland_mc.bin");
-MODULE_FIRMWARE("radeon/si58_mc.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_mc.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_mc.bin");
+MODULE_FIRMWARE("amdgpu/verde_mc.bin");
+MODULE_FIRMWARE("amdgpu/oland_mc.bin");
+MODULE_FIRMWARE("amdgpu/si58_mc.bin");
#define MC_SEQ_MISC0__MT__MASK 0xf0000000
#define MC_SEQ_MISC0__MT__GDDR1 0x10000000
@@ -134,9 +134,9 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)
is_58_fw = true;
if (is_58_fw)
- snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/si58_mc.bin");
else
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
if (err)
goto out;
@@ -819,12 +819,33 @@ static int gmc_v6_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ amdgpu_bo_late_init(adev);
+
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
else
return 0;
}
+static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
+ unsigned size;
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+ } else {
+ u32 viewport = RREG32(mmVIEWPORT_SIZE);
+ size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+ 4);
+ }
+ /* return 0 if the pre-OS buffer uses up most of vram */
+ if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+ return 0;
+ return size;
+}
+
static int gmc_v6_0_sw_init(void *handle)
{
int r;
@@ -851,8 +872,6 @@ static int gmc_v6_0_sw_init(void *handle)
adev->gmc.mc_mask = 0xffffffffffULL;
- adev->gmc.stolen_size = 256 * 1024;
-
adev->need_dma32 = false;
dma_bits = adev->need_dma32 ? 32 : 40;
r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
@@ -878,6 +897,8 @@ static int gmc_v6_0_sw_init(void *handle)
if (r)
return r;
+ adev->gmc.stolen_size = gmc_v6_0_get_vbios_fb_size(adev);
+
r = amdgpu_bo_init(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 80054f36e487..10920f0bd85f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -43,12 +43,14 @@
#include "amdgpu_atombios.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+
static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
static int gmc_v7_0_wait_for_idle(void *handle);
-MODULE_FIRMWARE("radeon/bonaire_mc.bin");
-MODULE_FIRMWARE("radeon/hawaii_mc.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_mc.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_mc.bin");
MODULE_FIRMWARE("amdgpu/topaz_mc.bin");
static const u32 golden_settings_iceland_a11[] =
@@ -147,10 +149,7 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
default: BUG();
}
- if (adev->asic_type == CHIP_TOPAZ)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
- else
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
if (err)
@@ -958,12 +957,33 @@ static int gmc_v7_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ amdgpu_bo_late_init(adev);
+
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
else
return 0;
}
+static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
+ unsigned size;
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+ } else {
+ u32 viewport = RREG32(mmVIEWPORT_SIZE);
+ size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+ 4);
+ }
+ /* return 0 if the pre-OS buffer uses up most of vram */
+ if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+ return 0;
+ return size;
+}
+
static int gmc_v7_0_sw_init(void *handle)
{
int r;
@@ -978,11 +998,11 @@ static int gmc_v7_0_sw_init(void *handle)
adev->gmc.vram_type = gmc_v7_0_convert_vram_type(tmp);
}
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault);
if (r)
return r;
@@ -998,8 +1018,6 @@ static int gmc_v7_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
- adev->gmc.stolen_size = 256 * 1024;
-
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 40-bits.
* IGP - can handle 40-bits
@@ -1030,6 +1048,8 @@ static int gmc_v7_0_sw_init(void *handle)
if (r)
return r;
+ adev->gmc.stolen_size = gmc_v7_0_get_vbios_fb_size(adev);
+
/* Memory manager */
r = amdgpu_bo_init(adev);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index d71d4cb68f9c..75f3ffb2891e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -44,6 +44,7 @@
#include "amdgpu_atombios.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -138,6 +139,7 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
break;
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
amdgpu_device_program_register_sequence(adev,
golden_settings_polaris11_a11,
ARRAY_SIZE(golden_settings_polaris11_a11));
@@ -231,6 +233,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
case CHIP_FIJI:
case CHIP_CARRIZO:
case CHIP_STONEY:
+ case CHIP_VEGAM:
return 0;
default: BUG();
}
@@ -567,9 +570,10 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
/* set the gart size */
if (amdgpu_gart_size == -1) {
switch (adev->asic_type) {
- case CHIP_POLARIS11: /* all engines support GPUVM */
case CHIP_POLARIS10: /* all engines support GPUVM */
+ case CHIP_POLARIS11: /* all engines support GPUVM */
case CHIP_POLARIS12: /* all engines support GPUVM */
+ case CHIP_VEGAM: /* all engines support GPUVM */
default:
adev->gmc.gart_size = 256ULL << 20;
break;
@@ -1049,12 +1053,33 @@ static int gmc_v8_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ amdgpu_bo_late_init(adev);
+
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
else
return 0;
}
+static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
+ unsigned size;
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+ } else {
+ u32 viewport = RREG32(mmVIEWPORT_SIZE);
+ size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+ 4);
+ }
+ /* return 0 if the pre-OS buffer uses up most of vram */
+ if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+ return 0;
+ return size;
+}
+
#define mmMC_SEQ_MISC0_FIJI 0xA71
static int gmc_v8_0_sw_init(void *handle)
@@ -1068,7 +1093,8 @@ static int gmc_v8_0_sw_init(void *handle)
} else {
u32 tmp;
- if (adev->asic_type == CHIP_FIJI)
+ if ((adev->asic_type == CHIP_FIJI) ||
+ (adev->asic_type == CHIP_VEGAM))
tmp = RREG32(mmMC_SEQ_MISC0_FIJI);
else
tmp = RREG32(mmMC_SEQ_MISC0);
@@ -1076,11 +1102,11 @@ static int gmc_v8_0_sw_init(void *handle)
adev->gmc.vram_type = gmc_v8_0_convert_vram_type(tmp);
}
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault);
if (r)
return r;
@@ -1096,8 +1122,6 @@ static int gmc_v8_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
- adev->gmc.stolen_size = 256 * 1024;
-
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 40-bits.
* IGP - can handle 40-bits
@@ -1128,6 +1152,8 @@ static int gmc_v8_0_sw_init(void *handle)
if (r)
return r;
+ adev->gmc.stolen_size = gmc_v8_0_get_vbios_fb_size(adev);
+
/* Memory manager */
r = amdgpu_bo_init(adev);
if (r)
@@ -1422,8 +1448,13 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
gmc_v8_0_set_fault_enable_default(adev, false);
if (printk_ratelimit()) {
- dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
- entry->src_id, entry->src_data[0]);
+ struct amdgpu_task_info task_info = { 0 };
+
+ amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+
+ dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n",
+ entry->src_id, entry->src_data[0], task_info.process_name,
+ task_info.tgid, task_info.task_name, task_info.pid);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index e687363900bb..9df94b45d17d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -43,19 +43,15 @@
#include "gfxhub_v1_0.h"
#include "mmhub_v1_0.h"
-#define mmDF_CS_AON0_DramBaseAddress0 0x0044
-#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX 0
-//DF_CS_AON0_DramBaseAddress0
-#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0
-#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1
-#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x4
-#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x8
-#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc
-#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L
-#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L
-#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L
-#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L
-#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+
+/* add these here since we already include dce12 headers and these are for DCN */
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L
/* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/
#define AMDGPU_NUM_OF_VMIDS 8
@@ -263,11 +259,16 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
}
if (printk_ratelimit()) {
+ struct amdgpu_task_info task_info = { 0 };
+
+ amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+
dev_err(adev->dev,
- "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
+ "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d\n)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid);
+ entry->pasid, task_info.process_name, task_info.tgid,
+ task_info.task_name, task_info.pid);
dev_err(adev->dev, " at page 0x%016llx from %d\n",
addr, entry->client_id);
if (!amdgpu_sriov_vf(adev))
@@ -385,11 +386,9 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
upper_32_bits(pd_addr));
- amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
-
- /* wait for the invalidate to complete */
- amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
- 1 << vmid, 1 << vmid);
+ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
+ hub->vm_inv_eng0_ack + eng,
+ req, 1 << vmid);
return pd_addr;
}
@@ -556,8 +555,7 @@ static int gmc_v9_0_early_init(void *handle)
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end =
adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
- adev->gmc.private_aperture_start =
- adev->gmc.shared_aperture_end + 1;
+ adev->gmc.private_aperture_start = 0x1000000000000000ULL;
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
@@ -659,6 +657,11 @@ static int gmc_v9_0_late_init(void *handle)
unsigned i;
int r;
+ /*
+ * TODO - Uncomment once GART corruption issue is fixed.
+ */
+ /* amdgpu_bo_late_init(adev); */
+
for(i = 0; i < adev->num_rings; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
unsigned vmhub = ring->funcs->vmhub;
@@ -679,6 +682,7 @@ static int gmc_v9_0_late_init(void *handle)
DRM_INFO("ECC is active.\n");
} else if (r == 0) {
DRM_INFO("ECC is not present.\n");
+ adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false);
} else {
DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r);
return r;
@@ -697,10 +701,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
amdgpu_device_vram_location(adev, &adev->gmc, base);
amdgpu_device_gart_location(adev, mc);
/* base offset of vram pages */
- if (adev->flags & AMD_IS_APU)
- adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
- else
- adev->vm_manager.vram_base_offset = 0;
+ adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
}
/**
@@ -714,7 +715,6 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
*/
static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
{
- u32 tmp;
int chansize, numchan;
int r;
@@ -727,39 +727,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
else
chansize = 128;
- tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
- tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
- tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
- switch (tmp) {
- case 0:
- default:
- numchan = 1;
- break;
- case 1:
- numchan = 2;
- break;
- case 2:
- numchan = 0;
- break;
- case 3:
- numchan = 4;
- break;
- case 4:
- numchan = 0;
- break;
- case 5:
- numchan = 8;
- break;
- case 6:
- numchan = 0;
- break;
- case 7:
- numchan = 16;
- break;
- case 8:
- numchan = 2;
- break;
- }
+ numchan = adev->df_funcs->get_hbm_channel_number(adev);
adev->gmc.vram_width = numchan * chansize;
}
@@ -792,6 +760,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10: /* all engines support GPUVM */
case CHIP_VEGA12: /* all engines support GPUVM */
+ case CHIP_VEGA20:
default:
adev->gmc.gart_size = 512ULL << 20;
break;
@@ -826,6 +795,52 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
return amdgpu_gart_table_vram_alloc(adev);
}
+static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+#if 0
+ u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
+#endif
+ unsigned size;
+
+ /*
+ * TODO Remove once GART corruption is resolved
+ * Check related code in gmc_v9_0_sw_fini
+ * */
+ size = 9 * 1024 * 1024;
+
+#if 0
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+ } else {
+ u32 viewport;
+
+ switch (adev->asic_type) {
+ case CHIP_RAVEN:
+ viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
+ size = (REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
+ 4);
+ break;
+ case CHIP_VEGA10:
+ case CHIP_VEGA12:
+ default:
+ viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
+ size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+ 4);
+ break;
+ }
+ }
+ /* return 0 if the pre-OS buffer uses up most of vram */
+ if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+ return 0;
+
+#endif
+ return size;
+}
+
static int gmc_v9_0_sw_init(void *handle)
{
int r;
@@ -851,6 +866,7 @@ static int gmc_v9_0_sw_init(void *handle)
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
/*
* To fulfill 4-level page support,
* vm size is 256TB (48bit), maximum size of Vega10,
@@ -863,9 +879,9 @@ static int gmc_v9_0_sw_init(void *handle)
}
/* This interrupt is VMC page fault.*/
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, 0,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
&adev->gmc.vm_fault);
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, 0,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
&adev->gmc.vm_fault);
if (r)
@@ -877,12 +893,6 @@ static int gmc_v9_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
- /*
- * It needs to reserve 8M stolen memory for vega10
- * TODO: Figure out how to avoid that...
- */
- adev->gmc.stolen_size = 8 * 1024 * 1024;
-
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 44-bits.
* IGP - can handle 44-bits
@@ -907,6 +917,8 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
+ adev->gmc.stolen_size = gmc_v9_0_get_vbios_fb_size(adev);
+
/* Memory manager */
r = amdgpu_bo_init(adev);
if (r)
@@ -950,6 +962,18 @@ static int gmc_v9_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
gmc_v9_0_gart_fini(adev);
+
+ /*
+ * TODO:
+ * Currently there is a bug where some memory client outside
+ * of the driver writes to first 8M of VRAM on S3 resume,
+ * this overrides GART which by default gets placed in first 8M and
+ * causes VM_FAULTS once GTT is accessed.
+ * Keep the stolen memory reservation until the while this is not solved.
+ * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init
+ */
+ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
+
amdgpu_bo_fini(adev);
return 0;
@@ -960,6 +984,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
+ case CHIP_VEGA20:
soc15_program_register_sequence(adev,
golden_settings_mmhub_1_0_0,
ARRAY_SIZE(golden_settings_mmhub_1_0_0));
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index 26ba984ab2b7..3f57f6463dc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -1921,7 +1921,7 @@ static int kv_dpm_set_power_state(void *handle)
int ret;
if (pi->bapm_enable) {
- ret = amdgpu_kv_smc_bapm_enable(adev, adev->pm.dpm.ac_power);
+ ret = amdgpu_kv_smc_bapm_enable(adev, adev->pm.ac_power);
if (ret) {
DRM_ERROR("amdgpu_kv_smc_bapm_enable failed\n");
return ret;
@@ -2727,8 +2727,9 @@ static int kv_parse_power_table(struct amdgpu_device *adev)
(mode_info->atom_context->bios + data_offset +
le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset));
- adev->pm.dpm.ps = kzalloc(sizeof(struct amdgpu_ps) *
- state_array->ucNumEntries, GFP_KERNEL);
+ adev->pm.dpm.ps = kcalloc(state_array->ucNumEntries,
+ sizeof(struct amdgpu_ps),
+ GFP_KERNEL);
if (!adev->pm.dpm.ps)
return -ENOMEM;
power_state_offset = (u8 *)state_array->states;
@@ -2817,7 +2818,7 @@ static int kv_dpm_init(struct amdgpu_device *adev)
pi->caps_tcp_ramping = true;
}
- if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK)
+ if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
pi->caps_sclk_ds = true;
else
pi->caps_sclk_ds = false;
@@ -2974,7 +2975,7 @@ static int kv_dpm_late_init(void *handle)
/* powerdown unused blocks for now */
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!amdgpu_dpm)
+ if (!adev->pm.dpm_enabled)
return 0;
kv_dpm_powergate_acp(adev, true);
@@ -3305,6 +3306,19 @@ static int kv_dpm_read_sensor(void *handle, int idx,
}
}
+static int kv_set_powergating_by_smu(void *handle,
+ uint32_t block_type, bool gate)
+{
+ switch (block_type) {
+ case AMD_IP_BLOCK_TYPE_UVD:
+ kv_dpm_powergate_uvd(handle, gate);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
static const struct amd_ip_funcs kv_dpm_ip_funcs = {
.name = "kv_dpm",
.early_init = kv_dpm_early_init,
@@ -3341,7 +3355,7 @@ static const struct amd_pm_funcs kv_dpm_funcs = {
.print_power_state = &kv_dpm_print_power_state,
.debugfs_print_current_performance_level = &kv_dpm_debugfs_print_current_performance_level,
.force_performance_level = &kv_dpm_force_performance_level,
- .powergate_uvd = &kv_dpm_powergate_uvd,
+ .set_powergating_by_smu = kv_set_powergating_by_smu,
.enable_bapm = &kv_dpm_enable_bapm,
.get_vce_clock_state = amdgpu_get_vce_clock_state,
.check_state_equal = kv_check_state_equal,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 43f925773b57..e70a0d4d6db4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -471,8 +471,8 @@ void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
RENG_EXECUTE_ON_REG_UPDATE, 1);
WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
- if (adev->powerplay.pp_funcs->set_mmhub_powergating_by_smu)
- amdgpu_dpm_set_mmhub_powergating_by_smu(adev);
+ if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true);
} else {
pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
@@ -734,6 +734,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
mmhub_v1_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 493348672475..078f70faedcb 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -260,8 +260,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
} while (timeout > 1);
flr_done:
- if (locked)
+ if (locked) {
+ adev->in_gpu_reset = 0;
mutex_unlock(&adev->lock_reset);
+ }
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_lockup_timeout == 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index df34dc79d444..365517c0121e 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -34,10 +34,19 @@
#define smnCPM_CONTROL 0x11180460
#define smnPCIE_CNTL2 0x11180070
+/* vega20 */
+#define mmRCC_DEV0_EPF0_STRAP0_VG20 0x0011
+#define mmRCC_DEV0_EPF0_STRAP0_VG20_BASE_IDX 2
+
static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
+ if (adev->asic_type == CHIP_VEGA20)
+ tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0_VG20);
+ else
+ tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
+
tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
@@ -75,10 +84,14 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
u32 doorbell_range = RREG32(reg);
+ u32 range = 2;
+
+ if (adev->asic_type == CHIP_VEGA20)
+ range = 8;
if (use_doorbell) {
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
- doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2);
+ doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, range);
} else
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
@@ -133,6 +146,9 @@ static void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t def, data;
+ if (adev->asic_type == CHIP_VEGA20)
+ return;
+
/* NBIF_MGCG_CTRL_LCLK */
def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 8da6da90b1c9..0cf48d26c676 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -40,11 +40,20 @@ enum psp_gfx_crtl_cmd_id
GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000, /* initialize GPCOM ring */
GFX_CTRL_CMD_ID_DESTROY_RINGS = 0x00030000, /* destroy rings */
GFX_CTRL_CMD_ID_CAN_INIT_RINGS = 0x00040000, /* is it allowed to initialized the rings */
+ GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */
+ GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */
+ GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */
GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */
};
+/*-----------------------------------------------------------------------------
+ NOTE: All physical addresses used in this interface are actually
+ GPU Virtual Addresses.
+*/
+
+
/* Control registers of the TEE Gfx interface. These are located in
* SRBM-to-PSP mailbox registers (total 8 registers).
*/
@@ -55,8 +64,8 @@ struct psp_gfx_ctrl
volatile uint32_t rbi_rptr; /* +8 Read pointer (index) of RBI ring */
volatile uint32_t gpcom_wptr; /* +12 Write pointer (index) of GPCOM ring */
volatile uint32_t gpcom_rptr; /* +16 Read pointer (index) of GPCOM ring */
- volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of physical address of ring buffer */
- volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of physical address of ring buffer */
+ volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of GPU Virtual of ring buffer (VMID=0)*/
+ volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of GPU Virtual of ring buffer (VMID=0) */
volatile uint32_t ring_buf_size; /* +28 Ring buffer size (in bytes) */
};
@@ -78,6 +87,8 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_LOAD_ASD = 0x00000004, /* load ASD Driver */
GFX_CMD_ID_SETUP_TMR = 0x00000005, /* setup TMR region */
GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */
+ GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */
+ GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */
};
@@ -85,11 +96,11 @@ enum psp_gfx_cmd_id
/* Command to load Trusted Application binary into PSP OS. */
struct psp_gfx_cmd_load_ta
{
- uint32_t app_phy_addr_lo; /* bits [31:0] of the physical address of the TA binary (must be 4 KB aligned) */
- uint32_t app_phy_addr_hi; /* bits [63:32] of the physical address of the TA binary */
+ uint32_t app_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of the TA binary (must be 4 KB aligned) */
+ uint32_t app_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of the TA binary */
uint32_t app_len; /* length of the TA binary in bytes */
- uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the physical address of CMD buffer (must be 4 KB aligned) */
- uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the physical address of CMD buffer */
+ uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of CMD buffer (must be 4 KB aligned) */
+ uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of CMD buffer */
uint32_t cmd_buf_len; /* length of the CMD buffer in bytes; must be multiple of 4 KB */
/* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided
@@ -111,8 +122,8 @@ struct psp_gfx_cmd_unload_ta
*/
struct psp_gfx_buf_desc
{
- uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of the buffer (must be 4 KB aligned) */
- uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of the buffer */
+ uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of the buffer (must be 4 KB aligned) */
+ uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of the buffer */
uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */
};
@@ -145,8 +156,8 @@ struct psp_gfx_cmd_invoke_cmd
/* Command to setup TMR region. */
struct psp_gfx_cmd_setup_tmr
{
- uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of TMR buffer (must be 4 KB aligned) */
- uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of TMR buffer */
+ uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of TMR buffer (must be 4 KB aligned) */
+ uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of TMR buffer */
uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB) */
};
@@ -174,18 +185,32 @@ enum psp_gfx_fw_type
GFX_FW_TYPE_ISP = 16,
GFX_FW_TYPE_ACP = 17,
GFX_FW_TYPE_SMU = 18,
+ GFX_FW_TYPE_MMSCH = 19,
+ GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20,
+ GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21,
+ GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL = 22,
+ GFX_FW_TYPE_MAX = 23
};
/* Command to load HW IP FW. */
struct psp_gfx_cmd_load_ip_fw
{
- uint32_t fw_phy_addr_lo; /* bits [31:0] of physical address of FW location (must be 4 KB aligned) */
- uint32_t fw_phy_addr_hi; /* bits [63:32] of physical address of FW location */
+ uint32_t fw_phy_addr_lo; /* bits [31:0] of GPU Virtual address of FW location (must be 4 KB aligned) */
+ uint32_t fw_phy_addr_hi; /* bits [63:32] of GPU Virtual address of FW location */
uint32_t fw_size; /* FW buffer size in bytes */
enum psp_gfx_fw_type fw_type; /* FW type */
};
+/* Command to save/restore HW IP FW. */
+struct psp_gfx_cmd_save_restore_ip_fw
+{
+ uint32_t save_fw; /* if set, command is used for saving fw otherwise for resetoring*/
+ uint32_t save_restore_addr_lo; /* bits [31:0] of FB address of GART memory used as save/restore buffer (must be 4 KB aligned) */
+ uint32_t save_restore_addr_hi; /* bits [63:32] of FB address of GART memory used as save/restore buffer */
+ uint32_t buf_size; /* Size of the save/restore buffer in bytes */
+ enum psp_gfx_fw_type fw_type; /* FW type */
+};
/* All GFX ring buffer commands. */
union psp_gfx_commands
@@ -195,7 +220,7 @@ union psp_gfx_commands
struct psp_gfx_cmd_invoke_cmd cmd_invoke_cmd;
struct psp_gfx_cmd_setup_tmr cmd_setup_tmr;
struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw;
-
+ struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
};
@@ -226,8 +251,8 @@ struct psp_gfx_cmd_resp
/* These fields are used for RBI only. They are all 0 in GPCOM commands
*/
- uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of physical address of response buffer (must be 4 KB aligned) */
- uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of physical address of response buffer */
+ uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of GPU Virtual address of response buffer (must be 4 KB aligned) */
+ uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of GPU Virtual address of response buffer */
uint32_t resp_offset; /* +20 offset within response buffer */
uint32_t resp_buf_size; /* +24 total size of the response buffer in bytes */
@@ -251,19 +276,19 @@ struct psp_gfx_cmd_resp
/* Structure of the Ring Buffer Frame */
struct psp_gfx_rb_frame
{
- uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of physical address of command buffer (must be 4 KB aligned) */
- uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of physical address of command buffer */
+ uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of GPU Virtual address of command buffer (must be 4 KB aligned) */
+ uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of GPU Virtual address of command buffer */
uint32_t cmd_buf_size; /* +8 command buffer size in bytes */
- uint32_t fence_addr_lo; /* +12 bits [31:0] of physical address of Fence for this frame */
- uint32_t fence_addr_hi; /* +16 bits [63:32] of physical address of Fence for this frame */
+ uint32_t fence_addr_lo; /* +12 bits [31:0] of GPU Virtual address of Fence for this frame */
+ uint32_t fence_addr_hi; /* +16 bits [63:32] of GPU Virtual address of Fence for this frame */
uint32_t fence_value; /* +20 Fence value */
uint32_t sid_lo; /* +24 bits [31:0] of SID value (used only for RBI frames) */
uint32_t sid_hi; /* +28 bits [63:32] of SID value (used only for RBI frames) */
uint8_t vmid; /* +32 VMID value used for mapping of all addresses for this frame */
uint8_t frame_type; /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */
uint8_t reserved1[2]; /* +34 reserved, must be 0 */
- uint32_t reserved2[7]; /* +40 reserved, must be 0 */
- /* total 64 bytes */
+ uint32_t reserved2[7]; /* +36 reserved, must be 0 */
+ /* total 64 bytes */
};
#endif /* _PSP_TEE_GFX_IF_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 8873d833a7f7..0ff136d02d9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -70,6 +70,15 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *
case AMDGPU_UCODE_ID_RLC_G:
*type = GFX_FW_TYPE_RLC_G;
break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+ *type = GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+ *type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+ *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM;
+ break;
case AMDGPU_UCODE_ID_SMC:
*type = GFX_FW_TYPE_SMU;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 196e75def1f2..727071fee6f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -41,9 +41,14 @@ MODULE_FIRMWARE("amdgpu/vega10_sos.bin");
MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
+MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
+
#define smnMP1_FIRMWARE_FLAGS 0x3010028
+static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554};
+
static int
psp_v3_1_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type)
{
@@ -207,12 +212,31 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
return ret;
}
+static bool psp_v3_1_match_version(struct amdgpu_device *adev, uint32_t ver)
+{
+ int i;
+
+ if (ver == adev->psp.sos_fw_version)
+ return true;
+
+ /*
+ * Double check if the latest four legacy versions.
+ * If yes, it is still the right version.
+ */
+ for (i = 0; i < sizeof(sos_old_versions) / sizeof(uint32_t); i++) {
+ if (sos_old_versions[i] == adev->psp.sos_fw_version)
+ return true;
+ }
+
+ return false;
+}
+
static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
{
int ret;
unsigned int psp_gfxdrv_command_reg = 0;
struct amdgpu_device *adev = psp->adev;
- uint32_t sol_reg;
+ uint32_t sol_reg, ver;
/* Check sOS sign of life register to confirm sys driver and sOS
* are already been loaded.
@@ -245,6 +269,10 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
0, true);
+ ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
+ if (!psp_v3_1_match_version(adev, ver))
+ DRM_WARN("SOS version doesn't match\n");
+
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index c7190c39c4f5..15ae4bc9c072 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -44,6 +44,8 @@
#include "iceland_sdma_pkt_open.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+
static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev);
@@ -202,8 +204,7 @@ static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring)
static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
- u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2;
+ u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2;
return wptr;
}
@@ -218,9 +219,8 @@ static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring)
static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2);
}
static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -273,7 +273,7 @@ static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
u32 ref_and_mask = 0;
- if (ring == &ring->adev->sdma.instance[0].ring)
+ if (ring->me == 0)
ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1);
else
ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1);
@@ -898,7 +898,7 @@ static int sdma_v2_4_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SDMA trap event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
return r;
@@ -910,7 +910,7 @@ static int sdma_v2_4_sw_init(void *handle)
return r;
/* SDMA Privileged inst */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE,
&adev->sdma.illegal_inst_irq);
if (r)
return r;
@@ -1213,8 +1213,10 @@ static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->sdma.num_instances; i++)
+ for (i = 0; i < adev->sdma.num_instances; i++) {
adev->sdma.instance[i].ring.funcs = &sdma_v2_4_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
}
static const struct amdgpu_irq_src_funcs sdma_v2_4_trap_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index be20a387d961..1e07ff274d73 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -44,6 +44,8 @@
#include "tonga_sdma_pkt_open.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+
static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev);
@@ -62,6 +64,8 @@ MODULE_FIRMWARE("amdgpu/polaris11_sdma.bin");
MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin");
MODULE_FIRMWARE("amdgpu/polaris12_sdma.bin");
MODULE_FIRMWARE("amdgpu/polaris12_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vegam_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vegam_sdma1.bin");
static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
@@ -209,6 +213,7 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
break;
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
amdgpu_device_program_register_sequence(adev,
golden_settings_polaris11_a11,
ARRAY_SIZE(golden_settings_polaris11_a11));
@@ -275,15 +280,18 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
case CHIP_FIJI:
chip_name = "fiji";
break;
- case CHIP_POLARIS11:
- chip_name = "polaris11";
- break;
case CHIP_POLARIS10:
chip_name = "polaris10";
break;
+ case CHIP_POLARIS11:
+ chip_name = "polaris11";
+ break;
case CHIP_POLARIS12:
chip_name = "polaris12";
break;
+ case CHIP_VEGAM:
+ chip_name = "vegam";
+ break;
case CHIP_CARRIZO:
chip_name = "carrizo";
break;
@@ -359,9 +367,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2;
} else {
- int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
-
- wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2;
+ wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2;
}
return wptr;
@@ -388,9 +394,7 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
} else {
- int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
-
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2);
}
}
@@ -444,7 +448,7 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
u32 ref_and_mask = 0;
- if (ring == &ring->adev->sdma.instance[0].ring)
+ if (ring->me == 0)
ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1);
else
ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1);
@@ -1173,7 +1177,7 @@ static int sdma_v3_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SDMA trap event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
return r;
@@ -1185,7 +1189,7 @@ static int sdma_v3_0_sw_init(void *handle)
return r;
/* SDMA Privileged inst */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE,
&adev->sdma.illegal_inst_irq);
if (r)
return r;
@@ -1649,8 +1653,10 @@ static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->sdma.num_instances; i++)
+ for (i = 0; i < adev->sdma.num_instances; i++) {
adev->sdma.instance[i].ring.funcs = &sdma_v3_0_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
}
static const struct amdgpu_irq_src_funcs sdma_v3_0_trap_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 399f876f9cad..e7ca4623cfb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -38,10 +38,15 @@
#include "soc15.h"
#include "vega10_sdma_pkt_open.h"
+#include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h"
+#include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
+
MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
@@ -107,6 +112,28 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] =
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0)
};
+static const struct soc15_reg_golden golden_settings_sdma_4_2[] =
+{
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0)
+};
+
static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
{
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
@@ -139,6 +166,11 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_sdma_vg12,
ARRAY_SIZE(golden_settings_sdma_vg12));
break;
+ case CHIP_VEGA20:
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_4_2,
+ ARRAY_SIZE(golden_settings_sdma_4_2));
+ break;
case CHIP_RAVEN:
soc15_program_register_sequence(adev,
golden_settings_sdma_4_1,
@@ -182,6 +214,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
case CHIP_VEGA12:
chip_name = "vega12";
break;
+ case CHIP_VEGA20:
+ chip_name = "vega20";
+ break;
case CHIP_RAVEN:
chip_name = "raven";
break;
@@ -264,13 +299,12 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
u32 lowbit, highbit;
- int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2;
- highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
+ lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2;
+ highbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
- me, highbit, lowbit);
+ ring->me, highbit, lowbit);
wptr = highbit;
wptr = wptr << 32;
wptr |= lowbit;
@@ -307,17 +341,15 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
} else {
- int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
-
DRM_DEBUG("Not using doorbell -- "
"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
"mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
- me,
+ ring->me,
lower_32_bits(ring->wptr << 2),
- me,
+ ring->me,
upper_32_bits(ring->wptr << 2));
- WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
- WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
}
}
@@ -360,6 +392,31 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
}
+static void sdma_v4_0_wait_reg_mem(struct amdgpu_ring *ring,
+ int mem_space, int hdp,
+ uint32_t addr0, uint32_t addr1,
+ uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) |
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ if (mem_space) {
+ /* memory */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ } else {
+ /* registers */
+ amdgpu_ring_write(ring, addr0 << 2);
+ amdgpu_ring_write(ring, addr1 << 2);
+ }
+ amdgpu_ring_write(ring, ref); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */
+}
+
/**
* sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
*
@@ -373,20 +430,15 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
u32 ref_and_mask = 0;
const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
- if (ring == &ring->adev->sdma.instance[0].ring)
+ if (ring->me == 0)
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;
else
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
- SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
- SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
- amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2);
- amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2);
- amdgpu_ring_write(ring, ref_and_mask); /* reference */
- amdgpu_ring_write(ring, ref_and_mask); /* mask */
- amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
- SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+ sdma_v4_0_wait_reg_mem(ring, 0, 1,
+ adev->nbio_funcs->get_hdp_flush_done_offset(adev),
+ adev->nbio_funcs->get_hdp_flush_req_offset(adev),
+ ref_and_mask, ref_and_mask, 10);
}
/**
@@ -1114,16 +1166,10 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
uint64_t addr = ring->fence_drv.gpu_addr;
/* wait for idle */
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
- SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
- SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
- SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
- amdgpu_ring_write(ring, addr & 0xfffffffc);
- amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
- amdgpu_ring_write(ring, seq); /* reference */
- amdgpu_ring_write(ring, 0xffffffff); /* mask */
- amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
- SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+ sdma_v4_0_wait_reg_mem(ring, 1, 0,
+ addr & 0xfffffffc,
+ upper_32_bits(addr) & 0xffffffff,
+ seq, 0xffffffff, 4);
}
@@ -1154,15 +1200,7 @@ static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring,
static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val, uint32_t mask)
{
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
- SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
- SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
- amdgpu_ring_write(ring, reg << 2);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, val); /* reference */
- amdgpu_ring_write(ring, mask); /* mask */
- amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
- SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+ sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
}
static int sdma_v4_0_early_init(void *handle)
@@ -1190,13 +1228,13 @@ static int sdma_v4_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SDMA trap event */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, 224,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
return r;
/* SDMA trap event */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, 224,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
return r;
@@ -1510,6 +1548,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
sdma_v4_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
@@ -1605,14 +1644,17 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
.pad_ib = sdma_v4_0_ring_pad_ib,
.emit_wreg = sdma_v4_0_ring_emit_wreg,
.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->sdma.num_instances; i++)
+ for (i = 0; i < adev->sdma.num_instances; i++) {
adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
}
static const struct amdgpu_irq_src_funcs sdma_v4_0_trap_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index a675ec6d2811..c364ef94cc36 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1252,6 +1252,12 @@ static void si_invalidate_hdp(struct amdgpu_device *adev,
}
}
+static bool si_need_full_reset(struct amdgpu_device *adev)
+{
+ /* change this when we support soft reset */
+ return true;
+}
+
static int si_get_pcie_lanes(struct amdgpu_device *adev)
{
u32 link_width_cntl;
@@ -1332,6 +1338,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
.get_config_memsize = &si_get_config_memsize,
.flush_hdp = &si_flush_hdp,
.invalidate_hdp = &si_invalidate_hdp,
+ .need_full_reset = &si_need_full_reset,
};
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index 797d505bf9ee..db327b412562 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -56,16 +56,16 @@
#define BIOS_SCRATCH_4 0x5cd
-MODULE_FIRMWARE("radeon/tahiti_smc.bin");
-MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
-MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
-MODULE_FIRMWARE("radeon/verde_smc.bin");
-MODULE_FIRMWARE("radeon/verde_k_smc.bin");
-MODULE_FIRMWARE("radeon/oland_smc.bin");
-MODULE_FIRMWARE("radeon/oland_k_smc.bin");
-MODULE_FIRMWARE("radeon/hainan_smc.bin");
-MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
-MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_smc.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_smc.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_k_smc.bin");
+MODULE_FIRMWARE("amdgpu/verde_smc.bin");
+MODULE_FIRMWARE("amdgpu/verde_k_smc.bin");
+MODULE_FIRMWARE("amdgpu/oland_smc.bin");
+MODULE_FIRMWARE("amdgpu/oland_k_smc.bin");
+MODULE_FIRMWARE("amdgpu/hainan_smc.bin");
+MODULE_FIRMWARE("amdgpu/hainan_k_smc.bin");
+MODULE_FIRMWARE("amdgpu/banks_k_2_smc.bin");
static const struct amd_pm_funcs si_dpm_funcs;
@@ -3480,7 +3480,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
disable_sclk_switching = true;
}
- if (adev->pm.dpm.ac_power)
+ if (adev->pm.ac_power)
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
else
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
@@ -3489,7 +3489,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
if (ps->performance_levels[i].vddc > ps->performance_levels[i+1].vddc)
ps->performance_levels[i].vddc = ps->performance_levels[i+1].vddc;
}
- if (adev->pm.dpm.ac_power == false) {
+ if (adev->pm.ac_power == false) {
for (i = 0; i < ps->performance_level_count; i++) {
if (ps->performance_levels[i].mclk > max_limits->mclk)
ps->performance_levels[i].mclk = max_limits->mclk;
@@ -7242,8 +7242,9 @@ static int si_parse_power_table(struct amdgpu_device *adev)
(mode_info->atom_context->bios + data_offset +
le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset));
- adev->pm.dpm.ps = kzalloc(sizeof(struct amdgpu_ps) *
- state_array->ucNumEntries, GFP_KERNEL);
+ adev->pm.dpm.ps = kcalloc(state_array->ucNumEntries,
+ sizeof(struct amdgpu_ps),
+ GFP_KERNEL);
if (!adev->pm.dpm.ps)
return -ENOMEM;
power_state_offset = (u8 *)state_array->states;
@@ -7317,8 +7318,7 @@ static int si_dpm_init(struct amdgpu_device *adev)
pi = &eg_pi->rv7xx;
si_pi->sys_pcie_mask =
- (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >>
- CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT;
+ adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK;
si_pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID;
si_pi->boot_pcie_gen = si_get_current_pcie_speed(adev);
@@ -7346,7 +7346,9 @@ static int si_dpm_init(struct amdgpu_device *adev)
return ret;
adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries =
- kzalloc(4 * sizeof(struct amdgpu_clock_voltage_dependency_entry), GFP_KERNEL);
+ kcalloc(4,
+ sizeof(struct amdgpu_clock_voltage_dependency_entry),
+ GFP_KERNEL);
if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) {
amdgpu_free_extended_power_table(adev);
return -ENOMEM;
@@ -7580,7 +7582,7 @@ static int si_dpm_late_init(void *handle)
int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!amdgpu_dpm)
+ if (!adev->pm.dpm_enabled)
return 0;
ret = si_set_temperature_range(adev);
@@ -7664,7 +7666,7 @@ static int si_dpm_init_microcode(struct amdgpu_device *adev)
default: BUG();
}
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name);
err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 51cf8a30f6c2..83f2717fcf81 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -41,8 +41,6 @@
#include "sdma1/sdma1_4_0_offset.h"
#include "hdp/hdp_4_0_offset.h"
#include "hdp/hdp_4_0_sh_mask.h"
-#include "mp/mp_9_0_offset.h"
-#include "mp/mp_9_0_sh_mask.h"
#include "smuio/smuio_9_0_offset.h"
#include "smuio/smuio_9_0_sh_mask.h"
@@ -52,6 +50,8 @@
#include "gmc_v9_0.h"
#include "gfxhub_v1_0.h"
#include "mmhub_v1_0.h"
+#include "df_v1_7.h"
+#include "df_v3_6.h"
#include "vega10_ih.h"
#include "sdma_v4_0.h"
#include "uvd_v7_0.h"
@@ -60,33 +60,6 @@
#include "dce_virtual.h"
#include "mxgpu_ai.h"
-#define mmFabricConfigAccessControl 0x0410
-#define mmFabricConfigAccessControl_BASE_IDX 0
-#define mmFabricConfigAccessControl_DEFAULT 0x00000000
-//FabricConfigAccessControl
-#define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT 0x0
-#define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT 0x1
-#define FabricConfigAccessControl__CfgRegInstID__SHIFT 0x10
-#define FabricConfigAccessControl__CfgRegInstAccEn_MASK 0x00000001L
-#define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK 0x00000002L
-#define FabricConfigAccessControl__CfgRegInstID_MASK 0x00FF0000L
-
-
-#define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc
-#define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0
-//DF_PIE_AON0_DfGlobalClkGater
-#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0
-#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL
-
-enum {
- DF_MGCG_DISABLE = 0,
- DF_MGCG_ENABLE_00_CYCLE_DELAY =1,
- DF_MGCG_ENABLE_01_CYCLE_DELAY =2,
- DF_MGCG_ENABLE_15_CYCLE_DELAY =13,
- DF_MGCG_ENABLE_31_CYCLE_DELAY =14,
- DF_MGCG_ENABLE_63_CYCLE_DELAY =15
-};
-
#define mmMP0_MISC_CGTT_CTRL0 0x01b9
#define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0
#define mmMP0_MISC_LIGHT_SLEEP_CTRL 0x01ba
@@ -313,6 +286,7 @@ static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
{ SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},
+ { SOC15_REG_ENTRY(GC, 0, mmDB_DEBUG2)},
};
static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
@@ -341,6 +315,8 @@ static uint32_t soc15_get_register_value(struct amdgpu_device *adev,
} else {
if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG))
return adev->gfx.config.gb_addr_config;
+ else if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2))
+ return adev->gfx.config.db_debug2;
return RREG32(reg_offset);
}
}
@@ -512,15 +488,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
case CHIP_RAVEN:
vega10_reg_base_init(adev);
break;
+ case CHIP_VEGA20:
+ vega20_reg_base_init(adev);
+ break;
default:
return -EINVAL;
}
if (adev->flags & AMD_IS_APU)
adev->nbio_funcs = &nbio_v7_0_funcs;
+ else if (adev->asic_type == CHIP_VEGA20)
+ adev->nbio_funcs = &nbio_v7_0_funcs;
else
adev->nbio_funcs = &nbio_v6_1_funcs;
+ if (adev->asic_type == CHIP_VEGA20)
+ adev->df_funcs = &df_v3_6_funcs;
+ else
+ adev->df_funcs = &df_v1_7_funcs;
adev->nbio_funcs->detect_hw_virt(adev);
if (amdgpu_sriov_vf(adev))
@@ -529,12 +514,15 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
- if (!amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ if (adev->asic_type != CHIP_VEGA20) {
+ amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ }
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
@@ -593,6 +581,12 @@ static void soc15_invalidate_hdp(struct amdgpu_device *adev,
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
}
+static bool soc15_need_full_reset(struct amdgpu_device *adev)
+{
+ /* change this when we implement soft reset */
+ return true;
+}
+
static const struct amdgpu_asic_funcs soc15_asic_funcs =
{
.read_disabled_bios = &soc15_read_disabled_bios,
@@ -606,6 +600,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
.get_config_memsize = &soc15_get_config_memsize,
.flush_hdp = &soc15_flush_hdp,
.invalidate_hdp = &soc15_invalidate_hdp,
+ .need_full_reset = &soc15_need_full_reset,
};
static int soc15_common_early_init(void *handle)
@@ -675,6 +670,28 @@ static int soc15_common_early_init(void *handle)
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x14;
break;
+ case CHIP_VEGA20:
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x28;
+ break;
case CHIP_RAVEN:
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
@@ -694,8 +711,15 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_MC_MGCG |
AMD_CG_SUPPORT_MC_LS |
AMD_CG_SUPPORT_SDMA_MGCG |
- AMD_CG_SUPPORT_SDMA_LS;
- adev->pg_flags = AMD_PG_SUPPORT_SDMA;
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_VCN_MGCG;
+
+ adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
+
+ if (adev->powerplay.pp_feature & PP_GFXOFF_MASK)
+ adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_RLC_SMU_HS;
adev->external_rev_id = 0x1;
break;
@@ -871,32 +895,6 @@ static void soc15_update_rom_medium_grain_clock_gating(struct amdgpu_device *ade
WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0), data);
}
-static void soc15_update_df_medium_grain_clock_gating(struct amdgpu_device *adev,
- bool enable)
-{
- uint32_t data;
-
- /* Put DF on broadcast mode */
- data = RREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl));
- data &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
- WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl), data);
-
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
- data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater));
- data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
- data |= DF_MGCG_ENABLE_15_CYCLE_DELAY;
- WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data);
- } else {
- data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater));
- data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
- data |= DF_MGCG_DISABLE;
- WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data);
- }
-
- WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl),
- mmFabricConfigAccessControl_DEFAULT);
-}
-
static int soc15_common_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
@@ -908,6 +906,7 @@ static int soc15_common_set_clockgating_state(void *handle,
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
adev->nbio_funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
adev->nbio_funcs->update_medium_grain_light_sleep(adev,
@@ -920,7 +919,7 @@ static int soc15_common_set_clockgating_state(void *handle,
state == AMD_CG_STATE_GATE ? true : false);
soc15_update_rom_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
- soc15_update_df_medium_grain_clock_gating(adev,
+ adev->df_funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
break;
case CHIP_RAVEN:
@@ -973,10 +972,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
*flags |= AMD_CG_SUPPORT_ROM_MGCG;
- /* AMD_CG_SUPPORT_DF_MGCG */
- data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater));
- if (data & DF_MGCG_ENABLE_15_CYCLE_DELAY)
- *flags |= AMD_CG_SUPPORT_DF_MGCG;
+ adev->df_funcs->get_clockgating_state(adev, flags);
}
static int soc15_common_set_powergating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index f70da8a29f86..1f714b7af520 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -55,5 +55,6 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
const u32 array_size);
int vega10_reg_base_init(struct amdgpu_device *adev);
+int vega20_reg_base_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index def865067edd..0942f492d2e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -47,6 +47,21 @@
#define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \
WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value)
+#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask, ret) \
+ do { \
+ uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
+ uint32_t loop = adev->usec_timeout; \
+ while ((tmp_ & (mask)) != (expected_value)) { \
+ udelay(2); \
+ tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
+ loop--; \
+ if (!loop) { \
+ ret = -ETIMEDOUT; \
+ break; \
+ } \
+ } \
+ } while (0)
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 7f408f85fdb6..edfe50821cd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -53,6 +53,29 @@
#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
+#define PACKETJ_CONDITION_CHECK0 0
+#define PACKETJ_CONDITION_CHECK1 1
+#define PACKETJ_CONDITION_CHECK2 2
+#define PACKETJ_CONDITION_CHECK3 3
+#define PACKETJ_CONDITION_CHECK4 4
+#define PACKETJ_CONDITION_CHECK5 5
+#define PACKETJ_CONDITION_CHECK6 6
+#define PACKETJ_CONDITION_CHECK7 7
+
+#define PACKETJ_TYPE0 0
+#define PACKETJ_TYPE1 1
+#define PACKETJ_TYPE2 2
+#define PACKETJ_TYPE3 3
+#define PACKETJ_TYPE4 4
+#define PACKETJ_TYPE5 5
+#define PACKETJ_TYPE6 6
+#define PACKETJ_TYPE7 7
+
+#define PACKETJ(reg, r, cond, type) ((reg & 0x3FFFF) | \
+ ((r & 0x3F) << 18) | \
+ ((cond & 0xF) << 24) | \
+ ((type & 0xF) << 28))
+
/* Packet 3 types */
#define PACKET3_NOP 0x10
#define PACKET3_SET_BASE 0x11
@@ -159,6 +182,7 @@
#define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */
#define EOP_TCL1_ACTION_EN (1 << 16)
#define EOP_TC_ACTION_EN (1 << 17) /* L2 */
+#define EOP_TC_NC_ACTION_EN (1 << 19)
#define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */
#define DATA_SEL(x) ((x) << 29)
@@ -268,6 +292,11 @@
* x=1: tmz_end
*/
+#define PACKET3_INVALIDATE_TLBS 0x98
+# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
+# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
+# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
+# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
#define PACKET3_SET_RESOURCES 0xA0
/* 1. header
* 2. CONTROL
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 948bb9437757..6fed3d7797a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -93,6 +93,7 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring)
static int uvd_v4_2_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->uvd.num_uvd_inst = 1;
uvd_v4_2_set_ring_funcs(adev);
uvd_v4_2_set_irq_funcs(adev);
@@ -107,7 +108,7 @@ static int uvd_v4_2_sw_init(void *handle)
int r;
/* UVD TRAP */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
if (r)
return r;
@@ -119,9 +120,9 @@ static int uvd_v4_2_sw_init(void *handle)
if (r)
return r;
- ring = &adev->uvd.ring;
+ ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
return r;
}
@@ -150,7 +151,7 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev,
static int uvd_v4_2_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
@@ -208,7 +209,7 @@ done:
static int uvd_v4_2_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
if (RREG32(mmUVD_STATUS) != 0)
uvd_v4_2_stop(adev);
@@ -251,7 +252,7 @@ static int uvd_v4_2_resume(void *handle)
*/
static int uvd_v4_2_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t rb_bufsz;
int i, j, r;
u32 tmp;
@@ -523,6 +524,18 @@ static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, ib->length_dw);
}
+static void uvd_v4_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
/**
* uvd_v4_2_mc_resume - memory controller programming
*
@@ -536,7 +549,7 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev)
uint32_t size;
/* programm the VCPU memory controller bits 0-27 */
- addr = (adev->uvd.gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
+ addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3;
WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr);
WREG32(mmUVD_VCPU_CACHE_SIZE0, size);
@@ -553,11 +566,11 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev)
WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
/* bits 28-31 */
- addr = (adev->uvd.gpu_addr >> 28) & 0xF;
+ addr = (adev->uvd.inst->gpu_addr >> 28) & 0xF;
WREG32(mmUVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
/* bits 32-39 */
- addr = (adev->uvd.gpu_addr >> 32) & 0xFF;
+ addr = (adev->uvd.inst->gpu_addr >> 32) & 0xFF;
WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
@@ -664,7 +677,7 @@ static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
DRM_DEBUG("IH: UVD TRAP\n");
- amdgpu_fence_process(&adev->uvd.ring);
+ amdgpu_fence_process(&adev->uvd.inst->ring);
return 0;
}
@@ -688,7 +701,7 @@ static int uvd_v4_2_set_powergating_state(void *handle,
if (state == AMD_PG_STATE_GATE) {
uvd_v4_2_stop(adev);
- if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) {
+ if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) {
if (!(RREG32_SMC(ixCURRENT_PG_STATUS) &
CURRENT_PG_STATUS__UVD_PG_STATUS_MASK)) {
WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK |
@@ -699,7 +712,7 @@ static int uvd_v4_2_set_powergating_state(void *handle,
}
return 0;
} else {
- if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) {
+ if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) {
if (RREG32_SMC(ixCURRENT_PG_STATUS) &
CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) {
WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK |
@@ -732,7 +745,6 @@ static const struct amd_ip_funcs uvd_v4_2_ip_funcs = {
static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
- .nop = PACKET0(mmUVD_NO_OP, 0),
.support_64bit_ptrs = false,
.get_rptr = uvd_v4_2_ring_get_rptr,
.get_wptr = uvd_v4_2_ring_get_wptr,
@@ -745,7 +757,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
.emit_fence = uvd_v4_2_ring_emit_fence,
.test_ring = uvd_v4_2_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = uvd_v4_2_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
@@ -753,7 +765,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev)
{
- adev->uvd.ring.funcs = &uvd_v4_2_ring_funcs;
+ adev->uvd.inst->ring.funcs = &uvd_v4_2_ring_funcs;
}
static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = {
@@ -763,8 +775,8 @@ static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = {
static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->uvd.irq.num_types = 1;
- adev->uvd.irq.funcs = &uvd_v4_2_irq_funcs;
+ adev->uvd.inst->irq.num_types = 1;
+ adev->uvd.inst->irq.funcs = &uvd_v4_2_irq_funcs;
}
const struct amdgpu_ip_block_version uvd_v4_2_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 6445d55e7d5a..aeaa1ca46a99 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -35,6 +35,7 @@
#include "vi.h"
#include "smu/smu_7_1_2_d.h"
#include "smu/smu_7_1_2_sh_mask.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev);
static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -89,6 +90,7 @@ static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
static int uvd_v5_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->uvd.num_uvd_inst = 1;
uvd_v5_0_set_ring_funcs(adev);
uvd_v5_0_set_irq_funcs(adev);
@@ -103,7 +105,7 @@ static int uvd_v5_0_sw_init(void *handle)
int r;
/* UVD TRAP */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq);
if (r)
return r;
@@ -115,9 +117,9 @@ static int uvd_v5_0_sw_init(void *handle)
if (r)
return r;
- ring = &adev->uvd.ring;
+ ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
return r;
}
@@ -144,7 +146,7 @@ static int uvd_v5_0_sw_fini(void *handle)
static int uvd_v5_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
@@ -204,7 +206,7 @@ done:
static int uvd_v5_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
if (RREG32(mmUVD_STATUS) != 0)
uvd_v5_0_stop(adev);
@@ -253,9 +255,9 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev)
/* programm memory controller bits 0-27 */
WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- lower_32_bits(adev->uvd.gpu_addr));
+ lower_32_bits(adev->uvd.inst->gpu_addr));
WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- upper_32_bits(adev->uvd.gpu_addr));
+ upper_32_bits(adev->uvd.inst->gpu_addr));
offset = AMDGPU_UVD_FIRMWARE_OFFSET;
size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
@@ -287,7 +289,7 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev)
*/
static int uvd_v5_0_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
uint32_t mp_swap_cntl;
@@ -540,6 +542,18 @@ static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, ib->length_dw);
}
+static void uvd_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
static bool uvd_v5_0_is_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -586,7 +600,7 @@ static int uvd_v5_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
DRM_DEBUG("IH: UVD TRAP\n");
- amdgpu_fence_process(&adev->uvd.ring);
+ amdgpu_fence_process(&adev->uvd.inst->ring);
return 0;
}
@@ -840,7 +854,6 @@ static const struct amd_ip_funcs uvd_v5_0_ip_funcs = {
static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
- .nop = PACKET0(mmUVD_NO_OP, 0),
.support_64bit_ptrs = false,
.get_rptr = uvd_v5_0_ring_get_rptr,
.get_wptr = uvd_v5_0_ring_get_wptr,
@@ -853,7 +866,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
.emit_fence = uvd_v5_0_ring_emit_fence,
.test_ring = uvd_v5_0_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = uvd_v5_0_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
@@ -861,7 +874,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev)
{
- adev->uvd.ring.funcs = &uvd_v5_0_ring_funcs;
+ adev->uvd.inst->ring.funcs = &uvd_v5_0_ring_funcs;
}
static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = {
@@ -871,8 +884,8 @@ static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = {
static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->uvd.irq.num_types = 1;
- adev->uvd.irq.funcs = &uvd_v5_0_irq_funcs;
+ adev->uvd.inst->irq.num_types = 1;
+ adev->uvd.inst->irq.funcs = &uvd_v5_0_irq_funcs;
}
const struct amdgpu_ip_block_version uvd_v5_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index f26f515db2fb..598dbeaba636 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -36,6 +36,7 @@
#include "bif/bif_5_1_d.h"
#include "gmc/gmc_8_1_d.h"
#include "vi.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
/* Polaris10/11/12 firmware version */
#define FW_1_130_16 ((1 << 24) | (130 << 16) | (16 << 8))
@@ -62,7 +63,7 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
static inline bool uvd_v6_0_enc_support(struct amdgpu_device *adev)
{
return ((adev->asic_type >= CHIP_POLARIS10) &&
- (adev->asic_type <= CHIP_POLARIS12) &&
+ (adev->asic_type <= CHIP_VEGAM) &&
(!adev->uvd.fw_version || adev->uvd.fw_version >= FW_1_130_16));
}
@@ -91,7 +92,7 @@ static uint64_t uvd_v6_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->uvd.ring_enc[0])
+ if (ring == &adev->uvd.inst->ring_enc[0])
return RREG32(mmUVD_RB_RPTR);
else
return RREG32(mmUVD_RB_RPTR2);
@@ -121,7 +122,7 @@ static uint64_t uvd_v6_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->uvd.ring_enc[0])
+ if (ring == &adev->uvd.inst->ring_enc[0])
return RREG32(mmUVD_RB_WPTR);
else
return RREG32(mmUVD_RB_WPTR2);
@@ -152,7 +153,7 @@ static void uvd_v6_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->uvd.ring_enc[0])
+ if (ring == &adev->uvd.inst->ring_enc[0])
WREG32(mmUVD_RB_WPTR,
lower_32_bits(ring->wptr));
else
@@ -247,12 +248,10 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
+ r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
- amdgpu_job_free(job);
if (fence)
*fence = dma_fence_get(f);
dma_fence_put(f);
@@ -311,19 +310,13 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- if (direct) {
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
- if (r)
- goto err;
-
- amdgpu_job_free(job);
- } else {
- r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity,
+ if (direct)
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ else
+ r = amdgpu_job_submit(job, &ring->adev->vce.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &f);
- if (r)
- goto err;
- }
+ if (r)
+ goto err;
if (fence)
*fence = dma_fence_get(f);
@@ -375,6 +368,7 @@ error:
static int uvd_v6_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->uvd.num_uvd_inst = 1;
if (!(adev->flags & AMD_IS_APU) &&
(RREG32_SMC(ixCC_HARVEST_FUSES) & CC_HARVEST_FUSES__UVD_DISABLE_MASK))
@@ -399,14 +393,14 @@ static int uvd_v6_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* UVD TRAP */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq);
if (r)
return r;
/* UVD ENC TRAP */
if (uvd_v6_0_enc_support(adev)) {
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 119, &adev->uvd.irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + VISLANDS30_IV_SRCID_UVD_ENC_GEN_PURP, &adev->uvd.inst->irq);
if (r)
return r;
}
@@ -418,39 +412,29 @@ static int uvd_v6_0_sw_init(void *handle)
if (!uvd_v6_0_enc_support(adev)) {
for (i = 0; i < adev->uvd.num_enc_rings; ++i)
- adev->uvd.ring_enc[i].funcs = NULL;
+ adev->uvd.inst->ring_enc[i].funcs = NULL;
- adev->uvd.irq.num_types = 1;
+ adev->uvd.inst->irq.num_types = 1;
adev->uvd.num_enc_rings = 0;
DRM_INFO("UVD ENC is disabled\n");
- } else {
- struct drm_sched_rq *rq;
- ring = &adev->uvd.ring_enc[0];
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc,
- rq, amdgpu_sched_jobs, NULL);
- if (r) {
- DRM_ERROR("Failed setting up UVD ENC run queue.\n");
- return r;
- }
}
r = amdgpu_uvd_resume(adev);
if (r)
return r;
- ring = &adev->uvd.ring;
+ ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
if (r)
return r;
if (uvd_v6_0_enc_support(adev)) {
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
- ring = &adev->uvd.ring_enc[i];
+ ring = &adev->uvd.inst->ring_enc[i];
sprintf(ring->name, "uvd_enc%d", i);
- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
if (r)
return r;
}
@@ -469,10 +453,8 @@ static int uvd_v6_0_sw_fini(void *handle)
return r;
if (uvd_v6_0_enc_support(adev)) {
- drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
-
for (i = 0; i < adev->uvd.num_enc_rings; ++i)
- amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
+ amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]);
}
return amdgpu_uvd_sw_fini(adev);
@@ -488,7 +470,7 @@ static int uvd_v6_0_sw_fini(void *handle)
static int uvd_v6_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int i, r;
@@ -532,7 +514,7 @@ static int uvd_v6_0_hw_init(void *handle)
if (uvd_v6_0_enc_support(adev)) {
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
- ring = &adev->uvd.ring_enc[i];
+ ring = &adev->uvd.inst->ring_enc[i];
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
@@ -563,7 +545,7 @@ done:
static int uvd_v6_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
if (RREG32(mmUVD_STATUS) != 0)
uvd_v6_0_stop(adev);
@@ -611,9 +593,9 @@ static void uvd_v6_0_mc_resume(struct amdgpu_device *adev)
/* programm memory controller bits 0-27 */
WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- lower_32_bits(adev->uvd.gpu_addr));
+ lower_32_bits(adev->uvd.inst->gpu_addr));
WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- upper_32_bits(adev->uvd.gpu_addr));
+ upper_32_bits(adev->uvd.inst->gpu_addr));
offset = AMDGPU_UVD_FIRMWARE_OFFSET;
size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
@@ -726,7 +708,7 @@ static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev,
*/
static int uvd_v6_0_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
uint32_t mp_swap_cntl;
@@ -866,14 +848,14 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
WREG32_FIELD(UVD_RBC_RB_CNTL, RB_NO_FETCH, 0);
if (uvd_v6_0_enc_support(adev)) {
- ring = &adev->uvd.ring_enc[0];
+ ring = &adev->uvd.inst->ring_enc[0];
WREG32(mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
WREG32(mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
WREG32(mmUVD_RB_BASE_LO, ring->gpu_addr);
WREG32(mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
WREG32(mmUVD_RB_SIZE, ring->ring_size / 4);
- ring = &adev->uvd.ring_enc[1];
+ ring = &adev->uvd.inst->ring_enc[1];
WREG32(mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
WREG32(mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
WREG32(mmUVD_RB_BASE_LO2, ring->gpu_addr);
@@ -964,6 +946,16 @@ static void uvd_v6_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
}
/**
+ * uvd_v6_0_ring_emit_hdp_flush - skip HDP flushing
+ *
+ * @ring: amdgpu_ring pointer
+ */
+static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ /* The firmware doesn't seem to like touching registers at this point. */
+}
+
+/**
* uvd_v6_0_ring_test_ring - register write test
*
* @ring: amdgpu_ring pointer
@@ -1089,6 +1081,18 @@ static void uvd_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0xE);
}
+static void uvd_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
static void uvd_v6_0_enc_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
{
uint32_t seq = ring->fence_drv.sync_seq;
@@ -1148,10 +1152,10 @@ static bool uvd_v6_0_check_soft_reset(void *handle)
srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
if (srbm_soft_reset) {
- adev->uvd.srbm_soft_reset = srbm_soft_reset;
+ adev->uvd.inst->srbm_soft_reset = srbm_soft_reset;
return true;
} else {
- adev->uvd.srbm_soft_reset = 0;
+ adev->uvd.inst->srbm_soft_reset = 0;
return false;
}
}
@@ -1160,7 +1164,7 @@ static int uvd_v6_0_pre_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!adev->uvd.srbm_soft_reset)
+ if (!adev->uvd.inst->srbm_soft_reset)
return 0;
uvd_v6_0_stop(adev);
@@ -1172,9 +1176,9 @@ static int uvd_v6_0_soft_reset(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 srbm_soft_reset;
- if (!adev->uvd.srbm_soft_reset)
+ if (!adev->uvd.inst->srbm_soft_reset)
return 0;
- srbm_soft_reset = adev->uvd.srbm_soft_reset;
+ srbm_soft_reset = adev->uvd.inst->srbm_soft_reset;
if (srbm_soft_reset) {
u32 tmp;
@@ -1202,7 +1206,7 @@ static int uvd_v6_0_post_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!adev->uvd.srbm_soft_reset)
+ if (!adev->uvd.inst->srbm_soft_reset)
return 0;
mdelay(5);
@@ -1228,17 +1232,17 @@ static int uvd_v6_0_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case 124:
- amdgpu_fence_process(&adev->uvd.ring);
+ amdgpu_fence_process(&adev->uvd.inst->ring);
break;
case 119:
if (likely(uvd_v6_0_enc_support(adev)))
- amdgpu_fence_process(&adev->uvd.ring_enc[0]);
+ amdgpu_fence_process(&adev->uvd.inst->ring_enc[0]);
else
int_handled = false;
break;
case 120:
if (likely(uvd_v6_0_enc_support(adev)))
- amdgpu_fence_process(&adev->uvd.ring_enc[1]);
+ amdgpu_fence_process(&adev->uvd.inst->ring_enc[1]);
else
int_handled = false;
break;
@@ -1521,22 +1525,22 @@ static const struct amd_ip_funcs uvd_v6_0_ip_funcs = {
static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
- .nop = PACKET0(mmUVD_NO_OP, 0),
.support_64bit_ptrs = false,
.get_rptr = uvd_v6_0_ring_get_rptr,
.get_wptr = uvd_v6_0_ring_get_wptr,
.set_wptr = uvd_v6_0_ring_set_wptr,
.parse_cs = amdgpu_uvd_ring_parse_cs,
.emit_frame_size =
- 6 + 6 + /* hdp flush / invalidate */
+ 6 + /* hdp invalidate */
10 + /* uvd_v6_0_ring_emit_pipeline_sync */
14, /* uvd_v6_0_ring_emit_fence x1 no user fence */
.emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */
.emit_ib = uvd_v6_0_ring_emit_ib,
.emit_fence = uvd_v6_0_ring_emit_fence,
+ .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
.test_ring = uvd_v6_0_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = uvd_v6_0_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
@@ -1546,13 +1550,12 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
- .nop = PACKET0(mmUVD_NO_OP, 0),
.support_64bit_ptrs = false,
.get_rptr = uvd_v6_0_ring_get_rptr,
.get_wptr = uvd_v6_0_ring_get_wptr,
.set_wptr = uvd_v6_0_ring_set_wptr,
.emit_frame_size =
- 6 + 6 + /* hdp flush / invalidate */
+ 6 + /* hdp invalidate */
10 + /* uvd_v6_0_ring_emit_pipeline_sync */
VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /* uvd_v6_0_ring_emit_vm_flush */
14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */
@@ -1561,9 +1564,10 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
.emit_fence = uvd_v6_0_ring_emit_fence,
.emit_vm_flush = uvd_v6_0_ring_emit_vm_flush,
.emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync,
+ .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
.test_ring = uvd_v6_0_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = uvd_v6_0_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
@@ -1600,10 +1604,10 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev)
{
if (adev->asic_type >= CHIP_POLARIS10) {
- adev->uvd.ring.funcs = &uvd_v6_0_ring_vm_funcs;
+ adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_vm_funcs;
DRM_INFO("UVD is enabled in VM mode\n");
} else {
- adev->uvd.ring.funcs = &uvd_v6_0_ring_phys_funcs;
+ adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_phys_funcs;
DRM_INFO("UVD is enabled in physical mode\n");
}
}
@@ -1613,7 +1617,7 @@ static void uvd_v6_0_set_enc_ring_funcs(struct amdgpu_device *adev)
int i;
for (i = 0; i < adev->uvd.num_enc_rings; ++i)
- adev->uvd.ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs;
+ adev->uvd.inst->ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs;
DRM_INFO("UVD ENC is enabled in VM mode\n");
}
@@ -1626,11 +1630,11 @@ static const struct amdgpu_irq_src_funcs uvd_v6_0_irq_funcs = {
static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev)
{
if (uvd_v6_0_enc_support(adev))
- adev->uvd.irq.num_types = adev->uvd.num_enc_rings + 1;
+ adev->uvd.inst->irq.num_types = adev->uvd.num_enc_rings + 1;
else
- adev->uvd.irq.num_types = 1;
+ adev->uvd.inst->irq.num_types = 1;
- adev->uvd.irq.funcs = &uvd_v6_0_irq_funcs;
+ adev->uvd.inst->irq.funcs = &uvd_v6_0_irq_funcs;
}
const struct amdgpu_ip_block_version uvd_v6_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index eddc57f3b72a..db5f3d78ab12 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -39,6 +39,9 @@
#include "hdp/hdp_4_0_offset.h"
#include "mmhub/mmhub_1_0_offset.h"
#include "mmhub/mmhub_1_0_sh_mask.h"
+#include "ivsrcid/uvd/irqsrcs_uvd_7_0.h"
+
+#define UVD7_MAX_HW_INSTANCES_VEGA20 2
static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev);
static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev);
@@ -47,6 +50,11 @@ static int uvd_v7_0_start(struct amdgpu_device *adev);
static void uvd_v7_0_stop(struct amdgpu_device *adev);
static int uvd_v7_0_sriov_start(struct amdgpu_device *adev);
+static int amdgpu_ih_clientid_uvds[] = {
+ SOC15_IH_CLIENTID_UVD,
+ SOC15_IH_CLIENTID_UVD1
+};
+
/**
* uvd_v7_0_ring_get_rptr - get read pointer
*
@@ -58,7 +66,7 @@ static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR);
}
/**
@@ -72,10 +80,10 @@ static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->uvd.ring_enc[0])
- return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
+ if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR);
else
- return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2);
}
/**
@@ -89,7 +97,7 @@ static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR);
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR);
}
/**
@@ -106,10 +114,10 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell)
return adev->wb.wb[ring->wptr_offs];
- if (ring == &adev->uvd.ring_enc[0])
- return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
+ if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR);
else
- return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2);
}
/**
@@ -123,7 +131,7 @@ static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
}
/**
@@ -144,11 +152,11 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
return;
}
- if (ring == &adev->uvd.ring_enc[0])
- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR,
+ if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
+ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR,
lower_32_bits(ring->wptr));
else
- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2,
+ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2,
lower_32_bits(ring->wptr));
}
@@ -170,8 +178,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 16);
if (r) {
- DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n",
- ring->idx, r);
+ DRM_ERROR("amdgpu: uvd enc failed to lock (%d)ring %d (%d).\n",
+ ring->me, ring->idx, r);
return r;
}
amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
@@ -184,11 +192,11 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
}
if (i < adev->usec_timeout) {
- DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
- ring->idx, i);
+ DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n",
+ ring->me, ring->idx, i);
} else {
- DRM_ERROR("amdgpu: ring %d test failed\n",
- ring->idx);
+ DRM_ERROR("amdgpu: (%d)ring %d test failed\n",
+ ring->me, ring->idx);
r = -ETIMEDOUT;
}
@@ -242,12 +250,10 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
+ r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
- amdgpu_job_free(job);
if (fence)
*fence = dma_fence_get(f);
dma_fence_put(f);
@@ -305,19 +311,13 @@ int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- if (direct) {
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
- if (r)
- goto err;
-
- amdgpu_job_free(job);
- } else {
- r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity,
+ if (direct)
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ else
+ r = amdgpu_job_submit(job, &ring->adev->vce.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &f);
- if (r)
- goto err;
- }
+ if (r)
+ goto err;
if (fence)
*fence = dma_fence_get(f);
@@ -342,24 +342,24 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL);
if (r) {
- DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
+ DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ring->me, r);
goto error;
}
r = uvd_v7_0_enc_get_destroy_msg(ring, 1, true, &fence);
if (r) {
- DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
+ DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ring->me, r);
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
- DRM_ERROR("amdgpu: IB test timed out.\n");
+ DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ring->me);
r = -ETIMEDOUT;
} else if (r < 0) {
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ring->me, r);
} else {
- DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
+ DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ring->me, ring->idx);
r = 0;
}
error:
@@ -370,6 +370,10 @@ error:
static int uvd_v7_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->asic_type == CHIP_VEGA20)
+ adev->uvd.num_uvd_inst = UVD7_MAX_HW_INSTANCES_VEGA20;
+ else
+ adev->uvd.num_uvd_inst = 1;
if (amdgpu_sriov_vf(adev))
adev->uvd.num_enc_rings = 1;
@@ -385,20 +389,21 @@ static int uvd_v7_0_early_init(void *handle)
static int uvd_v7_0_sw_init(void *handle)
{
struct amdgpu_ring *ring;
- struct drm_sched_rq *rq;
- int i, r;
+ int i, j, r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- /* UVD TRAP */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.irq);
- if (r)
- return r;
-
- /* UVD ENC TRAP */
- for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.irq);
+ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+ /* UVD TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], UVD_7_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->uvd.inst[j].irq);
if (r)
return r;
+
+ /* UVD ENC TRAP */
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], i + UVD_7_0__SRCID__UVD_ENC_GEN_PURP, &adev->uvd.inst[j].irq);
+ if (r)
+ return r;
+ }
}
r = amdgpu_uvd_sw_init(adev);
@@ -415,43 +420,37 @@ static int uvd_v7_0_sw_init(void *handle)
DRM_INFO("PSP loading UVD firmware\n");
}
- ring = &adev->uvd.ring_enc[0];
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc,
- rq, amdgpu_sched_jobs, NULL);
- if (r) {
- DRM_ERROR("Failed setting up UVD ENC run queue.\n");
- return r;
- }
-
r = amdgpu_uvd_resume(adev);
if (r)
return r;
- if (!amdgpu_sriov_vf(adev)) {
- ring = &adev->uvd.ring;
- sprintf(ring->name, "uvd");
- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
- if (r)
- return r;
- }
- for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
- ring = &adev->uvd.ring_enc[i];
- sprintf(ring->name, "uvd_enc%d", i);
- if (amdgpu_sriov_vf(adev)) {
- ring->use_doorbell = true;
-
- /* currently only use the first enconding ring for
- * sriov, so set unused location for other unused rings.
- */
- if (i == 0)
- ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
- else
- ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1;
+ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+ if (!amdgpu_sriov_vf(adev)) {
+ ring = &adev->uvd.inst[j].ring;
+ sprintf(ring->name, "uvd<%d>", j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+ ring = &adev->uvd.inst[j].ring_enc[i];
+ sprintf(ring->name, "uvd_enc%d<%d>", i, j);
+ if (amdgpu_sriov_vf(adev)) {
+ ring->use_doorbell = true;
+
+ /* currently only use the first enconding ring for
+ * sriov, so set unused location for other unused rings.
+ */
+ if (i == 0)
+ ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
+ else
+ ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1;
+ }
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
+ if (r)
+ return r;
}
- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
- if (r)
- return r;
}
r = amdgpu_virt_alloc_mm_table(adev);
@@ -463,7 +462,7 @@ static int uvd_v7_0_sw_init(void *handle)
static int uvd_v7_0_sw_fini(void *handle)
{
- int i, r;
+ int i, j, r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_virt_free_mm_table(adev);
@@ -472,11 +471,10 @@ static int uvd_v7_0_sw_fini(void *handle)
if (r)
return r;
- drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
-
- for (i = 0; i < adev->uvd.num_enc_rings; ++i)
- amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
-
+ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i)
+ amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
+ }
return amdgpu_uvd_sw_fini(adev);
}
@@ -490,9 +488,9 @@ static int uvd_v7_0_sw_fini(void *handle)
static int uvd_v7_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring;
uint32_t tmp;
- int i, r;
+ int i, j, r;
if (amdgpu_sriov_vf(adev))
r = uvd_v7_0_sriov_start(adev);
@@ -501,57 +499,60 @@ static int uvd_v7_0_hw_init(void *handle)
if (r)
goto done;
- if (!amdgpu_sriov_vf(adev)) {
- ring->ready = true;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->ready = false;
- goto done;
+ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+ ring = &adev->uvd.inst[j].ring;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ ring->ready = true;
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->ready = false;
+ goto done;
+ }
+
+ r = amdgpu_ring_alloc(ring, 10);
+ if (r) {
+ DRM_ERROR("amdgpu: (%d)ring failed to lock UVD ring (%d).\n", j, r);
+ goto done;
+ }
+
+ tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
+ mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
+ mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
+ mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ /* Clear timeout status bits */
+ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
+ mmUVD_SEMA_TIMEOUT_STATUS), 0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
+ mmUVD_SEMA_CNTL), 0));
+ amdgpu_ring_write(ring, 3);
+
+ amdgpu_ring_commit(ring);
}
- r = amdgpu_ring_alloc(ring, 10);
- if (r) {
- DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
- goto done;
- }
-
- tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
- mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
- amdgpu_ring_write(ring, tmp);
- amdgpu_ring_write(ring, 0xFFFFF);
-
- tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
- mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
- amdgpu_ring_write(ring, tmp);
- amdgpu_ring_write(ring, 0xFFFFF);
-
- tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
- mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
- amdgpu_ring_write(ring, tmp);
- amdgpu_ring_write(ring, 0xFFFFF);
-
- /* Clear timeout status bits */
- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
- mmUVD_SEMA_TIMEOUT_STATUS), 0));
- amdgpu_ring_write(ring, 0x8);
-
- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
- mmUVD_SEMA_CNTL), 0));
- amdgpu_ring_write(ring, 3);
-
- amdgpu_ring_commit(ring);
- }
-
- for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
- ring = &adev->uvd.ring_enc[i];
- ring->ready = true;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->ready = false;
- goto done;
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+ ring = &adev->uvd.inst[j].ring_enc[i];
+ ring->ready = true;
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->ready = false;
+ goto done;
+ }
}
}
-
done:
if (!r)
DRM_INFO("UVD and UVD ENC initialized successfully.\n");
@@ -569,7 +570,7 @@ done:
static int uvd_v7_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ int i;
if (!amdgpu_sriov_vf(adev))
uvd_v7_0_stop(adev);
@@ -578,7 +579,8 @@ static int uvd_v7_0_hw_fini(void *handle)
DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
}
- ring->ready = false;
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i)
+ adev->uvd.inst[i].ring.ready = false;
return 0;
}
@@ -618,48 +620,51 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
{
uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
uint32_t offset;
+ int i;
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
- offset = 0;
- } else {
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- lower_32_bits(adev->uvd.gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- upper_32_bits(adev->uvd.gpu_addr));
- offset = size;
- }
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+ offset = 0;
+ } else {
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->uvd.inst[i].gpu_addr));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->uvd.inst[i].gpu_addr));
+ offset = size;
+ }
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
- AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
-
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
- lower_32_bits(adev->uvd.gpu_addr + offset));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
- upper_32_bits(adev->uvd.gpu_addr + offset));
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21));
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE);
-
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
- lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
- upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21));
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2,
- AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
-
- WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- WREG32_SOC15(UVD, 0, mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size);
+
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21));
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE);
+
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21));
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2,
+ AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
+
+ WREG32_SOC15(UVD, i, mmUVD_UDEC_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, i, mmUVD_UDEC_DB_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, i, mmUVD_UDEC_DBW_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ WREG32_SOC15(UVD, i, mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
+ }
}
static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
@@ -669,6 +674,7 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
uint64_t addr = table->gpu_addr;
struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
uint32_t size;
+ int i;
size = header->header_size + header->vce_table_size + header->uvd_table_size;
@@ -688,11 +694,12 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
/* 4, set resp to zero */
WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0);
- WDOORBELL32(adev->uvd.ring_enc[0].doorbell_index, 0);
- adev->wb.wb[adev->uvd.ring_enc[0].wptr_offs] = 0;
- adev->uvd.ring_enc[0].wptr = 0;
- adev->uvd.ring_enc[0].wptr_old = 0;
-
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0);
+ adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0;
+ adev->uvd.inst[i].ring_enc[0].wptr = 0;
+ adev->uvd.inst[i].ring_enc[0].wptr_old = 0;
+ }
/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001);
@@ -725,6 +732,7 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
struct mmsch_v1_0_cmd_end end = { {0} };
uint32_t *init_table = adev->virt.mm_table.cpu_addr;
struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
+ uint8_t i = 0;
direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
@@ -742,120 +750,121 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
init_table += header->uvd_table_offset;
- ring = &adev->uvd.ring;
- ring->wptr = 0;
- size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
-
- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
- 0xFFFFFFFF, 0x00000004);
- /* mc resume*/
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
- lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
- upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
- offset = 0;
- } else {
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
- lower_32_bits(adev->uvd.gpu_addr));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
- upper_32_bits(adev->uvd.gpu_addr));
- offset = size;
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ ring = &adev->uvd.inst[i].ring;
+ ring->wptr = 0;
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
+
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+ 0xFFFFFFFF, 0x00000004);
+ /* mc resume*/
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+ offset = 0;
+ } else {
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->uvd.inst[i].gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->uvd.inst[i].gpu_addr));
+ offset = size;
+ }
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size);
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
+ AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
+ /* mc resume end*/
+
+ /* disable clock gating */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_CGC_CTRL),
+ ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
+
+ /* disable interupt */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
+ ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
+
+ /* stall UMC and register bus before resetting VCPU */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ /* put LMI, VCPU, RBC etc... into reset */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
+ (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
+
+ /* initialize UVD memory controller */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL),
+ (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ 0x00100000L));
+
+ /* take all subblocks out of reset, except VCPU */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+ /* enable VCPU clock */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* enable master interrupt */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
+ ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+ (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
+
+ /* clear the bit 4 of UVD_STATUS */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
+
+ /* force RBC into idle state */
+ size = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
+
+ ring = &adev->uvd.inst[i].ring_enc[0];
+ ring->wptr = 0;
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), ring->gpu_addr);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), ring->ring_size / 4);
+
+ /* boot up the VCPU */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), 0);
+
+ /* enable UMC */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
+
+ MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0x02, 0x02);
}
-
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
- AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size);
-
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
- lower_32_bits(adev->uvd.gpu_addr + offset));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
- upper_32_bits(adev->uvd.gpu_addr + offset));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
-
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
- lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
- upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2),
- AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
-
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
- /* mc resume end*/
-
- /* disable clock gating */
- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL),
- ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
-
- /* disable interupt */
- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
- ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
-
- /* stall UMC and register bus before resetting VCPU */
- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
- UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-
- /* put LMI, VCPU, RBC etc... into reset */
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
- (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
- UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
- UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
- UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
- UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
- UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
- UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
-
- /* initialize UVD memory controller */
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL),
- (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__REQ_MODE_MASK |
- 0x00100000L));
-
- /* take all subblocks out of reset, except VCPU */
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
-
- /* enable VCPU clock */
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__CLK_EN_MASK);
-
- /* enable master interrupt */
- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
- ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
- (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
-
- /* clear the bit 4 of UVD_STATUS */
- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
-
- /* force RBC into idle state */
- size = order_base_2(ring->ring_size);
- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp);
-
- ring = &adev->uvd.ring_enc[0];
- ring->wptr = 0;
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr);
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4);
-
- /* boot up the VCPU */
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0);
-
- /* enable UMC */
- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
-
- MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02);
-
/* add end packet */
memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
@@ -874,15 +883,17 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
*/
static int uvd_v7_0_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
uint32_t mp_swap_cntl;
- int i, j, r;
+ int i, j, k, r;
- /* disable DPG */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
- ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
+ /* disable DPG */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ }
/* disable byte swapping */
lmi_swap_cntl = 0;
@@ -890,157 +901,159 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
uvd_v7_0_mc_resume(adev);
- /* disable clock gating */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), 0,
- ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK);
-
- /* disable interupt */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
-
- /* stall UMC and register bus before resetting VCPU */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
- UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- mdelay(1);
-
- /* put LMI, VCPU, RBC etc... into reset */
- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
- UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
- UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
- UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
- UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
- UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
- UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
- UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
- mdelay(5);
+ for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
+ ring = &adev->uvd.inst[k].ring;
+ /* disable clock gating */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_CGC_CTRL), 0,
+ ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK);
- /* initialize UVD memory controller */
- WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL,
- (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__REQ_MODE_MASK |
- 0x00100000L);
+ /* disable interupt */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* stall UMC and register bus before resetting VCPU */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2),
+ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ mdelay(1);
+
+ /* put LMI, VCPU, RBC etc... into reset */
+ WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
+ UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+ mdelay(5);
+
+ /* initialize UVD memory controller */
+ WREG32_SOC15(UVD, k, mmUVD_LMI_CTRL,
+ (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ 0x00100000L);
#ifdef __BIG_ENDIAN
- /* swap (8 in 32) RB and IB */
- lmi_swap_cntl = 0xa;
- mp_swap_cntl = 0;
+ /* swap (8 in 32) RB and IB */
+ lmi_swap_cntl = 0xa;
+ mp_swap_cntl = 0;
#endif
- WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
- WREG32_SOC15(UVD, 0, mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
-
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88);
-
- /* take all subblocks out of reset, except VCPU */
- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
- mdelay(5);
+ WREG32_SOC15(UVD, k, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+ WREG32_SOC15(UVD, k, mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
- /* enable VCPU clock */
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL,
- UVD_VCPU_CNTL__CLK_EN_MASK);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA0, 0x40c2040);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA1, 0x0);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB0, 0x40c2040);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB1, 0x0);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_ALU, 0);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUX, 0x88);
- /* enable UMC */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ /* take all subblocks out of reset, except VCPU */
+ WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(5);
- /* boot up the VCPU */
- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0);
- mdelay(10);
+ /* enable VCPU clock */
+ WREG32_SOC15(UVD, k, mmUVD_VCPU_CNTL,
+ UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* enable UMC */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ /* boot up the VCPU */
+ WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, 0);
+ mdelay(10);
- for (i = 0; i < 10; ++i) {
- uint32_t status;
+ for (i = 0; i < 10; ++i) {
+ uint32_t status;
- for (j = 0; j < 100; ++j) {
- status = RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+ for (j = 0; j < 100; ++j) {
+ status = RREG32_SOC15(UVD, k, mmUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ }
+ r = 0;
if (status & 2)
break;
+
+ DRM_ERROR("UVD(%d) not responding, trying to reset the VCPU!!!\n", k);
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), 0,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
mdelay(10);
+ r = -1;
}
- r = 0;
- if (status & 2)
- break;
- DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
- ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
- mdelay(10);
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
- ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
- mdelay(10);
- r = -1;
- }
-
- if (r) {
- DRM_ERROR("UVD not responding, giving up!!!\n");
- return r;
- }
- /* enable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
- (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
- ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
-
- /* clear the bit 4 of UVD_STATUS */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
-
- /* force RBC into idle state */
- rb_bufsz = order_base_2(ring->ring_size);
- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
-
- /* set the write pointer delay */
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
-
- /* set the wb address */
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR,
- (upper_32_bits(ring->gpu_addr) >> 2));
-
- /* programm the RB_BASE for ring buffer */
- WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
-
- /* Initialize the ring buffer's read and write pointers */
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
-
- ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
- lower_32_bits(ring->wptr));
-
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
- ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
-
- ring = &adev->uvd.ring_enc[0];
- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+ if (r) {
+ DRM_ERROR("UVD(%d) not responding, giving up!!!\n", k);
+ return r;
+ }
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN),
+ (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+ ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
- ring = &adev->uvd.ring_enc[1];
- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ /* clear the bit 4 of UVD_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* set the write pointer delay */
+ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR_ADDR,
+ (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR, 0);
+
+ ring->wptr = RREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_RBC_RB_CNTL), 0,
+ ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
+
+ ring = &adev->uvd.inst[k].ring_enc[0];
+ WREG32_SOC15(UVD, k, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_SIZE, ring->ring_size / 4);
+
+ ring = &adev->uvd.inst[k].ring_enc[1];
+ WREG32_SOC15(UVD, k, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ }
return 0;
}
@@ -1053,26 +1066,30 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
*/
static void uvd_v7_0_stop(struct amdgpu_device *adev)
{
- /* force RBC into idle state */
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101);
-
- /* Stall UMC and register bus before resetting VCPU */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
- UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- mdelay(1);
-
- /* put VCPU into reset */
- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
- mdelay(5);
+ uint8_t i = 0;
+
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ /* force RBC into idle state */
+ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, 0x11010101);
- /* disable VCPU clock */
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0);
+ /* Stall UMC and register bus before resetting VCPU */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
+ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ mdelay(1);
- /* Unstall UMC and register bus */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ /* put VCPU into reset */
+ WREG32_SOC15(UVD, i, mmUVD_SOFT_RESET,
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(5);
+
+ /* disable VCPU clock */
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CNTL, 0x0);
+
+ /* Unstall UMC and register bus */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ }
}
/**
@@ -1091,26 +1108,26 @@ static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
amdgpu_ring_write(ring, seq);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
amdgpu_ring_write(ring, addr & 0xffffffff);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
amdgpu_ring_write(ring, 2);
}
@@ -1136,6 +1153,16 @@ static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
}
/**
+ * uvd_v7_0_ring_emit_hdp_flush - skip HDP flushing
+ *
+ * @ring: amdgpu_ring pointer
+ */
+static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ /* The firmware doesn't seem to like touching registers at this point. */
+}
+
+/**
* uvd_v7_0_ring_test_ring - register write test
*
* @ring: amdgpu_ring pointer
@@ -1149,30 +1176,30 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
- WREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
+ WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
- ring->idx, r);
+ DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n",
+ ring->me, ring->idx, r);
return r;
}
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID);
+ tmp = RREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID);
if (tmp == 0xDEADBEEF)
break;
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
- DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
- ring->idx, i);
+ DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n",
+ ring->me, ring->idx, i);
} else {
- DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
- ring->idx, tmp);
+ DRM_ERROR("(%d)amdgpu: ring %d test failed (0x%08X)\n",
+ ring->me, ring->idx, tmp);
r = -EINVAL;
}
return r;
@@ -1193,17 +1220,17 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0));
amdgpu_ring_write(ring, vmid);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_RBC_IB_SIZE), 0));
amdgpu_ring_write(ring, ib->length_dw);
}
@@ -1231,13 +1258,13 @@ static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
amdgpu_ring_write(ring, reg << 2);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
amdgpu_ring_write(ring, val);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
amdgpu_ring_write(ring, 8);
}
@@ -1247,16 +1274,16 @@ static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
struct amdgpu_device *adev = ring->adev;
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
amdgpu_ring_write(ring, reg << 2);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
amdgpu_ring_write(ring, val);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GP_SCRATCH8), 0));
amdgpu_ring_write(ring, mask);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
amdgpu_ring_write(ring, 12);
}
@@ -1277,12 +1304,15 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
- int i;
struct amdgpu_device *adev = ring->adev;
+ int i;
- for (i = 0; i < count; i++)
- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
+ WARN_ON(ring->wptr % 2 || count % 2);
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_NO_OP), 0));
+ amdgpu_ring_write(ring, 0);
+ }
}
static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
@@ -1349,16 +1379,16 @@ static bool uvd_v7_0_check_soft_reset(void *handle)
if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) ||
REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) ||
- (RREG32_SOC15(UVD, 0, mmUVD_STATUS) &
+ (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) &
AMDGPU_UVD_STATUS_BUSY_MASK))
srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
if (srbm_soft_reset) {
- adev->uvd.srbm_soft_reset = srbm_soft_reset;
+ adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset;
return true;
} else {
- adev->uvd.srbm_soft_reset = 0;
+ adev->uvd.inst[ring->me].srbm_soft_reset = 0;
return false;
}
}
@@ -1367,7 +1397,7 @@ static int uvd_v7_0_pre_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!adev->uvd.srbm_soft_reset)
+ if (!adev->uvd.inst[ring->me].srbm_soft_reset)
return 0;
uvd_v7_0_stop(adev);
@@ -1379,9 +1409,9 @@ static int uvd_v7_0_soft_reset(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 srbm_soft_reset;
- if (!adev->uvd.srbm_soft_reset)
+ if (!adev->uvd.inst[ring->me].srbm_soft_reset)
return 0;
- srbm_soft_reset = adev->uvd.srbm_soft_reset;
+ srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset;
if (srbm_soft_reset) {
u32 tmp;
@@ -1409,7 +1439,7 @@ static int uvd_v7_0_post_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!adev->uvd.srbm_soft_reset)
+ if (!adev->uvd.inst[ring->me].srbm_soft_reset)
return 0;
mdelay(5);
@@ -1431,17 +1461,32 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
+ uint32_t ip_instance;
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_UVD:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_UVD1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
DRM_DEBUG("IH: UVD TRAP\n");
+
switch (entry->src_id) {
case 124:
- amdgpu_fence_process(&adev->uvd.ring);
+ amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring);
break;
case 119:
- amdgpu_fence_process(&adev->uvd.ring_enc[0]);
+ amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[0]);
break;
case 120:
if (!amdgpu_sriov_vf(adev))
- amdgpu_fence_process(&adev->uvd.ring_enc[1]);
+ amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[1]);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -1457,9 +1502,9 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
{
uint32_t data, data1, data2, suvd_flags;
- data = RREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL);
- data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE);
- data2 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL);
+ data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL);
+ data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
+ data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL);
data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
@@ -1503,18 +1548,18 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
data1 |= suvd_flags;
- WREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL, data);
- WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, 0);
- WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1);
- WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL, data2);
+ WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data);
+ WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0);
+ WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
+ WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2);
}
static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
{
uint32_t data, data1, cgc_flags, suvd_flags;
- data = RREG32_SOC15(UVD, 0, mmUVD_CGC_GATE);
- data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE);
+ data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE);
+ data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
cgc_flags = UVD_CGC_GATE__SYS_MASK |
UVD_CGC_GATE__UDEC_MASK |
@@ -1546,8 +1591,8 @@ static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
data |= cgc_flags;
data1 |= suvd_flags;
- WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, data);
- WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1);
+ WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data);
+ WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
}
static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
@@ -1606,7 +1651,7 @@ static int uvd_v7_0_set_powergating_state(void *handle,
if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
return 0;
- WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
+ WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
if (state == AMD_PG_STATE_GATE) {
uvd_v7_0_stop(adev);
@@ -1647,14 +1692,13 @@ const struct amd_ip_funcs uvd_v7_0_ip_funcs = {
static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
- .nop = PACKET0(0x81ff, 0),
.support_64bit_ptrs = false,
.vmhub = AMDGPU_MMHUB,
.get_rptr = uvd_v7_0_ring_get_rptr,
.get_wptr = uvd_v7_0_ring_get_wptr,
.set_wptr = uvd_v7_0_ring_set_wptr,
.emit_frame_size =
- 6 + 6 + /* hdp flush / invalidate */
+ 6 + /* hdp invalidate */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
8 + /* uvd_v7_0_ring_emit_vm_flush */
@@ -1663,6 +1707,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
.emit_ib = uvd_v7_0_ring_emit_ib,
.emit_fence = uvd_v7_0_ring_emit_fence,
.emit_vm_flush = uvd_v7_0_ring_emit_vm_flush,
+ .emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush,
.test_ring = uvd_v7_0_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
.insert_nop = uvd_v7_0_ring_insert_nop,
@@ -1671,6 +1716,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
.end_use = amdgpu_uvd_ring_end_use,
.emit_wreg = uvd_v7_0_ring_emit_wreg,
.emit_reg_wait = uvd_v7_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
@@ -1702,22 +1748,32 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
.end_use = amdgpu_uvd_ring_end_use,
.emit_wreg = uvd_v7_0_enc_ring_emit_wreg,
.emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev)
{
- adev->uvd.ring.funcs = &uvd_v7_0_ring_vm_funcs;
- DRM_INFO("UVD is enabled in VM mode\n");
+ int i;
+
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs;
+ adev->uvd.inst[i].ring.me = i;
+ DRM_INFO("UVD(%d) is enabled in VM mode\n", i);
+ }
}
static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev)
{
- int i;
+ int i, j;
- for (i = 0; i < adev->uvd.num_enc_rings; ++i)
- adev->uvd.ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs;
+ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+ adev->uvd.inst[j].ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs;
+ adev->uvd.inst[j].ring_enc[i].me = j;
+ }
- DRM_INFO("UVD ENC is enabled in VM mode\n");
+ DRM_INFO("UVD(%d) ENC is enabled in VM mode\n", j);
+ }
}
static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = {
@@ -1727,8 +1783,12 @@ static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = {
static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->uvd.irq.num_types = adev->uvd.num_enc_rings + 1;
- adev->uvd.irq.funcs = &uvd_v7_0_irq_funcs;
+ int i;
+
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ adev->uvd.inst[i].irq.num_types = adev->uvd.num_enc_rings + 1;
+ adev->uvd.inst[i].irq.funcs = &uvd_v7_0_irq_funcs;
+ }
}
const struct amdgpu_ip_block_version uvd_v7_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 47f70827195b..d48e877b682e 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -56,7 +56,7 @@ static uint64_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
return RREG32(mmVCE_RB_RPTR);
else
return RREG32(mmVCE_RB_RPTR2);
@@ -73,7 +73,7 @@ static uint64_t vce_v2_0_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
return RREG32(mmVCE_RB_WPTR);
else
return RREG32(mmVCE_RB_WPTR2);
@@ -90,7 +90,7 @@ static void vce_v2_0_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
else
WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
@@ -627,8 +627,10 @@ static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->vce.num_rings; i++)
+ for (i = 0; i < adev->vce.num_rings; i++) {
adev->vce.ring[i].funcs = &vce_v2_0_ring_funcs;
+ adev->vce.ring[i].me = i;
+ }
}
static const struct amdgpu_irq_src_funcs vce_v2_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 428d1928e44e..cc6ce6cc03f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -39,6 +39,7 @@
#include "smu/smu_7_1_2_sh_mask.h"
#include "gca/gfx_8_0_d.h"
#include "gca/gfx_8_0_sh_mask.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
#define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04
@@ -86,9 +87,9 @@ static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
v = RREG32(mmVCE_RB_RPTR);
- else if (ring == &adev->vce.ring[1])
+ else if (ring->me == 1)
v = RREG32(mmVCE_RB_RPTR2);
else
v = RREG32(mmVCE_RB_RPTR3);
@@ -118,9 +119,9 @@ static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
v = RREG32(mmVCE_RB_WPTR);
- else if (ring == &adev->vce.ring[1])
+ else if (ring->me == 1)
v = RREG32(mmVCE_RB_WPTR2);
else
v = RREG32(mmVCE_RB_WPTR3);
@@ -149,9 +150,9 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
- else if (ring == &adev->vce.ring[1])
+ else if (ring->me == 1)
WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
else
WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
@@ -388,7 +389,8 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
default:
if ((adev->asic_type == CHIP_POLARIS10) ||
(adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12))
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM))
return AMDGPU_VCE_HARVEST_VCE1;
return 0;
@@ -421,7 +423,7 @@ static int vce_v3_0_sw_init(void *handle)
int r, i;
/* VCE */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 167, &adev->vce.irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq);
if (r)
return r;
@@ -467,8 +469,8 @@ static int vce_v3_0_hw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
vce_v3_0_override_vce_clock_gating(adev, true);
- if (!(adev->flags & AMD_IS_APU))
- amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
+
+ amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
for (i = 0; i < adev->vce.num_rings; i++)
adev->vce.ring[i].ready = false;
@@ -899,7 +901,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
.emit_frame_size =
4 + /* vce_v3_0_emit_pipeline_sync */
6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
- .emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
+ .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
.emit_ib = amdgpu_vce_ring_emit_ib,
.emit_fence = amdgpu_vce_ring_emit_fence,
.test_ring = amdgpu_vce_ring_test_ring,
@@ -923,7 +925,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
6 + /* vce_v3_0_emit_vm_flush */
4 + /* vce_v3_0_emit_pipeline_sync */
6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
- .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
+ .emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
.emit_ib = vce_v3_0_ring_emit_ib,
.emit_vm_flush = vce_v3_0_emit_vm_flush,
.emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
@@ -941,12 +943,16 @@ static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
int i;
if (adev->asic_type >= CHIP_STONEY) {
- for (i = 0; i < adev->vce.num_rings; i++)
+ for (i = 0; i < adev->vce.num_rings; i++) {
adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
+ adev->vce.ring[i].me = i;
+ }
DRM_INFO("VCE enabled in VM mode\n");
} else {
- for (i = 0; i < adev->vce.num_rings; i++)
+ for (i = 0; i < adev->vce.num_rings; i++) {
adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
+ adev->vce.ring[i].me = i;
+ }
DRM_INFO("VCE enabled in physical mode\n");
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 73fd48d6c756..65f8860169e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -39,6 +39,8 @@
#include "mmhub/mmhub_1_0_offset.h"
#include "mmhub/mmhub_1_0_sh_mask.h"
+#include "ivsrcid/vce/irqsrcs_vce_4_0.h"
+
#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
#define VCE_V4_0_FW_SIZE (384 * 1024)
@@ -60,9 +62,9 @@ static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
- else if (ring == &adev->vce.ring[1])
+ else if (ring->me == 1)
return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
else
return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
@@ -82,9 +84,9 @@ static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell)
return adev->wb.wb[ring->wptr_offs];
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
- else if (ring == &adev->vce.ring[1])
+ else if (ring->me == 1)
return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
else
return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
@@ -108,10 +110,10 @@ static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
return;
}
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
lower_32_bits(ring->wptr));
- else if (ring == &adev->vce.ring[1])
+ else if (ring->me == 1)
WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
lower_32_bits(ring->wptr));
else
@@ -1081,14 +1083,17 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
.end_use = amdgpu_vce_ring_end_use,
.emit_wreg = vce_v4_0_emit_wreg,
.emit_reg_wait = vce_v4_0_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->vce.num_rings; i++)
+ for (i = 0; i < adev->vce.num_rings; i++) {
adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
+ adev->vce.ring[i].me = i;
+ }
DRM_INFO("VCE enabled in VM mode\n");
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 8c132673bc79..2ce91a748c40 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -35,11 +35,14 @@
#include "mmhub/mmhub_9_1_offset.h"
#include "mmhub/mmhub_9_1_sh_mask.h"
-static int vcn_v1_0_start(struct amdgpu_device *adev);
+#include "ivsrcid/vcn/irqsrcs_vcn_1_0.h"
+
static int vcn_v1_0_stop(struct amdgpu_device *adev);
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);
/**
* vcn_v1_0_early_init - set function pointers
@@ -56,6 +59,7 @@ static int vcn_v1_0_early_init(void *handle)
vcn_v1_0_set_dec_ring_funcs(adev);
vcn_v1_0_set_enc_ring_funcs(adev);
+ vcn_v1_0_set_jpeg_ring_funcs(adev);
vcn_v1_0_set_irq_funcs(adev);
return 0;
@@ -75,18 +79,23 @@ static int vcn_v1_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* VCN DEC TRAP */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 124, &adev->vcn.irq);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.irq);
if (r)
return r;
/* VCN ENC TRAP */
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + 119,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
&adev->vcn.irq);
if (r)
return r;
}
+ /* VCN JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.irq);
+ if (r)
+ return r;
+
r = amdgpu_vcn_sw_init(adev);
if (r)
return r;
@@ -109,6 +118,12 @@ static int vcn_v1_0_sw_init(void *handle)
return r;
}
+ ring = &adev->vcn.ring_jpeg;
+ sprintf(ring->name, "vcn_jpeg");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
+ if (r)
+ return r;
+
return r;
}
@@ -146,10 +161,6 @@ static int vcn_v1_0_hw_init(void *handle)
struct amdgpu_ring *ring = &adev->vcn.ring_dec;
int i, r;
- r = vcn_v1_0_start(adev);
- if (r)
- goto done;
-
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
@@ -167,6 +178,14 @@ static int vcn_v1_0_hw_init(void *handle)
}
}
+ ring = &adev->vcn.ring_jpeg;
+ ring->ready = true;
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->ready = false;
+ goto done;
+ }
+
done:
if (!r)
DRM_INFO("VCN decode and encode initialized successfully.\n");
@@ -185,11 +204,9 @@ static int vcn_v1_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring = &adev->vcn.ring_dec;
- int r;
- r = vcn_v1_0_stop(adev);
- if (r)
- return r;
+ if (RREG32_SOC15(VCN, 0, mmUVD_STATUS))
+ vcn_v1_0_stop(adev);
ring->ready = false;
@@ -288,14 +305,14 @@ static void vcn_v1_0_mc_resume(struct amdgpu_device *adev)
*
* Disable clock gating for VCN block
*/
-static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw)
+static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
{
uint32_t data;
/* JPEG disable CGC */
data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
- if (sw)
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
else
data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE_MASK;
@@ -310,7 +327,7 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw)
/* UVD disable CGC */
data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
- if (sw)
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
else
data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
@@ -415,13 +432,13 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw)
*
* Enable clock gating for VCN block
*/
-static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw)
+static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev)
{
uint32_t data = 0;
/* enable JPEG CGC */
data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
- if (sw)
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
else
data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
@@ -435,7 +452,7 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw)
/* enable UVD CGC */
data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
- if (sw)
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
else
data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
@@ -480,6 +497,94 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw)
WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
}
+static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+ int ret;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON, 0xFFFFFF, ret);
+ } else {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
+ WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFFFFF, ret);
+ }
+
+ /* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS , UVDU_PWR_STATUS are 0 (power on) */
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
+ data &= ~0x103;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
+ data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | UVD_POWER_STATUS__UVD_PG_EN_MASK;
+
+ WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
+}
+
+static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+ int ret;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ /* Before power off, this indicator has to be turned on */
+ data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
+ data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
+ data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+ WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
+
+
+ data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+
+ data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDIL_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDIR_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDW_PWR_STATUS__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFFFFF, ret);
+ }
+}
+
/**
* vcn_v1_0_start - start VCN block
*
@@ -497,10 +602,11 @@ static int vcn_v1_0_start(struct amdgpu_device *adev)
/* disable byte swapping */
lmi_swap_cntl = 0;
- vcn_v1_0_mc_resume(adev);
-
+ vcn_1_0_disable_static_power_gating(adev);
/* disable clock gating */
- vcn_v1_0_disable_clock_gating(adev, true);
+ vcn_v1_0_disable_clock_gating(adev);
+
+ vcn_v1_0_mc_resume(adev);
/* disable interupt */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
@@ -647,6 +753,22 @@ static int vcn_v1_0_start(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ ring = &adev->vcn.ring_jpeg;
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+
+ /* initialize wptr */
+ ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
+
+ /* copy patch commands to the jpeg ring */
+ vcn_v1_0_jpeg_ring_set_patch_ring(ring,
+ (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission));
+
return 0;
}
@@ -680,16 +802,45 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev)
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- /* enable clock gating */
- vcn_v1_0_enable_clock_gating(adev, true);
+ WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0);
+ vcn_v1_0_enable_clock_gating(adev);
+ vcn_1_0_enable_static_power_gating(adev);
return 0;
}
+static bool vcn_v1_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == 0x2);
+}
+
+static int vcn_v1_0_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 0;
+
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, 0x2, 0x2, ret);
+
+ return ret;
+}
+
static int vcn_v1_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
- /* needed for driver unload*/
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+
+ if (enable) {
+ /* wait for STATUS to clear */
+ if (vcn_v1_0_is_idle(handle))
+ return -EBUSY;
+ vcn_v1_0_enable_clock_gating(adev);
+ } else {
+ /* disable HW gating and enable Sw gating */
+ vcn_v1_0_disable_clock_gating(adev);
+ }
return 0;
}
@@ -1015,6 +1166,383 @@ static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
+
+/**
+ * vcn_v1_0_jpeg_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vcn_v1_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * vcn_v1_0_jpeg_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vcn_v1_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * vcn_v1_0_jpeg_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vcn_v1_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+}
+
+/**
+ * vcn_v1_0_jpeg_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+static void vcn_v1_0_jpeg_ring_insert_start(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80010000);
+}
+
+/**
+ * vcn_v1_0_jpeg_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+static void vcn_v1_0_jpeg_ring_insert_end(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x00010000);
+}
+
+/**
+ * vcn_v1_0_jpeg_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @fence: fence to emit
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0xffffffff);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x3fbc);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x1);
+}
+
+/**
+ * vcn_v1_0_jpeg_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @ib: indirect buffer to execute
+ *
+ * Write ring commands to execute the indirect buffer.
+ */
+static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_ib *ib,
+ unsigned vmid, bool ctx_switch)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, ib->length_dw);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x2);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
+ amdgpu_ring_write(ring, 0x2);
+}
+
+static void vcn_v1_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val,
+ uint32_t mask)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, 0, PACKETJ_TYPE3));
+ }
+ amdgpu_ring_write(ring, mask);
+}
+
+static void vcn_v1_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ vcn_v1_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+static void vcn_v1_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ }
+ amdgpu_ring_write(ring, val);
+}
+
+static void vcn_v1_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
+{
+ struct amdgpu_device *adev = ring->adev;
+ ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ ring->ring[(*ptr)++] = 0;
+ ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0);
+ } else {
+ ring->ring[(*ptr)++] = reg_offset;
+ ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
+ }
+ ring->ring[(*ptr)++] = val;
+}
+
+static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ uint32_t reg, reg_offset, val, mask, i;
+
+ // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW
+ reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);
+ reg_offset = (reg << 2);
+ val = lower_32_bits(ring->gpu_addr);
+ vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
+ reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);
+ reg_offset = (reg << 2);
+ val = upper_32_bits(ring->gpu_addr);
+ vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 3rd to 5th: issue MEM_READ commands
+ for (i = 0; i <= 2; i++) {
+ ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
+ ring->ring[ptr++] = 0;
+ }
+
+ // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability
+ reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+ reg_offset = (reg << 2);
+ val = 0x13;
+ vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 7th: program mmUVD_JRBC_RB_REF_DATA
+ reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA);
+ reg_offset = (reg << 2);
+ val = 0x1;
+ vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
+ reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+ reg_offset = (reg << 2);
+ val = 0x1;
+ mask = 0x1;
+
+ ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);
+ ring->ring[ptr++] = 0x01400200;
+ ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);
+ ring->ring[ptr++] = val;
+ ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ ring->ring[ptr++] = 0;
+ ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3);
+ } else {
+ ring->ring[ptr++] = reg_offset;
+ ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
+ }
+ ring->ring[ptr++] = mask;
+
+ //9th to 21st: insert no-op
+ for (i = 0; i <= 12; i++) {
+ ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+ ring->ring[ptr++] = 0;
+ }
+
+ //22nd: reset mmUVD_JRBC_RB_RPTR
+ reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_RPTR);
+ reg_offset = (reg << 2);
+ val = 0;
+ vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch
+ reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+ reg_offset = (reg << 2);
+ val = 0x12;
+ vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
+}
+
static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -1039,6 +1567,9 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev,
case 120:
amdgpu_fence_process(&adev->vcn.ring_enc[1]);
break;
+ case 126:
+ amdgpu_fence_process(&adev->vcn.ring_jpeg);
+ break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
entry->src_id, entry->src_data[0]);
@@ -1048,16 +1579,36 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static void vcn_v1_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
- int i;
struct amdgpu_device *adev = ring->adev;
+ int i;
- for (i = 0; i < count; i++)
- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
+ WARN_ON(ring->wptr % 2 || count % 2);
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
+ amdgpu_ring_write(ring, 0);
+ }
}
+static int vcn_v1_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* This doesn't actually powergate the VCN block.
+ * That's done in the dpm code via the SMC. This
+ * just re-inits the block as necessary. The actual
+ * gating still happens in the dpm code. We should
+ * revisit this when there is a cleaner line between
+ * the smc and the hw blocks
+ */
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (state == AMD_PG_STATE_GATE)
+ return vcn_v1_0_stop(adev);
+ else
+ return vcn_v1_0_start(adev);
+}
static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.name = "vcn_v1_0",
@@ -1069,20 +1620,19 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.hw_fini = vcn_v1_0_hw_fini,
.suspend = vcn_v1_0_suspend,
.resume = vcn_v1_0_resume,
- .is_idle = NULL /* vcn_v1_0_is_idle */,
- .wait_for_idle = NULL /* vcn_v1_0_wait_for_idle */,
+ .is_idle = vcn_v1_0_is_idle,
+ .wait_for_idle = vcn_v1_0_wait_for_idle,
.check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */,
.pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */,
.soft_reset = NULL /* vcn_v1_0_soft_reset */,
.post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */,
.set_clockgating_state = vcn_v1_0_set_clockgating_state,
- .set_powergating_state = NULL /* vcn_v1_0_set_powergating_state */,
+ .set_powergating_state = vcn_v1_0_set_powergating_state,
};
static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0xf,
- .nop = PACKET0(0x81ff, 0),
.support_64bit_ptrs = false,
.vmhub = AMDGPU_MMHUB,
.get_rptr = vcn_v1_0_dec_ring_get_rptr,
@@ -1101,7 +1651,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush,
.test_ring = amdgpu_vcn_dec_ring_test_ring,
.test_ib = amdgpu_vcn_dec_ring_test_ib,
- .insert_nop = vcn_v1_0_ring_insert_nop,
+ .insert_nop = vcn_v1_0_dec_ring_insert_nop,
.insert_start = vcn_v1_0_dec_ring_insert_start,
.insert_end = vcn_v1_0_dec_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
@@ -1109,6 +1659,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.end_use = amdgpu_vcn_ring_end_use,
.emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
.emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
@@ -1139,6 +1690,40 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
.end_use = amdgpu_vcn_ring_end_use,
.emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .nop = PACKET0(0x81ff, 0),
+ .support_64bit_ptrs = false,
+ .vmhub = AMDGPU_MMHUB,
+ .extra_dw = 64,
+ .get_rptr = vcn_v1_0_jpeg_ring_get_rptr,
+ .get_wptr = vcn_v1_0_jpeg_ring_get_wptr,
+ .set_wptr = vcn_v1_0_jpeg_ring_set_wptr,
+ .emit_frame_size =
+ 6 + 6 + /* hdp invalidate / flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* vcn_v1_0_dec_ring_emit_vm_flush */
+ 14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */
+ 6,
+ .emit_ib_size = 22, /* vcn_v1_0_dec_ring_emit_ib */
+ .emit_ib = vcn_v1_0_jpeg_ring_emit_ib,
+ .emit_fence = vcn_v1_0_jpeg_ring_emit_fence,
+ .emit_vm_flush = vcn_v1_0_jpeg_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_jpeg_ring_test_ring,
+ .test_ib = amdgpu_vcn_jpeg_ring_test_ib,
+ .insert_nop = vcn_v1_0_jpeg_ring_nop,
+ .insert_start = vcn_v1_0_jpeg_ring_insert_start,
+ .insert_end = vcn_v1_0_jpeg_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v1_0_jpeg_ring_emit_wreg,
+ .emit_reg_wait = vcn_v1_0_jpeg_ring_emit_reg_wait,
};
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
@@ -1157,6 +1742,12 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev)
DRM_INFO("VCN encode is enabled in VM mode\n");
}
+static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs;
+ DRM_INFO("VCN jpeg decode is enabled in VM mode\n");
+}
+
static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
.set = vcn_v1_0_set_interrupt_state,
.process = vcn_v1_0_process_interrupt,
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
index 45aafca7f315..c5c9b2bc190d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
@@ -51,6 +51,7 @@ int vega10_reg_base_init(struct amdgpu_device *adev)
adev->reg_offset[PWR_HWIP][i] = (uint32_t *)(&(PWR_BASE.instance[i]));
adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIF_BASE.instance[i]));
adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
}
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
new file mode 100644
index 000000000000..52778de93ab0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+#include "vega20_ip_offset.h"
+
+int vega20_reg_base_init(struct amdgpu_device *adev)
+{
+ /* HW has more IP blocks, only initialized the blocke beend by our driver */
+ uint32_t i;
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i]));
+ adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCE_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ }
+ return 0;
+}
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 126f1276d347..42c8ad105b05 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -305,9 +305,10 @@ static void vi_init_golden_registers(struct amdgpu_device *adev)
stoney_mgcg_cgcg_init,
ARRAY_SIZE(stoney_mgcg_cgcg_init));
break;
- case CHIP_POLARIS11:
case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
default:
break;
}
@@ -728,33 +729,59 @@ static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
return r;
tmp = RREG32_SMC(cntl_reg);
- tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK |
- CG_DCLK_CNTL__DCLK_DIVIDER_MASK);
+
+ if (adev->flags & AMD_IS_APU)
+ tmp &= ~CG_DCLK_CNTL__DCLK_DIVIDER_MASK;
+ else
+ tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK |
+ CG_DCLK_CNTL__DCLK_DIVIDER_MASK);
tmp |= dividers.post_divider;
WREG32_SMC(cntl_reg, tmp);
for (i = 0; i < 100; i++) {
- if (RREG32_SMC(status_reg) & CG_DCLK_STATUS__DCLK_STATUS_MASK)
- break;
+ tmp = RREG32_SMC(status_reg);
+ if (adev->flags & AMD_IS_APU) {
+ if (tmp & 0x10000)
+ break;
+ } else {
+ if (tmp & CG_DCLK_STATUS__DCLK_STATUS_MASK)
+ break;
+ }
mdelay(10);
}
if (i == 100)
return -ETIMEDOUT;
-
return 0;
}
+#define ixGNB_CLK1_DFS_CNTL 0xD82200F0
+#define ixGNB_CLK1_STATUS 0xD822010C
+#define ixGNB_CLK2_DFS_CNTL 0xD8220110
+#define ixGNB_CLK2_STATUS 0xD822012C
+#define ixGNB_CLK3_DFS_CNTL 0xD8220130
+#define ixGNB_CLK3_STATUS 0xD822014C
+
static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
{
int r;
- r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS);
- if (r)
- return r;
+ if (adev->flags & AMD_IS_APU) {
+ r = vi_set_uvd_clock(adev, vclk, ixGNB_CLK2_DFS_CNTL, ixGNB_CLK2_STATUS);
+ if (r)
+ return r;
- r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS);
- if (r)
- return r;
+ r = vi_set_uvd_clock(adev, dclk, ixGNB_CLK1_DFS_CNTL, ixGNB_CLK1_STATUS);
+ if (r)
+ return r;
+ } else {
+ r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS);
+ if (r)
+ return r;
+
+ r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS);
+ if (r)
+ return r;
+ }
return 0;
}
@@ -764,6 +791,22 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
int r, i;
struct atom_clock_dividers dividers;
u32 tmp;
+ u32 reg_ctrl;
+ u32 reg_status;
+ u32 status_mask;
+ u32 reg_mask;
+
+ if (adev->flags & AMD_IS_APU) {
+ reg_ctrl = ixGNB_CLK3_DFS_CNTL;
+ reg_status = ixGNB_CLK3_STATUS;
+ status_mask = 0x00010000;
+ reg_mask = CG_ECLK_CNTL__ECLK_DIVIDER_MASK;
+ } else {
+ reg_ctrl = ixCG_ECLK_CNTL;
+ reg_status = ixCG_ECLK_STATUS;
+ status_mask = CG_ECLK_STATUS__ECLK_STATUS_MASK;
+ reg_mask = CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | CG_ECLK_CNTL__ECLK_DIVIDER_MASK;
+ }
r = amdgpu_atombios_get_clock_dividers(adev,
COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
@@ -772,24 +815,25 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
return r;
for (i = 0; i < 100; i++) {
- if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK)
+ if (RREG32_SMC(reg_status) & status_mask)
break;
mdelay(10);
}
+
if (i == 100)
return -ETIMEDOUT;
- tmp = RREG32_SMC(ixCG_ECLK_CNTL);
- tmp &= ~(CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK |
- CG_ECLK_CNTL__ECLK_DIVIDER_MASK);
+ tmp = RREG32_SMC(reg_ctrl);
+ tmp &= ~reg_mask;
tmp |= dividers.post_divider;
- WREG32_SMC(ixCG_ECLK_CNTL, tmp);
+ WREG32_SMC(reg_ctrl, tmp);
for (i = 0; i < 100; i++) {
- if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK)
+ if (RREG32_SMC(reg_status) & status_mask)
break;
mdelay(10);
}
+
if (i == 100)
return -ETIMEDOUT;
@@ -876,6 +920,27 @@ static void vi_invalidate_hdp(struct amdgpu_device *adev,
}
}
+static bool vi_need_full_reset(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ /* CZ has hang issues with full reset at the moment */
+ return false;
+ case CHIP_FIJI:
+ case CHIP_TONGA:
+ /* XXX: soft reset should work on fiji and tonga */
+ return true;
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_TOPAZ:
+ default:
+ /* change this when we support soft reset */
+ return true;
+ }
+}
+
static const struct amdgpu_asic_funcs vi_asic_funcs =
{
.read_disabled_bios = &vi_read_disabled_bios,
@@ -889,6 +954,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
.get_config_memsize = &vi_get_config_memsize,
.flush_hdp = &vi_flush_hdp,
.invalidate_hdp = &vi_invalidate_hdp,
+ .need_full_reset = &vi_need_full_reset,
};
#define CZ_REV_BRISTOL(rev) \
@@ -1031,6 +1097,30 @@ static int vi_common_early_init(void *handle)
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x64;
break;
+ case CHIP_VEGAM:
+ adev->cg_flags = 0;
+ /*AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_DRM_LS |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_VCE_MGCG;*/
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x6E;
+ break;
case CHIP_CARRIZO:
adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG |
AMD_CG_SUPPORT_GFX_MGCG |
@@ -1273,11 +1363,11 @@ static int vi_common_set_clockgating_state_by_smu(void *handle,
if (adev->cg_flags & (AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_MC_MGCG)) {
if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) {
- pp_support_state = AMD_CG_SUPPORT_MC_LS;
+ pp_support_state = PP_STATE_SUPPORT_LS;
pp_state = PP_STATE_LS;
}
if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG) {
- pp_support_state |= AMD_CG_SUPPORT_MC_MGCG;
+ pp_support_state |= PP_STATE_SUPPORT_CG;
pp_state |= PP_STATE_CG;
}
if (state == AMD_CG_STATE_UNGATE)
@@ -1292,11 +1382,11 @@ static int vi_common_set_clockgating_state_by_smu(void *handle,
if (adev->cg_flags & (AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_SDMA_MGCG)) {
if (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS) {
- pp_support_state = AMD_CG_SUPPORT_SDMA_LS;
+ pp_support_state = PP_STATE_SUPPORT_LS;
pp_state = PP_STATE_LS;
}
if (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG) {
- pp_support_state |= AMD_CG_SUPPORT_SDMA_MGCG;
+ pp_support_state |= PP_STATE_SUPPORT_CG;
pp_state |= PP_STATE_CG;
}
if (state == AMD_CG_STATE_UNGATE)
@@ -1311,11 +1401,11 @@ static int vi_common_set_clockgating_state_by_smu(void *handle,
if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_MGCG)) {
if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
- pp_support_state = AMD_CG_SUPPORT_HDP_LS;
+ pp_support_state = PP_STATE_SUPPORT_LS;
pp_state = PP_STATE_LS;
}
if (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG) {
- pp_support_state |= AMD_CG_SUPPORT_HDP_MGCG;
+ pp_support_state |= PP_STATE_SUPPORT_CG;
pp_state |= PP_STATE_CG;
}
if (state == AMD_CG_STATE_UNGATE)
@@ -1422,6 +1512,7 @@ static int vi_common_set_clockgating_state(void *handle,
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
vi_common_set_clockgating_state_by_smu(adev, state);
default:
break;
@@ -1551,9 +1642,10 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block);
}
break;
- case CHIP_POLARIS11:
case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block);
amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);