diff options
author | Rodrigo Vivi <rodrigo.vivi@intel.com> | 2018-07-23 09:13:12 -0700 |
---|---|---|
committer | Rodrigo Vivi <rodrigo.vivi@intel.com> | 2018-07-23 09:13:12 -0700 |
commit | c74a7469f97c0f40b46e82ee979f9fb1bb6e847c (patch) | |
tree | f2690a1a916b73ef94657fbf0e0141ae57701825 /drivers/gpu/drm/amd/amdgpu | |
parent | 6f15a7de86c8cf2dc09fc9e6d07047efa40ef809 (diff) | |
parent | 500775074f88d9cf5416bed2ca19592812d62c41 (diff) |
Merge drm/drm-next into drm-intel-next-queued
We need a backmerge to get DP_DPCD_REV_14 before we push other
i915 changes to dinq that could break compilation.
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
114 files changed, 9500 insertions, 3154 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 2ca2b5154d52..bfd332c95b61 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -56,13 +56,18 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ - ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \ - amdgpu_amdkfd_gfx_v7.o + ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o amdgpu-y += \ - vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o + vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ + vega20_reg_init.o + +# add DF block +amdgpu-y += \ + df_v1_7.o \ + df_v3_6.o # add GMC block amdgpu-y += \ @@ -126,11 +131,20 @@ amdgpu-y += \ vcn_v1_0.o # add amdkfd interfaces +amdgpu-y += amdgpu_amdkfd.o + +ifneq ($(CONFIG_HSA_AMD),) amdgpu-y += \ - amdgpu_amdkfd.o \ amdgpu_amdkfd_fence.o \ amdgpu_amdkfd_gpuvm.o \ - amdgpu_amdkfd_gfx_v8.o + amdgpu_amdkfd_gfx_v8.o \ + amdgpu_amdkfd_gfx_v9.o + +ifneq ($(CONFIG_DRM_AMDGPU_CIK),) +amdgpu-y += amdgpu_amdkfd_gfx_v7.o +endif + +endif # add cgs amdgpu-y += amdgpu_cgs.o diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h index 06192698bd96..5b393622f592 100644 --- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h +++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h @@ -136,6 +136,7 @@ #define GENERIC_OBJECT_ID_PX2_NON_DRIVABLE 0x02 #define GENERIC_OBJECT_ID_MXM_OPM 0x03 #define GENERIC_OBJECT_ID_STEREO_PIN 0x04 //This object could show up from Misc Object table, it follows ATOM_OBJECT format, and contains one ATOM_OBJECT_GPIO_CNTL_RECORD for the stereo pin +#define GENERIC_OBJECT_ID_BRACKET_LAYOUT 0x05 /****************************************************/ /* Graphics Object ENUM ID Definition */ @@ -714,6 +715,13 @@ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\ GENERIC_OBJECT_ID_STEREO_PIN << OBJECT_ID_SHIFT) +#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID1 (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\ + GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\ + GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT) + +#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID2 (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\ + GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\ + GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT) /****************************************************/ /* Object Cap definition - Shared with BIOS */ /****************************************************/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c8b605f3dc05..44f62fda4022 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -73,6 +73,7 @@ #include "amdgpu_virt.h" #include "amdgpu_gart.h" #include "amdgpu_debugfs.h" +#include "amdgpu_job.h" /* * Modules parameters. @@ -105,11 +106,8 @@ extern int amdgpu_vm_fault_stop; extern int amdgpu_vm_debug; extern int amdgpu_vm_update_mode; extern int amdgpu_dc; -extern int amdgpu_dc_log; extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; -extern int amdgpu_no_evict; -extern int amdgpu_direct_gma_size; extern uint amdgpu_pcie_gen_cap; extern uint amdgpu_pcie_lane_cap; extern uint amdgpu_cg_mask; @@ -129,6 +127,7 @@ extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; extern int amdgpu_gpu_recovery; extern int amdgpu_emu_mode; +extern uint amdgpu_smu_memory_pool_size; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -137,6 +136,7 @@ extern int amdgpu_si_support; extern int amdgpu_cik_support; #endif +#define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ @@ -188,6 +188,7 @@ struct amdgpu_job; struct amdgpu_irq_src; struct amdgpu_fpriv; struct amdgpu_bo_va_mapping; +struct amdgpu_atif; enum amdgpu_cp_irq { AMDGPU_CP_IRQ_GFX_EOP = 0, @@ -222,10 +223,10 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_LAST }; -int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, +int amdgpu_device_ip_set_clockgating_state(void *dev, enum amd_ip_block_type block_type, enum amd_clockgating_state state); -int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, +int amdgpu_device_ip_set_powergating_state(void *dev, enum amd_ip_block_type block_type, enum amd_powergating_state state); void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, @@ -597,17 +598,6 @@ struct amdgpu_ib { extern const struct drm_sched_backend_ops amdgpu_sched_ops; -int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, - struct amdgpu_job **job, struct amdgpu_vm *vm); -int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, - struct amdgpu_job **job); - -void amdgpu_job_free_resources(struct amdgpu_job *job); -void amdgpu_job_free(struct amdgpu_job *job); -int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, - struct drm_sched_entity *entity, void *owner, - struct dma_fence **f); - /* * Queue manager */ @@ -681,6 +671,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id); void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); @@ -727,6 +719,14 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, struct list_head *validated); void amdgpu_bo_list_put(struct amdgpu_bo_list *list); void amdgpu_bo_list_free(struct amdgpu_bo_list *list); +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, + struct drm_amdgpu_bo_list_entry **info_param); + +int amdgpu_bo_list_create(struct amdgpu_device *adev, + struct drm_file *filp, + struct drm_amdgpu_bo_list_entry *info, + unsigned num_entries, + struct amdgpu_bo_list **list); /* * GFX stuff @@ -771,9 +771,18 @@ struct amdgpu_rlc { u32 starting_offsets_start; u32 reg_list_format_size_bytes; u32 reg_list_size_bytes; + u32 reg_list_format_direct_reg_list_length; + u32 save_restore_list_cntl_size_bytes; + u32 save_restore_list_gpm_size_bytes; + u32 save_restore_list_srm_size_bytes; u32 *register_list_format; u32 *register_restore; + u8 *save_restore_list_cntl; + u8 *save_restore_list_gpm; + u8 *save_restore_list_srm; + + bool is_rlc_v2_1; }; #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES @@ -867,6 +876,8 @@ struct amdgpu_gfx_config { /* gfx configure feature */ uint32_t double_offchip_lds_buf; + /* cached value of DB_DEBUG2 */ + uint32_t db_debug2; }; struct amdgpu_cu_info { @@ -915,6 +926,11 @@ struct amdgpu_ngg { bool init; }; +struct sq_work { + struct work_struct work; + unsigned ih_data; +}; + struct amdgpu_gfx { struct mutex gpu_clock_mutex; struct amdgpu_gfx_config config; @@ -938,6 +954,12 @@ struct amdgpu_gfx { uint32_t ce_feature_version; uint32_t pfp_feature_version; uint32_t rlc_feature_version; + uint32_t rlc_srlc_fw_version; + uint32_t rlc_srlc_feature_version; + uint32_t rlc_srlg_fw_version; + uint32_t rlc_srlg_feature_version; + uint32_t rlc_srls_fw_version; + uint32_t rlc_srls_feature_version; uint32_t mec_feature_version; uint32_t mec2_feature_version; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; @@ -947,6 +969,10 @@ struct amdgpu_gfx { struct amdgpu_irq_src eop_irq; struct amdgpu_irq_src priv_reg_irq; struct amdgpu_irq_src priv_inst_irq; + struct amdgpu_irq_src cp_ecc_error_irq; + struct amdgpu_irq_src sq_irq; + struct sq_work sq_work; + /* gfx status */ uint32_t gfx_current_status; /* ce ram size*/ @@ -998,6 +1024,7 @@ struct amdgpu_cs_parser { /* scheduler job object */ struct amdgpu_job *job; + struct amdgpu_ring *ring; /* buffer objects */ struct ww_acquire_ctx ticket; @@ -1019,40 +1046,6 @@ struct amdgpu_cs_parser { struct drm_syncobj **post_dep_syncobjs; }; -#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ -#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */ -#define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */ - -struct amdgpu_job { - struct drm_sched_job base; - struct amdgpu_device *adev; - struct amdgpu_vm *vm; - struct amdgpu_ring *ring; - struct amdgpu_sync sync; - struct amdgpu_sync sched_sync; - struct amdgpu_ib *ibs; - struct dma_fence *fence; /* the hw fence */ - uint32_t preamble_status; - uint32_t num_ibs; - void *owner; - uint64_t fence_ctx; /* the fence_context this job uses */ - bool vm_needs_flush; - uint64_t vm_pd_addr; - unsigned vmid; - unsigned pasid; - uint32_t gds_base, gds_size; - uint32_t gws_base, gws_size; - uint32_t oa_base, oa_size; - uint32_t vram_lost_counter; - - /* user fence handling */ - uint64_t uf_addr; - uint64_t uf_sequence; - -}; -#define to_amdgpu_job(sched_job) \ - container_of((sched_job), struct amdgpu_job, base) - static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) { @@ -1204,6 +1197,8 @@ struct amdgpu_asic_funcs { /* invalidate hdp read cache */ void (*invalidate_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); + /* check if the asic needs a full reset of if soft reset will work */ + bool (*need_full_reset)(struct amdgpu_device *adev); }; /* @@ -1246,43 +1241,6 @@ struct amdgpu_vram_scratch { /* * ACPI */ -struct amdgpu_atif_notification_cfg { - bool enabled; - int command_code; -}; - -struct amdgpu_atif_notifications { - bool display_switch; - bool expansion_mode_change; - bool thermal_state; - bool forced_power_state; - bool system_power_state; - bool display_conf_change; - bool px_gfx_switch; - bool brightness_change; - bool dgpu_display_event; -}; - -struct amdgpu_atif_functions { - bool system_params; - bool sbios_requests; - bool select_active_disp; - bool lid_state; - bool get_tv_standard; - bool set_tv_standard; - bool get_panel_expansion_mode; - bool set_panel_expansion_mode; - bool temperature_change; - bool graphics_device_types; -}; - -struct amdgpu_atif { - struct amdgpu_atif_notifications notifications; - struct amdgpu_atif_functions functions; - struct amdgpu_atif_notification_cfg notification_cfg; - struct amdgpu_encoder *encoder_for_bl; -}; - struct amdgpu_atcs_functions { bool get_ext_state; bool pcie_perf_req; @@ -1368,7 +1326,19 @@ struct amdgpu_nbio_funcs { void (*detect_hw_virt)(struct amdgpu_device *adev); }; - +struct amdgpu_df_funcs { + void (*init)(struct amdgpu_device *adev); + void (*enable_broadcast_mode)(struct amdgpu_device *adev, + bool enable); + u32 (*get_fb_channel_number)(struct amdgpu_device *adev); + u32 (*get_hbm_channel_number)(struct amdgpu_device *adev); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); + void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, + bool enable); +}; /* Define the HW IP blocks will be used in driver , add more if necessary */ enum amd_hw_ip_block_type { GC_HWIP = 1, @@ -1390,6 +1360,7 @@ enum amd_hw_ip_block_type { PWR_HWIP, NBIF_HWIP, THM_HWIP, + CLK_HWIP, MAX_HWIP }; @@ -1398,6 +1369,7 @@ enum amd_hw_ip_block_type { struct amd_powerplay { void *pp_handle; const struct amd_pm_funcs *pp_funcs; + uint32_t pp_feature; }; #define AMDGPU_RESET_MAGIC_NUM 64 @@ -1430,7 +1402,7 @@ struct amdgpu_device { #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_regs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; #endif - struct amdgpu_atif atif; + struct amdgpu_atif *atif; struct amdgpu_atcs atcs; struct mutex srbm_mutex; /* GRBM index mutex. Protects concurrent access to GRBM index */ @@ -1579,9 +1551,9 @@ struct amdgpu_device { DECLARE_HASHTABLE(mn_hash, 7); /* tracking pinned memory */ - u64 vram_pin_size; - u64 invisible_pin_size; - u64 gart_pin_size; + atomic64_t vram_pin_size; + atomic64_t visible_pin_size; + atomic64_t gart_pin_size; /* amdkfd interface */ struct kfd_dev *kfd; @@ -1590,6 +1562,7 @@ struct amdgpu_device { uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; const struct amdgpu_nbio_funcs *nbio_funcs; + const struct amdgpu_df_funcs *df_funcs; /* delayed work_func for deferring clockgating during resume */ struct delayed_work late_init_work; @@ -1764,6 +1737,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) #define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r)) #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) +#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) #define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid)) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) @@ -1790,6 +1764,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) +#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) @@ -1855,6 +1830,12 @@ static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false; static inline bool amdgpu_has_atpx(void) { return false; } #endif +#if defined(CONFIG_VGA_SWITCHEROO) && defined(CONFIG_ACPI) +void *amdgpu_atpx_get_dhandle(void); +#else +static inline void *amdgpu_atpx_get_dhandle(void) { return NULL; } +#endif + /* * KMS */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index a29362f9ef41..f4c474a95875 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -290,12 +290,11 @@ static int acp_hw_init(void *handle) else if (r) return r; - r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO, - 0x5289, 0, &acp_base); - if (r == -ENODEV) - return 0; - else if (r) - return r; + if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) + return -EINVAL; + + acp_base = adev->rmmio_base; + if (adev->asic_type != CHIP_STONEY) { adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); if (adev->acp.acp_genpd == NULL) @@ -311,20 +310,20 @@ static int acp_hw_init(void *handle) pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); } - adev->acp.acp_cell = kzalloc(sizeof(struct mfd_cell) * ACP_DEVS, + adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), GFP_KERNEL); if (adev->acp.acp_cell == NULL) return -ENOMEM; - adev->acp.acp_res = kzalloc(sizeof(struct resource) * 4, GFP_KERNEL); + adev->acp.acp_res = kcalloc(4, sizeof(struct resource), GFP_KERNEL); if (adev->acp.acp_res == NULL) { kfree(adev->acp.acp_cell); return -ENOMEM; } - i2s_pdata = kzalloc(sizeof(struct i2s_platform_data) * 2, GFP_KERNEL); + i2s_pdata = kcalloc(2, sizeof(struct i2s_platform_data), GFP_KERNEL); if (i2s_pdata == NULL) { kfree(adev->acp.acp_res); kfree(adev->acp.acp_cell); @@ -513,7 +512,7 @@ static int acp_hw_fini(void *handle) if (adev->acp.acp_genpd) { for (i = 0; i < ACP_DEVS ; i++) { dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev); + ret = pm_genpd_remove_device(dev); /* If removal fails, dont giveup and try rest */ if (ret) dev_err(dev, "remove dev from genpd failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 8fa850a070e0..0d8c3fc6eace 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -34,6 +34,45 @@ #include "amd_acpi.h" #include "atom.h" +struct amdgpu_atif_notification_cfg { + bool enabled; + int command_code; +}; + +struct amdgpu_atif_notifications { + bool display_switch; + bool expansion_mode_change; + bool thermal_state; + bool forced_power_state; + bool system_power_state; + bool display_conf_change; + bool px_gfx_switch; + bool brightness_change; + bool dgpu_display_event; +}; + +struct amdgpu_atif_functions { + bool system_params; + bool sbios_requests; + bool select_active_disp; + bool lid_state; + bool get_tv_standard; + bool set_tv_standard; + bool get_panel_expansion_mode; + bool set_panel_expansion_mode; + bool temperature_change; + bool graphics_device_types; +}; + +struct amdgpu_atif { + acpi_handle handle; + + struct amdgpu_atif_notifications notifications; + struct amdgpu_atif_functions functions; + struct amdgpu_atif_notification_cfg notification_cfg; + struct amdgpu_encoder *encoder_for_bl; +}; + /* Call the ATIF method */ /** @@ -46,8 +85,9 @@ * Executes the requested ATIF function (all asics). * Returns a pointer to the acpi output buffer. */ -static union acpi_object *amdgpu_atif_call(acpi_handle handle, int function, - struct acpi_buffer *params) +static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif, + int function, + struct acpi_buffer *params) { acpi_status status; union acpi_object atif_arg_elements[2]; @@ -70,7 +110,8 @@ static union acpi_object *amdgpu_atif_call(acpi_handle handle, int function, atif_arg_elements[1].integer.value = 0; } - status = acpi_evaluate_object(handle, "ATIF", &atif_arg, &buffer); + status = acpi_evaluate_object(atif->handle, NULL, &atif_arg, + &buffer); /* Fail only if calling the method fails and ATIF is supported */ if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { @@ -141,15 +182,14 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas * (all asics). * returns 0 on success, error on failure. */ -static int amdgpu_atif_verify_interface(acpi_handle handle, - struct amdgpu_atif *atif) +static int amdgpu_atif_verify_interface(struct amdgpu_atif *atif) { union acpi_object *info; struct atif_verify_interface output; size_t size; int err = 0; - info = amdgpu_atif_call(handle, ATIF_FUNCTION_VERIFY_INTERFACE, NULL); + info = amdgpu_atif_call(atif, ATIF_FUNCTION_VERIFY_INTERFACE, NULL); if (!info) return -EIO; @@ -176,6 +216,35 @@ out: return err; } +static acpi_handle amdgpu_atif_probe_handle(acpi_handle dhandle) +{ + acpi_handle handle = NULL; + char acpi_method_name[255] = { 0 }; + struct acpi_buffer buffer = { sizeof(acpi_method_name), acpi_method_name }; + acpi_status status; + + /* For PX/HG systems, ATIF and ATPX are in the iGPU's namespace, on dGPU only + * systems, ATIF is in the dGPU's namespace. + */ + status = acpi_get_handle(dhandle, "ATIF", &handle); + if (ACPI_SUCCESS(status)) + goto out; + + if (amdgpu_has_atpx()) { + status = acpi_get_handle(amdgpu_atpx_get_dhandle(), "ATIF", + &handle); + if (ACPI_SUCCESS(status)) + goto out; + } + + DRM_DEBUG_DRIVER("No ATIF handle found\n"); + return NULL; +out: + acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer); + DRM_DEBUG_DRIVER("Found ATIF handle %s\n", acpi_method_name); + return handle; +} + /** * amdgpu_atif_get_notification_params - determine notify configuration * @@ -188,15 +257,16 @@ out: * where n is specified in the result if a notifier is used. * Returns 0 on success, error on failure. */ -static int amdgpu_atif_get_notification_params(acpi_handle handle, - struct amdgpu_atif_notification_cfg *n) +static int amdgpu_atif_get_notification_params(struct amdgpu_atif *atif) { union acpi_object *info; + struct amdgpu_atif_notification_cfg *n = &atif->notification_cfg; struct atif_system_params params; size_t size; int err = 0; - info = amdgpu_atif_call(handle, ATIF_FUNCTION_GET_SYSTEM_PARAMETERS, NULL); + info = amdgpu_atif_call(atif, ATIF_FUNCTION_GET_SYSTEM_PARAMETERS, + NULL); if (!info) { err = -EIO; goto out; @@ -250,14 +320,15 @@ out: * (all asics). * Returns 0 on success, error on failure. */ -static int amdgpu_atif_get_sbios_requests(acpi_handle handle, - struct atif_sbios_requests *req) +static int amdgpu_atif_get_sbios_requests(struct amdgpu_atif *atif, + struct atif_sbios_requests *req) { union acpi_object *info; size_t size; int count = 0; - info = amdgpu_atif_call(handle, ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS, NULL); + info = amdgpu_atif_call(atif, ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS, + NULL); if (!info) return -EIO; @@ -290,11 +361,10 @@ out: * Returns NOTIFY code */ static int amdgpu_atif_handler(struct amdgpu_device *adev, - struct acpi_bus_event *event) + struct acpi_bus_event *event) { - struct amdgpu_atif *atif = &adev->atif; + struct amdgpu_atif *atif = adev->atif; struct atif_sbios_requests req; - acpi_handle handle; int count; DRM_DEBUG_DRIVER("event, device_class = %s, type = %#x\n", @@ -303,14 +373,14 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, if (strcmp(event->device_class, ACPI_VIDEO_CLASS) != 0) return NOTIFY_DONE; - if (!atif->notification_cfg.enabled || + if (!atif || + !atif->notification_cfg.enabled || event->type != atif->notification_cfg.command_code) /* Not our event */ return NOTIFY_DONE; /* Check pending SBIOS requests */ - handle = ACPI_HANDLE(&adev->pdev->dev); - count = amdgpu_atif_get_sbios_requests(handle, &req); + count = amdgpu_atif_get_sbios_requests(atif, &req); if (count <= 0) return NOTIFY_DONE; @@ -641,8 +711,8 @@ static int amdgpu_acpi_event(struct notifier_block *nb, */ int amdgpu_acpi_init(struct amdgpu_device *adev) { - acpi_handle handle; - struct amdgpu_atif *atif = &adev->atif; + acpi_handle handle, atif_handle; + struct amdgpu_atif *atif; struct amdgpu_atcs *atcs = &adev->atcs; int ret; @@ -658,12 +728,26 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) DRM_DEBUG_DRIVER("Call to ATCS verify_interface failed: %d\n", ret); } + /* Probe for ATIF, and initialize it if found */ + atif_handle = amdgpu_atif_probe_handle(handle); + if (!atif_handle) + goto out; + + atif = kzalloc(sizeof(*atif), GFP_KERNEL); + if (!atif) { + DRM_WARN("Not enough memory to initialize ATIF\n"); + goto out; + } + atif->handle = atif_handle; + /* Call the ATIF method */ - ret = amdgpu_atif_verify_interface(handle, atif); + ret = amdgpu_atif_verify_interface(atif); if (ret) { DRM_DEBUG_DRIVER("Call to ATIF verify_interface failed: %d\n", ret); + kfree(atif); goto out; } + adev->atif = atif; if (atif->notifications.brightness_change) { struct drm_encoder *tmp; @@ -693,8 +777,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) } if (atif->functions.system_params) { - ret = amdgpu_atif_get_notification_params(handle, - &atif->notification_cfg); + ret = amdgpu_atif_get_notification_params(atif); if (ret) { DRM_DEBUG_DRIVER("Call to GET_SYSTEM_PARAMS failed: %d\n", ret); @@ -720,4 +803,6 @@ out: void amdgpu_acpi_fini(struct amdgpu_device *adev) { unregister_acpi_notifier(&adev->acpi_nb); + if (adev->atif) + kfree(adev->atif); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 4d36203ffb11..e3ed08dca7b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -50,15 +50,21 @@ int amdgpu_amdkfd_init(void) kgd2kfd = NULL; } + #elif defined(CONFIG_HSA_AMD) + ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); if (ret) kgd2kfd = NULL; #else + kgd2kfd = NULL; ret = -ENOENT; #endif + +#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD) amdgpu_amdkfd_gpuvm_init_mem_limits(); +#endif return ret; } @@ -92,8 +98,12 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) case CHIP_POLARIS11: kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; + case CHIP_VEGA10: + case CHIP_RAVEN: + kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); + break; default: - dev_dbg(adev->dev, "kfd not supported on this ASIC\n"); + dev_info(adev->dev, "kfd not supported on this ASIC\n"); return; } @@ -175,6 +185,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) &gpu_resources.doorbell_physical_address, &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); + if (adev->asic_type >= CHIP_VEGA10) { + /* On SOC15 the BIF is involved in routing + * doorbells using the low 12 bits of the + * address. Communicate the assignments to + * KFD. KFD uses two doorbell pages per + * process in case of 64-bit doorbells so we + * can use each doorbell assignment twice. + */ + gpu_resources.sdma_doorbell[0][0] = + AMDGPU_DOORBELL64_sDMA_ENGINE0; + gpu_resources.sdma_doorbell[0][1] = + AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200; + gpu_resources.sdma_doorbell[1][0] = + AMDGPU_DOORBELL64_sDMA_ENGINE1; + gpu_resources.sdma_doorbell[1][1] = + AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200; + /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for + * SDMA, IH and VCN. So don't use them for the CP. + */ + gpu_resources.reserved_doorbell_mask = 0x1f0; + gpu_resources.reserved_doorbell_val = 0x0f0; + } kgd2kfd->device_init(adev->kfd, &gpu_resources); } @@ -217,13 +249,18 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_bo *bo = NULL; + struct amdgpu_bo_param bp; int r; - uint64_t gpu_addr_tmp = 0; void *cpu_ptr_tmp = NULL; - r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel, - NULL, &bo); + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + r = amdgpu_bo_create(adev, &bp, &bo); if (r) { dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); @@ -237,13 +274,18 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, goto allocate_mem_reserve_bo_failed; } - r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, - &gpu_addr_tmp); + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); if (r) { dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); goto allocate_mem_pin_bo_failed; } + r = amdgpu_ttm_alloc_gart(&bo->tbo); + if (r) { + dev_err(adev->dev, "%p bind failed\n", bo); + goto allocate_mem_kmap_bo_failed; + } + r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp); if (r) { dev_err(adev->dev, @@ -252,7 +294,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, } *mem_obj = bo; - *gpu_addr = gpu_addr_tmp; + *gpu_addr = amdgpu_bo_gpu_offset(bo); *cpu_ptr = cpu_ptr_tmp; amdgpu_bo_unreserve(bo); @@ -304,15 +346,12 @@ void get_local_mem_info(struct kgd_dev *kgd, mem_info->local_mem_size_public, mem_info->local_mem_size_private); - if (amdgpu_emu_mode == 1) { - mem_info->mem_clk_max = 100; - return; - } - if (amdgpu_sriov_vf(adev)) mem_info->mem_clk_max = adev->clock.default_mclk / 100; - else + else if (adev->powerplay.pp_funcs) mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; + else + mem_info->mem_clk_max = 100; } uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) @@ -329,13 +368,12 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) struct amdgpu_device *adev = (struct amdgpu_device *)kgd; /* the sclk is in quantas of 10kHz */ - if (amdgpu_emu_mode == 1) - return 100; - if (amdgpu_sriov_vf(adev)) return adev->clock.default_sclk / 100; - - return amdgpu_dpm_get_sclk(adev, false) / 100; + else if (adev->powerplay.pp_funcs) + return amdgpu_dpm_get_sclk(adev, false) / 100; + else + return 100; } void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) @@ -432,3 +470,44 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } + +#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD) +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) +{ + return false; +} + +void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +{ +} + +void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ +} + +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) +{ + return NULL; +} + +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) +{ + return 0; +} + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) +{ + return NULL; +} + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) +{ + return NULL; +} + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) +{ + return NULL; +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index c2c2bea731e0..a8418a3f4e9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -28,6 +28,7 @@ #include <linux/types.h> #include <linux/mm.h> #include <linux/mmu_context.h> +#include <linux/workqueue.h> #include <kgd_kfd_interface.h> #include <drm/ttm/ttm_execbuf_util.h> #include "amdgpu_sync.h" @@ -59,7 +60,9 @@ struct kgd_mem { uint32_t mapping_flags; + atomic_t invalid; struct amdkfd_process_info *process_info; + struct page **user_pages; struct amdgpu_sync sync; @@ -84,6 +87,9 @@ struct amdkfd_process_info { struct list_head vm_list_head; /* List head for all KFD BOs that belong to a KFD process. */ struct list_head kfd_bo_list; + /* List of userptr BOs that are valid or invalid */ + struct list_head userptr_valid_list; + struct list_head userptr_inval_list; /* Lock to protect kfd_bo_list */ struct mutex lock; @@ -91,6 +97,11 @@ struct amdkfd_process_info { unsigned int n_vms; /* Eviction Fence */ struct amdgpu_amdkfd_fence *eviction_fence; + + /* MMU-notifier related fields */ + atomic_t evicted_bos; + struct delayed_work restore_userptr_work; + struct pid *pid; }; int amdgpu_amdkfd_init(void); @@ -104,12 +115,14 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); @@ -143,14 +156,14 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); /* GPUVM API */ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, - void **process_info, - struct dma_fence **ef); + void **process_info, + struct dma_fence **ef); int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, - struct file *filp, - void **vm, void **process_info, - struct dma_fence **ef); + struct file *filp, + void **vm, void **process_info, + struct dma_fence **ef); void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, - struct amdgpu_vm *vm); + struct amdgpu_vm *vm); void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 2c14025e5e76..574c1181ae9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -173,7 +173,5 @@ static const struct dma_fence_ops amdkfd_fence_ops = { .get_driver_name = amdkfd_fence_get_driver_name, .get_timeline_name = amdkfd_fence_get_timeline_name, .enable_signaling = amdkfd_fence_enable_signaling, - .signaled = NULL, - .wait = dma_fence_default_wait, .release = amdkfd_fence_release, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ea54e53172b9..ea79908dac4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -98,8 +98,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, @@ -183,7 +181,6 @@ static const struct kfd2kgd_calls kfd2kgd = { .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, @@ -309,13 +306,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, return 0; } -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr) -{ - /* amdgpu owns the per-pipe state */ - return 0; -} - static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -417,7 +407,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -514,7 +504,7 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, #undef HQD_N_REGS #define HQD_N_REGS (19+4) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 89264c9a5e9f..19dd665e7307 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -57,8 +57,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_bases); static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, @@ -141,7 +139,6 @@ static const struct kfd2kgd_calls kfd2kgd = { .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, @@ -270,13 +267,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, return 0; } -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr) -{ - /* amdgpu owns the per-pipe state */ - return 0; -} - static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -405,7 +395,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -501,7 +491,7 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, #undef HQD_N_REGS #define HQD_N_REGS (19+4+2+3+7) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c new file mode 100644 index 000000000000..1db60aa5b7f0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -0,0 +1,1043 @@ +/* + * Copyright 2014-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include <linux/module.h> +#include <linux/fdtable.h> +#include <linux/uaccess.h> +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "amdgpu_ucode.h" +#include "soc15_hw_ip.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "vega10_enum.h" +#include "sdma0/sdma0_4_0_offset.h" +#include "sdma0/sdma0_4_0_sh_mask.h" +#include "sdma1/sdma1_4_0_offset.h" +#include "sdma1/sdma1_4_0_sh_mask.h" +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "oss/osssys_4_0_offset.h" +#include "oss/osssys_4_0_sh_mask.h" +#include "soc15_common.h" +#include "v9_structs.h" +#include "soc15.h" +#include "soc15d.h" + +/* HACK: MMHUB and GC both have VM-related register with the same + * names but different offsets. Define the MMHUB register we need here + * with a prefix. A proper solution would be to move the functions + * programming these registers into gfx_v9_0.c and mmhub_v1_0.c + * respectively. + */ +#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3 +#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0 + +#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705 +#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0 + +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0 +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0 + +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0 +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0 + +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0 +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0 + +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727 +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0 +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728 +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0 + +#define V9_PIPE_PER_MEC (4) +#define V9_QUEUES_PER_PIPE_MEC (8) + +enum hqd_dequeue_request_type { + NO_ACTION = 0, + DRAIN_PIPE, + RESET_WAVES +}; + +/* + * Register access functions + */ + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases); +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id); +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout); +static int kgd_address_watch_disable(struct kgd_dev *kgd); +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); + +/* Because of REG_GET_FIELD() being used, we put this function in the + * asic specific file. + */ +static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, + struct tile_config *config) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + config->gb_addr_config = adev->gfx.config.gb_addr_config; + + config->tile_config_ptr = adev->gfx.config.tile_mode_array; + config->num_tile_configs = + ARRAY_SIZE(adev->gfx.config.tile_mode_array); + config->macro_tile_config_ptr = + adev->gfx.config.macrotile_mode_array; + config->num_macro_tile_configs = + ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); + + return 0; +} + +static const struct kfd2kgd_calls kfd2kgd = { + .init_gtt_mem_allocation = alloc_gtt_mem, + .free_gtt_mem = free_gtt_mem, + .get_local_mem_info = get_local_mem_info, + .get_gpu_clock_counter = get_gpu_clock_counter, + .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, + .alloc_pasid = amdgpu_pasid_alloc, + .free_pasid = amdgpu_pasid_free, + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = + get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + get_atc_vmid_pasid_mapping_valid, + .get_fw_version = get_fw_version, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = amdgpu_amdkfd_get_tile_config, + .get_cu_info = get_cu_info, + .get_vram_usage = amdgpu_amdkfd_get_vram_usage, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, + .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .submit_ib = amdgpu_amdkfd_submit_ib, +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, mec, pipe, queue, vmid); +} + +static void unlock_srbm(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, queue_id, 0); +} + +static uint32_t get_queue_mask(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id) +{ + unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id) & 31; + + return ((uint32_t)1) << bit; +} + +static void release_queue(struct kgd_dev *kgd) +{ + unlock_srbm(kgd); +} + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + /* APE1 no longer exists on GFX9 */ + + unlock_srbm(kgd); +} + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + /* + * We have to assume that there is no outstanding mapping. + * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because + * a mapping is in progress or because a mapping finished + * and the SW cleared it. + * So the protocol is to always wait & clear. + */ + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + ATC_VMID0_PASID_MAPPING__VALID_MASK; + + /* + * need to do this twice, once for gfx and once for mmhub + * for ATC add 16 to VMID for mmhub, for IH different registers. + * ATC_VMID0..15 registers are separate from ATC_VMID16..31. + */ + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, + pasid_mapping); + + while (!(RREG32(SOC15_REG_OFFSET( + ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & + (1U << vmid))) + cpu_relax(); + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), + 1U << vmid); + + /* Mapping vmid to pasid also for IH block */ + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, + pasid_mapping); + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid, + pasid_mapping); + + while (!(RREG32(SOC15_REG_OFFSET( + ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & + (1U << (vmid + 16)))) + cpu_relax(); + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), + 1U << (vmid + 16)); + + /* Mapping vmid to pasid also for IH block */ + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid, + pasid_mapping); + return 0; +} + +/* TODO - RING0 form of field is obsolete, seems to date back to SI + * but still works + */ + +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t mec; + uint32_t pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, 0, 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), + CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | + CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); + + unlock_srbm(kgd); + + return 0; +} + +static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, + unsigned int engine_id, + unsigned int queue_id) +{ + uint32_t base[2] = { + SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL + }; + uint32_t retval; + + retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - + mmSDMA0_RLC0_RB_CNTL); + + pr_debug("sdma base address: 0x%x\n", retval); + + return retval; +} + +static inline struct v9_mqd *get_mqd(void *mqd) +{ + return (struct v9_mqd *)mqd; +} + +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v9_sdma_mqd *)mqd; +} + +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, hqd_base, data; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + /* HIQ is set during driver init period with vmid set to 0*/ + if (m->cp_hqd_vmid == 0) { + uint32_t value, mec, pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, + ((mec << 5) | (pipe << 3) | queue_id | 0x80)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); + } + + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ + mqd_hqd = &m->cp_mqd_base_addr_lo; + hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + + for (reg = hqd_base; + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + WREG32(reg, mqd_hqd[reg - hqd_base]); + + + /* Activate doorbell logic before triggering WPTR poll. */ + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + + if (wptr) { + /* Don't read wptr with get_user because the user + * context may not be accessible (if this function + * runs in a work queue). Instead trigger a one-shot + * polling read from memory in the CP. This assumes + * that wptr is GPU-accessible in the queue's VMID via + * ATC or SVM. WPTR==RPTR before starting the poll so + * the CP starts fetching new commands from the right + * place. + * + * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit + * tricky. Assume that the queue didn't overflow. The + * number of valid bits in the 32-bit RPTR depends on + * the queue size. The remaining bits are taken from + * the saved 64-bit WPTR. If the WPTR wrapped, add the + * queue size. + */ + uint32_t queue_size = + 2 << REG_GET_FIELD(m->cp_hqd_pq_control, + CP_HQD_PQ_CONTROL, QUEUE_SIZE); + uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); + + if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) + guessed_wptr += queue_size; + guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); + guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + lower_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + upper_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + lower_32_bits((uintptr_t)wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + upper_32_bits((uintptr_t)wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), + get_queue_mask(adev, pipe_id, queue_id)); + } + + /* Start the EOP fetcher */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), + REG_SET_FIELD(m->cp_hqd_eop_rptr, + CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + + release_queue(kgd); + + return 0; +} + +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t i = 0, reg; +#define HQD_N_REGS 56 +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(kgd, pipe_id, queue_id); + + for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + DUMP_REG(reg); + + release_queue(kgd); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr, sdmax_gfx_context_cntl; + unsigned long end_jiffies; + uint32_t data; + uint64_t data64; + uint64_t __user *wptr64 = (uint64_t __user *)wptr; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + sdmax_gfx_context_cntl = m->sdma_engine_id ? + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + data = RREG32(sdmax_gfx_context_cntl); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(sdmax_gfx_context_cntl, data); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, + m->sdmax_rlcx_doorbell_offset); + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + if (read_user_wptr(mm, wptr64, data64)) { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + lower_32_bits(data64)); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + upper_32_bits(data64)); + } else { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + } + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+6+7+10) + + *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; + reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; + reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) + DUMP_REG(sdma_base_addr + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t act; + bool retval = false; + uint32_t low, high; + + acquire_queue(kgd, pipe_id, queue_id); + act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (act) { + low = lower_32_bits(queue_address >> 8); + high = upper_32_bits(queue_address >> 8); + + if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && + high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) + retval = true; + } + release_queue(kgd); + return retval; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + enum hqd_dequeue_request_type type; + unsigned long end_jiffies; + uint32_t temp; + struct v9_mqd *m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + if (m->cp_hqd_vmid == 0) + WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); + + switch (reset_type) { + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: + type = DRAIN_PIPE; + break; + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: + type = RESET_WAVES; + break; + default: + type = DRAIN_PIPE; + break; + } + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; + while (true) { + temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("cp queue preemption time out.\n"); + release_queue(kgd); + return -ETIME; + } + usleep_range(500, 1000); + } + + release_queue(kgd); + return 0; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t temp; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr_hi = + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); + + return 0; +} + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} + +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; +} + +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + uint32_t req = (1 << vmid) | + (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */ + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK; + + mutex_lock(&adev->srbm_mutex); + + /* Use legacy mode tlb invalidation. + * + * Currently on Raven the code below is broken for anything but + * legacy mode due to a MMHUB power gating problem. A workaround + * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ + * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack + * bit. + * + * TODO 1: agree on the right set of invalidation registers for + * KFD use. Use the last one for now. Invalidate both GC and + * MMHUB. + * + * TODO 2: support range-based invalidation, requires kfg2kgd + * interface change + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), + 0xffffffff); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), + 0x0000001f); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), + 0xffffffff); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), + 0x0000001f); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), + req); + + while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) & + (1 << vmid))) + cpu_relax(); + + while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & + (1 << vmid))) + cpu_relax(); + + mutex_unlock(&adev->srbm_mutex); + +} + +static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) +{ + signed long r; + uint32_t seq; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */ + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + + if (ring->ready) + return invalidate_tlbs_with_kiq(adev, pasid); + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { + if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) + == pasid) { + write_vmid_invalidate_request(kgd, vmid); + break; + } + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid %d\n", vmid); + return 0; + } + + write_vmid_invalidate_request(kgd, vmid); + return 0; +} + +static int kgd_address_watch_disable(struct kgd_dev *kgd) +{ + return 0; +} + +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo) +{ + return 0; +} + +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); + + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SH_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SE_BROADCAST_WRITES, 1); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) +{ + return 0; +} + +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid) +{ + /* No longer needed on GFXv9. The scratch base address is + * passed to the shader by the CP. It's the user mode driver's + * responsibility. + */ +} + +/* FIXME: Does this need to be ASIC-specific code? */ +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + const union amdgpu_firmware_header *hdr; + + switch (type) { + case KGD_ENGINE_PFP: + hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data; + break; + + case KGD_ENGINE_ME: + hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data; + break; + + case KGD_ENGINE_CE: + hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data; + break; + + case KGD_ENGINE_MEC1: + hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data; + break; + + case KGD_ENGINE_MEC2: + hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data; + break; + + case KGD_ENGINE_RLC: + hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data; + break; + + case KGD_ENGINE_SDMA1: + hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data; + break; + + case KGD_ENGINE_SDMA2: + hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data; + break; + + default: + return 0; + } + + if (hdr == NULL) + return 0; + + /* Only 12 bit in use*/ + return hdr->common.ucode_version; +} + +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT | + AMDGPU_PTE_VALID; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID %u\n", + vmid); + return; + } + + /* TODO: take advantage of per-process address space size. For + * now, all processes share the same address space size, like + * on GFX8 and older. + */ + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), + upper_32_bits(adev->vm_manager.max_pfn - 1)); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), + upper_32_bits(adev->vm_manager.max_pfn - 1)); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1d6e1479da38..079af8ac2636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -23,6 +23,8 @@ #define pr_fmt(fmt) "kfd2kgd: " fmt #include <linux/list.h> +#include <linux/pagemap.h> +#include <linux/sched/mm.h> #include <drm/drmP.h> #include "amdgpu_object.h" #include "amdgpu_vm.h" @@ -33,10 +35,20 @@ */ #define VI_BO_SIZE_ALIGN (0x8000) +/* BO flag to indicate a KFD userptr BO */ +#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) + +/* Userptr restore delay, just long enough to allow consecutive VM + * changes to accumulate + */ +#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 + /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; + uint64_t max_userptr_mem_limit; int64_t system_mem_used; + int64_t userptr_mem_used; spinlock_t mem_limit_lock; } kfd_mem_limit; @@ -57,6 +69,7 @@ static const char * const domain_bit_to_string[] = { #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] +static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) @@ -78,6 +91,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, /* Set memory usage limits. Current, limits are * System (kernel) memory - 3/8th System RAM + * Userptr memory - 3/4th System RAM */ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) { @@ -90,8 +104,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) spin_lock_init(&kfd_mem_limit.mem_limit_lock); kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); - pr_debug("Kernel memory limit %lluM\n", - (kfd_mem_limit.max_system_mem_limit >> 20)); + kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2); + pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n", + (kfd_mem_limit.max_system_mem_limit >> 20), + (kfd_mem_limit.max_userptr_mem_limit >> 20)); } static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, @@ -111,6 +127,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, goto err_no_mem; } kfd_mem_limit.system_mem_used += (acc_size + size); + } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { + if ((kfd_mem_limit.system_mem_used + acc_size > + kfd_mem_limit.max_system_mem_limit) || + (kfd_mem_limit.userptr_mem_used + (size + acc_size) > + kfd_mem_limit.max_userptr_mem_limit)) { + ret = -ENOMEM; + goto err_no_mem; + } + kfd_mem_limit.system_mem_used += acc_size; + kfd_mem_limit.userptr_mem_used += size; } err_no_mem: spin_unlock(&kfd_mem_limit.mem_limit_lock); @@ -126,10 +152,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, sizeof(struct amdgpu_bo)); spin_lock(&kfd_mem_limit.mem_limit_lock); - if (domain == AMDGPU_GEM_DOMAIN_GTT) + if (domain == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (acc_size + size); + } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { + kfd_mem_limit.system_mem_used -= acc_size; + kfd_mem_limit.userptr_mem_used -= size; + } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, + "kfd userptr memory accounting unbalanced"); spin_unlock(&kfd_mem_limit.mem_limit_lock); } @@ -138,12 +170,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) { spin_lock(&kfd_mem_limit.mem_limit_lock); - if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { + if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { + kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; + kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); + } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (bo->tbo.acc_size + amdgpu_bo_size(bo)); } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, + "kfd userptr memory accounting unbalanced"); spin_unlock(&kfd_mem_limit.mem_limit_lock); } @@ -506,7 +543,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev, } static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, - struct amdkfd_process_info *process_info) + struct amdkfd_process_info *process_info, + bool userptr) { struct ttm_validate_buffer *entry = &mem->validate_list; struct amdgpu_bo *bo = mem->bo; @@ -515,10 +553,95 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, entry->shared = true; entry->bo = &bo->tbo; mutex_lock(&process_info->lock); - list_add_tail(&entry->head, &process_info->kfd_bo_list); + if (userptr) + list_add_tail(&entry->head, &process_info->userptr_valid_list); + else + list_add_tail(&entry->head, &process_info->kfd_bo_list); mutex_unlock(&process_info->lock); } +/* Initializes user pages. It registers the MMU notifier and validates + * the userptr BO in the GTT domain. + * + * The BO must already be on the userptr_valid_list. Otherwise an + * eviction and restore may happen that leaves the new BO unmapped + * with the user mode queues running. + * + * Takes the process_info->lock to protect against concurrent restore + * workers. + * + * Returns 0 for success, negative errno for errors. + */ +static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, + uint64_t user_addr) +{ + struct amdkfd_process_info *process_info = mem->process_info; + struct amdgpu_bo *bo = mem->bo; + struct ttm_operation_ctx ctx = { true, false }; + int ret = 0; + + mutex_lock(&process_info->lock); + + ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0); + if (ret) { + pr_err("%s: Failed to set userptr: %d\n", __func__, ret); + goto out; + } + + ret = amdgpu_mn_register(bo, user_addr); + if (ret) { + pr_err("%s: Failed to register MMU notifier: %d\n", + __func__, ret); + goto out; + } + + /* If no restore worker is running concurrently, user_pages + * should not be allocated + */ + WARN(mem->user_pages, "Leaking user_pages array"); + + mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!mem->user_pages) { + pr_err("%s: Failed to allocate pages array\n", __func__); + ret = -ENOMEM; + goto unregister_out; + } + + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + if (ret) { + pr_err("%s: Failed to get user pages: %d\n", __func__, ret); + goto free_out; + } + + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + pr_err("%s: Failed to reserve BO\n", __func__); + goto release_out; + } + amdgpu_ttm_placement_from_domain(bo, mem->domain); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + pr_err("%s: failed to validate BO\n", __func__); + amdgpu_bo_unreserve(bo); + +release_out: + if (ret) + release_pages(mem->user_pages, bo->tbo.ttm->num_pages); +free_out: + kvfree(mem->user_pages); + mem->user_pages = NULL; +unregister_out: + if (ret) + amdgpu_mn_unregister(bo); +out: + mutex_unlock(&process_info->lock); + return ret; +} + /* Reserving a BO and its page table BOs must happen atomically to * avoid deadlocks. Some operations update multiple VMs at once. Track * all the reservation info in a context structure. Optionally a sync @@ -748,7 +871,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, } static int map_bo_to_gpuvm(struct amdgpu_device *adev, - struct kfd_bo_va_list *entry, struct amdgpu_sync *sync) + struct kfd_bo_va_list *entry, struct amdgpu_sync *sync, + bool no_update_pte) { int ret; @@ -762,6 +886,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, return ret; } + if (no_update_pte) + return 0; + ret = update_gpuvm_pte(adev, entry, sync); if (ret) { pr_err("update_gpuvm_pte() failed\n"); @@ -820,6 +947,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, mutex_init(&info->lock); INIT_LIST_HEAD(&info->vm_list_head); INIT_LIST_HEAD(&info->kfd_bo_list); + INIT_LIST_HEAD(&info->userptr_valid_list); + INIT_LIST_HEAD(&info->userptr_inval_list); info->eviction_fence = amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), @@ -830,6 +959,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, goto create_evict_fence_fail; } + info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); + atomic_set(&info->evicted_bos, 0); + INIT_DELAYED_WORK(&info->restore_userptr_work, + amdgpu_amdkfd_restore_userptr_worker); + *process_info = info; *ef = dma_fence_get(&info->eviction_fence->base); } @@ -872,6 +1006,7 @@ reserve_pd_fail: dma_fence_put(*ef); *ef = NULL; *process_info = NULL; + put_pid(info->pid); create_evict_fence_fail: mutex_destroy(&info->lock); kfree(info); @@ -967,8 +1102,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, /* Release per-process resources when last compute VM is destroyed */ if (!process_info->n_vms) { WARN_ON(!list_empty(&process_info->kfd_bo_list)); + WARN_ON(!list_empty(&process_info->userptr_valid_list)); + WARN_ON(!list_empty(&process_info->userptr_inval_list)); dma_fence_put(&process_info->eviction_fence->base); + cancel_delayed_work_sync(&process_info->restore_userptr_work); + put_pid(process_info->pid); mutex_destroy(&process_info->lock); kfree(process_info); } @@ -1003,9 +1142,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + uint64_t user_addr = 0; struct amdgpu_bo *bo; + struct amdgpu_bo_param bp; int byte_align; - u32 alloc_domain; + u32 domain, alloc_domain; u64 alloc_flags; uint32_t mapping_flags; int ret; @@ -1014,14 +1155,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( * Check on which domain to allocate BO */ if (flags & ALLOC_MEM_FLAGS_VRAM) { - alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; + domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : AMDGPU_GEM_CREATE_NO_CPU_ACCESS; } else if (flags & ALLOC_MEM_FLAGS_GTT) { - alloc_domain = AMDGPU_GEM_DOMAIN_GTT; + domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_flags = 0; + } else if (flags & ALLOC_MEM_FLAGS_USERPTR) { + domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_domain = AMDGPU_GEM_DOMAIN_CPU; alloc_flags = 0; + if (!offset || !*offset) + return -EINVAL; + user_addr = *offset; } else { return -EINVAL; } @@ -1069,8 +1217,14 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", va, size, domain_string(alloc_domain)); - ret = amdgpu_bo_create(adev, size, byte_align, - alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo); + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = byte_align; + bp.domain = alloc_domain; + bp.flags = alloc_flags; + bp.type = ttm_bo_type_device; + bp.resv = NULL; + ret = amdgpu_bo_create(adev, &bp, &bo); if (ret) { pr_debug("Failed to create BO on domain %s. ret %d\n", domain_string(alloc_domain), ret); @@ -1078,18 +1232,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } bo->kfd_bo = *mem; (*mem)->bo = bo; + if (user_addr) + bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; (*mem)->va = va; - (*mem)->domain = alloc_domain; + (*mem)->domain = domain; (*mem)->mapped_to_gpu_memory = 0; (*mem)->process_info = avm->process_info; - add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info); + add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); + + if (user_addr) { + ret = init_user_pages(*mem, current->mm, user_addr); + if (ret) { + mutex_lock(&avm->process_info->lock); + list_del(&(*mem)->validate_list.head); + mutex_unlock(&avm->process_info->lock); + goto allocate_init_user_pages_failed; + } + } if (offset) *offset = amdgpu_bo_mmap_offset(bo); return 0; +allocate_init_user_pages_failed: + amdgpu_bo_unref(&bo); + /* Don't unreserve system mem limit twice */ + goto err_reserve_system_mem; err_bo_create: unreserve_system_mem_limit(adev, size, alloc_domain); err_reserve_system_mem: @@ -1122,12 +1292,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( * be freed anyway */ + /* No more MMU notifiers */ + amdgpu_mn_unregister(mem->bo); + /* Make sure restore workers don't access the BO any more */ bo_list_entry = &mem->validate_list; mutex_lock(&process_info->lock); list_del(&bo_list_entry->head); mutex_unlock(&process_info->lock); + /* Free user pages if necessary */ + if (mem->user_pages) { + pr_debug("%s: Freeing user_pages array\n", __func__); + if (mem->user_pages[0]) + release_pages(mem->user_pages, + mem->bo->tbo.ttm->num_pages); + kvfree(mem->user_pages); + } + ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) return ret; @@ -1173,21 +1355,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct kfd_bo_va_list *bo_va_entry = NULL; struct kfd_bo_va_list *bo_va_entry_aql = NULL; unsigned long bo_size; - - /* Make sure restore is not running concurrently. - */ - mutex_lock(&mem->process_info->lock); - - mutex_lock(&mem->lock); + bool is_invalid_userptr = false; bo = mem->bo; - if (!bo) { pr_err("Invalid BO when mapping memory to GPU\n"); - ret = -EINVAL; - goto out; + return -EINVAL; } + /* Make sure restore is not running concurrently. Since we + * don't map invalid userptr BOs, we rely on the next restore + * worker to do the mapping + */ + mutex_lock(&mem->process_info->lock); + + /* Lock mmap-sem. If we find an invalid userptr BO, we can be + * sure that the MMU notifier is no longer running + * concurrently and the queues are actually stopped + */ + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { + down_write(¤t->mm->mmap_sem); + is_invalid_userptr = atomic_read(&mem->invalid); + up_write(¤t->mm->mmap_sem); + } + + mutex_lock(&mem->lock); + domain = mem->domain; bo_size = bo->tbo.mem.size; @@ -1200,6 +1393,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( if (unlikely(ret)) goto out; + /* Userptr can be marked as "not invalid", but not actually be + * validated yet (still in the system domain). In that case + * the queues are still stopped and we can leave mapping for + * the next restore worker + */ + if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) + is_invalid_userptr = true; + if (check_if_add_bo_to_vm(avm, mem)) { ret = add_bo_to_vm(adev, mem, avm, false, &bo_va_entry); @@ -1217,7 +1418,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( goto add_bo_to_vm_failed; } - if (mem->mapped_to_gpu_memory == 0) { + if (mem->mapped_to_gpu_memory == 0 && + !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { /* Validate BO only once. The eviction fence gets added to BO * the first time it is mapped. Validate will wait for all * background evictions to complete. @@ -1235,7 +1437,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( entry->va, entry->va + bo_size, entry); - ret = map_bo_to_gpuvm(adev, entry, ctx.sync); + ret = map_bo_to_gpuvm(adev, entry, ctx.sync, + is_invalid_userptr); if (ret) { pr_err("Failed to map radeon bo to gpuvm\n"); goto map_bo_to_gpuvm_failed; @@ -1384,7 +1587,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, goto bo_reserve_failed; } - ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); if (ret) { pr_err("Failed to pin bo. ret %d\n", ret); goto pin_failed; @@ -1418,6 +1621,337 @@ bo_reserve_failed: return ret; } +/* Evict a userptr BO by stopping the queues if necessary + * + * Runs in MMU notifier, may be in RECLAIM_FS context. This means it + * cannot do any memory allocations, and cannot take any locks that + * are held elsewhere while allocating memory. Therefore this is as + * simple as possible, using atomic counters. + * + * It doesn't do anything to the BO itself. The real work happens in + * restore, where we get updated page addresses. This function only + * ensures that GPU access to the BO is stopped. + */ +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, + struct mm_struct *mm) +{ + struct amdkfd_process_info *process_info = mem->process_info; + int invalid, evicted_bos; + int r = 0; + + invalid = atomic_inc_return(&mem->invalid); + evicted_bos = atomic_inc_return(&process_info->evicted_bos); + if (evicted_bos == 1) { + /* First eviction, stop the queues */ + r = kgd2kfd->quiesce_mm(mm); + if (r) + pr_err("Failed to quiesce KFD\n"); + schedule_delayed_work(&process_info->restore_userptr_work, + msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); + } + + return r; +} + +/* Update invalid userptr BOs + * + * Moves invalidated (evicted) userptr BOs from userptr_valid_list to + * userptr_inval_list and updates user pages for all BOs that have + * been invalidated since their last update. + */ +static int update_invalid_user_pages(struct amdkfd_process_info *process_info, + struct mm_struct *mm) +{ + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; + struct ttm_operation_ctx ctx = { false, false }; + int invalid, ret; + + /* Move all invalidated BOs to the userptr_inval_list and + * release their user pages by migration to the CPU domain + */ + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_valid_list, + validate_list.head) { + if (!atomic_read(&mem->invalid)) + continue; /* BO is still valid */ + + bo = mem->bo; + + if (amdgpu_bo_reserve(bo, true)) + return -EAGAIN; + amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + amdgpu_bo_unreserve(bo); + if (ret) { + pr_err("%s: Failed to invalidate userptr BO\n", + __func__); + return -EAGAIN; + } + + list_move_tail(&mem->validate_list.head, + &process_info->userptr_inval_list); + } + + if (list_empty(&process_info->userptr_inval_list)) + return 0; /* All evicted userptr BOs were freed */ + + /* Go through userptr_inval_list and update any invalid user_pages */ + list_for_each_entry(mem, &process_info->userptr_inval_list, + validate_list.head) { + invalid = atomic_read(&mem->invalid); + if (!invalid) + /* BO hasn't been invalidated since the last + * revalidation attempt. Keep its BO list. + */ + continue; + + bo = mem->bo; + + if (!mem->user_pages) { + mem->user_pages = + kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!mem->user_pages) { + pr_err("%s: Failed to allocate pages array\n", + __func__); + return -ENOMEM; + } + } else if (mem->user_pages[0]) { + release_pages(mem->user_pages, bo->tbo.ttm->num_pages); + } + + /* Get updated user pages */ + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, + mem->user_pages); + if (ret) { + mem->user_pages[0] = NULL; + pr_info("%s: Failed to get user pages: %d\n", + __func__, ret); + /* Pretend it succeeded. It will fail later + * with a VM fault if the GPU tries to access + * it. Better than hanging indefinitely with + * stalled user mode queues. + */ + } + + /* Mark the BO as valid unless it was invalidated + * again concurrently + */ + if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) + return -EAGAIN; + } + + return 0; +} + +/* Validate invalid userptr BOs + * + * Validates BOs on the userptr_inval_list, and moves them back to the + * userptr_valid_list. Also updates GPUVM page tables with new page + * addresses and waits for the page table updates to complete. + */ +static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) +{ + struct amdgpu_bo_list_entry *pd_bo_list_entries; + struct list_head resv_list, duplicates; + struct ww_acquire_ctx ticket; + struct amdgpu_sync sync; + + struct amdgpu_vm *peer_vm; + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; + struct ttm_operation_ctx ctx = { false, false }; + int i, ret; + + pd_bo_list_entries = kcalloc(process_info->n_vms, + sizeof(struct amdgpu_bo_list_entry), + GFP_KERNEL); + if (!pd_bo_list_entries) { + pr_err("%s: Failed to allocate PD BO list entries\n", __func__); + return -ENOMEM; + } + + INIT_LIST_HEAD(&resv_list); + INIT_LIST_HEAD(&duplicates); + + /* Get all the page directory BOs that need to be reserved */ + i = 0; + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_vm_get_pd_bo(peer_vm, &resv_list, + &pd_bo_list_entries[i++]); + /* Add the userptr_inval_list entries to resv_list */ + list_for_each_entry(mem, &process_info->userptr_inval_list, + validate_list.head) { + list_add_tail(&mem->resv_list.head, &resv_list); + mem->resv_list.bo = mem->validate_list.bo; + mem->resv_list.shared = mem->validate_list.shared; + } + + /* Reserve all BOs and page tables for validation */ + ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); + WARN(!list_empty(&duplicates), "Duplicates should be empty"); + if (ret) + goto out; + + amdgpu_sync_create(&sync); + + /* Avoid triggering eviction fences when unmapping invalid + * userptr BOs (waits for all fences, doesn't use + * FENCE_OWNER_VM) + */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo, + process_info->eviction_fence, + NULL, NULL); + + ret = process_validate_vms(process_info); + if (ret) + goto unreserve_out; + + /* Validate BOs and update GPUVM page tables */ + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_inval_list, + validate_list.head) { + struct kfd_bo_va_list *bo_va_entry; + + bo = mem->bo; + + /* Copy pages array and validate the BO if we got user pages */ + if (mem->user_pages[0]) { + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, + mem->user_pages); + amdgpu_ttm_placement_from_domain(bo, mem->domain); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) { + pr_err("%s: failed to validate BO\n", __func__); + goto unreserve_out; + } + } + + /* Validate succeeded, now the BO owns the pages, free + * our copy of the pointer array. Put this BO back on + * the userptr_valid_list. If we need to revalidate + * it, we need to start from scratch. + */ + kvfree(mem->user_pages); + mem->user_pages = NULL; + list_move_tail(&mem->validate_list.head, + &process_info->userptr_valid_list); + + /* Update mapping. If the BO was not validated + * (because we couldn't get user pages), this will + * clear the page table entries, which will result in + * VM faults if the GPU tries to access the invalid + * memory. + */ + list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { + if (!bo_va_entry->is_mapped) + continue; + + ret = update_gpuvm_pte((struct amdgpu_device *) + bo_va_entry->kgd_dev, + bo_va_entry, &sync); + if (ret) { + pr_err("%s: update PTE failed\n", __func__); + /* make sure this gets validated again */ + atomic_inc(&mem->invalid); + goto unreserve_out; + } + } + } + + /* Update page directories */ + ret = process_update_pds(process_info, &sync); + +unreserve_out: + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_bo_fence(peer_vm->root.base.bo, + &process_info->eviction_fence->base, true); + ttm_eu_backoff_reservation(&ticket, &resv_list); + amdgpu_sync_wait(&sync, false); + amdgpu_sync_free(&sync); +out: + kfree(pd_bo_list_entries); + + return ret; +} + +/* Worker callback to restore evicted userptr BOs + * + * Tries to update and validate all userptr BOs. If successful and no + * concurrent evictions happened, the queues are restarted. Otherwise, + * reschedule for another attempt later. + */ +static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct amdkfd_process_info *process_info = + container_of(dwork, struct amdkfd_process_info, + restore_userptr_work); + struct task_struct *usertask; + struct mm_struct *mm; + int evicted_bos; + + evicted_bos = atomic_read(&process_info->evicted_bos); + if (!evicted_bos) + return; + + /* Reference task and mm in case of concurrent process termination */ + usertask = get_pid_task(process_info->pid, PIDTYPE_PID); + if (!usertask) + return; + mm = get_task_mm(usertask); + if (!mm) { + put_task_struct(usertask); + return; + } + + mutex_lock(&process_info->lock); + + if (update_invalid_user_pages(process_info, mm)) + goto unlock_out; + /* userptr_inval_list can be empty if all evicted userptr BOs + * have been freed. In that case there is nothing to validate + * and we can just restart the queues. + */ + if (!list_empty(&process_info->userptr_inval_list)) { + if (atomic_read(&process_info->evicted_bos) != evicted_bos) + goto unlock_out; /* Concurrent eviction, try again */ + + if (validate_invalid_user_pages(process_info)) + goto unlock_out; + } + /* Final check for concurrent evicton and atomic update. If + * another eviction happens after successful update, it will + * be a first eviction that calls quiesce_mm. The eviction + * reference counting inside KFD will handle this case. + */ + if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != + evicted_bos) + goto unlock_out; + evicted_bos = 0; + if (kgd2kfd->resume_mm(mm)) { + pr_err("%s: Failed to resume KFD\n", __func__); + /* No recovery from this failure. Probably the CP is + * hanging. No point trying again. + */ + } +unlock_out: + mutex_unlock(&process_info->lock); + mmput(mm); + put_task_struct(usertask); + + /* If validation failed, reschedule another attempt */ + if (evicted_bos) + schedule_delayed_work(&process_info->restore_userptr_work, + msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); +} + /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given * KFD process identified by process_info * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index a0f48cb9b8f0..236915849cfe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -322,3 +322,47 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev) return ret; } + +union gfx_info { + struct atom_gfx_info_v2_4 v24; +}; + +int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index; + uint8_t frev, crev; + uint16_t data_offset; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + gfx_info); + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + union gfx_info *gfx_info = (union gfx_info *) + (mode_info->atom_context->bios + data_offset); + switch (crev) { + case 4: + adev->gfx.config.max_shader_engines = gfx_info->v24.gc_num_se; + adev->gfx.config.max_cu_per_sh = gfx_info->v24.gc_num_cu_per_sh; + adev->gfx.config.max_sh_per_se = gfx_info->v24.gc_num_sh_per_se; + adev->gfx.config.max_backends_per_se = gfx_info->v24.gc_num_rb_per_se; + adev->gfx.config.max_texture_channel_caches = gfx_info->v24.gc_num_tccs; + adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs); + adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds; + adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth; + adev->gfx.config.gs_prim_buffer_depth = + le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = + gfx_info->v24.gc_double_offchip_lds_buffer; + adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu; + adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size); + return 0; + default: + return -EINVAL; + } + + } + return -EINVAL; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index 7689c961c4ef..20f158fd3b76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -30,5 +30,6 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); +int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 1ae5ae8c45a4..b33f1680c9a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -32,7 +32,7 @@ struct amdgpu_atpx_functions { bool switch_start; bool switch_end; bool disp_connectors_mapping; - bool disp_detetion_ports; + bool disp_detection_ports; }; struct amdgpu_atpx { @@ -90,6 +90,12 @@ bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return amdgpu_atpx_priv.atpx.dgpu_req_power_for_displays; } +#if defined(CONFIG_ACPI) +void *amdgpu_atpx_get_dhandle(void) { + return amdgpu_atpx_priv.dhandle; +} +#endif + /** * amdgpu_atpx_call - call an ATPX method * @@ -156,7 +162,7 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas f->switch_start = mask & ATPX_GRAPHICS_DEVICE_SWITCH_START_NOTIFICATION_SUPPORTED; f->switch_end = mask & ATPX_GRAPHICS_DEVICE_SWITCH_END_NOTIFICATION_SUPPORTED; f->disp_connectors_mapping = mask & ATPX_GET_DISPLAY_CONNECTORS_MAPPING_SUPPORTED; - f->disp_detetion_ports = mask & ATPX_GET_DISPLAY_DETECTION_PORTS_SUPPORTED; + f->disp_detection_ports = mask & ATPX_GET_DISPLAY_DETECTION_PORTS_SUPPORTED; } /** @@ -550,7 +556,7 @@ static int amdgpu_atpx_init(void) * look up whether we are the integrated or discrete GPU (all asics). * Returns the client id. */ -static int amdgpu_atpx_get_client_id(struct pci_dev *pdev) +static enum vga_switcheroo_client_id amdgpu_atpx_get_client_id(struct pci_dev *pdev) { if (amdgpu_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev)) return VGA_SWITCHEROO_IGD; @@ -569,7 +575,6 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = { { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, - { 0x1002, 0x67DF, 0x1028, 0x0774, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0, 0, 0, 0, 0 }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 02b849be083b..3079ea8523c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -75,37 +75,56 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, { struct amdgpu_bo *dobj = NULL; struct amdgpu_bo *sobj = NULL; + struct amdgpu_bo_param bp; uint64_t saddr, daddr; int r, n; int time; + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = PAGE_SIZE; + bp.domain = sdomain; + bp.flags = 0; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; n = AMDGPU_BENCHMARK_ITERATIONS; - r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0, - ttm_bo_type_kernel, NULL, &sobj); + r = amdgpu_bo_create(adev, &bp, &sobj); if (r) { goto out_cleanup; } r = amdgpu_bo_reserve(sobj, false); if (unlikely(r != 0)) goto out_cleanup; - r = amdgpu_bo_pin(sobj, sdomain, &saddr); + r = amdgpu_bo_pin(sobj, sdomain); + if (r) { + amdgpu_bo_unreserve(sobj); + goto out_cleanup; + } + r = amdgpu_ttm_alloc_gart(&sobj->tbo); amdgpu_bo_unreserve(sobj); if (r) { goto out_cleanup; } - r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0, - ttm_bo_type_kernel, NULL, &dobj); + saddr = amdgpu_bo_gpu_offset(sobj); + bp.domain = ddomain; + r = amdgpu_bo_create(adev, &bp, &dobj); if (r) { goto out_cleanup; } r = amdgpu_bo_reserve(dobj, false); if (unlikely(r != 0)) goto out_cleanup; - r = amdgpu_bo_pin(dobj, ddomain, &daddr); + r = amdgpu_bo_pin(dobj, ddomain); + if (r) { + amdgpu_bo_unreserve(sobj); + goto out_cleanup; + } + r = amdgpu_ttm_alloc_gart(&dobj->tbo); amdgpu_bo_unreserve(dobj); if (r) { goto out_cleanup; } + daddr = amdgpu_bo_gpu_offset(dobj); if (adev->mman.buffer_funcs) { time = amdgpu_benchmark_do_move(adev, size, saddr, daddr, n); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 92be7f6de197..7679c068c89a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -55,15 +55,15 @@ static void amdgpu_bo_list_release_rcu(struct kref *ref) kfree_rcu(list, rhead); } -static int amdgpu_bo_list_create(struct amdgpu_device *adev, +int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_bo_list_entry *info, unsigned num_entries, - int *id) + struct amdgpu_bo_list **list_out) { - int r; - struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_bo_list *list; + int r; + list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); if (!list) @@ -78,16 +78,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev, return r; } - /* idr alloc should be called only after initialization of bo list. */ - mutex_lock(&fpriv->bo_list_lock); - r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL); - mutex_unlock(&fpriv->bo_list_lock); - if (r < 0) { - amdgpu_bo_list_free(list); - return r; - } - *id = r; - + *list_out = list; return 0; } @@ -263,55 +254,79 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list) kfree(list); } -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, + struct drm_amdgpu_bo_list_entry **info_param) { + const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr); const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry); - - struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; - union drm_amdgpu_bo_list *args = data; - uint32_t handle = args->in.list_handle; - const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr); - struct drm_amdgpu_bo_list_entry *info; - struct amdgpu_bo_list *list; - int r; - info = kvmalloc_array(args->in.bo_number, - sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL); + info = kvmalloc_array(in->bo_number, info_size, GFP_KERNEL); if (!info) return -ENOMEM; /* copy the handle array from userspace to a kernel buffer */ r = -EFAULT; - if (likely(info_size == args->in.bo_info_size)) { - unsigned long bytes = args->in.bo_number * - args->in.bo_info_size; + if (likely(info_size == in->bo_info_size)) { + unsigned long bytes = in->bo_number * + in->bo_info_size; if (copy_from_user(info, uptr, bytes)) goto error_free; } else { - unsigned long bytes = min(args->in.bo_info_size, info_size); + unsigned long bytes = min(in->bo_info_size, info_size); unsigned i; - memset(info, 0, args->in.bo_number * info_size); - for (i = 0; i < args->in.bo_number; ++i) { + memset(info, 0, in->bo_number * info_size); + for (i = 0; i < in->bo_number; ++i) { if (copy_from_user(&info[i], uptr, bytes)) goto error_free; - uptr += args->in.bo_info_size; + uptr += in->bo_info_size; } } + *info_param = info; + return 0; + +error_free: + kvfree(info); + return r; +} + +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + union drm_amdgpu_bo_list *args = data; + uint32_t handle = args->in.list_handle; + struct drm_amdgpu_bo_list_entry *info = NULL; + struct amdgpu_bo_list *list; + int r; + + r = amdgpu_bo_create_list_entry_array(&args->in, &info); + if (r) + goto error_free; + switch (args->in.operation) { case AMDGPU_BO_LIST_OP_CREATE: r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number, - &handle); + &list); if (r) goto error_free; + + mutex_lock(&fpriv->bo_list_lock); + r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL); + mutex_unlock(&fpriv->bo_list_lock); + if (r < 0) { + amdgpu_bo_list_free(list); + return r; + } + + handle = r; break; case AMDGPU_BO_LIST_OP_DESTROY: @@ -345,6 +360,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, return 0; error_free: - kvfree(info); + if (info) + kvfree(info); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 71a57b2f7f04..693ec5ea4950 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -23,7 +23,6 @@ */ #include <linux/list.h> #include <linux/slab.h> -#include <linux/pci.h> #include <drm/drmP.h> #include <linux/firmware.h> #include <drm/amdgpu_drm.h> @@ -109,121 +108,6 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device, WARN(1, "Invalid indirect register space"); } -static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device, - enum cgs_resource_type resource_type, - uint64_t size, - uint64_t offset, - uint64_t *resource_base) -{ - CGS_FUNC_ADEV; - - if (resource_base == NULL) - return -EINVAL; - - switch (resource_type) { - case CGS_RESOURCE_TYPE_MMIO: - if (adev->rmmio_size == 0) - return -ENOENT; - if ((offset + size) > adev->rmmio_size) - return -EINVAL; - *resource_base = adev->rmmio_base; - return 0; - case CGS_RESOURCE_TYPE_DOORBELL: - if (adev->doorbell.size == 0) - return -ENOENT; - if ((offset + size) > adev->doorbell.size) - return -EINVAL; - *resource_base = adev->doorbell.base; - return 0; - case CGS_RESOURCE_TYPE_FB: - case CGS_RESOURCE_TYPE_IO: - case CGS_RESOURCE_TYPE_ROM: - default: - return -EINVAL; - } -} - -static const void *amdgpu_cgs_atom_get_data_table(struct cgs_device *cgs_device, - unsigned table, uint16_t *size, - uint8_t *frev, uint8_t *crev) -{ - CGS_FUNC_ADEV; - uint16_t data_start; - - if (amdgpu_atom_parse_data_header( - adev->mode_info.atom_context, table, size, - frev, crev, &data_start)) - return (uint8_t*)adev->mode_info.atom_context->bios + - data_start; - - return NULL; -} - -static int amdgpu_cgs_atom_get_cmd_table_revs(struct cgs_device *cgs_device, unsigned table, - uint8_t *frev, uint8_t *crev) -{ - CGS_FUNC_ADEV; - - if (amdgpu_atom_parse_cmd_header( - adev->mode_info.atom_context, table, - frev, crev)) - return 0; - - return -EINVAL; -} - -static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigned table, - void *args) -{ - CGS_FUNC_ADEV; - - return amdgpu_atom_execute_table( - adev->mode_info.atom_context, table, args); -} - -static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device, - enum amd_ip_block_type block_type, - enum amd_clockgating_state state) -{ - CGS_FUNC_ADEV; - int i, r = -1; - - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; - - if (adev->ip_blocks[i].version->type == block_type) { - r = adev->ip_blocks[i].version->funcs->set_clockgating_state( - (void *)adev, - state); - break; - } - } - return r; -} - -static int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device, - enum amd_ip_block_type block_type, - enum amd_powergating_state state) -{ - CGS_FUNC_ADEV; - int i, r = -1; - - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; - - if (adev->ip_blocks[i].version->type == block_type) { - r = adev->ip_blocks[i].version->funcs->set_powergating_state( - (void *)adev, - state); - break; - } - } - return r; -} - - static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type) { CGS_FUNC_ADEV; @@ -271,18 +155,6 @@ static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type) return result; } -static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type) -{ - CGS_FUNC_ADEV; - if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) { - release_firmware(adev->pm.fw); - adev->pm.fw = NULL; - return 0; - } - /* cannot release other firmware because they are not created by cgs */ - return -EINVAL; -} - static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device, enum cgs_ucode_id type) { @@ -326,34 +198,6 @@ static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device, return fw_version; } -static int amdgpu_cgs_enter_safe_mode(struct cgs_device *cgs_device, - bool en) -{ - CGS_FUNC_ADEV; - - if (adev->gfx.rlc.funcs->enter_safe_mode == NULL || - adev->gfx.rlc.funcs->exit_safe_mode == NULL) - return 0; - - if (en) - adev->gfx.rlc.funcs->enter_safe_mode(adev); - else - adev->gfx.rlc.funcs->exit_safe_mode(adev); - - return 0; -} - -static void amdgpu_cgs_lock_grbm_idx(struct cgs_device *cgs_device, - bool lock) -{ - CGS_FUNC_ADEV; - - if (lock) - mutex_lock(&adev->grbm_idx_mutex); - else - mutex_unlock(&adev->grbm_idx_mutex); -} - static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, enum cgs_ucode_id type, struct cgs_firmware_info *info) @@ -470,17 +314,17 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, (adev->pdev->revision == 0x81) || (adev->pdev->device == 0x665f)) { info->is_kicker = true; - strcpy(fw_name, "radeon/bonaire_k_smc.bin"); + strcpy(fw_name, "amdgpu/bonaire_k_smc.bin"); } else { - strcpy(fw_name, "radeon/bonaire_smc.bin"); + strcpy(fw_name, "amdgpu/bonaire_smc.bin"); } break; case CHIP_HAWAII: if (adev->pdev->revision == 0x80) { info->is_kicker = true; - strcpy(fw_name, "radeon/hawaii_k_smc.bin"); + strcpy(fw_name, "amdgpu/hawaii_k_smc.bin"); } else { - strcpy(fw_name, "radeon/hawaii_smc.bin"); + strcpy(fw_name, "amdgpu/hawaii_smc.bin"); } break; case CHIP_TOPAZ: @@ -541,6 +385,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, case CHIP_POLARIS12: strcpy(fw_name, "amdgpu/polaris12_smc.bin"); break; + case CHIP_VEGAM: + strcpy(fw_name, "amdgpu/vegam_smc.bin"); + break; case CHIP_VEGA10: if ((adev->pdev->device == 0x687f) && ((adev->pdev->revision == 0xc0) || @@ -553,6 +400,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, case CHIP_VEGA12: strcpy(fw_name, "amdgpu/vega12_smc.bin"); break; + case CHIP_VEGA20: + strcpy(fw_name, "amdgpu/vega20_smc.bin"); + break; default: DRM_ERROR("SMC firmware not supported\n"); return -EINVAL; @@ -598,97 +448,12 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, return 0; } -static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device) -{ - CGS_FUNC_ADEV; - return amdgpu_sriov_vf(adev); -} - -static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, - struct cgs_display_info *info) -{ - CGS_FUNC_ADEV; - struct cgs_mode_info *mode_info; - - if (info == NULL) - return -EINVAL; - - mode_info = info->mode_info; - if (mode_info) - /* if the displays are off, vblank time is max */ - mode_info->vblank_time_us = 0xffffffff; - - if (!amdgpu_device_has_dc_support(adev)) { - struct amdgpu_crtc *amdgpu_crtc; - struct drm_device *ddev = adev->ddev; - struct drm_crtc *crtc; - uint32_t line_time_us, vblank_lines; - - if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { - list_for_each_entry(crtc, - &ddev->mode_config.crtc_list, head) { - amdgpu_crtc = to_amdgpu_crtc(crtc); - if (crtc->enabled) { - info->active_display_mask |= (1 << amdgpu_crtc->crtc_id); - info->display_count++; - } - if (mode_info != NULL && - crtc->enabled && amdgpu_crtc->enabled && - amdgpu_crtc->hw_mode.clock) { - line_time_us = (amdgpu_crtc->hw_mode.crtc_htotal * 1000) / - amdgpu_crtc->hw_mode.clock; - vblank_lines = amdgpu_crtc->hw_mode.crtc_vblank_end - - amdgpu_crtc->hw_mode.crtc_vdisplay + - (amdgpu_crtc->v_border * 2); - mode_info->vblank_time_us = vblank_lines * line_time_us; - mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode); - /* we have issues with mclk switching with refresh rates - * over 120 hz on the non-DC code. - */ - if (mode_info->refresh_rate > 120) - mode_info->vblank_time_us = 0; - mode_info = NULL; - } - } - } - } else { - info->display_count = adev->pm.pm_display_cfg.num_display; - if (mode_info != NULL) { - mode_info->vblank_time_us = adev->pm.pm_display_cfg.min_vblank_time; - mode_info->refresh_rate = adev->pm.pm_display_cfg.vrefresh; - } - } - return 0; -} - - -static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool enabled) -{ - CGS_FUNC_ADEV; - - adev->pm.dpm_enabled = enabled; - - return 0; -} - static const struct cgs_ops amdgpu_cgs_ops = { .read_register = amdgpu_cgs_read_register, .write_register = amdgpu_cgs_write_register, .read_ind_register = amdgpu_cgs_read_ind_register, .write_ind_register = amdgpu_cgs_write_ind_register, - .get_pci_resource = amdgpu_cgs_get_pci_resource, - .atom_get_data_table = amdgpu_cgs_atom_get_data_table, - .atom_get_cmd_table_revs = amdgpu_cgs_atom_get_cmd_table_revs, - .atom_exec_cmd_table = amdgpu_cgs_atom_exec_cmd_table, .get_firmware_info = amdgpu_cgs_get_firmware_info, - .rel_firmware = amdgpu_cgs_rel_firmware, - .set_powergating_state = amdgpu_cgs_set_powergating_state, - .set_clockgating_state = amdgpu_cgs_set_clockgating_state, - .get_active_displays_info = amdgpu_cgs_get_active_displays_info, - .notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled, - .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled, - .enter_safe_mode = amdgpu_cgs_enter_safe_mode, - .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx, }; struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 96501ff0e55b..c770d73352a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -212,30 +212,21 @@ static void amdgpu_connector_update_scratch_regs(struct drm_connector *connector, enum drm_connector_status status) { - struct drm_encoder *best_encoder = NULL; - struct drm_encoder *encoder = NULL; + struct drm_encoder *best_encoder; + struct drm_encoder *encoder; const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private; bool connected; int i; best_encoder = connector_funcs->best_encoder(connector); - for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { - if (connector->encoder_ids[i] == 0) - break; - - encoder = drm_encoder_find(connector->dev, NULL, - connector->encoder_ids[i]); - if (!encoder) - continue; - + drm_connector_for_each_possible_encoder(connector, encoder, i) { if ((encoder == best_encoder) && (status == connector_status_connected)) connected = true; else connected = false; amdgpu_atombios_encoder_set_bios_scratch_regs(connector, encoder, connected); - } } @@ -246,17 +237,11 @@ amdgpu_connector_find_encoder(struct drm_connector *connector, struct drm_encoder *encoder; int i; - for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { - if (connector->encoder_ids[i] == 0) - break; - encoder = drm_encoder_find(connector->dev, NULL, - connector->encoder_ids[i]); - if (!encoder) - continue; - + drm_connector_for_each_possible_encoder(connector, encoder, i) { if (encoder->encoder_type == encoder_type) return encoder; } + return NULL; } @@ -349,22 +334,24 @@ static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector) int ret; if (amdgpu_connector->edid) { - drm_mode_connector_update_edid_property(connector, amdgpu_connector->edid); + drm_connector_update_edid_property(connector, amdgpu_connector->edid); ret = drm_add_edid_modes(connector, amdgpu_connector->edid); return ret; } - drm_mode_connector_update_edid_property(connector, NULL); + drm_connector_update_edid_property(connector, NULL); return 0; } static struct drm_encoder * amdgpu_connector_best_single_encoder(struct drm_connector *connector) { - int enc_id = connector->encoder_ids[0]; + struct drm_encoder *encoder; + int i; + + /* pick the first one */ + drm_connector_for_each_possible_encoder(connector, encoder, i) + return encoder; - /* pick the encoder ids */ - if (enc_id) - return drm_encoder_find(connector->dev, NULL, enc_id); return NULL; } @@ -691,7 +678,7 @@ static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector) return ret; } -static int amdgpu_connector_lvds_mode_valid(struct drm_connector *connector, +static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector); @@ -843,7 +830,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector) return ret; } -static int amdgpu_connector_vga_mode_valid(struct drm_connector *connector, +static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; @@ -985,9 +972,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) struct drm_device *dev = connector->dev; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); - struct drm_encoder *encoder = NULL; const struct drm_encoder_helper_funcs *encoder_funcs; - int i, r; + int r; enum drm_connector_status ret = connector_status_disconnected; bool dret = false, broken_edid = false; @@ -1077,14 +1063,10 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) /* find analog encoder */ if (amdgpu_connector->dac_load_detect) { - for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { - if (connector->encoder_ids[i] == 0) - break; - - encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]); - if (!encoder) - continue; + struct drm_encoder *encoder; + int i; + drm_connector_for_each_possible_encoder(connector, encoder, i) { if (encoder->encoder_type != DRM_MODE_ENCODER_DAC && encoder->encoder_type != DRM_MODE_ENCODER_TVDAC) continue; @@ -1132,18 +1114,11 @@ exit: static struct drm_encoder * amdgpu_connector_dvi_encoder(struct drm_connector *connector) { - int enc_id = connector->encoder_ids[0]; struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); struct drm_encoder *encoder; int i; - for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { - if (connector->encoder_ids[i] == 0) - break; - - encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]); - if (!encoder) - continue; + drm_connector_for_each_possible_encoder(connector, encoder, i) { if (amdgpu_connector->use_digital == true) { if (encoder->encoder_type == DRM_MODE_ENCODER_TMDS) return encoder; @@ -1158,8 +1133,9 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector) /* then check use digitial */ /* pick the first one */ - if (enc_id) - return drm_encoder_find(connector->dev, NULL, enc_id); + drm_connector_for_each_possible_encoder(connector, encoder, i) + return encoder; + return NULL; } @@ -1172,7 +1148,7 @@ static void amdgpu_connector_dvi_force(struct drm_connector *connector) amdgpu_connector->use_digital = true; } -static int amdgpu_connector_dvi_mode_valid(struct drm_connector *connector, +static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; @@ -1296,15 +1272,7 @@ u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *conn struct amdgpu_encoder *amdgpu_encoder; int i; - for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { - if (connector->encoder_ids[i] == 0) - break; - - encoder = drm_encoder_find(connector->dev, NULL, - connector->encoder_ids[i]); - if (!encoder) - continue; - + drm_connector_for_each_possible_encoder(connector, encoder, i) { amdgpu_encoder = to_amdgpu_encoder(encoder); switch (amdgpu_encoder->encoder_id) { @@ -1326,14 +1294,7 @@ static bool amdgpu_connector_encoder_is_hbr2(struct drm_connector *connector) int i; bool found = false; - for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { - if (connector->encoder_ids[i] == 0) - break; - encoder = drm_encoder_find(connector->dev, NULL, - connector->encoder_ids[i]); - if (!encoder) - continue; - + drm_connector_for_each_possible_encoder(connector, encoder, i) { amdgpu_encoder = to_amdgpu_encoder(encoder); if (amdgpu_encoder->caps & ATOM_ENCODER_CAP_RECORD_HBR2) found = true; @@ -1448,7 +1409,7 @@ out: return ret; } -static int amdgpu_connector_dp_mode_valid(struct drm_connector *connector, +static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index dc34b50e6b29..7c5cc33d0cda 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -31,6 +31,7 @@ #include <drm/drm_syncobj.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_gmc.h" static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_amdgpu_cs_chunk_fence *data, @@ -65,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, return 0; } -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, + struct drm_amdgpu_bo_list_in *data) +{ + int r; + struct drm_amdgpu_bo_list_entry *info = NULL; + + r = amdgpu_bo_create_list_entry_array(data, &info); + if (r) + return r; + + r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, + &p->bo_list); + if (r) + goto error_free; + + kvfree(info); + return 0; + +error_free: + if (info) + kvfree(info); + + return r; +} + +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - union drm_amdgpu_cs *cs = data; uint64_t *chunk_array_user; uint64_t *chunk_array; unsigned size, num_ibs = 0; @@ -163,6 +188,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) break; + case AMDGPU_CHUNK_ID_BO_HANDLES: + size = sizeof(struct drm_amdgpu_bo_list_in); + if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { + ret = -EINVAL; + goto free_partial_kdata; + } + + ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata); + if (ret) + goto free_partial_kdata; + + break; + case AMDGPU_CHUNK_ID_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_IN: case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: @@ -186,6 +224,10 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) if (p->uf_entry.robj) p->job->uf_addr = uf_offset; kfree(chunk_array); + + /* Use this opportunity to fill in task info for the vm */ + amdgpu_vm_set_task_info(vm); + return 0; free_all_kdata: @@ -257,7 +299,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, return; } - total_vram = adev->gmc.real_vram_size - adev->vram_pin_size; + total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; @@ -302,7 +344,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); /* Do the same for visible VRAM if half of it is free */ - if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) { + if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) { u64 total_vis_vram = adev->gmc.visible_vram_size; u64 used_vis_vram = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); @@ -359,7 +401,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, * to move it. Don't move anything if the threshold is zero. */ if (p->bytes_moved < p->bytes_moved_threshold) { - if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && + if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { /* And don't move a CPU_ACCESS_REQUIRED BO to limited * visible VRAM if we've depleted our allowance to do @@ -381,9 +423,8 @@ retry: r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); p->bytes_moved += ctx.bytes_moved; - if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && - bo->tbo.mem.mem_type == TTM_PL_VRAM && - bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT) + if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && + amdgpu_bo_in_cpu_visible_vram(bo)) p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { @@ -411,7 +452,6 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *candidate = p->evictable; struct amdgpu_bo *bo = candidate->robj; struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u64 initial_bytes_moved, bytes_moved; bool update_bytes_moved_vis; uint32_t other; @@ -435,18 +475,14 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, continue; /* Good we can try to move this BO somewhere else */ - amdgpu_ttm_placement_from_domain(bo, other); update_bytes_moved_vis = - adev->gmc.visible_vram_size < adev->gmc.real_vram_size && - bo->tbo.mem.mem_type == TTM_PL_VRAM && - bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT; - initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); + !amdgpu_gmc_vram_full_visible(&adev->gmc) && + amdgpu_bo_in_cpu_visible_vram(bo); + amdgpu_ttm_placement_from_domain(bo, other); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - bytes_moved = atomic64_read(&adev->num_bytes_moved) - - initial_bytes_moved; - p->bytes_moved += bytes_moved; + p->bytes_moved += ctx.bytes_moved; if (update_bytes_moved_vis) - p->bytes_moved_vis += bytes_moved; + p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r)) break; @@ -528,15 +564,23 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *e; struct list_head duplicates; unsigned i, tries = 10; + struct amdgpu_bo *gds; + struct amdgpu_bo *gws; + struct amdgpu_bo *oa; int r; INIT_LIST_HEAD(&p->validated); - p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); + /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ + if (!p->bo_list) + p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); + else + mutex_lock(&p->bo_list->lock); + if (p->bo_list) { amdgpu_bo_list_get_list(p->bo_list, &p->validated); if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev); + p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); } INIT_LIST_HEAD(&duplicates); @@ -658,31 +702,36 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, p->bytes_moved_vis); + if (p->bo_list) { - struct amdgpu_bo *gds = p->bo_list->gds_obj; - struct amdgpu_bo *gws = p->bo_list->gws_obj; - struct amdgpu_bo *oa = p->bo_list->oa_obj; struct amdgpu_vm *vm = &fpriv->vm; unsigned i; + gds = p->bo_list->gds_obj; + gws = p->bo_list->gws_obj; + oa = p->bo_list->oa_obj; for (i = 0; i < p->bo_list->num_entries; i++) { struct amdgpu_bo *bo = p->bo_list->array[i].robj; p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo); } + } else { + gds = p->adev->gds.gds_gfx_bo; + gws = p->adev->gds.gws_gfx_bo; + oa = p->adev->gds.oa_gfx_bo; + } - if (gds) { - p->job->gds_base = amdgpu_bo_gpu_offset(gds); - p->job->gds_size = amdgpu_bo_size(gds); - } - if (gws) { - p->job->gws_base = amdgpu_bo_gpu_offset(gws); - p->job->gws_size = amdgpu_bo_size(gws); - } - if (oa) { - p->job->oa_base = amdgpu_bo_gpu_offset(oa); - p->job->oa_size = amdgpu_bo_size(oa); - } + if (gds) { + p->job->gds_base = amdgpu_bo_gpu_offset(gds); + p->job->gds_size = amdgpu_bo_size(gds); + } + if (gws) { + p->job->gws_base = amdgpu_bo_gpu_offset(gws); + p->job->gws_size = amdgpu_bo_size(gws); + } + if (oa) { + p->job->oa_base = amdgpu_bo_gpu_offset(oa); + p->job->oa_size = amdgpu_bo_size(oa); } if (!r && p->uf_entry.robj) { @@ -863,11 +912,11 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_ring *ring = p->job->ring; + struct amdgpu_ring *ring = p->ring; int r; /* Only for UVD/VCE VM emulation */ - if (p->job->ring->funcs->parse_cs) { + if (p->ring->funcs->parse_cs) { unsigned i, j; for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { @@ -925,6 +974,10 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, r = amdgpu_bo_vm_update_pte(p); if (r) return r; + + r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); + if (r) + return r; } return amdgpu_cs_sync_rings(p); @@ -977,10 +1030,10 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, } } - if (parser->job->ring && parser->job->ring != ring) + if (parser->ring && parser->ring != ring) return -EINVAL; - parser->job->ring = ring; + parser->ring = ring; r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, @@ -999,11 +1052,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, /* UVD & VCE fw doesn't support user fences */ if (parser->job->uf_addr && ( - parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD || - parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) + parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD || + parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) return -EINVAL; - return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx); + return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx); } static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, @@ -1154,8 +1207,9 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { - struct amdgpu_ring *ring = p->job->ring; + struct amdgpu_ring *ring = p->ring; struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity; + enum drm_sched_priority priority; struct amdgpu_job *job; unsigned i; uint64_t seq; @@ -1186,7 +1240,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, } job->owner = p->filp; - job->fence_ctx = entity->fence_context; p->fence = dma_fence_get(&job->base.s_fence->finished); r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); @@ -1204,11 +1257,14 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->uf_sequence = seq; amdgpu_job_free_resources(job); - amdgpu_ring_priority_get(job->ring, job->base.s_priority); trace_amdgpu_cs_ioctl(job); + priority = job->base.s_priority; drm_sched_entity_push_job(&job->base, entity); + ring = to_amdgpu_ring(entity->sched); + amdgpu_ring_priority_get(ring, priority); + ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); amdgpu_mn_unlock(p->mn); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 09d35051fdd6..83e3b320a793 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -90,8 +90,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, if (ring == &adev->gfx.kiq.ring) continue; - r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity, - rq, amdgpu_sched_jobs, &ctx->guilty); + r = drm_sched_entity_init(&ctx->rings[i].entity, + &rq, 1, &ctx->guilty); if (r) goto failed; } @@ -104,15 +104,16 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, failed: for (j = 0; j < i; j++) - drm_sched_entity_fini(&adev->rings[j]->sched, + drm_sched_entity_destroy(&adev->rings[j]->sched, &ctx->rings[j].entity); kfree(ctx->fences); ctx->fences = NULL; return r; } -static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) +static void amdgpu_ctx_fini(struct kref *ref) { + struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); struct amdgpu_device *adev = ctx->adev; unsigned i, j; @@ -125,13 +126,11 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) kfree(ctx->fences); ctx->fences = NULL; - for (i = 0; i < adev->num_rings; i++) - drm_sched_entity_fini(&adev->rings[i]->sched, - &ctx->rings[i].entity); - amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); mutex_destroy(&ctx->lock); + + kfree(ctx); } static int amdgpu_ctx_alloc(struct amdgpu_device *adev, @@ -170,12 +169,20 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, static void amdgpu_ctx_do_release(struct kref *ref) { struct amdgpu_ctx *ctx; + u32 i; ctx = container_of(ref, struct amdgpu_ctx, refcount); - amdgpu_ctx_fini(ctx); + for (i = 0; i < ctx->adev->num_rings; i++) { - kfree(ctx); + if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) + continue; + + drm_sched_entity_destroy(&ctx->adev->rings[i]->sched, + &ctx->rings[i].entity); + } + + amdgpu_ctx_fini(ref); } static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) @@ -419,9 +426,11 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id) if (other) { signed long r; - r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); + r = dma_fence_wait(other, true); if (r < 0) { - DRM_ERROR("Error (%ld) waiting for fence!\n", r); + if (r != -ERESTARTSYS) + DRM_ERROR("Error (%ld) waiting for fence!\n", r); + return r; } } @@ -435,16 +444,74 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) idr_init(&mgr->ctx_handles); } +void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr) +{ + struct amdgpu_ctx *ctx; + struct idr *idp; + uint32_t id, i; + long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY; + + idp = &mgr->ctx_handles; + + mutex_lock(&mgr->lock); + idr_for_each_entry(idp, ctx, id) { + + if (!ctx->adev) { + mutex_unlock(&mgr->lock); + return; + } + + for (i = 0; i < ctx->adev->num_rings; i++) { + + if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) + continue; + + max_wait = drm_sched_entity_flush(&ctx->adev->rings[i]->sched, + &ctx->rings[i].entity, max_wait); + } + } + mutex_unlock(&mgr->lock); +} + +void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) +{ + struct amdgpu_ctx *ctx; + struct idr *idp; + uint32_t id, i; + + idp = &mgr->ctx_handles; + + idr_for_each_entry(idp, ctx, id) { + + if (!ctx->adev) + return; + + for (i = 0; i < ctx->adev->num_rings; i++) { + + if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) + continue; + + if (kref_read(&ctx->refcount) == 1) + drm_sched_entity_fini(&ctx->adev->rings[i]->sched, + &ctx->rings[i].entity); + else + DRM_ERROR("ctx %p is still alive\n", ctx); + } + } +} + void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) { struct amdgpu_ctx *ctx; struct idr *idp; uint32_t id; + amdgpu_ctx_mgr_entity_fini(mgr); + idp = &mgr->ctx_handles; idr_for_each_entry(idp, ctx, id) { - if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1) + if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1) DRM_ERROR("ctx %p is still alive\n", ctx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 448d69fe3756..f5fb93795a69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -28,8 +28,13 @@ #include <linux/debugfs.h> #include "amdgpu.h" -/* - * Debugfs +/** + * amdgpu_debugfs_add_files - Add simple debugfs entries + * + * @adev: Device to attach debugfs entries to + * @files: Array of function callbacks that respond to reads + * @nfiles: Number of callbacks to register + * */ int amdgpu_debugfs_add_files(struct amdgpu_device *adev, const struct drm_info_list *files, @@ -64,7 +69,33 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev, #if defined(CONFIG_DEBUG_FS) - +/** + * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes + * + * @read: True if reading + * @f: open file handle + * @buf: User buffer to write/read to + * @size: Number of bytes to write/read + * @pos: Offset to seek to + * + * This debugfs entry has special meaning on the offset being sought. + * Various bits have different meanings: + * + * Bit 62: Indicates a GRBM bank switch is needed + * Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is + * zero) + * Bits 24..33: The SE or ME selector if needed + * Bits 34..43: The SH (or SA) or PIPE selector if needed + * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed + * + * Bit 23: Indicates that the PM power gating lock should be held + * This is necessary to read registers that might be + * unreliable during a power gating transistion. + * + * The lower bits are the BYTE offset of the register to read. This + * allows reading multiple registers in a single call and having + * the returned size reflect that. + */ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -164,19 +195,37 @@ end: return result; } - +/** + * amdgpu_debugfs_regs_read - Callback for reading MMIO registers + */ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos); } +/** + * amdgpu_debugfs_regs_write - Callback for writing MMIO registers + */ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos); } + +/** + * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to read. This + * allows reading multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -204,6 +253,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_debugfs_regs_pcie_write - Write to a PCIE register + * + * @f: open file handle + * @buf: User buffer to write data from + * @size: Number of bytes to write + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to write. This + * allows writing multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { @@ -232,6 +293,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user return result; } +/** + * amdgpu_debugfs_regs_didt_read - Read from a DIDT register + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to read. This + * allows reading multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -259,6 +332,18 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_debugfs_regs_didt_write - Write to a DIDT register + * + * @f: open file handle + * @buf: User buffer to write data from + * @size: Number of bytes to write + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to write. This + * allows writing multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { @@ -287,6 +372,18 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user return result; } +/** + * amdgpu_debugfs_regs_smc_read - Read from a SMC register + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to read. This + * allows reading multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -314,6 +411,18 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_debugfs_regs_smc_write - Write to a SMC register + * + * @f: open file handle + * @buf: User buffer to write data from + * @size: Number of bytes to write + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to write. This + * allows writing multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { @@ -342,6 +451,20 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * return result; } +/** + * amdgpu_debugfs_gca_config_read - Read from gfx config data + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * This file is used to access configuration data in a somewhat + * stable fashion. The format is a series of DWORDs with the first + * indicating which revision it is. New content is appended to the + * end so that older software can still read the data. + */ + static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -418,6 +541,19 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_debugfs_sensor_read - Read from the powerplay sensors + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The offset is treated as the BYTE address of one of the sensors + * enumerated in amd/include/kgd_pp_interface.h under the + * 'amd_pp_sensors' enumeration. For instance to read the UVD VCLK + * you would use the offset 3 * 4 = 12. + */ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -428,7 +564,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, if (size & 3 || *pos & 0x3) return -EINVAL; - if (amdgpu_dpm == 0) + if (!adev->pm.dpm_enabled) return -EINVAL; /* convert offset to sensor number */ @@ -457,6 +593,27 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, return !r ? outsize : r; } +/** amdgpu_debugfs_wave_read - Read WAVE STATUS data + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The offset being sought changes which wave that the status data + * will be returned for. The bits are used as follows: + * + * Bits 0..6: Byte offset into data + * Bits 7..14: SE selector + * Bits 15..22: SH/SA selector + * Bits 23..30: CU/{WGP+SIMD} selector + * Bits 31..36: WAVE ID selector + * Bits 37..44: SIMD ID selector + * + * The returned data begins with one DWORD of version information + * Followed by WAVE STATUS registers relevant to the GFX IP version + * being used. See gfx_v8_0_read_wave_data() for an example output. + */ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -507,6 +664,28 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, return result; } +/** amdgpu_debugfs_gpr_read - Read wave gprs + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The offset being sought changes which wave that the status data + * will be returned for. The bits are used as follows: + * + * Bits 0..11: Byte offset into data + * Bits 12..19: SE selector + * Bits 20..27: SH/SA selector + * Bits 28..35: CU/{WGP+SIMD} selector + * Bits 36..43: WAVE ID selector + * Bits 37..44: SIMD ID selector + * Bits 52..59: Thread selector + * Bits 60..61: Bank selector (VGPR=0,SGPR=1) + * + * The return data comes from the SGPR or VGPR register bank for + * the selected operational unit. + */ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -637,6 +816,12 @@ static const char *debugfs_regs_names[] = { "amdgpu_gpr", }; +/** + * amdgpu_debugfs_regs_init - Initialize debugfs entries that provide + * register access. + * + * @adev: The device to attach the debugfs entries to + */ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) { struct drm_minor *minor = adev->ddev->primary; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 34af664b9f93..386a7b34d2f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -25,6 +25,7 @@ * Alex Deucher * Jerome Glisse */ +#include <linux/power_supply.h> #include <linux/kthread.h> #include <linux/console.h> #include <linux/slab.h> @@ -83,8 +84,10 @@ static const char *amdgpu_asic_name[] = { "POLARIS10", "POLARIS11", "POLARIS12", + "VEGAM", "VEGA10", "VEGA12", + "VEGA20", "RAVEN", "LAST", }; @@ -673,34 +676,34 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev, } /** - * amdgpu_device_gart_location - try to find GTT location + * amdgpu_device_gart_location - try to find GART location * * @adev: amdgpu device structure holding all necessary informations * @mc: memory controller structure holding memory informations * - * Function will place try to place GTT before or after VRAM. + * Function will place try to place GART before or after VRAM. * - * If GTT size is bigger than space left then we ajust GTT size. + * If GART size is bigger than space left then we ajust GART size. * Thus function will never fails. - * - * FIXME: when reducing GTT size align new size on power of 2. */ void amdgpu_device_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) { u64 size_af, size_bf; + mc->gart_size += adev->pm.smu_prv_buffer_size; + size_af = adev->gmc.mc_mask - mc->vram_end; size_bf = mc->vram_start; if (size_bf > size_af) { if (mc->gart_size > size_bf) { - dev_warn(adev->dev, "limiting GTT\n"); + dev_warn(adev->dev, "limiting GART\n"); mc->gart_size = size_bf; } mc->gart_start = 0; } else { if (mc->gart_size > size_af) { - dev_warn(adev->dev, "limiting GTT\n"); + dev_warn(adev->dev, "limiting GART\n"); mc->gart_size = size_af; } /* VCE doesn't like it when BOs cross a 4GB segment, so align @@ -709,7 +712,7 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev, mc->gart_start = ALIGN(mc->vram_end + 1, 0x100000000ULL); } mc->gart_end = mc->gart_start + mc->gart_size - 1; - dev_info(adev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n", + dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n", mc->gart_size >> 20, mc->gart_start, mc->gart_end); } @@ -907,6 +910,46 @@ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) } } +static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) +{ + struct sysinfo si; + bool is_os_64 = (sizeof(void *) == 8) ? true : false; + uint64_t total_memory; + uint64_t dram_size_seven_GB = 0x1B8000000; + uint64_t dram_size_three_GB = 0xB8000000; + + if (amdgpu_smu_memory_pool_size == 0) + return; + + if (!is_os_64) { + DRM_WARN("Not 64-bit OS, feature not supported\n"); + goto def_value; + } + si_meminfo(&si); + total_memory = (uint64_t)si.totalram * si.mem_unit; + + if ((amdgpu_smu_memory_pool_size == 1) || + (amdgpu_smu_memory_pool_size == 2)) { + if (total_memory < dram_size_three_GB) + goto def_value1; + } else if ((amdgpu_smu_memory_pool_size == 4) || + (amdgpu_smu_memory_pool_size == 8)) { + if (total_memory < dram_size_seven_GB) + goto def_value1; + } else { + DRM_WARN("Smu memory pool size not supported\n"); + goto def_value; + } + adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; + + return; + +def_value1: + DRM_WARN("No enough system memory\n"); +def_value: + adev->pm.smu_prv_buffer_size = 0; +} + /** * amdgpu_device_check_arguments - validate module params * @@ -948,6 +991,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_vm_fragment_size = -1; } + amdgpu_device_check_smu_prv_buffer_size(adev); + amdgpu_device_check_vm_size(adev); amdgpu_device_check_block_size(adev); @@ -1031,7 +1076,7 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { /** * amdgpu_device_ip_set_clockgating_state - set the CG state * - * @adev: amdgpu_device pointer + * @dev: amdgpu_device pointer * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) * @state: clockgating state (gate or ungate) * @@ -1039,10 +1084,11 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { * the hardware IP specified. * Returns the error code from the last instance. */ -int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, +int amdgpu_device_ip_set_clockgating_state(void *dev, enum amd_ip_block_type block_type, enum amd_clockgating_state state) { + struct amdgpu_device *adev = dev; int i, r = 0; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -1064,7 +1110,7 @@ int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, /** * amdgpu_device_ip_set_powergating_state - set the PG state * - * @adev: amdgpu_device pointer + * @dev: amdgpu_device pointer * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) * @state: powergating state (gate or ungate) * @@ -1072,10 +1118,11 @@ int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, * the hardware IP specified. * Returns the error code from the last instance. */ -int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, +int amdgpu_device_ip_set_powergating_state(void *dev, enum amd_ip_block_type block_type, enum amd_powergating_state state) { + struct amdgpu_device *adev = dev; int i, r = 0; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -1174,7 +1221,7 @@ bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, * amdgpu_device_ip_get_ip_block - get a hw IP pointer * * @adev: amdgpu_device pointer - * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) + * @type: Type of hardware IP (SMU, GFX, UVD, etc.) * * Returns a pointer to the hardware IP block structure * if it exists for the asic, otherwise NULL. @@ -1320,9 +1367,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_TOPAZ: case CHIP_TONGA: case CHIP_FIJI: - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: case CHIP_CARRIZO: case CHIP_STONEY: #ifdef CONFIG_DRM_AMDGPU_SI @@ -1339,6 +1387,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_KABINI: case CHIP_MULLINS: #endif + case CHIP_VEGA20: default: return 0; case CHIP_VEGA10: @@ -1428,9 +1477,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) case CHIP_TOPAZ: case CHIP_TONGA: case CHIP_FIJI: - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: case CHIP_CARRIZO: case CHIP_STONEY: if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) @@ -1472,6 +1522,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) #endif case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: if (adev->asic_type == CHIP_RAVEN) adev->family = AMDGPU_FAMILY_RV; @@ -1499,6 +1550,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) return -EAGAIN; } + adev->powerplay.pp_feature = amdgpu_pp_feature_mask; + for (i = 0; i < adev->num_ip_blocks; i++) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) { DRM_ERROR("disabled ip block: %d <%s>\n", @@ -1660,6 +1713,7 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) /* skip CG for VCE/UVD, it's handled specially */ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && + adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && adev->ip_blocks[i].version->funcs->set_clockgating_state) { /* enable clockgating to save power */ r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, @@ -1671,6 +1725,35 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) } } } + + return 0; +} + +static int amdgpu_device_ip_late_set_pg_state(struct amdgpu_device *adev) +{ + int i = 0, r; + + if (amdgpu_emu_mode == 1) + return 0; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + /* skip CG for VCE/UVD, it's handled specially */ + if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && + adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && + adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && + adev->ip_blocks[i].version->funcs->set_powergating_state) { + /* enable powergating to save power */ + r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, + AMD_PG_STATE_GATE); + if (r) { + DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + } + } return 0; } @@ -1704,8 +1787,11 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) } } - mod_delayed_work(system_wq, &adev->late_init_work, - msecs_to_jiffies(AMDGPU_RESUME_MS)); + amdgpu_device_ip_late_set_cg_state(adev); + amdgpu_device_ip_late_set_pg_state(adev); + + queue_delayed_work(system_wq, &adev->late_init_work, + msecs_to_jiffies(AMDGPU_RESUME_MS)); amdgpu_device_fill_reset_magic(adev); @@ -1742,6 +1828,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) adev->ip_blocks[i].version->funcs->name, r); return r; } + if (adev->powerplay.pp_funcs->set_powergating_by_smu) + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false); r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); /* XXX handle errors */ if (r) { @@ -1759,6 +1847,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && + adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && adev->ip_blocks[i].version->funcs->set_clockgating_state) { /* ungate blocks before hw fini so that we can shutdown the blocks safely */ r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, @@ -1829,7 +1918,11 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, late_init_work.work); - amdgpu_device_ip_late_set_cg_state(adev); + int r; + + r = amdgpu_ib_ring_tests(adev); + if (r) + DRM_ERROR("ib ring test failed (%d).\n", r); } /** @@ -1857,6 +1950,10 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev) DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n", r); } + /* call smu to disable gfx off feature first when suspend */ + if (adev->powerplay.pp_funcs->set_powergating_by_smu) + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false); + for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) continue; @@ -2080,23 +2177,30 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) switch (asic_type) { #if defined(CONFIG_DRM_AMD_DC) case CHIP_BONAIRE: - case CHIP_HAWAII: case CHIP_KAVERI: case CHIP_KABINI: case CHIP_MULLINS: + /* + * We have systems in the wild with these ASICs that require + * LVDS and VGA support which is not supported with DC. + * + * Fallback to the non-DC driver here by default so as not to + * cause regressions. + */ + return amdgpu_dc > 0; + case CHIP_HAWAII: case CHIP_CARRIZO: case CHIP_STONEY: - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: case CHIP_TONGA: case CHIP_FIJI: -#if defined(CONFIG_DRM_AMD_DC_PRE_VEGA) - return amdgpu_dc != 0; -#endif case CHIP_VEGA10: case CHIP_VEGA12: -#if defined(CONFIG_DRM_AMD_DC_DCN1_0) + case CHIP_VEGA20: +#ifdef CONFIG_X86 case CHIP_RAVEN: #endif return amdgpu_dc != 0; @@ -2125,7 +2229,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) * amdgpu_device_init - initialize the driver * * @adev: amdgpu_device pointer - * @pdev: drm dev pointer + * @ddev: drm dev pointer * @pdev: pci dev pointer * @flags: driver flags * @@ -2216,6 +2320,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_device_ip_late_init_func_handler); + adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; + /* Registers mapping */ /* TODO: block userspace mapping of io register */ if (adev->asic_type >= CHIP_BONAIRE) { @@ -2375,10 +2481,6 @@ fence_driver_init: goto failed; } - r = amdgpu_ib_ring_tests(adev); - if (r) - DRM_ERROR("ib ring test failed (%d).\n", r); - if (amdgpu_sriov_vf(adev)) amdgpu_virt_init_data_exchange(adev); @@ -2500,8 +2602,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev) /** * amdgpu_device_suspend - initiate device suspend * - * @pdev: drm dev pointer - * @state: suspend state + * @dev: drm dev pointer + * @suspend: suspend state + * @fbcon : notify the fbdev of suspend * * Puts the hw in the suspend state (all asics). * Returns 0 for success or an error on failure. @@ -2539,7 +2642,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_framebuffer *rfb = to_amdgpu_framebuffer(crtc->primary->fb); + struct drm_framebuffer *fb = crtc->primary->fb; struct amdgpu_bo *robj; if (amdgpu_crtc->cursor_bo) { @@ -2551,10 +2654,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) } } - if (rfb == NULL || rfb->obj == NULL) { + if (fb == NULL || fb->obj[0] == NULL) { continue; } - robj = gem_to_amdgpu_bo(rfb->obj); + robj = gem_to_amdgpu_bo(fb->obj[0]); /* don't unpin kernel fb objects */ if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { r = amdgpu_bo_reserve(robj, true); @@ -2599,7 +2702,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) /** * amdgpu_device_resume - initiate device resume * - * @pdev: drm dev pointer + * @dev: drm dev pointer + * @resume: resume state + * @fbcon : notify the fbdev of resume * * Bring the hw back to operating state (all asics). * Returns 0 for success or an error on failure. @@ -2640,11 +2745,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) } amdgpu_fence_driver_resume(adev); - if (resume) { - r = amdgpu_ib_ring_tests(adev); - if (r) - DRM_ERROR("ib ring test failed (%d).\n", r); - } r = amdgpu_device_ip_late_init(adev); if (r) @@ -2658,11 +2758,10 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); r = amdgpu_bo_reserve(aobj, true); if (r == 0) { - r = amdgpu_bo_pin(aobj, - AMDGPU_GEM_DOMAIN_VRAM, - &amdgpu_crtc->cursor_addr); + r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); if (r != 0) DRM_ERROR("Failed to pin cursor BO (%d)\n", r); + amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); amdgpu_bo_unreserve(aobj); } } @@ -2736,6 +2835,9 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return true; + if (amdgpu_asic_need_full_reset(adev)) + return true; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -2792,6 +2894,9 @@ static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) { int i; + if (amdgpu_asic_need_full_reset(adev)) + return true; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -3061,6 +3166,7 @@ out: * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * * @adev: amdgpu device pointer + * @from_hypervisor: request from hypervisor * * do VF FLR and reinitialize Asic * return 0 means successed otherwise failed @@ -3087,20 +3193,19 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, /* now we are okay to resume SMC/CP/SDMA */ r = amdgpu_device_ip_reinit_late_sriov(adev); - amdgpu_virt_release_full_gpu(adev, true); if (r) goto error; amdgpu_irq_gpu_reset_resume_helper(adev); r = amdgpu_ib_ring_tests(adev); +error: + amdgpu_virt_release_full_gpu(adev, true); if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { atomic_inc(&adev->vram_lost_counter); r = amdgpu_device_handle_vram_lost(adev); } -error: - return r; } @@ -3109,7 +3214,7 @@ error: * * @adev: amdgpu device pointer * @job: which job trigger hang - * @force forces reset regardless of amdgpu_gpu_recovery + * @force: forces reset regardless of amdgpu_gpu_recovery * * Attempt to reset the GPU if it has hung (all asics). * Returns 0 for success or an error on failure. @@ -3117,7 +3222,6 @@ error: int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job, bool force) { - struct drm_atomic_state *state = NULL; int i, r, resched; if (!force && !amdgpu_device_ip_check_soft_reset(adev)) { @@ -3140,10 +3244,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); - /* store modesetting */ - if (amdgpu_device_has_dc_support(adev)) - state = drm_atomic_helper_suspend(adev->ddev); - /* block all schedulers and reset given job's ring */ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -3153,7 +3253,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, kthread_park(ring->sched.thread); - if (job && job->ring->idx != i) + if (job && job->base.sched == &ring->sched) continue; drm_sched_hw_job_reset(&ring->sched, &job->base); @@ -3177,16 +3277,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * or all rings (in the case @job is NULL) * after above amdgpu_reset accomplished */ - if ((!job || job->ring->idx == i) && !r) + if ((!job || job->base.sched == &ring->sched) && !r) drm_sched_job_recovery(&ring->sched); kthread_unpark(ring->sched.thread); } - if (amdgpu_device_has_dc_support(adev)) { - if (drm_atomic_helper_resume(adev->ddev, state)) - dev_info(adev->dev, "drm resume failed:%d\n", r); - } else { + if (!amdgpu_device_has_dc_support(adev)) { drm_helper_resume_force_mode(adev->ddev); } @@ -3217,8 +3314,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, */ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) { - u32 mask; - int ret; + struct pci_dev *pdev; + enum pci_bus_speed speed_cap; + enum pcie_link_width link_width; if (amdgpu_pcie_gen_cap) adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; @@ -3236,27 +3334,61 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) } if (adev->pm.pcie_gen_mask == 0) { - ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask); - if (!ret) { - adev->pm.pcie_gen_mask = (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | + /* asic caps */ + pdev = adev->pdev; + speed_cap = pcie_get_speed_cap(pdev); + if (speed_cap == PCI_SPEED_UNKNOWN) { + adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); - - if (mask & DRM_PCIE_SPEED_25) - adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; - if (mask & DRM_PCIE_SPEED_50) - adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2; - if (mask & DRM_PCIE_SPEED_80) - adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3; } else { - adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; + if (speed_cap == PCIE_SPEED_16_0GT) + adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | + CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | + CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | + CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); + else if (speed_cap == PCIE_SPEED_8_0GT) + adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | + CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | + CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); + else if (speed_cap == PCIE_SPEED_5_0GT) + adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | + CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); + else + adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; + } + /* platform caps */ + pdev = adev->ddev->pdev->bus->self; + speed_cap = pcie_get_speed_cap(pdev); + if (speed_cap == PCI_SPEED_UNKNOWN) { + adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | + CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); + } else { + if (speed_cap == PCIE_SPEED_16_0GT) + adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | + CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | + CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | + CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); + else if (speed_cap == PCIE_SPEED_8_0GT) + adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | + CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | + CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); + else if (speed_cap == PCIE_SPEED_5_0GT) + adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | + CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); + else + adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; + } } if (adev->pm.pcie_mlw_mask == 0) { - ret = drm_pcie_get_max_link_width(adev->ddev, &mask); - if (!ret) { - switch (mask) { - case 32: + pdev = adev->ddev->pdev->bus->self; + link_width = pcie_get_width_cap(pdev); + if (link_width == PCIE_LNK_WIDTH_UNKNOWN) { + adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; + } else { + switch (link_width) { + case PCIE_LNK_X32: adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | @@ -3265,7 +3397,7 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; - case 16: + case PCIE_LNK_X16: adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | @@ -3273,36 +3405,34 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; - case 12: + case PCIE_LNK_X12: adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; - case 8: + case PCIE_LNK_X8: adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; - case 4: + case PCIE_LNK_X4: adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; - case 2: + case PCIE_LNK_X2: adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; - case 1: + case PCIE_LNK_X1: adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; break; default: break; } - } else { - adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK; } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 93f700ab1bfb..6748cd7fc129 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -35,6 +35,7 @@ #include <linux/pm_runtime.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_edid.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_fb_helper.h> static void amdgpu_display_flip_callback(struct dma_fence *f, @@ -151,14 +152,11 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_framebuffer *old_amdgpu_fb; - struct amdgpu_framebuffer *new_amdgpu_fb; struct drm_gem_object *obj; struct amdgpu_flip_work *work; struct amdgpu_bo *new_abo; unsigned long flags; u64 tiling_flags; - u64 base; int i, r; work = kzalloc(sizeof *work, GFP_KERNEL); @@ -174,15 +172,13 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; /* schedule unpin of the old buffer */ - old_amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - obj = old_amdgpu_fb->obj; + obj = crtc->primary->fb->obj[0]; /* take a reference to the old object */ work->old_abo = gem_to_amdgpu_bo(obj); amdgpu_bo_ref(work->old_abo); - new_amdgpu_fb = to_amdgpu_framebuffer(fb); - obj = new_amdgpu_fb->obj; + obj = fb->obj[0]; new_abo = gem_to_amdgpu_bo(obj); /* pin the new buffer */ @@ -192,12 +188,18 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto cleanup; } - r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base); + r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev)); if (unlikely(r != 0)) { DRM_ERROR("failed to pin new abo buffer before flip\n"); goto unreserve; } + r = amdgpu_ttm_alloc_gart(&new_abo->tbo); + if (unlikely(r != 0)) { + DRM_ERROR("%p bind failed\n", new_abo); + goto unpin; + } + r = reservation_object_get_fences_rcu(new_abo->tbo.resv, &work->excl, &work->shared_count, &work->shared); @@ -209,7 +211,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, amdgpu_bo_get_tiling_flags(new_abo, &tiling_flags); amdgpu_bo_unreserve(new_abo); - work->base = base; + work->base = amdgpu_bo_gpu_offset(new_abo); work->target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) + amdgpu_get_vblank_counter_kms(dev, work->crtc_id); @@ -482,31 +484,12 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, return true; } -static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb) -{ - struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); - - drm_gem_object_put_unlocked(amdgpu_fb->obj); - drm_framebuffer_cleanup(fb); - kfree(amdgpu_fb); -} - -static int amdgpu_display_user_framebuffer_create_handle( - struct drm_framebuffer *fb, - struct drm_file *file_priv, - unsigned int *handle) -{ - struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); - - return drm_gem_handle_create(file_priv, amdgpu_fb->obj, handle); -} - static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { - .destroy = amdgpu_display_user_framebuffer_destroy, - .create_handle = amdgpu_display_user_framebuffer_create_handle, + .destroy = drm_gem_fb_destroy, + .create_handle = drm_gem_fb_create_handle, }; -uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev) +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev) { uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; @@ -526,11 +509,11 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev, struct drm_gem_object *obj) { int ret; - rfb->obj = obj; + rfb->base.obj[0] = obj; drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); if (ret) { - rfb->obj = NULL; + rfb->base.obj[0] = NULL; return ret; } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 2b11d808f297..f66e3e3fef0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -23,7 +23,7 @@ #ifndef __AMDGPU_DISPLAY_H__ #define __AMDGPU_DISPLAY_H__ -uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev); +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev); struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index e997ebbe43ea..1c4595562f8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -28,6 +28,7 @@ #include "amdgpu_i2c.h" #include "amdgpu_dpm.h" #include "atom.h" +#include "amd_pcie.h" void amdgpu_dpm_print_class_info(u32 class, u32 class2) { @@ -115,6 +116,26 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev, pr_cont("\n"); } +void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev) +{ + struct drm_device *ddev = adev->ddev; + struct drm_crtc *crtc; + struct amdgpu_crtc *amdgpu_crtc; + + adev->pm.dpm.new_active_crtcs = 0; + adev->pm.dpm.new_active_crtc_count = 0; + if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { + list_for_each_entry(crtc, + &ddev->mode_config.crtc_list, head) { + amdgpu_crtc = to_amdgpu_crtc(crtc); + if (amdgpu_crtc->enabled) { + adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id); + adev->pm.dpm.new_active_crtc_count++; + } + } + } +} + u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev) { @@ -432,7 +453,7 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) ATOM_PPLIB_PhaseSheddingLimits_Record *entry; adev->pm.dpm.dyn_state.phase_shedding_limits_table.entries = - kzalloc(psl->ucNumEntries * + kcalloc(psl->ucNumEntries, sizeof(struct amdgpu_phase_shedding_limits_entry), GFP_KERNEL); if (!adev->pm.dpm.dyn_state.phase_shedding_limits_table.entries) { @@ -916,9 +937,11 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev, case AMDGPU_PCIE_GEN3: return AMDGPU_PCIE_GEN3; default: - if ((sys_mask & DRM_PCIE_SPEED_80) && (default_gen == AMDGPU_PCIE_GEN3)) + if ((sys_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) && + (default_gen == AMDGPU_PCIE_GEN3)) return AMDGPU_PCIE_GEN3; - else if ((sys_mask & DRM_PCIE_SPEED_50) && (default_gen == AMDGPU_PCIE_GEN2)) + else if ((sys_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) && + (default_gen == AMDGPU_PCIE_GEN2)) return AMDGPU_PCIE_GEN2; else return AMDGPU_PCIE_GEN1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 643d008410c6..ff24e1cc5b65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -52,8 +52,6 @@ enum amdgpu_dpm_event_src { AMDGPU_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4 }; -#define SCLK_DEEP_SLEEP_MASK 0x8 - struct amdgpu_ps { u32 caps; /* vbios flags */ u32 class; /* vbios flags */ @@ -289,12 +287,6 @@ enum amdgpu_pcie_gen { #define amdgpu_dpm_force_performance_level(adev, l) \ ((adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l))) -#define amdgpu_dpm_powergate_uvd(adev, g) \ - ((adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g))) - -#define amdgpu_dpm_powergate_vce(adev, g) \ - ((adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g))) - #define amdgpu_dpm_get_current_power_state(adev) \ ((adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)) @@ -349,11 +341,9 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ (adev)->powerplay.pp_handle, msg_id)) -#define amdgpu_dpm_notify_smu_memory_info(adev, virtual_addr_low, \ - virtual_addr_hi, mc_addr_low, mc_addr_hi, size) \ - ((adev)->powerplay.pp_funcs->notify_smu_memory_info)( \ - (adev)->powerplay.pp_handle, virtual_addr_low, \ - virtual_addr_hi, mc_addr_low, mc_addr_hi, size) +#define amdgpu_dpm_set_powergating_by_smu(adev, block_type, gate) \ + ((adev)->powerplay.pp_funcs->set_powergating_by_smu(\ + (adev)->powerplay.pp_handle, block_type, gate)) #define amdgpu_dpm_get_power_profile_mode(adev, buf) \ ((adev)->powerplay.pp_funcs->get_power_profile_mode(\ @@ -367,10 +357,6 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->odn_edit_dpm_table(\ (adev)->powerplay.pp_handle, type, parameter, size)) -#define amdgpu_dpm_set_mmhub_powergating_by_smu(adev) \ - ((adev)->powerplay.pp_funcs->set_mmhub_powergating_by_smu( \ - (adev)->powerplay.pp_handle)) - struct amdgpu_dpm { struct amdgpu_ps *ps; /* number of valid power states */ @@ -410,7 +396,6 @@ struct amdgpu_dpm { u32 tdp_adjustment; u16 load_line_slope; bool power_control; - bool ac_power; /* special states active */ bool thermal_active; bool uvd_active; @@ -445,6 +430,9 @@ struct amdgpu_pm { uint32_t pcie_gen_mask; uint32_t pcie_mlw_mask; struct amd_pp_display_configuration pm_display_cfg;/* set by dc */ + uint32_t smu_prv_buffer_size; + struct amdgpu_bo *smu_prv_buffer; + bool ac_power; }; #define R600_SSTU_DFLT 0 @@ -482,6 +470,7 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev, struct amdgpu_ps *rps); u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev); u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev); +void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev); bool amdgpu_is_uvd_state(u32 class, u32 class2); void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b, u32 *p, u32 *u); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0b19482b36b8..8843a06360fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1,10 +1,3 @@ -/** - * \file amdgpu_drv.c - * AMD Amdgpu driver - * - * \author Gareth Hughes <gareth@valinux.com> - */ - /* * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. * All Rights Reserved. @@ -75,9 +68,11 @@ * - 3.23.0 - Add query for VRAM lost counter * - 3.24.0 - Add high priority compute support for gfx9 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). + * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 25 +#define KMS_DRIVER_MINOR 27 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; @@ -109,11 +104,8 @@ int amdgpu_vram_page_split = 512; int amdgpu_vm_update_mode = -1; int amdgpu_exp_hw_support = 0; int amdgpu_dc = -1; -int amdgpu_dc_log = 0; int amdgpu_sched_jobs = 32; int amdgpu_sched_hw_submission = 2; -int amdgpu_no_evict = 0; -int amdgpu_direct_gma_size = 0; uint amdgpu_pcie_gen_cap = 0; uint amdgpu_pcie_lane_cap = 0; uint amdgpu_cg_mask = 0xffffffff; @@ -121,7 +113,8 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; -uint amdgpu_pp_feature_mask = 0xffffbfff; +/* OverDrive(bit 14),gfxoff(bit 15),stutter mode(bit 17) disabled by default*/ +uint amdgpu_pp_feature_mask = 0xfffd3fff; int amdgpu_ngg = 0; int amdgpu_prim_buf_per_se = 0; int amdgpu_pos_buf_per_se = 0; @@ -132,164 +125,370 @@ int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; int amdgpu_gpu_recovery = -1; /* auto */ int amdgpu_emu_mode = 0; +uint amdgpu_smu_memory_pool_size = 0; +/** + * DOC: vramlimit (int) + * Restrict the total amount of VRAM in MiB for testing. The default is 0 (Use full VRAM). + */ MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); +/** + * DOC: vis_vramlimit (int) + * Restrict the amount of CPU visible VRAM in MiB for testing. The default is 0 (Use full CPU visible VRAM). + */ MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes"); module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444); +/** + * DOC: gartsize (uint) + * Restrict the size of GART in Mib (32, 64, etc.) for testing. The default is -1 (The size depends on asic). + */ MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)"); module_param_named(gartsize, amdgpu_gart_size, uint, 0600); +/** + * DOC: gttsize (int) + * Restrict the size of GTT domain in MiB for testing. The default is -1 (It's VRAM size if 3GB < VRAM < 3/4 RAM, + * otherwise 3/4 RAM size). + */ MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)"); module_param_named(gttsize, amdgpu_gtt_size, int, 0600); +/** + * DOC: moverate (int) + * Set maximum buffer migration rate in MB/s. The default is -1 (8 MB/s). + */ MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc., -1=auto, 0=1=disabled)"); module_param_named(moverate, amdgpu_moverate, int, 0600); +/** + * DOC: benchmark (int) + * Run benchmarks. The default is 0 (Skip benchmarks). + */ MODULE_PARM_DESC(benchmark, "Run benchmark"); module_param_named(benchmark, amdgpu_benchmarking, int, 0444); +/** + * DOC: test (int) + * Test BO GTT->VRAM and VRAM->GTT GPU copies. The default is 0 (Skip test, only set 1 to run test). + */ MODULE_PARM_DESC(test, "Run tests"); module_param_named(test, amdgpu_testing, int, 0444); +/** + * DOC: audio (int) + * Set HDMI/DPAudio. Only affects non-DC display handling. The default is -1 (Enabled), set 0 to disabled it. + */ MODULE_PARM_DESC(audio, "Audio enable (-1 = auto, 0 = disable, 1 = enable)"); module_param_named(audio, amdgpu_audio, int, 0444); +/** + * DOC: disp_priority (int) + * Set display Priority (1 = normal, 2 = high). Only affects non-DC display handling. The default is 0 (auto). + */ MODULE_PARM_DESC(disp_priority, "Display Priority (0 = auto, 1 = normal, 2 = high)"); module_param_named(disp_priority, amdgpu_disp_priority, int, 0444); +/** + * DOC: hw_i2c (int) + * To enable hw i2c engine. Only affects non-DC display handling. The default is 0 (Disabled). + */ MODULE_PARM_DESC(hw_i2c, "hw i2c engine enable (0 = disable)"); module_param_named(hw_i2c, amdgpu_hw_i2c, int, 0444); +/** + * DOC: pcie_gen2 (int) + * To disable PCIE Gen2/3 mode (0 = disable, 1 = enable). The default is -1 (auto, enabled). + */ MODULE_PARM_DESC(pcie_gen2, "PCIE Gen2 mode (-1 = auto, 0 = disable, 1 = enable)"); module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444); +/** + * DOC: msi (int) + * To disable Message Signaled Interrupts (MSI) functionality (1 = enable, 0 = disable). The default is -1 (auto, enabled). + */ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(msi, amdgpu_msi, int, 0444); +/** + * DOC: lockup_timeout (int) + * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000. + * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000. + */ MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)"); module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444); +/** + * DOC: dpm (int) + * Override for dynamic power management setting (1 = enable, 0 = disable). The default is -1 (auto). + */ MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(dpm, amdgpu_dpm, int, 0444); +/** + * DOC: fw_load_type (int) + * Set different firmware loading type for debugging (0 = direct, 1 = SMU, 2 = PSP). The default is -1 (auto). + */ MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = direct, 1 = SMU, 2 = PSP, -1 = auto)"); module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444); +/** + * DOC: aspm (int) + * To disable ASPM (1 = enable, 0 = disable). The default is -1 (auto, enabled). + */ MODULE_PARM_DESC(aspm, "ASPM support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(aspm, amdgpu_aspm, int, 0444); +/** + * DOC: runpm (int) + * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down + * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality. + */ MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)"); module_param_named(runpm, amdgpu_runtime_pm, int, 0444); +/** + * DOC: ip_block_mask (uint) + * Override what IP blocks are enabled on the GPU. Each GPU is a collection of IP blocks (gfx, display, video, etc.). + * Use this parameter to disable specific blocks. Note that the IP blocks do not have a fixed index. Some asics may not have + * some IPs or may include multiple instances of an IP so the ordering various from asic to asic. See the driver output in + * the kernel log for the list of IPs on the asic. The default is 0xffffffff (enable all blocks on a device). + */ MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))"); module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444); +/** + * DOC: bapm (int) + * Bidirectional Application Power Management (BAPM) used to dynamically share TDP between CPU and GPU. Set value 0 to disable it. + * The default -1 (auto, enabled) + */ MODULE_PARM_DESC(bapm, "BAPM support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(bapm, amdgpu_bapm, int, 0444); +/** + * DOC: deep_color (int) + * Set 1 to enable Deep Color support. Only affects non-DC display handling. The default is 0 (disabled). + */ MODULE_PARM_DESC(deep_color, "Deep Color support (1 = enable, 0 = disable (default))"); module_param_named(deep_color, amdgpu_deep_color, int, 0444); +/** + * DOC: vm_size (int) + * Override the size of the GPU's per client virtual address space in GiB. The default is -1 (automatic for each asic). + */ MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)"); module_param_named(vm_size, amdgpu_vm_size, int, 0444); +/** + * DOC: vm_fragment_size (int) + * Override VM fragment size in bits (4, 5, etc. 4 = 64K, 9 = 2M). The default is -1 (automatic for each asic). + */ MODULE_PARM_DESC(vm_fragment_size, "VM fragment size in bits (4, 5, etc. 4 = 64K (default), Max 9 = 2M)"); module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 0444); +/** + * DOC: vm_block_size (int) + * Override VM page table size in bits (default depending on vm_size and hw setup). The default is -1 (automatic for each asic). + */ MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)"); module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444); +/** + * DOC: vm_fault_stop (int) + * Stop on VM fault for debugging (0 = never, 1 = print first, 2 = always). The default is 0 (No stop). + */ MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = print first, 2 = always)"); module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444); +/** + * DOC: vm_debug (int) + * Debug VM handling (0 = disabled, 1 = enabled). The default is 0 (Disabled). + */ MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)"); module_param_named(vm_debug, amdgpu_vm_debug, int, 0644); +/** + * DOC: vm_update_mode (int) + * Override VM update mode. VM updated by using CPU (0 = never, 1 = Graphics only, 2 = Compute only, 3 = Both). The default + * is -1 (Only in large BAR(LB) systems Compute VM tables will be updated by CPU, otherwise 0, never). + */ MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both"); module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444); +/** + * DOC: vram_page_split (int) + * Override the number of pages after we split VRAM allocations (default 512, -1 = disable). The default is 512. + */ MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 512, -1 = disable)"); module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444); +/** + * DOC: exp_hw_support (int) + * Enable experimental hw support (1 = enable). The default is 0 (disabled). + */ MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))"); module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444); +/** + * DOC: dc (int) + * Disable/Enable Display Core driver for debugging (1 = enable, 0 = disable). The default is -1 (automatic for each asic). + */ MODULE_PARM_DESC(dc, "Display Core driver (1 = enable, 0 = disable, -1 = auto (default))"); module_param_named(dc, amdgpu_dc, int, 0444); -MODULE_PARM_DESC(dc_log, "Display Core Log Level (0 = minimal (default), 1 = chatty"); -module_param_named(dc_log, amdgpu_dc_log, int, 0444); - +/** + * DOC: sched_jobs (int) + * Override the max number of jobs supported in the sw queue. The default is 32. + */ MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)"); module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444); +/** + * DOC: sched_hw_submission (int) + * Override the max number of HW submissions. The default is 2. + */ MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)"); module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); +/** + * DOC: ppfeaturemask (uint) + * Override power features enabled. See enum PP_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h. + * The default is the current set of stable power features. + */ MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))"); module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444); -MODULE_PARM_DESC(no_evict, "Support pinning request from user space (1 = enable, 0 = disable (default))"); -module_param_named(no_evict, amdgpu_no_evict, int, 0444); - -MODULE_PARM_DESC(direct_gma_size, "Direct GMA size in megabytes (max 96MB)"); -module_param_named(direct_gma_size, amdgpu_direct_gma_size, int, 0444); - +/** + * DOC: pcie_gen_cap (uint) + * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h. + * The default is 0 (automatic for each asic). + */ MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))"); module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444); +/** + * DOC: pcie_lane_cap (uint) + * Override PCIE lanes capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h. + * The default is 0 (automatic for each asic). + */ MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))"); module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444); +/** + * DOC: cg_mask (uint) + * Override Clockgating features enabled on GPU (0 = disable clock gating). See the AMD_CG_SUPPORT flags in + * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled). + */ MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)"); module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444); +/** + * DOC: pg_mask (uint) + * Override Powergating features enabled on GPU (0 = disable power gating). See the AMD_PG_SUPPORT flags in + * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled). + */ MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)"); module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444); +/** + * DOC: sdma_phase_quantum (uint) + * Override SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change). The default is 32. + */ MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))"); module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444); +/** + * DOC: disable_cu (charp) + * Set to disable CUs (It's set like se.sh.cu,...). The default is NULL. + */ MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)"); module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444); +/** + * DOC: virtual_display (charp) + * Set to enable virtual display feature. This feature provides a virtual display hardware on headless boards + * or in virtualized environments. It will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x. It's the pci address of + * the device, plus the number of crtcs to expose. E.g., 0000:26:00.0,4 would enable 4 virtual crtcs on the pci + * device at 26:00.0. The default is NULL. + */ MODULE_PARM_DESC(virtual_display, "Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)"); module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444); +/** + * DOC: ngg (int) + * Set to enable Next Generation Graphics (1 = enable). The default is 0 (disabled). + */ MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))"); module_param_named(ngg, amdgpu_ngg, int, 0444); +/** + * DOC: prim_buf_per_se (int) + * Override the size of Primitive Buffer per Shader Engine in Byte. The default is 0 (depending on gfx). + */ MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)"); module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444); +/** + * DOC: pos_buf_per_se (int) + * Override the size of Position Buffer per Shader Engine in Byte. The default is 0 (depending on gfx). + */ MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)"); module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444); +/** + * DOC: cntl_sb_buf_per_se (int) + * Override the size of Control Sideband per Shader Engine in Byte. The default is 0 (depending on gfx). + */ MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)"); module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); +/** + * DOC: param_buf_per_se (int) + * Override the size of Off-Chip Pramater Cache per Shader Engine in Byte. The default is 0 (depending on gfx). + */ MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)"); module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); +/** + * DOC: job_hang_limit (int) + * Set how much time allow a job hang and not drop it. The default is 0. + */ MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)"); module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444); +/** + * DOC: lbpw (int) + * Override Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable). The default is -1 (auto, enabled). + */ MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(lbpw, amdgpu_lbpw, int, 0444); MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)"); module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); +/** + * DOC: gpu_recovery (int) + * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV). + */ MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)"); module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); +/** + * DOC: emu_mode (int) + * Set value 1 to enable emulation mode. This is only needed when running on an emulator. The default is 0 (disabled). + */ MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)"); module_param_named(emu_mode, amdgpu_emu_mode, int, 0444); +/** + * DOC: si_support (int) + * Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled, + * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available, + * otherwise using amdgpu driver. + */ #ifdef CONFIG_DRM_AMDGPU_SI #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) @@ -303,6 +502,12 @@ MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)") module_param_named(si_support, amdgpu_si_support, int, 0444); #endif +/** + * DOC: cik_support (int) + * Set CIK support driver. This parameter works after set config CONFIG_DRM_AMDGPU_CIK. For CIK asic, when radeon driver is enabled, + * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available, + * otherwise using amdgpu driver. + */ #ifdef CONFIG_DRM_AMDGPU_CIK #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) @@ -316,6 +521,16 @@ MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled) module_param_named(cik_support, amdgpu_cik_support, int, 0444); #endif +/** + * DOC: smu_memory_pool_size (uint) + * It is used to reserve gtt for smu debug usage, setting value 0 to disable it. The actual size is value * 256MiB. + * E.g. 0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte. The default is 0 (disabled). + */ +MODULE_PARM_DESC(smu_memory_pool_size, + "reserve gtt for smu debug usage, 0 = disable," + "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte"); +module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -534,6 +749,9 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, + /* VEGAM */ + {0x1002, 0x694C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM}, + {0x1002, 0x694E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM}, /* Vega 10 */ {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, {0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, @@ -550,6 +768,13 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, {0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, {0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, + /* Vega 20 */ + {0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, /* Raven */ {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, @@ -647,7 +872,7 @@ retry_init: err_pci: pci_disable_device(pdev); err_free: - drm_dev_unref(dev); + drm_dev_put(dev); return ret; } @@ -657,7 +882,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); drm_dev_unregister(dev); - drm_dev_unref(dev); + drm_dev_put(dev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } @@ -838,9 +1063,21 @@ static const struct dev_pm_ops amdgpu_pm_ops = { .runtime_idle = amdgpu_pmops_runtime_idle, }; +static int amdgpu_flush(struct file *f, fl_owner_t id) +{ + struct drm_file *file_priv = f->private_data; + struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + + amdgpu_ctx_mgr_entity_flush(&fpriv->ctx_mgr); + + return 0; +} + + static const struct file_operations amdgpu_driver_kms_fops = { .owner = THIS_MODULE, .open = drm_open, + .flush = amdgpu_flush, .release = drm_release, .unlocked_ioctl = amdgpu_drm_ioctl, .mmap = amdgpu_mmap, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c index 94138abe093b..ae8fac34f7a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c @@ -46,7 +46,7 @@ amdgpu_link_encoder_connector(struct drm_device *dev) list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { amdgpu_encoder = to_amdgpu_encoder(encoder); if (amdgpu_encoder->devices & amdgpu_connector->devices) { - drm_mode_connector_attach_encoder(connector, encoder); + drm_connector_attach_encoder(connector, encoder); if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { amdgpu_atombios_encoder_init_backlight(amdgpu_encoder, connector); adev->mode_info.bl_encoder = amdgpu_encoder; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 12063019751b..d44b76455e89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -137,7 +137,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, /* need to align pitch with crtc limits */ mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, fb_tiled); - domain = amdgpu_display_framebuffer_domains(adev); + domain = amdgpu_display_supported_domains(adev); height = ALIGN(mode_cmd->height, 8); size = mode_cmd->pitches[0] * height; @@ -146,7 +146,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_VRAM_CLEARED, - true, NULL, &gobj); + ttm_bo_type_kernel, NULL, &gobj); if (ret) { pr_err("failed to allocate framebuffer (%d)\n", aligned_size); return -ENOMEM; @@ -168,11 +168,19 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, } - ret = amdgpu_bo_pin(abo, domain, NULL); + ret = amdgpu_bo_pin(abo, domain); if (ret) { amdgpu_bo_unreserve(abo); goto out_unref; } + + ret = amdgpu_ttm_alloc_gart(&abo->tbo); + if (ret) { + amdgpu_bo_unreserve(abo); + dev_err(adev->dev, "%p bind failed\n", abo); + goto out_unref; + } + ret = amdgpu_bo_kmap(abo, NULL); amdgpu_bo_unreserve(abo); if (ret) { @@ -292,9 +300,9 @@ static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfb drm_fb_helper_unregister_fbi(&rfbdev->helper); - if (rfb->obj) { - amdgpufb_destroy_pinned_object(rfb->obj); - rfb->obj = NULL; + if (rfb->base.obj[0]) { + amdgpufb_destroy_pinned_object(rfb->base.obj[0]); + rfb->base.obj[0] = NULL; drm_framebuffer_unregister_private(&rfb->base); drm_framebuffer_cleanup(&rfb->base); } @@ -377,7 +385,7 @@ int amdgpu_fbdev_total_size(struct amdgpu_device *adev) if (!adev->mode_info.rfbdev) return 0; - robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj); + robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]); size += amdgpu_bo_size(robj); return size; } @@ -386,7 +394,7 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) { if (!adev->mode_info.rfbdev) return false; - if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj)) + if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0])) return true; return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 97449e06a242..7056925eb386 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -131,7 +131,8 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, + unsigned flags) { struct amdgpu_device *adev = ring->adev; struct amdgpu_fence *fence; @@ -149,7 +150,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) adev->fence_context + ring->idx, seq); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, - seq, AMDGPU_FENCE_FLAG_INT); + seq, flags | AMDGPU_FENCE_FLAG_INT); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; /* This function can't be called concurrently anyway, otherwise @@ -375,14 +376,14 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; uint64_t index; - if (ring != &adev->uvd.ring) { + if (ring->funcs->type != AMDGPU_RING_TYPE_UVD) { ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs]; ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4); } else { /* put fence directly behind firmware */ index = ALIGN(adev->uvd.fw->size, 8); - ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index; - ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index; + ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index; + ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index; } amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq)); amdgpu_irq_get(adev, irq_src, irq_type); @@ -645,7 +646,6 @@ static const struct dma_fence_ops amdgpu_fence_ops = { .get_driver_name = amdgpu_fence_get_driver_name, .get_timeline_name = amdgpu_fence_get_timeline_name, .enable_signaling = amdgpu_fence_enable_signaling, - .wait = dma_fence_default_wait, .release = amdgpu_fence_release, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index cf0f186c6092..a54d5655a191 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -113,12 +113,17 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) int r; if (adev->gart.robj == NULL) { - r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - ttm_bo_type_kernel, NULL, - &adev->gart.robj); + struct amdgpu_bo_param bp; + + memset(&bp, 0, sizeof(bp)); + bp.size = adev->gart.table_size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + r = amdgpu_bo_create(adev, &bp, &adev->gart.robj); if (r) { return r; } @@ -138,14 +143,12 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) */ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev) { - uint64_t gpu_addr; int r; r = amdgpu_bo_reserve(adev->gart.robj, false); if (unlikely(r != 0)) return r; - r = amdgpu_bo_pin(adev->gart.robj, - AMDGPU_GEM_DOMAIN_VRAM, &gpu_addr); + r = amdgpu_bo_pin(adev->gart.robj, AMDGPU_GEM_DOMAIN_VRAM); if (r) { amdgpu_bo_unreserve(adev->gart.robj); return r; @@ -154,7 +157,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev) if (r) amdgpu_bo_unpin(adev->gart.robj); amdgpu_bo_unreserve(adev->gart.robj); - adev->gart.table_addr = gpu_addr; + adev->gart.table_addr = amdgpu_bo_gpu_offset(adev->gart.robj); return r; } @@ -229,7 +232,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, } t = offset / AMDGPU_GPU_PAGE_SIZE; - p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); + p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE; for (i = 0; i < pages; i++, p++) { #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS adev->gart.pages[p] = NULL; @@ -238,7 +241,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, if (!adev->gart.ptr) continue; - for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { + for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) { amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr, t, page_base, flags); page_base += AMDGPU_GPU_PAGE_SIZE; @@ -277,7 +280,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, for (i = 0; i < pages; i++) { page_base = dma_addr[i]; - for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { + for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) { amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags); page_base += AMDGPU_GPU_PAGE_SIZE; } @@ -314,7 +317,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS t = offset / AMDGPU_GPU_PAGE_SIZE; - p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); + p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE; for (i = 0; i < pages; i++, p++) adev->gart.pages[p] = pagelist ? pagelist[i] : NULL; #endif @@ -364,7 +367,8 @@ int amdgpu_gart_init(struct amdgpu_device *adev) #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS /* Allocate pages table */ - adev->gart.pages = vzalloc(sizeof(void *) * adev->gart.num_cpu_pages); + adev->gart.pages = vzalloc(array_size(sizeof(void *), + adev->gart.num_cpu_pages)); if (adev->gart.pages == NULL) return -ENOMEM; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index 456295c00291..9f9e9dc87da1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -37,6 +37,8 @@ struct amdgpu_bo; #define AMDGPU_GPU_PAGE_SHIFT 12 #define AMDGPU_GPU_PAGE_ALIGN(a) (((a) + AMDGPU_GPU_PAGE_MASK) & ~AMDGPU_GPU_PAGE_MASK) +#define AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE) + struct amdgpu_gart { u64 table_addr; struct amdgpu_bo *robj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 46b9ea4e6103..bcbdcf997d20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -30,6 +30,7 @@ #include <drm/drmP.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" +#include "amdgpu_display.h" void amdgpu_gem_object_free(struct drm_gem_object *gobj) { @@ -48,17 +49,25 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, struct drm_gem_object **obj) { struct amdgpu_bo *bo; + struct amdgpu_bo_param bp; int r; + memset(&bp, 0, sizeof(bp)); *obj = NULL; /* At least align on page size */ if (alignment < PAGE_SIZE) { alignment = PAGE_SIZE; } + bp.size = size; + bp.byte_align = alignment; + bp.type = type; + bp.resv = resv; + bp.preferred_domain = initial_domain; retry: - r = amdgpu_bo_create(adev, size, alignment, initial_domain, - flags, type, resv, &bo); + bp.flags = flags; + bp.domain = initial_domain; + r = amdgpu_bo_create(adev, &bp, &bo); if (r) { if (r != -ERESTARTSYS) { if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { @@ -221,17 +230,19 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; /* reject invalid gem domains */ - if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | - AMDGPU_GEM_DOMAIN_GTT | - AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GDS | - AMDGPU_GEM_DOMAIN_GWS | - AMDGPU_GEM_DOMAIN_OA)) + if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK) return -EINVAL; /* create a gem object to contain this object in */ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { + if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { + /* if gds bo is created from user space, it must be + * passed to bo list + */ + DRM_ERROR("GDS bo cannot be per-vm-bo\n"); + return -EINVAL; + } flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS) size = size << AMDGPU_GDS_SHIFT; @@ -254,7 +265,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, r = amdgpu_gem_object_create(adev, size, args->in.alignment, (u32)(0xffffffff & args->in.domains), - flags, false, resv, &gobj); + flags, ttm_bo_type_device, resv, &gobj); if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { if (!r) { struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); @@ -306,7 +317,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, /* create a gem object to contain this object in */ r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU, - 0, 0, NULL, &gobj); + 0, ttm_bo_type_device, NULL, &gobj); if (r) return r; @@ -499,7 +510,6 @@ out: * @adev: amdgpu_device pointer * @vm: vm to update * @bo_va: bo_va to update - * @list: validation list * @operation: map, unmap or clear * * Update the bo_va directly after setting its address. Errors are not @@ -508,7 +518,6 @@ out: static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va *bo_va, - struct list_head *list, uint32_t operation) { int r; @@ -601,7 +610,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -ENOENT; abo = gem_to_amdgpu_bo(gobj); tv.bo = &abo->tbo; - tv.shared = false; + tv.shared = !!(abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID); list_add(&tv.head, &list); } else { gobj = NULL; @@ -662,7 +671,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, break; } if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug) - amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, &list, + amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, args->operation); error_backoff: @@ -746,17 +755,18 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct amdgpu_device *adev = dev->dev_private; struct drm_gem_object *gobj; uint32_t handle; + u32 domain; int r; args->pitch = amdgpu_align_pitch(adev, args->width, DIV_ROUND_UP(args->bpp, 8), 0); args->size = (u64)args->pitch * args->height; args->size = ALIGN(args->size, PAGE_SIZE); - - r = amdgpu_gem_object_create(adev, args->size, 0, - AMDGPU_GEM_DOMAIN_VRAM, + domain = amdgpu_bo_get_preferred_pin_domain(adev, + amdgpu_display_supported_domains(adev)); + r = amdgpu_gem_object_create(adev, args->size, 0, domain, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - false, NULL, &gobj); + ttm_bo_type_device, NULL, &gobj); if (r) return -ENOMEM; @@ -771,16 +781,23 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, } #if defined(CONFIG_DEBUG_FS) + +#define amdgpu_debugfs_gem_bo_print_flag(m, bo, flag) \ + if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) { \ + seq_printf((m), " " #flag); \ + } + static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) { struct drm_gem_object *gobj = ptr; struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); struct seq_file *m = data; + struct dma_buf_attachment *attachment; + struct dma_buf *dma_buf; unsigned domain; const char *placement; unsigned pin_count; - uint64_t offset; domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); switch (domain) { @@ -798,13 +815,27 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) seq_printf(m, "\t0x%08x: %12ld byte %s", id, amdgpu_bo_size(bo), placement); - offset = READ_ONCE(bo->tbo.mem.start); - if (offset != AMDGPU_BO_INVALID_OFFSET) - seq_printf(m, " @ 0x%010Lx", offset); - pin_count = READ_ONCE(bo->pin_count); if (pin_count) seq_printf(m, " pin count %d", pin_count); + + dma_buf = READ_ONCE(bo->gem_base.dma_buf); + attachment = READ_ONCE(bo->gem_base.import_attach); + + if (attachment) + seq_printf(m, " imported from %p", dma_buf); + else if (dma_buf) + seq_printf(m, " exported as %p", dma_buf); + + amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED); + amdgpu_debugfs_gem_bo_print_flag(m, bo, NO_CPU_ACCESS); + amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_GTT_USWC); + amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CLEARED); + amdgpu_debugfs_gem_bo_print_flag(m, bo, SHADOW); + amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CONTIGUOUS); + amdgpu_debugfs_gem_bo_print_flag(m, bo, VM_ALWAYS_VALID); + amdgpu_debugfs_gem_bo_print_flag(m, bo, EXPLICIT_SYNC); + seq_printf(m, "\n"); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 893c2490b783..6cb4948233cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -109,4 +109,19 @@ struct amdgpu_gmc { const struct amdgpu_gmc_funcs *gmc_funcs; }; +/** + * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR + * + * @adev: amdgpu_device pointer + * + * Returns: + * True if full VRAM is visible through the BAR + */ +static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc) +{ + WARN_ON(gmc->real_vram_size < gmc->visible_vram_size); + + return (gmc->real_vram_size == gmc->visible_vram_size); +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 311589e02d17..5518e623fed2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_vm *vm; uint64_t fence_ctx; uint32_t status = 0, alloc_size; + unsigned fence_flags = 0; unsigned i; int r = 0; @@ -138,7 +139,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* ring tests don't use a job */ if (job) { vm = job->vm; - fence_ctx = job->fence_ctx; + fence_ctx = job->base.s_fence->scheduled.context; } else { vm = NULL; fence_ctx = 0; @@ -227,7 +228,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, #endif amdgpu_asic_invalidate_hdp(adev, ring); - r = amdgpu_fence_emit(ring, f); + if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) + fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; + + /* wrap the last IB with fence */ + if (job && job->uf_addr) { + amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, + fence_flags | AMDGPU_FENCE_FLAG_64BIT); + } + + r = amdgpu_fence_emit(ring, f, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) @@ -239,12 +249,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (ring->funcs->insert_end) ring->funcs->insert_end(ring); - /* wrap the last IB with fence */ - if (job && job->uf_addr) { - amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, - AMDGPU_FENCE_FLAG_64BIT); - } - if (patch_offset != ~0 && ring->funcs->patch_cond_exec) amdgpu_ring_patch_cond_exec(ring, patch_offset); @@ -349,7 +353,8 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) ring->funcs->type == AMDGPU_RING_TYPE_VCE || ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC || ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC || - ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) + ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC || + ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) tmo = tmo_mm; else tmo = tmo_gfx; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index a1c78f90eadf..3a072a7a39f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -578,11 +578,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); } } - - adev->vm_manager.fence_context = - dma_fence_context_alloc(AMDGPU_MAX_RINGS); - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) - adev->vm_manager.seqno[i] = 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 3a5ca462abf0..1abf5b5bac9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -25,6 +25,23 @@ * Alex Deucher * Jerome Glisse */ + +/** + * DOC: Interrupt Handling + * + * Interrupts generated within GPU hardware raise interrupt requests that are + * passed to amdgpu IRQ handler which is responsible for detecting source and + * type of the interrupt and dispatching matching handlers. If handling an + * interrupt requires calling kernel functions that may sleep processing is + * dispatched to work handlers. + * + * If MSI functionality is not disabled by module parameter then MSI + * support will be enabled. + * + * For GPU interrupt sources that may be driven by another driver, IRQ domain + * support is used (with mapping between virtual and hardware IRQs). + */ + #include <linux/irq.h> #include <drm/drmP.h> #include <drm/drm_crtc_helper.h> @@ -43,19 +60,21 @@ #define AMDGPU_WAIT_IDLE_TIMEOUT 200 -/* - * Handle hotplug events outside the interrupt handler proper. - */ /** - * amdgpu_hotplug_work_func - display hotplug work handler + * amdgpu_hotplug_work_func - work handler for display hotplug event * - * @work: work struct + * @work: work struct pointer * - * This is the hot plug event work handler (all asics). - * The work gets scheduled from the irq handler if there - * was a hot plug interrupt. It walks the connector table - * and calls the hotplug handler for each one, then sends - * a drm hotplug event to alert userspace. + * This is the hotplug event work handler (all ASICs). + * The work gets scheduled from the IRQ handler if there + * was a hotplug interrupt. It walks through the connector table + * and calls hotplug handler for each connector. After this, it sends + * a DRM hotplug event to alert userspace. + * + * This design approach is required in order to defer hotplug event handling + * from the IRQ handler to a work handler because hotplug handler has to use + * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may + * sleep). */ static void amdgpu_hotplug_work_func(struct work_struct *work) { @@ -74,13 +93,12 @@ static void amdgpu_hotplug_work_func(struct work_struct *work) } /** - * amdgpu_irq_reset_work_func - execute gpu reset + * amdgpu_irq_reset_work_func - execute GPU reset * - * @work: work struct + * @work: work struct pointer * - * Execute scheduled gpu reset (cayman+). - * This function is called when the irq handler - * thinks we need a gpu reset. + * Execute scheduled GPU reset (Cayman+). + * This function is called when the IRQ handler thinks we need a GPU reset. */ static void amdgpu_irq_reset_work_func(struct work_struct *work) { @@ -91,7 +109,13 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work) amdgpu_device_gpu_recover(adev, NULL, false); } -/* Disable *all* interrupts */ +/** + * amdgpu_irq_disable_all - disable *all* interrupts + * + * @adev: amdgpu device pointer + * + * Disable all types of interrupts from all sources. + */ void amdgpu_irq_disable_all(struct amdgpu_device *adev) { unsigned long irqflags; @@ -123,11 +147,15 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev) } /** - * amdgpu_irq_handler - irq handler + * amdgpu_irq_handler - IRQ handler + * + * @irq: IRQ number (unused) + * @arg: pointer to DRM device * - * @int irq, void *arg: args + * IRQ handler for amdgpu driver (all ASICs). * - * This is the irq handler for the amdgpu driver (all asics). + * Returns: + * result of handling the IRQ, as defined by &irqreturn_t */ irqreturn_t amdgpu_irq_handler(int irq, void *arg) { @@ -142,18 +170,18 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg) } /** - * amdgpu_msi_ok - asic specific msi checks + * amdgpu_msi_ok - check whether MSI functionality is enabled * - * @adev: amdgpu device pointer + * @adev: amdgpu device pointer (unused) + * + * Checks whether MSI functionality has been disabled via module parameter + * (all ASICs). * - * Handles asic specific MSI checks to determine if - * MSIs should be enabled on a particular chip (all asics). - * Returns true if MSIs should be enabled, false if MSIs - * should not be enabled. + * Returns: + * *true* if MSIs are allowed to be enabled or *false* otherwise */ static bool amdgpu_msi_ok(struct amdgpu_device *adev) { - /* force MSI on */ if (amdgpu_msi == 1) return true; else if (amdgpu_msi == 0) @@ -163,12 +191,15 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev) } /** - * amdgpu_irq_init - init driver interrupt info + * amdgpu_irq_init - initialize interrupt handling * * @adev: amdgpu device pointer * - * Sets up the work irq handlers, vblank init, MSIs, etc. (all asics). - * Returns 0 for success, error for failure. + * Sets up work functions for hotplug and reset interrupts, enables MSI + * functionality, initializes vblank, hotplug and reset interrupt handling. + * + * Returns: + * 0 on success or error code on failure */ int amdgpu_irq_init(struct amdgpu_device *adev) { @@ -176,7 +207,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) spin_lock_init(&adev->irq.lock); - /* enable msi */ + /* Enable MSI if not disabled by module parameter */ adev->irq.msi_enabled = false; if (amdgpu_msi_ok(adev)) { @@ -189,7 +220,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) if (!amdgpu_device_has_dc_support(adev)) { if (!adev->enable_virtual_display) - /* Disable vblank irqs aggressively for power-saving */ + /* Disable vblank IRQs aggressively for power-saving */ /* XXX: can this be enabled for DC? */ adev->ddev->vblank_disable_immediate = true; @@ -197,7 +228,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) if (r) return r; - /* pre DCE11 */ + /* Pre-DCE11 */ INIT_WORK(&adev->hotplug_work, amdgpu_hotplug_work_func); } @@ -220,11 +251,13 @@ int amdgpu_irq_init(struct amdgpu_device *adev) } /** - * amdgpu_irq_fini - tear down driver interrupt info + * amdgpu_irq_fini - shut down interrupt handling * * @adev: amdgpu device pointer * - * Tears down the work irq handlers, vblank handlers, MSIs, etc. (all asics). + * Tears down work functions for hotplug and reset interrupts, disables MSI + * functionality, shuts down vblank, hotplug and reset interrupt handling, + * turns off interrupts from all sources (all ASICs). */ void amdgpu_irq_fini(struct amdgpu_device *adev) { @@ -264,12 +297,17 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) } /** - * amdgpu_irq_add_id - register irq source + * amdgpu_irq_add_id - register IRQ source * * @adev: amdgpu device pointer - * @src_id: source id for this source - * @source: irq source + * @client_id: client id + * @src_id: source id + * @source: IRQ source pointer + * + * Registers IRQ source on a client. * + * Returns: + * 0 on success or error code otherwise */ int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned client_id, unsigned src_id, @@ -312,12 +350,12 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, } /** - * amdgpu_irq_dispatch - dispatch irq to IP blocks + * amdgpu_irq_dispatch - dispatch IRQ to IP blocks * * @adev: amdgpu device pointer - * @entry: interrupt vector + * @entry: interrupt vector pointer * - * Dispatches the irq to the different IP blocks + * Dispatches IRQ to IP blocks. */ void amdgpu_irq_dispatch(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) @@ -361,13 +399,13 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, } /** - * amdgpu_irq_update - update hw interrupt state + * amdgpu_irq_update - update hardware interrupt state * * @adev: amdgpu device pointer - * @src: interrupt src you want to enable - * @type: type of interrupt you want to update + * @src: interrupt source pointer + * @type: type of interrupt * - * Updates the interrupt state for a specific src (all asics). + * Updates interrupt state for the specific source (all ASICs). */ int amdgpu_irq_update(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type) @@ -378,7 +416,7 @@ int amdgpu_irq_update(struct amdgpu_device *adev, spin_lock_irqsave(&adev->irq.lock, irqflags); - /* we need to determine after taking the lock, otherwise + /* We need to determine after taking the lock, otherwise we might disable just enabled interrupts again */ if (amdgpu_irq_enabled(adev, src, type)) state = AMDGPU_IRQ_STATE_ENABLE; @@ -390,6 +428,14 @@ int amdgpu_irq_update(struct amdgpu_device *adev, return r; } +/** + * amdgpu_irq_gpu_reset_resume_helper - update interrupt states on all sources + * + * @adev: amdgpu device pointer + * + * Updates state of all types of interrupts on all sources on resume after + * reset. + */ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev) { int i, j, k; @@ -413,10 +459,13 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev) * amdgpu_irq_get - enable interrupt * * @adev: amdgpu device pointer - * @src: interrupt src you want to enable - * @type: type of interrupt you want to enable + * @src: interrupt source pointer + * @type: type of interrupt * - * Enables the interrupt type for a specific src (all asics). + * Enables specified type of interrupt on the specified source (all ASICs). + * + * Returns: + * 0 on success or error code otherwise */ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type) @@ -440,10 +489,13 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, * amdgpu_irq_put - disable interrupt * * @adev: amdgpu device pointer - * @src: interrupt src you want to disable - * @type: type of interrupt you want to disable + * @src: interrupt source pointer + * @type: type of interrupt + * + * Enables specified type of interrupt on the specified source (all ASICs). * - * Disables the interrupt type for a specific src (all asics). + * Returns: + * 0 on success or error code otherwise */ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type) @@ -464,12 +516,17 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, } /** - * amdgpu_irq_enabled - test if irq is enabled or not + * amdgpu_irq_enabled - check whether interrupt is enabled or not * * @adev: amdgpu device pointer - * @idx: interrupt src you want to test + * @src: interrupt source pointer + * @type: type of interrupt * - * Tests if the given interrupt source is enabled or not + * Checks whether the given type of interrupt is enabled on the given source. + * + * Returns: + * *true* if interrupt is enabled, *false* if interrupt is disabled or on + * invalid parameters */ bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type) @@ -486,7 +543,7 @@ bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src, return !!atomic_read(&src->enabled_types[type]); } -/* gen irq */ +/* XXX: Generic IRQ handling */ static void amdgpu_irq_mask(struct irq_data *irqd) { /* XXX */ @@ -497,12 +554,26 @@ static void amdgpu_irq_unmask(struct irq_data *irqd) /* XXX */ } +/* amdgpu hardware interrupt chip descriptor */ static struct irq_chip amdgpu_irq_chip = { .name = "amdgpu-ih", .irq_mask = amdgpu_irq_mask, .irq_unmask = amdgpu_irq_unmask, }; +/** + * amdgpu_irqdomain_map - create mapping between virtual and hardware IRQ numbers + * + * @d: amdgpu IRQ domain pointer (unused) + * @irq: virtual IRQ number + * @hwirq: hardware irq number + * + * Current implementation assigns simple interrupt handler to the given virtual + * IRQ. + * + * Returns: + * 0 on success or error code otherwise + */ static int amdgpu_irqdomain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) { @@ -514,17 +585,21 @@ static int amdgpu_irqdomain_map(struct irq_domain *d, return 0; } +/* Implementation of methods for amdgpu IRQ domain */ static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = { .map = amdgpu_irqdomain_map, }; /** - * amdgpu_irq_add_domain - create a linear irq domain + * amdgpu_irq_add_domain - create a linear IRQ domain * * @adev: amdgpu device pointer * - * Create an irq domain for GPU interrupt sources + * Creates an IRQ domain for GPU interrupt sources * that may be driven by another driver (e.g., ACP). + * + * Returns: + * 0 on success or error code otherwise */ int amdgpu_irq_add_domain(struct amdgpu_device *adev) { @@ -539,11 +614,11 @@ int amdgpu_irq_add_domain(struct amdgpu_device *adev) } /** - * amdgpu_irq_remove_domain - remove the irq domain + * amdgpu_irq_remove_domain - remove the IRQ domain * * @adev: amdgpu device pointer * - * Remove the irq domain for GPU interrupt sources + * Removes the IRQ domain for GPU interrupt sources * that may be driven by another driver (e.g., ACP). */ void amdgpu_irq_remove_domain(struct amdgpu_device *adev) @@ -555,16 +630,17 @@ void amdgpu_irq_remove_domain(struct amdgpu_device *adev) } /** - * amdgpu_irq_create_mapping - create a mapping between a domain irq and a - * Linux irq + * amdgpu_irq_create_mapping - create mapping between domain Linux IRQs * * @adev: amdgpu device pointer * @src_id: IH source id * - * Create a mapping between a domain irq (GPU IH src id) and a Linux irq + * Creates mapping between a domain IRQ (GPU IH src id) and a Linux IRQ * Use this for components that generate a GPU interrupt, but are driven * by a different driver (e.g., ACP). - * Returns the Linux irq. + * + * Returns: + * Linux IRQ */ unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 2bd56760c744..5a2c26a85984 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -30,14 +30,14 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) { - struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); + struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); + struct amdgpu_job *job = to_amdgpu_job(s_job); - DRM_ERROR("ring %s timeout, last signaled seq=%u, last emitted seq=%u\n", - job->base.sched->name, - atomic_read(&job->ring->fence_drv.last_seq), - job->ring->fence_drv.sync_seq); + DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", + job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), + ring->fence_drv.sync_seq); - amdgpu_device_gpu_recover(job->adev, job, false); + amdgpu_device_gpu_recover(ring->adev, job, false); } int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, @@ -54,7 +54,11 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, if (!*job) return -ENOMEM; - (*job)->adev = adev; + /* + * Initialize the scheduler to at least some ring so that we always + * have a pointer to adev. + */ + (*job)->base.sched = &adev->rings[0]->sched; (*job)->vm = vm; (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; @@ -86,6 +90,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, void amdgpu_job_free_resources(struct amdgpu_job *job) { + struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); struct dma_fence *f; unsigned i; @@ -93,14 +98,15 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) f = job->base.s_fence ? &job->base.s_fence->finished : job->fence; for (i = 0; i < job->num_ibs; ++i) - amdgpu_ib_free(job->adev, &job->ibs[i], f); + amdgpu_ib_free(ring->adev, &job->ibs[i], f); } static void amdgpu_job_free_cb(struct drm_sched_job *s_job) { - struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); + struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); + struct amdgpu_job *job = to_amdgpu_job(s_job); - amdgpu_ring_priority_put(job->ring, s_job->s_priority); + amdgpu_ring_priority_put(ring, s_job->s_priority); dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); @@ -117,50 +123,68 @@ void amdgpu_job_free(struct amdgpu_job *job) kfree(job); } -int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, - struct drm_sched_entity *entity, void *owner, - struct dma_fence **f) +int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, + void *owner, struct dma_fence **f) { + enum drm_sched_priority priority; + struct amdgpu_ring *ring; int r; - job->ring = ring; if (!f) return -EINVAL; - r = drm_sched_job_init(&job->base, &ring->sched, entity, owner); + r = drm_sched_job_init(&job->base, entity->sched, entity, owner); if (r) return r; job->owner = owner; - job->fence_ctx = entity->fence_context; *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); - amdgpu_ring_priority_get(job->ring, job->base.s_priority); + priority = job->base.s_priority; drm_sched_entity_push_job(&job->base, entity); + ring = to_amdgpu_ring(entity->sched); + amdgpu_ring_priority_get(ring, priority); + + return 0; +} + +int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, + struct dma_fence **fence) +{ + int r; + + job->base.sched = &ring->sched; + r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence); + job->fence = dma_fence_get(*fence); + if (r) + return r; + + amdgpu_job_free(job); return 0; } static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, struct drm_sched_entity *s_entity) { + struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->sched); struct amdgpu_job *job = to_amdgpu_job(sched_job); struct amdgpu_vm *vm = job->vm; + struct dma_fence *fence; bool explicit = false; int r; - struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync, &explicit); + fence = amdgpu_sync_get_fence(&job->sync, &explicit); if (fence && explicit) { if (drm_sched_dependency_optimized(fence, s_entity)) { - r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence, false); + r = amdgpu_sync_fence(ring->adev, &job->sched_sync, + fence, false); if (r) - DRM_ERROR("Error adding fence to sync (%d)\n", r); + DRM_ERROR("Error adding fence (%d)\n", r); } } while (fence == NULL && vm && !job->vmid) { - struct amdgpu_ring *ring = job->ring; - r = amdgpu_vmid_grab(vm, ring, &job->sync, &job->base.s_fence->finished, job); @@ -175,30 +199,25 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) { + struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched); struct dma_fence *fence = NULL, *finished; - struct amdgpu_device *adev; struct amdgpu_job *job; int r; - if (!sched_job) { - DRM_ERROR("job is null\n"); - return NULL; - } job = to_amdgpu_job(sched_job); finished = &job->base.s_fence->finished; - adev = job->adev; BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); trace_amdgpu_sched_run_job(job); - if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) + if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter)) dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */ if (finished->error < 0) { DRM_INFO("Skip scheduling IBs!\n"); } else { - r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, + r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, &fence); if (r) DRM_ERROR("Error scheduling IBs (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h new file mode 100644 index 000000000000..57cfe78a262b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -0,0 +1,74 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_JOB_H__ +#define __AMDGPU_JOB_H__ + +/* bit set means command submit involves a preamble IB */ +#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) +/* bit set means preamble IB is first presented in belonging context */ +#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) +/* bit set means context switch occured */ +#define AMDGPU_HAVE_CTX_SWITCH (1 << 2) + +#define to_amdgpu_job(sched_job) \ + container_of((sched_job), struct amdgpu_job, base) + +struct amdgpu_fence; + +struct amdgpu_job { + struct drm_sched_job base; + struct amdgpu_vm *vm; + struct amdgpu_sync sync; + struct amdgpu_sync sched_sync; + struct amdgpu_ib *ibs; + struct dma_fence *fence; /* the hw fence */ + uint32_t preamble_status; + uint32_t num_ibs; + void *owner; + bool vm_needs_flush; + uint64_t vm_pd_addr; + unsigned vmid; + unsigned pasid; + uint32_t gds_base, gds_size; + uint32_t gws_base, gws_size; + uint32_t oa_base, oa_size; + uint32_t vram_lost_counter; + + /* user fence handling */ + uint64_t uf_addr; + uint64_t uf_sequence; + +}; + +int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, + struct amdgpu_job **job, struct amdgpu_vm *vm); +int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, + struct amdgpu_job **job); + +void amdgpu_job_free_resources(struct amdgpu_job *job); +void amdgpu_job_free(struct amdgpu_job *job); +int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, + void *owner, struct dma_fence **f); +int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, + struct dma_fence **fence); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 4b7824d30e73..207f238649b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -31,6 +31,7 @@ #include "amdgpu_sched.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" +#include "atom.h" #include <linux/vga_switcheroo.h> #include <linux/slab.h> @@ -214,6 +215,18 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->gfx.rlc_fw_version; fw_info->feature = adev->gfx.rlc_feature_version; break; + case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL: + fw_info->ver = adev->gfx.rlc_srlc_fw_version; + fw_info->feature = adev->gfx.rlc_srlc_feature_version; + break; + case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM: + fw_info->ver = adev->gfx.rlc_srlg_fw_version; + fw_info->feature = adev->gfx.rlc_srlg_feature_version; + break; + case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM: + fw_info->ver = adev->gfx.rlc_srls_fw_version; + fw_info->feature = adev->gfx.rlc_srls_feature_version; + break; case AMDGPU_INFO_FW_GFX_MEC: if (query_fw->index == 0) { fw_info->ver = adev->gfx.mec_fw_version; @@ -273,12 +286,15 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct drm_crtc *crtc; uint32_t ui32 = 0; uint64_t ui64 = 0; - int i, found; + int i, j, found; int ui32_size = sizeof(ui32); if (!info->return_size || !info->return_pointer) return -EINVAL; + /* Ensure IB tests are run on ring */ + flush_delayed_work(&adev->late_init_work); + switch (info->query) { case AMDGPU_INFO_ACCEL_WORKING: ui32 = adev->accel_working; @@ -313,56 +329,66 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file type = AMD_IP_BLOCK_TYPE_GFX; for (i = 0; i < adev->gfx.num_gfx_rings; i++) ring_mask |= ((adev->gfx.gfx_ring[i].ready ? 1 : 0) << i); - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; - ib_size_alignment = 8; + ib_start_alignment = 32; + ib_size_alignment = 32; break; case AMDGPU_HW_IP_COMPUTE: type = AMD_IP_BLOCK_TYPE_GFX; for (i = 0; i < adev->gfx.num_compute_rings; i++) ring_mask |= ((adev->gfx.compute_ring[i].ready ? 1 : 0) << i); - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; - ib_size_alignment = 8; + ib_start_alignment = 32; + ib_size_alignment = 32; break; case AMDGPU_HW_IP_DMA: type = AMD_IP_BLOCK_TYPE_SDMA; for (i = 0; i < adev->sdma.num_instances; i++) ring_mask |= ((adev->sdma.instance[i].ring.ready ? 1 : 0) << i); - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; - ib_size_alignment = 1; + ib_start_alignment = 256; + ib_size_alignment = 4; break; case AMDGPU_HW_IP_UVD: type = AMD_IP_BLOCK_TYPE_UVD; - ring_mask = adev->uvd.ring.ready ? 1 : 0; - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; - ib_size_alignment = 16; + for (i = 0; i < adev->uvd.num_uvd_inst; i++) + ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i); + ib_start_alignment = 64; + ib_size_alignment = 64; break; case AMDGPU_HW_IP_VCE: type = AMD_IP_BLOCK_TYPE_VCE; for (i = 0; i < adev->vce.num_rings; i++) ring_mask |= ((adev->vce.ring[i].ready ? 1 : 0) << i); - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; + ib_start_alignment = 4; ib_size_alignment = 1; break; case AMDGPU_HW_IP_UVD_ENC: type = AMD_IP_BLOCK_TYPE_UVD; - for (i = 0; i < adev->uvd.num_enc_rings; i++) - ring_mask |= ((adev->uvd.ring_enc[i].ready ? 1 : 0) << i); - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; - ib_size_alignment = 1; + for (i = 0; i < adev->uvd.num_uvd_inst; i++) + for (j = 0; j < adev->uvd.num_enc_rings; j++) + ring_mask |= + ((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) << + (j + i * adev->uvd.num_enc_rings)); + ib_start_alignment = 64; + ib_size_alignment = 64; break; case AMDGPU_HW_IP_VCN_DEC: type = AMD_IP_BLOCK_TYPE_VCN; ring_mask = adev->vcn.ring_dec.ready ? 1 : 0; - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; + ib_start_alignment = 16; ib_size_alignment = 16; break; case AMDGPU_HW_IP_VCN_ENC: type = AMD_IP_BLOCK_TYPE_VCN; for (i = 0; i < adev->vcn.num_enc_rings; i++) ring_mask |= ((adev->vcn.ring_enc[i].ready ? 1 : 0) << i); - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; + ib_start_alignment = 64; ib_size_alignment = 1; break; + case AMDGPU_HW_IP_VCN_JPEG: + type = AMD_IP_BLOCK_TYPE_VCN; + ring_mask = adev->vcn.ring_jpeg.ready ? 1 : 0; + ib_start_alignment = 16; + ib_size_alignment = 16; + break; default: return -EINVAL; } @@ -407,6 +433,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file break; case AMDGPU_HW_IP_VCN_DEC: case AMDGPU_HW_IP_VCN_ENC: + case AMDGPU_HW_IP_VCN_JPEG: type = AMD_IP_BLOCK_TYPE_VCN; break; default: @@ -474,13 +501,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file case AMDGPU_INFO_VRAM_GTT: { struct drm_amdgpu_info_vram_gtt vram_gtt; - vram_gtt.vram_size = adev->gmc.real_vram_size; - vram_gtt.vram_size -= adev->vram_pin_size; - vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size; - vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size); + vram_gtt.vram_size = adev->gmc.real_vram_size - + atomic64_read(&adev->vram_pin_size); + vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size - + atomic64_read(&adev->visible_pin_size); vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size; vram_gtt.gtt_size *= PAGE_SIZE; - vram_gtt.gtt_size -= adev->gart_pin_size; + vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size); return copy_to_user(out, &vram_gtt, min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0; } @@ -489,17 +516,16 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file memset(&mem, 0, sizeof(mem)); mem.vram.total_heap_size = adev->gmc.real_vram_size; - mem.vram.usable_heap_size = - adev->gmc.real_vram_size - adev->vram_pin_size; + mem.vram.usable_heap_size = adev->gmc.real_vram_size - + atomic64_read(&adev->vram_pin_size); mem.vram.heap_usage = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = adev->gmc.visible_vram_size; - mem.cpu_accessible_vram.usable_heap_size = - adev->gmc.visible_vram_size - - (adev->vram_pin_size - adev->invisible_pin_size); + mem.cpu_accessible_vram.usable_heap_size = adev->gmc.visible_vram_size - + atomic64_read(&adev->visible_pin_size); mem.cpu_accessible_vram.heap_usage = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.cpu_accessible_vram.max_allocation = @@ -507,8 +533,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file mem.gtt.total_heap_size = adev->mman.bdev.man[TTM_PL_TT].size; mem.gtt.total_heap_size *= PAGE_SIZE; - mem.gtt.usable_heap_size = mem.gtt.total_heap_size - - adev->gart_pin_size; + mem.gtt.usable_heap_size = mem.gtt.total_heap_size - + atomic64_read(&adev->gart_pin_size); mem.gtt.heap_usage = amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4; @@ -701,10 +727,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file } } case AMDGPU_INFO_SENSOR: { - struct pp_gpu_power query = {0}; - int query_size = sizeof(query); - - if (amdgpu_dpm == 0) + if (!adev->pm.dpm_enabled) return -ENOENT; switch (info->sensor_info.type) { @@ -746,10 +769,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file /* get average GPU power */ if (amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, - (void *)&query, &query_size)) { + (void *)&ui32, &ui32_size)) { return -EINVAL; } - ui32 = query.average_gpu_power >> 8; + ui32 >>= 8; break; case AMDGPU_INFO_SENSOR_VDDNB: /* get VDDNB in millivolts */ @@ -914,8 +937,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, pm_runtime_get_sync(dev->dev); - amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); - if (adev->asic_type != CHIP_RAVEN) { amdgpu_uvd_free_handles(adev, file_priv); amdgpu_vce_free_handles(adev, file_priv); @@ -935,6 +956,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, pd = amdgpu_bo_ref(fpriv->vm.root.base.bo); amdgpu_vm_fini(adev, &fpriv->vm); + amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); + if (pasid) amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); amdgpu_bo_unref(&pd); @@ -1088,6 +1111,7 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) struct amdgpu_device *adev = dev->dev_private; struct drm_amdgpu_info_firmware fw_info; struct drm_amdgpu_query_fw query_fw; + struct atom_context *ctx = adev->mode_info.atom_context; int ret, i; /* VCE */ @@ -1146,6 +1170,30 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + /* RLC SAVE RESTORE LIST CNTL */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLC SRLC feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + + /* RLC SAVE RESTORE LIST GPM MEM */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLC SRLG feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + + /* RLC SAVE RESTORE LIST SRM MEM */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + /* MEC */ query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC; query_fw.index = 0; @@ -1210,6 +1258,9 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + + seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index bd67f4cb8e6c..a365ea2383d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -28,6 +28,21 @@ * Christian König <christian.koenig@amd.com> */ +/** + * DOC: MMU Notifier + * + * For coherent userptr handling registers an MMU notifier to inform the driver + * about updates on the page tables of a process. + * + * When somebody tries to invalidate the page tables we block the update until + * all operations on the pages in question are completed, then those pages are + * marked as accessed and also dirty if it wasn't a read only access. + * + * New command submissions using the userptrs in question are delayed until all + * page table invalidation are completed and we once more see a coherent process + * address space. + */ + #include <linux/firmware.h> #include <linux/module.h> #include <linux/mmu_notifier.h> @@ -36,12 +51,30 @@ #include <drm/drm.h> #include "amdgpu.h" +#include "amdgpu_amdkfd.h" +/** + * struct amdgpu_mn + * + * @adev: amdgpu device pointer + * @mm: process address space + * @mn: MMU notifier structure + * @type: type of MMU notifier + * @work: destruction work item + * @node: hash table node to find structure by adev and mn + * @lock: rw semaphore protecting the notifier nodes + * @objects: interval tree containing amdgpu_mn_nodes + * @read_lock: mutex for recursive locking of @lock + * @recursion: depth of recursion + * + * Data for each amdgpu device and process address space. + */ struct amdgpu_mn { /* constant after initialisation */ struct amdgpu_device *adev; struct mm_struct *mm; struct mmu_notifier mn; + enum amdgpu_mn_type type; /* only used on destruction */ struct work_struct work; @@ -56,13 +89,21 @@ struct amdgpu_mn { atomic_t recursion; }; +/** + * struct amdgpu_mn_node + * + * @it: interval node defining start-last of the affected address range + * @bos: list of all BOs in the affected address range + * + * Manages all BOs which are affected of a certain range of address space. + */ struct amdgpu_mn_node { struct interval_tree_node it; struct list_head bos; }; /** - * amdgpu_mn_destroy - destroy the rmn + * amdgpu_mn_destroy - destroy the MMU notifier * * @work: previously sheduled work item * @@ -70,47 +111,50 @@ struct amdgpu_mn_node { */ static void amdgpu_mn_destroy(struct work_struct *work) { - struct amdgpu_mn *rmn = container_of(work, struct amdgpu_mn, work); - struct amdgpu_device *adev = rmn->adev; + struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work); + struct amdgpu_device *adev = amn->adev; struct amdgpu_mn_node *node, *next_node; struct amdgpu_bo *bo, *next_bo; mutex_lock(&adev->mn_lock); - down_write(&rmn->lock); - hash_del(&rmn->node); + down_write(&amn->lock); + hash_del(&amn->node); rbtree_postorder_for_each_entry_safe(node, next_node, - &rmn->objects.rb_root, it.rb) { + &amn->objects.rb_root, it.rb) { list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { bo->mn = NULL; list_del_init(&bo->mn_list); } kfree(node); } - up_write(&rmn->lock); + up_write(&amn->lock); mutex_unlock(&adev->mn_lock); - mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm); - kfree(rmn); + mmu_notifier_unregister_no_release(&amn->mn, amn->mm); + kfree(amn); } /** * amdgpu_mn_release - callback to notify about mm destruction * * @mn: our notifier - * @mn: the mm this callback is about + * @mm: the mm this callback is about * * Shedule a work item to lazy destroy our notifier. */ static void amdgpu_mn_release(struct mmu_notifier *mn, struct mm_struct *mm) { - struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); - INIT_WORK(&rmn->work, amdgpu_mn_destroy); - schedule_work(&rmn->work); + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + + INIT_WORK(&amn->work, amdgpu_mn_destroy); + schedule_work(&amn->work); } /** - * amdgpu_mn_lock - take the write side lock for this mn + * amdgpu_mn_lock - take the write side lock for this notifier + * + * @mn: our notifier */ void amdgpu_mn_lock(struct amdgpu_mn *mn) { @@ -119,7 +163,9 @@ void amdgpu_mn_lock(struct amdgpu_mn *mn) } /** - * amdgpu_mn_unlock - drop the write side lock for this mn + * amdgpu_mn_unlock - drop the write side lock for this notifier + * + * @mn: our notifier */ void amdgpu_mn_unlock(struct amdgpu_mn *mn) { @@ -128,40 +174,38 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) } /** - * amdgpu_mn_read_lock - take the rmn read lock - * - * @rmn: our notifier + * amdgpu_mn_read_lock - take the read side lock for this notifier * - * Take the rmn read side lock. + * @amn: our notifier */ -static void amdgpu_mn_read_lock(struct amdgpu_mn *rmn) +static void amdgpu_mn_read_lock(struct amdgpu_mn *amn) { - mutex_lock(&rmn->read_lock); - if (atomic_inc_return(&rmn->recursion) == 1) - down_read_non_owner(&rmn->lock); - mutex_unlock(&rmn->read_lock); + mutex_lock(&amn->read_lock); + if (atomic_inc_return(&amn->recursion) == 1) + down_read_non_owner(&amn->lock); + mutex_unlock(&amn->read_lock); } /** - * amdgpu_mn_read_unlock - drop the rmn read lock + * amdgpu_mn_read_unlock - drop the read side lock for this notifier * - * @rmn: our notifier - * - * Drop the rmn read side lock. + * @amn: our notifier */ -static void amdgpu_mn_read_unlock(struct amdgpu_mn *rmn) +static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) { - if (atomic_dec_return(&rmn->recursion) == 0) - up_read_non_owner(&rmn->lock); + if (atomic_dec_return(&amn->recursion) == 0) + up_read_non_owner(&amn->lock); } /** * amdgpu_mn_invalidate_node - unmap all BOs of a node * * @node: the node with the BOs to unmap + * @start: start of address range affected + * @end: end of address range affected * - * We block for all BOs and unmap them by move them - * into system domain again. + * Block for operations on BOs to finish and mark pages as accessed and + * potentially dirty. */ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, unsigned long start, @@ -185,30 +229,30 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, } /** - * amdgpu_mn_invalidate_range_start - callback to notify about mm change + * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change * * @mn: our notifier - * @mn: the mm this callback is about + * @mm: the mm this callback is about * @start: start of updated range * @end: end of updated range * - * We block for all BOs between start and end to be idle and - * unmap them by move them into system domain again. + * Block for operations on BOs to finish and mark pages as accessed and + * potentially dirty. */ -static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end) +static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) { - struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); struct interval_tree_node *it; /* notification is exclusive, but interval is inclusive */ end -= 1; - amdgpu_mn_read_lock(rmn); + amdgpu_mn_read_lock(amn); - it = interval_tree_iter_first(&rmn->objects, start, end); + it = interval_tree_iter_first(&amn->objects, start, end); while (it) { struct amdgpu_mn_node *node; @@ -220,10 +264,53 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, } /** + * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change + * + * @mn: our notifier + * @mm: the mm this callback is about + * @start: start of updated range + * @end: end of updated range + * + * We temporarily evict all BOs between start and end. This + * necessitates evicting all user-mode queues of the process. The BOs + * are restorted in amdgpu_mn_invalidate_range_end_hsa. + */ +static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct interval_tree_node *it; + + /* notification is exclusive, but interval is inclusive */ + end -= 1; + + amdgpu_mn_read_lock(amn); + + it = interval_tree_iter_first(&amn->objects, start, end); + while (it) { + struct amdgpu_mn_node *node; + struct amdgpu_bo *bo; + + node = container_of(it, struct amdgpu_mn_node, it); + it = interval_tree_iter_next(it, start, end); + + list_for_each_entry(bo, &node->bos, mn_list) { + struct kgd_mem *mem = bo->kfd_bo; + + if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, + start, end)) + amdgpu_amdkfd_evict_userptr(mem, mm); + } + } +} + +/** * amdgpu_mn_invalidate_range_end - callback to notify about mm change * * @mn: our notifier - * @mn: the mm this callback is about + * @mm: the mm this callback is about * @start: start of updated range * @end: end of updated range * @@ -234,28 +321,44 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, unsigned long start, unsigned long end) { - struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); - amdgpu_mn_read_unlock(rmn); + amdgpu_mn_read_unlock(amn); } -static const struct mmu_notifier_ops amdgpu_mn_ops = { - .release = amdgpu_mn_release, - .invalidate_range_start = amdgpu_mn_invalidate_range_start, - .invalidate_range_end = amdgpu_mn_invalidate_range_end, +static const struct mmu_notifier_ops amdgpu_mn_ops[] = { + [AMDGPU_MN_TYPE_GFX] = { + .release = amdgpu_mn_release, + .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, + .invalidate_range_end = amdgpu_mn_invalidate_range_end, + }, + [AMDGPU_MN_TYPE_HSA] = { + .release = amdgpu_mn_release, + .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, + .invalidate_range_end = amdgpu_mn_invalidate_range_end, + }, }; +/* Low bits of any reasonable mm pointer will be unused due to struct + * alignment. Use these bits to make a unique key from the mm pointer + * and notifier type. + */ +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) + /** * amdgpu_mn_get - create notifier context * * @adev: amdgpu device pointer + * @type: type of MMU notifier context * * Creates a notifier context for current->mm. */ -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) +struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type) { struct mm_struct *mm = current->mm; - struct amdgpu_mn *rmn; + struct amdgpu_mn *amn; + unsigned long key = AMDGPU_MN_KEY(mm, type); int r; mutex_lock(&adev->mn_lock); @@ -264,40 +367,41 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) return ERR_PTR(-EINTR); } - hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm) - if (rmn->mm == mm) + hash_for_each_possible(adev->mn_hash, amn, node, key) + if (AMDGPU_MN_KEY(amn->mm, amn->type) == key) goto release_locks; - rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); - if (!rmn) { - rmn = ERR_PTR(-ENOMEM); + amn = kzalloc(sizeof(*amn), GFP_KERNEL); + if (!amn) { + amn = ERR_PTR(-ENOMEM); goto release_locks; } - rmn->adev = adev; - rmn->mm = mm; - rmn->mn.ops = &amdgpu_mn_ops; - init_rwsem(&rmn->lock); - rmn->objects = RB_ROOT_CACHED; - mutex_init(&rmn->read_lock); - atomic_set(&rmn->recursion, 0); + amn->adev = adev; + amn->mm = mm; + init_rwsem(&amn->lock); + amn->type = type; + amn->mn.ops = &amdgpu_mn_ops[type]; + amn->objects = RB_ROOT_CACHED; + mutex_init(&amn->read_lock); + atomic_set(&amn->recursion, 0); - r = __mmu_notifier_register(&rmn->mn, mm); + r = __mmu_notifier_register(&amn->mn, mm); if (r) - goto free_rmn; + goto free_amn; - hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm); + hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type)); release_locks: up_write(&mm->mmap_sem); mutex_unlock(&adev->mn_lock); - return rmn; + return amn; -free_rmn: +free_amn: up_write(&mm->mmap_sem); mutex_unlock(&adev->mn_lock); - kfree(rmn); + kfree(amn); return ERR_PTR(r); } @@ -315,37 +419,40 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { unsigned long end = addr + amdgpu_bo_size(bo) - 1; struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_mn *rmn; - struct amdgpu_mn_node *node = NULL; + enum amdgpu_mn_type type = + bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; + struct amdgpu_mn *amn; + struct amdgpu_mn_node *node = NULL, *new_node; struct list_head bos; struct interval_tree_node *it; - rmn = amdgpu_mn_get(adev); - if (IS_ERR(rmn)) - return PTR_ERR(rmn); + amn = amdgpu_mn_get(adev, type); + if (IS_ERR(amn)) + return PTR_ERR(amn); + + new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); + if (!new_node) + return -ENOMEM; INIT_LIST_HEAD(&bos); - down_write(&rmn->lock); + down_write(&amn->lock); - while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) { + while ((it = interval_tree_iter_first(&amn->objects, addr, end))) { kfree(node); node = container_of(it, struct amdgpu_mn_node, it); - interval_tree_remove(&node->it, &rmn->objects); + interval_tree_remove(&node->it, &amn->objects); addr = min(it->start, addr); end = max(it->last, end); list_splice(&node->bos, &bos); } - if (!node) { - node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL); - if (!node) { - up_write(&rmn->lock); - return -ENOMEM; - } - } + if (!node) + node = new_node; + else + kfree(new_node); - bo->mn = rmn; + bo->mn = amn; node->it.start = addr; node->it.last = end; @@ -353,9 +460,9 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) list_splice(&bos, &node->bos); list_add(&bo->mn_list, &node->bos); - interval_tree_insert(&node->it, &rmn->objects); + interval_tree_insert(&node->it, &amn->objects); - up_write(&rmn->lock); + up_write(&amn->lock); return 0; } @@ -370,18 +477,18 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) void amdgpu_mn_unregister(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_mn *rmn; + struct amdgpu_mn *amn; struct list_head *head; mutex_lock(&adev->mn_lock); - rmn = bo->mn; - if (rmn == NULL) { + amn = bo->mn; + if (amn == NULL) { mutex_unlock(&adev->mn_lock); return; } - down_write(&rmn->lock); + down_write(&amn->lock); /* save the next list entry for later */ head = bo->mn_list.next; @@ -391,12 +498,13 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) if (list_empty(head)) { struct amdgpu_mn_node *node; + node = container_of(head, struct amdgpu_mn_node, bos); - interval_tree_remove(&node->it, &rmn->objects); + interval_tree_remove(&node->it, &amn->objects); kfree(node); } - up_write(&rmn->lock); + up_write(&amn->lock); mutex_unlock(&adev->mn_lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index d0095a3793b8..eb0f432f78fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -29,16 +29,23 @@ */ struct amdgpu_mn; +enum amdgpu_mn_type { + AMDGPU_MN_TYPE_GFX, + AMDGPU_MN_TYPE_HSA, +}; + #if defined(CONFIG_MMU_NOTIFIER) void amdgpu_mn_lock(struct amdgpu_mn *mn); void amdgpu_mn_unlock(struct amdgpu_mn *mn); -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev); +struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); #else static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} -static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) +static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type) { return NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index d6416ee52e32..b9e9e8b02fb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -308,7 +308,6 @@ struct amdgpu_display_funcs { struct amdgpu_framebuffer { struct drm_framebuffer base; - struct drm_gem_object *obj; /* caching for later use */ uint64_t address; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6d08cde8443c..b12526ce1a9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -38,6 +38,19 @@ #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" +/** + * DOC: amdgpu_object + * + * This defines the interfaces to operate on an &amdgpu_bo buffer object which + * represents memory used by driver (VRAM, system memory, etc.). The driver + * provides DRM/GEM APIs to userspace. DRM/GEM APIs then use these interfaces + * to create/destroy/set buffer object which are then managed by the kernel TTM + * memory manager. + * The interfaces are also used internally by kernel clients, including gfx, + * uvd, etc. for kernel managed allocations used by the GPU. + * + */ + static bool amdgpu_need_backup(struct amdgpu_device *adev) { if (adev->flags & AMD_IS_APU) @@ -50,11 +63,35 @@ static bool amdgpu_need_backup(struct amdgpu_device *adev) return true; } +/** + * amdgpu_bo_subtract_pin_size - Remove BO from pin_size accounting + * + * @bo: &amdgpu_bo buffer object + * + * This function is called when a BO stops being pinned, and updates the + * &amdgpu_device pin_size values accordingly. + */ +static void amdgpu_bo_subtract_pin_size(struct amdgpu_bo *bo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + + if (bo->tbo.mem.mem_type == TTM_PL_VRAM) { + atomic64_sub(amdgpu_bo_size(bo), &adev->vram_pin_size); + atomic64_sub(amdgpu_vram_mgr_bo_visible_size(bo), + &adev->visible_pin_size); + } else if (bo->tbo.mem.mem_type == TTM_PL_TT) { + atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size); + } +} + static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); + if (WARN_ON_ONCE(bo->pin_count > 0)) + amdgpu_bo_subtract_pin_size(bo); + if (bo->kfd_bo) amdgpu_amdkfd_unreserve_system_memory_limit(bo); @@ -73,6 +110,16 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) kfree(bo); } +/** + * amdgpu_ttm_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo + * @bo: buffer object to be checked + * + * Uses destroy function associated with the object to determine if this is + * an &amdgpu_bo. + * + * Returns: + * true if the object belongs to &amdgpu_bo, false if not. + */ bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) { if (bo->destroy == &amdgpu_ttm_bo_destroy) @@ -80,6 +127,14 @@ bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) return false; } +/** + * amdgpu_ttm_placement_from_domain - set buffer's placement + * @abo: &amdgpu_bo buffer object whose placement is to be set + * @domain: requested domain + * + * Sets buffer's placement according to requested domain and the buffer's + * flags. + */ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) { struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); @@ -184,21 +239,29 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) * * Note: For bo_ptr new BO is only created if bo_ptr points to NULL. * - * Returns 0 on success, negative error code otherwise. + * Returns: + * 0 on success, negative error code otherwise. */ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, unsigned long size, int align, u32 domain, struct amdgpu_bo **bo_ptr, u64 *gpu_addr, void **cpu_addr) { + struct amdgpu_bo_param bp; bool free = false; int r; + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = align; + bp.domain = domain; + bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + if (!*bo_ptr) { - r = amdgpu_bo_create(adev, size, align, domain, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - ttm_bo_type_kernel, NULL, bo_ptr); + r = amdgpu_bo_create(adev, &bp, bo_ptr); if (r) { dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r); @@ -213,22 +276,33 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, goto error_free; } - r = amdgpu_bo_pin(*bo_ptr, domain, gpu_addr); + r = amdgpu_bo_pin(*bo_ptr, domain); if (r) { dev_err(adev->dev, "(%d) kernel bo pin failed\n", r); goto error_unreserve; } + r = amdgpu_ttm_alloc_gart(&(*bo_ptr)->tbo); + if (r) { + dev_err(adev->dev, "%p bind failed\n", *bo_ptr); + goto error_unpin; + } + + if (gpu_addr) + *gpu_addr = amdgpu_bo_gpu_offset(*bo_ptr); + if (cpu_addr) { r = amdgpu_bo_kmap(*bo_ptr, cpu_addr); if (r) { dev_err(adev->dev, "(%d) kernel bo map failed\n", r); - goto error_unreserve; + goto error_unpin; } } return 0; +error_unpin: + amdgpu_bo_unpin(*bo_ptr); error_unreserve: amdgpu_bo_unreserve(*bo_ptr); @@ -254,7 +328,8 @@ error_free: * * Note: For bo_ptr new BO is only created if bo_ptr points to NULL. * - * Returns 0 on success, negative error code otherwise. + * Returns: + * 0 on success, negative error code otherwise. */ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, unsigned long size, int align, @@ -278,6 +353,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, * amdgpu_bo_free_kernel - free BO for kernel use * * @bo: amdgpu BO to free + * @gpu_addr: pointer to where the BO's GPU memory space address was stored + * @cpu_addr: pointer to where the BO's CPU memory space address was stored * * unmaps and unpin a BO for kernel internal use. */ @@ -341,27 +418,25 @@ fail: return false; } -static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, - int byte_align, u32 domain, - u64 flags, enum ttm_bo_type type, - struct reservation_object *resv, +static int amdgpu_bo_do_create(struct amdgpu_device *adev, + struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr) { struct ttm_operation_ctx ctx = { - .interruptible = (type != ttm_bo_type_kernel), + .interruptible = (bp->type != ttm_bo_type_kernel), .no_wait_gpu = false, - .resv = resv, + .resv = bp->resv, .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT }; struct amdgpu_bo *bo; - unsigned long page_align; + unsigned long page_align, size = bp->size; size_t acc_size; int r; - page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT; + page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT; size = ALIGN(size, PAGE_SIZE); - if (!amdgpu_bo_validate_size(adev, size, domain)) + if (!amdgpu_bo_validate_size(adev, size, bp->domain)) return -ENOMEM; *bo_ptr = NULL; @@ -375,18 +450,14 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); INIT_LIST_HEAD(&bo->shadow_list); INIT_LIST_HEAD(&bo->va); - bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT | - AMDGPU_GEM_DOMAIN_CPU | - AMDGPU_GEM_DOMAIN_GDS | - AMDGPU_GEM_DOMAIN_GWS | - AMDGPU_GEM_DOMAIN_OA); + bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : + bp->domain; bo->allowed_domains = bo->preferred_domains; - if (type != ttm_bo_type_kernel && + if (bp->type != ttm_bo_type_kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; - bo->flags = flags; + bo->flags = bp->flags; #ifdef CONFIG_X86_32 /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit @@ -417,15 +488,17 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, #endif bo->tbo.bdev = &adev->mman.bdev; - amdgpu_ttm_placement_from_domain(bo, domain); + amdgpu_ttm_placement_from_domain(bo, bp->domain); + if (bp->type == ttm_bo_type_kernel) + bo->tbo.priority = 1; - r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, + r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type, &bo->placement, page_align, &ctx, acc_size, - NULL, resv, &amdgpu_ttm_bo_destroy); + NULL, bp->resv, &amdgpu_ttm_bo_destroy); if (unlikely(r != 0)) return r; - if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && + if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && bo->tbo.mem.mem_type == TTM_PL_VRAM && bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT) amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, @@ -433,10 +506,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, else amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0); - if (type == ttm_bo_type_kernel) - bo->tbo.priority = 1; - - if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && + if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { struct dma_fence *fence; @@ -449,20 +519,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, bo->tbo.moving = dma_fence_get(fence); dma_fence_put(fence); } - if (!resv) + if (!bp->resv) amdgpu_bo_unreserve(bo); *bo_ptr = bo; trace_amdgpu_bo_create(bo); /* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */ - if (type == ttm_bo_type_device) + if (bp->type == ttm_bo_type_device) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; return 0; fail_unreserve: - if (!resv) + if (!bp->resv) ww_mutex_unlock(&bo->tbo.resv->lock); amdgpu_bo_unref(&bo); return r; @@ -472,16 +542,22 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, unsigned long size, int byte_align, struct amdgpu_bo *bo) { + struct amdgpu_bo_param bp; int r; if (bo->shadow) return 0; - r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC | - AMDGPU_GEM_CREATE_SHADOW, - ttm_bo_type_kernel, - bo->tbo.resv, &bo->shadow); + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = byte_align; + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_SHADOW; + bp.type = ttm_bo_type_kernel; + bp.resv = bo->tbo.resv; + + r = amdgpu_bo_do_create(adev, &bp, &bo->shadow); if (!r) { bo->shadow->parent = amdgpu_bo_ref(bo); mutex_lock(&adev->shadow_list_lock); @@ -492,28 +568,40 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, return r; } -int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, - int byte_align, u32 domain, - u64 flags, enum ttm_bo_type type, - struct reservation_object *resv, +/** + * amdgpu_bo_create - create an &amdgpu_bo buffer object + * @adev: amdgpu device object + * @bp: parameters to be used for the buffer object + * @bo_ptr: pointer to the buffer object pointer + * + * Creates an &amdgpu_bo buffer object; and if requested, also creates a + * shadow object. + * Shadow object is used to backup the original buffer object, and is always + * in GTT. + * + * Returns: + * 0 for success or a negative error code on failure. + */ +int amdgpu_bo_create(struct amdgpu_device *adev, + struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr) { - uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; + u64 flags = bp->flags; int r; - r = amdgpu_bo_do_create(adev, size, byte_align, domain, - parent_flags, type, resv, bo_ptr); + bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW; + r = amdgpu_bo_do_create(adev, bp, bo_ptr); if (r) return r; if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) { - if (!resv) + if (!bp->resv) WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, NULL)); - r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr)); + r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr)); - if (!resv) + if (!bp->resv) reservation_object_unlock((*bo_ptr)->tbo.resv); if (r) @@ -523,6 +611,21 @@ int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, return r; } +/** + * amdgpu_bo_backup_to_shadow - Backs up an &amdgpu_bo buffer object + * @adev: amdgpu device object + * @ring: amdgpu_ring for the engine handling the buffer operations + * @bo: &amdgpu_bo buffer to be backed up + * @resv: reservation object with embedded fence + * @fence: dma_fence associated with the operation + * @direct: whether to submit the job directly + * + * Copies an &amdgpu_bo buffer object to its shadow object. + * Not used for now. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_bo *bo, @@ -555,6 +658,18 @@ err: return r; } +/** + * amdgpu_bo_validate - validate an &amdgpu_bo buffer object + * @bo: pointer to the buffer object + * + * Sets placement according to domain; and changes placement and caching + * policy of the buffer object according to the placement. + * This is used for validating shadow bos. It calls ttm_bo_validate() to + * make sure the buffer is resident where it needs to be. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_validate(struct amdgpu_bo *bo) { struct ttm_operation_ctx ctx = { false, false }; @@ -577,6 +692,22 @@ retry: return r; } +/** + * amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object + * @adev: amdgpu device object + * @ring: amdgpu_ring for the engine handling the buffer operations + * @bo: &amdgpu_bo buffer to be restored + * @resv: reservation object with embedded fence + * @fence: dma_fence associated with the operation + * @direct: whether to submit the job directly + * + * Copies a buffer object's shadow content back to the object. + * This is used for recovering a buffer from its shadow in case of a gpu + * reset where vram context may be lost. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_bo *bo, @@ -609,6 +740,17 @@ err: return r; } +/** + * amdgpu_bo_kmap - map an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object to be mapped + * @ptr: kernel virtual address to be returned + * + * Calls ttm_bo_kmap() to set up the kernel virtual mapping; calls + * amdgpu_bo_kptr() to get the kernel virtual address. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) { void *kptr; @@ -639,6 +781,15 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; } +/** + * amdgpu_bo_kptr - returns a kernel virtual address of the buffer object + * @bo: &amdgpu_bo buffer object + * + * Calls ttm_kmap_obj_virtual() to get the kernel virtual address + * + * Returns: + * the virtual address of a buffer object area. + */ void *amdgpu_bo_kptr(struct amdgpu_bo *bo) { bool is_iomem; @@ -646,12 +797,27 @@ void *amdgpu_bo_kptr(struct amdgpu_bo *bo) return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); } +/** + * amdgpu_bo_kunmap - unmap an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object to be unmapped + * + * Unmaps a kernel map set up by amdgpu_bo_kmap(). + */ void amdgpu_bo_kunmap(struct amdgpu_bo *bo) { if (bo->kmap.bo) ttm_bo_kunmap(&bo->kmap); } +/** + * amdgpu_bo_ref - reference an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object + * + * References the contained &ttm_buffer_object. + * + * Returns: + * a refcounted pointer to the &amdgpu_bo buffer object. + */ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo) { if (bo == NULL) @@ -661,6 +827,12 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo) return bo; } +/** + * amdgpu_bo_unref - unreference an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object + * + * Unreferences the contained &ttm_buffer_object and clear the pointer + */ void amdgpu_bo_unref(struct amdgpu_bo **bo) { struct ttm_buffer_object *tbo; @@ -674,9 +846,30 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo) *bo = NULL; } +/** + * amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object to be pinned + * @domain: domain to be pinned to + * @min_offset: the start of requested address range + * @max_offset: the end of requested address range + * + * Pins the buffer object according to requested domain and address range. If + * the memory is unbound gart memory, binds the pages into gart table. Adjusts + * pin_count and pin_size accordingly. + * + * Pinning means to lock pages in memory along with keeping them at a fixed + * offset. It is required when a buffer can not be moved, for example, when + * a display buffer is being scanned out. + * + * Compared with amdgpu_bo_pin(), this function gives more flexibility on + * where to pin a buffer if there are specific restrictions on where a buffer + * must be located. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, - u64 min_offset, u64 max_offset, - u64 *gpu_addr) + u64 min_offset, u64 max_offset) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_operation_ctx ctx = { false, false }; @@ -689,8 +882,17 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return -EINVAL; /* A shared bo cannot be migrated to VRAM */ - if (bo->prime_shared_count && (domain == AMDGPU_GEM_DOMAIN_VRAM)) - return -EINVAL; + if (bo->prime_shared_count) { + if (domain & AMDGPU_GEM_DOMAIN_GTT) + domain = AMDGPU_GEM_DOMAIN_GTT; + else + return -EINVAL; + } + + /* This assumes only APU display buffers are pinned with (VRAM|GTT). + * See function amdgpu_display_supported_domains() + */ + domain = amdgpu_bo_get_preferred_pin_domain(adev, domain); if (bo->pin_count) { uint32_t mem_type = bo->tbo.mem.mem_type; @@ -699,8 +901,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return -EINVAL; bo->pin_count++; - if (gpu_addr) - *gpu_addr = amdgpu_bo_gpu_offset(bo); if (max_offset != 0) { u64 domain_start = bo->tbo.bdev->man[mem_type].gpu_offset; @@ -736,34 +936,48 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, goto error; } - r = amdgpu_ttm_alloc_gart(&bo->tbo); - if (unlikely(r)) { - dev_err(adev->dev, "%p bind failed\n", bo); - goto error; - } - bo->pin_count = 1; - if (gpu_addr != NULL) - *gpu_addr = amdgpu_bo_gpu_offset(bo); domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); if (domain == AMDGPU_GEM_DOMAIN_VRAM) { - adev->vram_pin_size += amdgpu_bo_size(bo); - if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) - adev->invisible_pin_size += amdgpu_bo_size(bo); + atomic64_add(amdgpu_bo_size(bo), &adev->vram_pin_size); + atomic64_add(amdgpu_vram_mgr_bo_visible_size(bo), + &adev->visible_pin_size); } else if (domain == AMDGPU_GEM_DOMAIN_GTT) { - adev->gart_pin_size += amdgpu_bo_size(bo); + atomic64_add(amdgpu_bo_size(bo), &adev->gart_pin_size); } error: return r; } -int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr) +/** + * amdgpu_bo_pin - pin an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object to be pinned + * @domain: domain to be pinned to + * + * A simple wrapper to amdgpu_bo_pin_restricted(). + * Provides a simpler API for buffers that do not have any strict restrictions + * on where a buffer must be located. + * + * Returns: + * 0 for success or a negative error code on failure. + */ +int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) { - return amdgpu_bo_pin_restricted(bo, domain, 0, 0, gpu_addr); + return amdgpu_bo_pin_restricted(bo, domain, 0, 0); } +/** + * amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object to be unpinned + * + * Decreases the pin_count, and clears the flags if pin_count reaches 0. + * Changes placement and pin size accordingly. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_unpin(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -777,28 +991,30 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo) bo->pin_count--; if (bo->pin_count) return 0; + + amdgpu_bo_subtract_pin_size(bo); + for (i = 0; i < bo->placement.num_placement; i++) { bo->placements[i].lpfn = 0; bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; } r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (unlikely(r)) { + if (unlikely(r)) dev_err(adev->dev, "%p validate failed for unpin\n", bo); - goto error; - } - - if (bo->tbo.mem.mem_type == TTM_PL_VRAM) { - adev->vram_pin_size -= amdgpu_bo_size(bo); - if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) - adev->invisible_pin_size -= amdgpu_bo_size(bo); - } else if (bo->tbo.mem.mem_type == TTM_PL_TT) { - adev->gart_pin_size -= amdgpu_bo_size(bo); - } -error: return r; } +/** + * amdgpu_bo_evict_vram - evict VRAM buffers + * @adev: amdgpu device object + * + * Evicts all VRAM buffers on the lru list of the memory type. + * Mainly used for evicting vram at suspend time. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_evict_vram(struct amdgpu_device *adev) { /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */ @@ -821,6 +1037,15 @@ static const char *amdgpu_vram_names[] = { "DDR4", }; +/** + * amdgpu_bo_init - initialize memory manager + * @adev: amdgpu device object + * + * Calls amdgpu_ttm_init() to initialize amdgpu memory manager. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_init(struct amdgpu_device *adev) { /* reserve PAT memory space to WC for VRAM */ @@ -838,6 +1063,29 @@ int amdgpu_bo_init(struct amdgpu_device *adev) return amdgpu_ttm_init(adev); } +/** + * amdgpu_bo_late_init - late init + * @adev: amdgpu device object + * + * Calls amdgpu_ttm_late_init() to free resources used earlier during + * initialization. + * + * Returns: + * 0 for success or a negative error code on failure. + */ +int amdgpu_bo_late_init(struct amdgpu_device *adev) +{ + amdgpu_ttm_late_init(adev); + + return 0; +} + +/** + * amdgpu_bo_fini - tear down memory manager + * @adev: amdgpu device object + * + * Reverses amdgpu_bo_init() to tear down memory manager. + */ void amdgpu_bo_fini(struct amdgpu_device *adev) { amdgpu_ttm_fini(adev); @@ -845,12 +1093,33 @@ void amdgpu_bo_fini(struct amdgpu_device *adev) arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); } +/** + * amdgpu_bo_fbdev_mmap - mmap fbdev memory + * @bo: &amdgpu_bo buffer object + * @vma: vma as input from the fbdev mmap method + * + * Calls ttm_fbdev_mmap() to mmap fbdev memory if it is backed by a bo. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, struct vm_area_struct *vma) { return ttm_fbdev_mmap(vma, &bo->tbo); } +/** + * amdgpu_bo_set_tiling_flags - set tiling flags + * @bo: &amdgpu_bo buffer object + * @tiling_flags: new flags + * + * Sets buffer object's tiling flags with the new one. Used by GEM ioctl or + * kernel driver to set the tiling flags on a buffer. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -863,6 +1132,14 @@ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags) return 0; } +/** + * amdgpu_bo_get_tiling_flags - get tiling flags + * @bo: &amdgpu_bo buffer object + * @tiling_flags: returned flags + * + * Gets buffer object's tiling flags. Used by GEM ioctl or kernel driver to + * set the tiling flags on a buffer. + */ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags) { lockdep_assert_held(&bo->tbo.resv->lock.base); @@ -871,6 +1148,19 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags) *tiling_flags = bo->tiling_flags; } +/** + * amdgpu_bo_set_metadata - set metadata + * @bo: &amdgpu_bo buffer object + * @metadata: new metadata + * @metadata_size: size of the new metadata + * @flags: flags of the new metadata + * + * Sets buffer object's metadata, its size and flags. + * Used via GEM ioctl. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, uint32_t metadata_size, uint64_t flags) { @@ -900,6 +1190,21 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, return 0; } +/** + * amdgpu_bo_get_metadata - get metadata + * @bo: &amdgpu_bo buffer object + * @buffer: returned metadata + * @buffer_size: size of the buffer + * @metadata_size: size of the returned metadata + * @flags: flags of the returned metadata + * + * Gets buffer object's metadata, its size and flags. buffer_size shall not be + * less than metadata_size. + * Used via GEM ioctl. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, size_t buffer_size, uint32_t *metadata_size, uint64_t *flags) @@ -923,6 +1228,16 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, return 0; } +/** + * amdgpu_bo_move_notify - notification about a memory move + * @bo: pointer to a buffer object + * @evict: if this move is evicting the buffer from the graphics address space + * @new_mem: new information of the bufer object + * + * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs + * bookkeeping. + * TTM driver callback which is called when ttm moves a buffer. + */ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict, struct ttm_mem_reg *new_mem) @@ -951,6 +1266,17 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type); } +/** + * amdgpu_bo_fault_reserve_notify - notification about a memory fault + * @bo: pointer to a buffer object + * + * Notifies the driver we are taking a fault on this BO and have reserved it, + * also performs bookkeeping. + * TTM driver callback for dealing with vm faults. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); @@ -1024,10 +1350,11 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, * amdgpu_bo_gpu_offset - return GPU offset of bo * @bo: amdgpu object for which we query the offset * - * Returns current GPU offset of the object. - * * Note: object should either be pinned or reserved when calling this * function, it might be useful to add check for this for debugging. + * + * Returns: + * current GPU offset of the object. */ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) { @@ -1042,3 +1369,22 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) return bo->tbo.offset; } + +/** + * amdgpu_bo_get_preferred_pin_domain - get preferred domain for scanout + * @adev: amdgpu device object + * @domain: allowed :ref:`memory domains <amdgpu_memory_domains>` + * + * Returns: + * Which of the allowed domains is preferred for pinning the BO for scanout. + */ +uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev, + uint32_t domain) +{ + if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) { + domain = AMDGPU_GEM_DOMAIN_VRAM; + if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD) + domain = AMDGPU_GEM_DOMAIN_GTT; + } + return domain; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 546f77cb7882..9c3e29a04eb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -33,6 +33,16 @@ #define AMDGPU_BO_INVALID_OFFSET LONG_MAX +struct amdgpu_bo_param { + unsigned long size; + int byte_align; + u32 domain; + u32 preferred_domain; + u64 flags; + enum ttm_bo_type type; + struct reservation_object *resv; +}; + /* bo virtual addresses in a vm */ struct amdgpu_bo_va_mapping { struct amdgpu_bo_va *bo_va; @@ -196,6 +206,27 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) } /** + * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM + */ +static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; + struct drm_mm_node *node = bo->tbo.mem.mm_node; + unsigned long pages_left; + + if (bo->tbo.mem.mem_type != TTM_PL_VRAM) + return false; + + for (pages_left = bo->tbo.mem.num_pages; pages_left; + pages_left -= node->size, node++) + if (node->start < fpfn) + return true; + + return false; +} + +/** * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced */ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) @@ -203,10 +234,8 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; } -int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, - int byte_align, u32 domain, - u64 flags, enum ttm_bo_type type, - struct reservation_object *resv, +int amdgpu_bo_create(struct amdgpu_device *adev, + struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr); int amdgpu_bo_create_reserved(struct amdgpu_device *adev, unsigned long size, int align, @@ -223,13 +252,13 @@ void *amdgpu_bo_kptr(struct amdgpu_bo *bo); void amdgpu_bo_kunmap(struct amdgpu_bo *bo); struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo); void amdgpu_bo_unref(struct amdgpu_bo **bo); -int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr); +int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain); int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, - u64 min_offset, u64 max_offset, - u64 *gpu_addr); + u64 min_offset, u64 max_offset); int amdgpu_bo_unpin(struct amdgpu_bo *bo); int amdgpu_bo_evict_vram(struct amdgpu_device *adev); int amdgpu_bo_init(struct amdgpu_device *adev); +int amdgpu_bo_late_init(struct amdgpu_device *adev); void amdgpu_bo_fini(struct amdgpu_device *adev); int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, struct vm_area_struct *vma); @@ -259,7 +288,8 @@ int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, struct reservation_object *resv, struct dma_fence **fence, bool direct); - +uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev, + uint32_t domain); /* * sub allocation diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 361975cf45a9..15a1192c1ec5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -68,15 +68,49 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) if (adev->pm.dpm_enabled) { mutex_lock(&adev->pm.mutex); if (power_supply_is_system_supplied() > 0) - adev->pm.dpm.ac_power = true; + adev->pm.ac_power = true; else - adev->pm.dpm.ac_power = false; + adev->pm.ac_power = false; if (adev->powerplay.pp_funcs->enable_bapm) - amdgpu_dpm_enable_bapm(adev, adev->pm.dpm.ac_power); + amdgpu_dpm_enable_bapm(adev, adev->pm.ac_power); mutex_unlock(&adev->pm.mutex); } } +/** + * DOC: power_dpm_state + * + * The power_dpm_state file is a legacy interface and is only provided for + * backwards compatibility. The amdgpu driver provides a sysfs API for adjusting + * certain power related parameters. The file power_dpm_state is used for this. + * It accepts the following arguments: + * + * - battery + * + * - balanced + * + * - performance + * + * battery + * + * On older GPUs, the vbios provided a special power state for battery + * operation. Selecting battery switched to this state. This is no + * longer provided on newer GPUs so the option does nothing in that case. + * + * balanced + * + * On older GPUs, the vbios provided a special power state for balanced + * operation. Selecting balanced switched to this state. This is no + * longer provided on newer GPUs so the option does nothing in that case. + * + * performance + * + * On older GPUs, the vbios provided a special power state for performance + * operation. Selecting performance switched to this state. This is no + * longer provided on newer GPUs so the option does nothing in that case. + * + */ + static ssize_t amdgpu_get_dpm_state(struct device *dev, struct device_attribute *attr, char *buf) @@ -131,6 +165,66 @@ fail: return count; } + +/** + * DOC: power_dpm_force_performance_level + * + * The amdgpu driver provides a sysfs API for adjusting certain power + * related parameters. The file power_dpm_force_performance_level is + * used for this. It accepts the following arguments: + * + * - auto + * + * - low + * + * - high + * + * - manual + * + * - profile_standard + * + * - profile_min_sclk + * + * - profile_min_mclk + * + * - profile_peak + * + * auto + * + * When auto is selected, the driver will attempt to dynamically select + * the optimal power profile for current conditions in the driver. + * + * low + * + * When low is selected, the clocks are forced to the lowest power state. + * + * high + * + * When high is selected, the clocks are forced to the highest power state. + * + * manual + * + * When manual is selected, the user can manually adjust which power states + * are enabled for each clock domain via the sysfs pp_dpm_mclk, pp_dpm_sclk, + * and pp_dpm_pcie files and adjust the power state transition heuristics + * via the pp_power_profile_mode sysfs file. + * + * profile_standard + * profile_min_sclk + * profile_min_mclk + * profile_peak + * + * When the profiling modes are selected, clock and power gating are + * disabled and the clocks are set for different profiling cases. This + * mode is recommended for profiling specific work loads where you do + * not want clock or power gating for clock fluctuation to interfere + * with your results. profile_standard sets the clocks to a fixed clock + * level which varies from asic to asic. profile_min_sclk forces the sclk + * to the lowest level. profile_min_mclk forces the mclk to the lowest level. + * profile_peak sets all clocks (mclk, sclk, pcie) to the highest levels. + * + */ + static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, struct device_attribute *attr, char *buf) @@ -324,6 +418,17 @@ fail: return count; } +/** + * DOC: pp_table + * + * The amdgpu driver provides a sysfs API for uploading new powerplay + * tables. The file pp_table is used for this. Reading the file + * will dump the current power play table. Writing to the file + * will attempt to upload a new powerplay table and re-initialize + * powerplay using that new table. + * + */ + static ssize_t amdgpu_get_pp_table(struct device *dev, struct device_attribute *attr, char *buf) @@ -360,6 +465,32 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, return count; } +/** + * DOC: pp_od_clk_voltage + * + * The amdgpu driver provides a sysfs API for adjusting the clocks and voltages + * in each power level within a power state. The pp_od_clk_voltage is used for + * this. + * + * Reading the file will display: + * + * - a list of engine clock levels and voltages labeled OD_SCLK + * + * - a list of memory clock levels and voltages labeled OD_MCLK + * + * - a list of valid ranges for sclk, mclk, and voltage labeled OD_RANGE + * + * To manually adjust these settings, first select manual using + * power_dpm_force_performance_level. Enter a new value for each + * level by writing a string that contains "s/m level clock voltage" to + * the file. E.g., "s 1 500 820" will update sclk level 1 to be 500 MHz + * at 820 mV; "m 0 350 810" will update mclk level 0 to be 350 MHz at + * 810 mV. When you have edited all of the states as needed, write + * "c" (commit) to the file to commit your changes. If you want to reset to the + * default power levels, write "r" (reset) to the file to reset them. + * + */ + static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, struct device_attribute *attr, const char *buf, @@ -437,6 +568,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, if (adev->powerplay.pp_funcs->print_clock_levels) { size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); + size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size); return size; } else { return snprintf(buf, PAGE_SIZE, "\n"); @@ -444,6 +576,23 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, } +/** + * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie + * + * The amdgpu driver provides a sysfs API for adjusting what power levels + * are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk, + * and pp_dpm_pcie are used for this. + * + * Reading back the files will show you the available power levels within + * the power state and the clock information for those levels. + * + * To manually adjust these states, first select manual using + * power_dpm_force_performance_level. + * Secondly,Enter a new value for each level by inputing a string that + * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie" + * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6. + */ + static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, struct device_attribute *attr, char *buf) @@ -457,6 +606,42 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, return snprintf(buf, PAGE_SIZE, "\n"); } +/* + * Worst case: 32 bits individually specified, in octal at 12 characters + * per line (+1 for \n). + */ +#define AMDGPU_MASK_BUF_MAX (32 * 13) + +static ssize_t amdgpu_read_mask(const char *buf, size_t count, uint32_t *mask) +{ + int ret; + long level; + char *sub_str = NULL; + char *tmp; + char buf_cpy[AMDGPU_MASK_BUF_MAX + 1]; + const char delimiter[3] = {' ', '\n', '\0'}; + size_t bytes; + + *mask = 0; + + bytes = min(count, sizeof(buf_cpy) - 1); + memcpy(buf_cpy, buf, bytes); + buf_cpy[bytes] = '\0'; + tmp = buf_cpy; + while (tmp[0]) { + sub_str = strsep(&tmp, delimiter); + if (strlen(sub_str)) { + ret = kstrtol(sub_str, 0, &level); + if (ret) + return -EINVAL; + *mask |= 1 << level; + } else + break; + } + + return 0; +} + static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, struct device_attribute *attr, const char *buf, @@ -465,28 +650,15 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; int ret; - long level; - uint32_t i, mask = 0; - char sub_str[2]; - - for (i = 0; i < strlen(buf); i++) { - if (*(buf + i) == '\n') - continue; - sub_str[0] = *(buf + i); - sub_str[1] = '\0'; - ret = kstrtol(sub_str, 0, &level); + uint32_t mask = 0; - if (ret) { - count = -EINVAL; - goto fail; - } - mask |= 1 << level; - } + ret = amdgpu_read_mask(buf, count, &mask); + if (ret) + return ret; if (adev->powerplay.pp_funcs->force_clock_level) amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); -fail: return count; } @@ -511,27 +683,15 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; int ret; - long level; - uint32_t i, mask = 0; - char sub_str[2]; + uint32_t mask = 0; - for (i = 0; i < strlen(buf); i++) { - if (*(buf + i) == '\n') - continue; - sub_str[0] = *(buf + i); - sub_str[1] = '\0'; - ret = kstrtol(sub_str, 0, &level); + ret = amdgpu_read_mask(buf, count, &mask); + if (ret) + return ret; - if (ret) { - count = -EINVAL; - goto fail; - } - mask |= 1 << level; - } if (adev->powerplay.pp_funcs->force_clock_level) amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); -fail: return count; } @@ -556,27 +716,15 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; int ret; - long level; - uint32_t i, mask = 0; - char sub_str[2]; + uint32_t mask = 0; - for (i = 0; i < strlen(buf); i++) { - if (*(buf + i) == '\n') - continue; - sub_str[0] = *(buf + i); - sub_str[1] = '\0'; - ret = kstrtol(sub_str, 0, &level); + ret = amdgpu_read_mask(buf, count, &mask); + if (ret) + return ret; - if (ret) { - count = -EINVAL; - goto fail; - } - mask |= 1 << level; - } if (adev->powerplay.pp_funcs->force_clock_level) amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); -fail: return count; } @@ -668,6 +816,26 @@ fail: return count; } +/** + * DOC: pp_power_profile_mode + * + * The amdgpu driver provides a sysfs API for adjusting the heuristics + * related to switching between power levels in a power state. The file + * pp_power_profile_mode is used for this. + * + * Reading this file outputs a list of all of the predefined power profiles + * and the relevant heuristics settings for that profile. + * + * To select a profile or create a custom profile, first select manual using + * power_dpm_force_performance_level. Writing the number of a predefined + * profile to pp_power_profile_mode will enable those heuristics. To + * create a custom set of heuristics, write a string of numbers to the file + * starting with the number of the custom profile along with a setting + * for each heuristic parameter. Due to differences across asic families + * the heuristic parameters vary from family to family. + * + */ + static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, struct device_attribute *attr, char *buf) @@ -734,6 +902,36 @@ fail: return -EINVAL; } +/** + * DOC: busy_percent + * + * The amdgpu driver provides a sysfs API for reading how busy the GPU + * is as a percentage. The file gpu_busy_percent is used for this. + * The SMU firmware computes a percentage of load based on the + * aggregate activity level in the IP cores. + */ +static ssize_t amdgpu_get_busy_percent(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + int r, value, size = sizeof(value); + + /* sanity check PP is enabled */ + if (!(adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->read_sensor)) + return -EINVAL; + + /* read the IP busy sensor */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, + (void *)&value, &size); + if (r) + return r; + + return snprintf(buf, PAGE_SIZE, "%d\n", value); +} + static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, amdgpu_get_dpm_forced_performance_level, @@ -767,6 +965,8 @@ static DEVICE_ATTR(pp_power_profile_mode, S_IRUGO | S_IWUSR, static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR, amdgpu_get_pp_od_clk_voltage, amdgpu_set_pp_od_clk_voltage); +static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, + amdgpu_get_busy_percent, NULL); static ssize_t amdgpu_hwmon_show_temp(struct device *dev, struct device_attribute *attr, @@ -1020,8 +1220,8 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); struct drm_device *ddev = adev->ddev; - struct pp_gpu_power query = {0}; - int r, size = sizeof(query); + u32 query = 0; + int r, size = sizeof(u32); unsigned uw; /* Can't get power when the card is off */ @@ -1041,7 +1241,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, return r; /* convert to microwatts */ - uw = (query.average_gpu_power >> 8) * 1000000; + uw = (query >> 8) * 1000000 + (query & 0xff) * 1000; return snprintf(buf, PAGE_SIZE, "%u\n", uw); } @@ -1109,6 +1309,62 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, return count; } + +/** + * DOC: hwmon + * + * The amdgpu driver exposes the following sensor interfaces: + * + * - GPU temperature (via the on-die sensor) + * + * - GPU voltage + * + * - Northbridge voltage (APUs only) + * + * - GPU power + * + * - GPU fan + * + * hwmon interfaces for GPU temperature: + * + * - temp1_input: the on die GPU temperature in millidegrees Celsius + * + * - temp1_crit: temperature critical max value in millidegrees Celsius + * + * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius + * + * hwmon interfaces for GPU voltage: + * + * - in0_input: the voltage on the GPU in millivolts + * + * - in1_input: the voltage on the Northbridge in millivolts + * + * hwmon interfaces for GPU power: + * + * - power1_average: average power used by the GPU in microWatts + * + * - power1_cap_min: minimum cap supported in microWatts + * + * - power1_cap_max: maximum cap supported in microWatts + * + * - power1_cap: selected power cap in microWatts + * + * hwmon interfaces for GPU fan: + * + * - pwm1: pulse width modulation fan level (0-255) + * + * - pwm1_enable: pulse width modulation fan control method (0: no fan speed control, 1: manual fan speed control using pwm interface, 2: automatic fan speed control) + * + * - pwm1_min: pulse width modulation fan control minimum level (0) + * + * - pwm1_max: pulse width modulation fan control maximum level (255) + * + * - fan1_input: fan speed in RPM + * + * You can use hwmon tools like sensors to view this information on your system. + * + */ + static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); @@ -1153,19 +1409,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, struct amdgpu_device *adev = dev_get_drvdata(dev); umode_t effective_mode = attr->mode; - /* handle non-powerplay limitations */ - if (!adev->powerplay.pp_handle) { - /* Skip fan attributes if fan is not present */ - if (adev->pm.no_fan && - (attr == &sensor_dev_attr_pwm1.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) - return 0; - /* requires powerplay */ - if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr) - return 0; - } + + /* Skip fan attributes if fan is not present */ + if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_min.dev_attr.attr || + attr == &sensor_dev_attr_fan1_input.dev_attr.attr)) + return 0; /* Skip limit attributes if DPM is not enabled */ if (!adev->pm.dpm_enabled && @@ -1462,10 +1713,10 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) { - if (adev->powerplay.pp_funcs->powergate_uvd) { + if (adev->powerplay.pp_funcs->set_powergating_by_smu) { /* enable/disable UVD */ mutex_lock(&adev->pm.mutex); - amdgpu_dpm_powergate_uvd(adev, !enable); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); mutex_unlock(&adev->pm.mutex); } else { if (enable) { @@ -1484,10 +1735,10 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) { - if (adev->powerplay.pp_funcs->powergate_vce) { + if (adev->powerplay.pp_funcs->set_powergating_by_smu) { /* enable/disable VCE */ mutex_lock(&adev->pm.mutex); - amdgpu_dpm_powergate_vce(adev, !enable); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); mutex_unlock(&adev->pm.mutex); } else { if (enable) { @@ -1619,6 +1870,13 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) "pp_od_clk_voltage\n"); return ret; } + ret = device_create_file(adev->dev, + &dev_attr_gpu_busy_percent); + if (ret) { + DRM_ERROR("failed to create device file " + "gpu_busy_level\n"); + return ret; + } ret = amdgpu_debugfs_pm_init(adev); if (ret) { DRM_ERROR("Failed to register debugfs file for dpm!\n"); @@ -1654,13 +1912,11 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) &dev_attr_pp_power_profile_mode); device_remove_file(adev->dev, &dev_attr_pp_od_clk_voltage); + device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); } void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) { - struct drm_device *ddev = adev->ddev; - struct drm_crtc *crtc; - struct amdgpu_crtc *amdgpu_crtc; int i = 0; if (!adev->pm.dpm_enabled) @@ -1675,30 +1931,35 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) amdgpu_fence_wait_empty(ring); } + mutex_lock(&adev->pm.mutex); + /* update battery/ac status */ + if (power_supply_is_system_supplied() > 0) + adev->pm.ac_power = true; + else + adev->pm.ac_power = false; + mutex_unlock(&adev->pm.mutex); + if (adev->powerplay.pp_funcs->dispatch_tasks) { + if (!amdgpu_device_has_dc_support(adev)) { + mutex_lock(&adev->pm.mutex); + amdgpu_dpm_get_active_displays(adev); + adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtc_count; + adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev); + adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev); + /* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */ + if (adev->pm.pm_display_cfg.vrefresh > 120) + adev->pm.pm_display_cfg.min_vblank_time = 0; + if (adev->powerplay.pp_funcs->display_configuration_change) + adev->powerplay.pp_funcs->display_configuration_change( + adev->powerplay.pp_handle, + &adev->pm.pm_display_cfg); + mutex_unlock(&adev->pm.mutex); + } amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL); } else { mutex_lock(&adev->pm.mutex); - adev->pm.dpm.new_active_crtcs = 0; - adev->pm.dpm.new_active_crtc_count = 0; - if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { - list_for_each_entry(crtc, - &ddev->mode_config.crtc_list, head) { - amdgpu_crtc = to_amdgpu_crtc(crtc); - if (amdgpu_crtc->enabled) { - adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id); - adev->pm.dpm.new_active_crtc_count++; - } - } - } - /* update battery/ac status */ - if (power_supply_is_system_supplied() > 0) - adev->pm.dpm.ac_power = true; - else - adev->pm.dpm.ac_power = false; - + amdgpu_dpm_get_active_displays(adev); amdgpu_dpm_change_power_state_locked(adev); - mutex_unlock(&adev->pm.mutex); } } @@ -1711,7 +1972,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev) { uint32_t value; - struct pp_gpu_power query = {0}; + uint32_t query = 0; int size; /* sanity check PP is enabled */ @@ -1734,17 +1995,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a seq_printf(m, "\t%u mV (VDDGFX)\n", value); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size)) seq_printf(m, "\t%u mV (VDDNB)\n", value); - size = sizeof(query); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) { - seq_printf(m, "\t%u.%u W (VDDC)\n", query.vddc_power >> 8, - query.vddc_power & 0xff); - seq_printf(m, "\t%u.%u W (VDDCI)\n", query.vddci_power >> 8, - query.vddci_power & 0xff); - seq_printf(m, "\t%u.%u W (max GPU)\n", query.max_gpu_power >> 8, - query.max_gpu_power & 0xff); - seq_printf(m, "\t%u.%u W (average GPU)\n", query.average_gpu_power >> 8, - query.average_gpu_power & 0xff); - } + size = sizeof(uint32_t); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) + seq_printf(m, "\t%u.%u W (average GPU)\n", query >> 8, query & 0xff); size = sizeof(value); seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 4b584cb75bf4..3ed02f472003 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -23,6 +23,14 @@ * * Authors: Alex Deucher */ + +/** + * DOC: PRIME Buffer Sharing + * + * The following callback implementations are used for :ref:`sharing GEM buffer + * objects between different devices via PRIME <prime_buffer_sharing>`. + */ + #include <drm/drmP.h> #include "amdgpu.h" @@ -32,6 +40,14 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops; +/** + * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table + * implementation + * @obj: GEM buffer object + * + * Returns: + * A scatter/gather table for the pinned pages of the buffer object's memory. + */ struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -40,6 +56,15 @@ struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages); } +/** + * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation + * @obj: GEM buffer object + * + * Sets up an in-kernel virtual mapping of the buffer object's memory. + * + * Returns: + * The virtual address of the mapping or an error pointer. + */ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -53,6 +78,13 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj) return bo->dma_buf_vmap.virtual; } +/** + * amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation + * @obj: GEM buffer object + * @vaddr: virtual address (unused) + * + * Tears down the in-kernel virtual mapping of the buffer object's memory. + */ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -60,6 +92,17 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) ttm_bo_kunmap(&bo->dma_buf_vmap); } +/** + * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation + * @obj: GEM buffer object + * @vma: virtual memory area + * + * Sets up a userspace mapping of the buffer object's memory in the given + * virtual memory area. + * + * Returns: + * 0 on success or negative error code. + */ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -94,6 +137,19 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma return ret; } +/** + * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table + * implementation + * @dev: DRM device + * @attach: DMA-buf attachment + * @sg: Scatter/gather table + * + * Import shared DMA buffer memory exported by another device. + * + * Returns: + * A new GEM buffer object of the given DRM device, representing the memory + * described by the given DMA-buf attachment and scatter/gather table. + */ struct drm_gem_object * amdgpu_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, @@ -102,12 +158,18 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, struct reservation_object *resv = attach->dmabuf->resv; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_bo *bo; + struct amdgpu_bo_param bp; int ret; + memset(&bp, 0, sizeof(bp)); + bp.size = attach->dmabuf->size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_CPU; + bp.flags = 0; + bp.type = ttm_bo_type_sg; + bp.resv = resv; ww_mutex_lock(&resv->lock, NULL); - ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg, - resv, &bo); + ret = amdgpu_bo_create(adev, &bp, &bo); if (ret) goto error; @@ -126,8 +188,19 @@ error: return ERR_PTR(ret); } +/** + * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation + * @dma_buf: shared DMA buffer + * @attach: DMA-buf attachment + * + * Makes sure that the shared DMA buffer can be accessed by the target device. + * For now, simply pins it to the GTT domain, where it should be accessible by + * all DMA devices. + * + * Returns: + * 0 on success or negative error code. + */ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, - struct device *target_dev, struct dma_buf_attachment *attach) { struct drm_gem_object *obj = dma_buf->priv; @@ -135,7 +208,7 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); long r; - r = drm_gem_map_attach(dma_buf, target_dev, attach); + r = drm_gem_map_attach(dma_buf, attach); if (r) return r; @@ -159,7 +232,7 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, } /* pin buffer into GTT */ - r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); if (r) goto error_unreserve; @@ -175,6 +248,14 @@ error_detach: return r; } +/** + * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation + * @dma_buf: shared DMA buffer + * @attach: DMA-buf attachment + * + * This is called when a shared DMA buffer no longer needs to be accessible by + * the other device. For now, simply unpins the buffer from GTT. + */ static void amdgpu_gem_map_detach(struct dma_buf *dma_buf, struct dma_buf_attachment *attach) { @@ -196,6 +277,13 @@ error: drm_gem_map_detach(dma_buf, attach); } +/** + * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation + * @obj: GEM buffer object + * + * Returns: + * The buffer object's reservation object. + */ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -203,13 +291,25 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) return bo->tbo.resv; } +/** + * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation + * @dma_buf: shared DMA buffer + * @direction: direction of DMA transfer + * + * This is called before CPU access to the shared DMA buffer's memory. If it's + * a read access, the buffer is moved to the GTT domain if possible, for optimal + * CPU read performance. + * + * Returns: + * 0 on success or negative error code. + */ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_operation_ctx ctx = { true, false }; - u32 domain = amdgpu_display_framebuffer_domains(adev); + u32 domain = amdgpu_display_supported_domains(adev); int ret; bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE); @@ -239,14 +339,24 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops = { .release = drm_gem_dmabuf_release, .begin_cpu_access = amdgpu_gem_begin_cpu_access, .map = drm_gem_dmabuf_kmap, - .map_atomic = drm_gem_dmabuf_kmap_atomic, .unmap = drm_gem_dmabuf_kunmap, - .unmap_atomic = drm_gem_dmabuf_kunmap_atomic, .mmap = drm_gem_dmabuf_mmap, .vmap = drm_gem_dmabuf_vmap, .vunmap = drm_gem_dmabuf_vunmap, }; +/** + * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation + * @dev: DRM device + * @gobj: GEM buffer object + * @flags: flags like DRM_CLOEXEC and DRM_RDWR + * + * The main work is done by the &drm_gem_prime_export helper, which in turn + * uses &amdgpu_gem_prime_res_obj. + * + * Returns: + * Shared DMA buffer representing the GEM buffer object from the given device. + */ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gobj, int flags) @@ -267,6 +377,17 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, return buf; } +/** + * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation + * @dev: DRM device + * @dma_buf: Shared DMA buffer + * + * The main work is done by the &drm_gem_prime_import helper, which in turn + * uses &amdgpu_gem_prime_import_sg_table. + * + * Returns: + * GEM buffer object representing the shared DMA buffer for the given device. + */ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index c7d43e064fc7..9f1a5bd39ae8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -52,6 +52,7 @@ static int psp_sw_init(void *handle) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: psp_v3_1_set_psp_funcs(psp); break; case CHIP_RAVEN: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c index 262c1267249e..ea9850c9224d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c @@ -66,6 +66,8 @@ static int amdgpu_identity_map(struct amdgpu_device *adev, u32 ring, struct amdgpu_ring **out_ring) { + u32 instance; + switch (mapper->hw_ip) { case AMDGPU_HW_IP_GFX: *out_ring = &adev->gfx.gfx_ring[ring]; @@ -77,13 +79,16 @@ static int amdgpu_identity_map(struct amdgpu_device *adev, *out_ring = &adev->sdma.instance[ring].ring; break; case AMDGPU_HW_IP_UVD: - *out_ring = &adev->uvd.ring; + instance = ring; + *out_ring = &adev->uvd.inst[instance].ring; break; case AMDGPU_HW_IP_VCE: *out_ring = &adev->vce.ring[ring]; break; case AMDGPU_HW_IP_UVD_ENC: - *out_ring = &adev->uvd.ring_enc[ring]; + instance = ring / adev->uvd.num_enc_rings; + *out_ring = + &adev->uvd.inst[instance].ring_enc[ring%adev->uvd.num_enc_rings]; break; case AMDGPU_HW_IP_VCN_DEC: *out_ring = &adev->vcn.ring_dec; @@ -91,6 +96,9 @@ static int amdgpu_identity_map(struct amdgpu_device *adev, case AMDGPU_HW_IP_VCN_ENC: *out_ring = &adev->vcn.ring_enc[ring]; break; + case AMDGPU_HW_IP_VCN_JPEG: + *out_ring = &adev->vcn.ring_jpeg; + break; default: *out_ring = NULL; DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip); @@ -240,13 +248,14 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, ip_num_rings = adev->sdma.num_instances; break; case AMDGPU_HW_IP_UVD: - ip_num_rings = 1; + ip_num_rings = adev->uvd.num_uvd_inst; break; case AMDGPU_HW_IP_VCE: ip_num_rings = adev->vce.num_rings; break; case AMDGPU_HW_IP_UVD_ENC: - ip_num_rings = adev->uvd.num_enc_rings; + ip_num_rings = + adev->uvd.num_enc_rings * adev->uvd.num_uvd_inst; break; case AMDGPU_HW_IP_VCN_DEC: ip_num_rings = 1; @@ -254,6 +263,9 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, case AMDGPU_HW_IP_VCN_ENC: ip_num_rings = adev->vcn.num_enc_rings; break; + case AMDGPU_HW_IP_VCN_JPEG: + ip_num_rings = 1; + break; default: DRM_DEBUG("unknown ip type: %d\n", hw_ip); return -EINVAL; @@ -281,6 +293,7 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, case AMDGPU_HW_IP_UVD_ENC: case AMDGPU_HW_IP_VCN_DEC: case AMDGPU_HW_IP_VCN_ENC: + case AMDGPU_HW_IP_VCN_JPEG: r = amdgpu_identity_map(adev, mapper, ring, out_ring); break; case AMDGPU_HW_IP_DMA: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index d5f526f38e50..93794a85f83d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -211,7 +211,8 @@ void amdgpu_ring_priority_get(struct amdgpu_ring *ring, if (!ring->funcs->set_priority) return; - atomic_inc(&ring->num_jobs[priority]); + if (atomic_inc_return(&ring->num_jobs[priority]) <= 0) + return; mutex_lock(&ring->priority_mutex); if (priority <= ring->priority) @@ -304,7 +305,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, 0xffffffffffffffff : ring->buf_mask; /* Allocate ring buffer */ if (ring->ring_obj == NULL) { - r = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &ring->ring_obj, &ring->gpu_addr, @@ -362,6 +363,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) dma_fence_put(ring->vmid_wait); ring->vmid_wait = NULL; + ring->me = 0; ring->adev->rings[ring->idx] = NULL; } @@ -459,6 +461,26 @@ void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring) spin_unlock(&adev->ring_lru_list_lock); } +/** + * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper + * + * @adev: amdgpu_device pointer + * @reg0: register to write + * @reg1: register to wait on + * @ref: reference value to write/wait on + * @mask: mask to wait on + * + * Helper for rings that don't support write and wait in a + * single oneshot packet. + */ +void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + amdgpu_ring_emit_wreg(ring, reg0, ref); + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); +} + /* * Debugfs info */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 1a5911882657..5018c0b6bf1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -29,7 +29,7 @@ #include <drm/drm_print.h> /* max number of rings */ -#define AMDGPU_MAX_RINGS 18 +#define AMDGPU_MAX_RINGS 21 #define AMDGPU_MAX_GFX_RINGS 1 #define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_VCE_RINGS 3 @@ -42,6 +42,9 @@ #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) #define AMDGPU_FENCE_FLAG_INT (1 << 1) +#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) + +#define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) enum amdgpu_ring_type { AMDGPU_RING_TYPE_GFX, @@ -52,7 +55,8 @@ enum amdgpu_ring_type { AMDGPU_RING_TYPE_KIQ, AMDGPU_RING_TYPE_UVD_ENC, AMDGPU_RING_TYPE_VCN_DEC, - AMDGPU_RING_TYPE_VCN_ENC + AMDGPU_RING_TYPE_VCN_ENC, + AMDGPU_RING_TYPE_VCN_JPEG }; struct amdgpu_device; @@ -90,7 +94,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, unsigned irq_type); void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); void amdgpu_fence_driver_resume(struct amdgpu_device *adev); -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, + unsigned flags); int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); void amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); @@ -110,6 +115,7 @@ struct amdgpu_ring_funcs { u32 nop; bool support_64bit_ptrs; unsigned vmhub; + unsigned extra_dw; /* ring read/write ptr handling */ u64 (*get_rptr)(struct amdgpu_ring *ring); @@ -154,6 +160,9 @@ struct amdgpu_ring_funcs { void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask); + void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask); void (*emit_tmz)(struct amdgpu_ring *ring, bool start); /* priority functions */ void (*set_priority) (struct amdgpu_ring *ring, @@ -228,6 +237,10 @@ int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist, int num_blacklist, bool lru_pipe_order, struct amdgpu_ring **ring); void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring); +void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t val0, + uint32_t reg1, uint32_t val1); + static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) { int i = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 2dbe87591f81..8904e62dca7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT /* * Copyright 2009 VMware, Inc. * @@ -33,6 +34,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_bo *vram_obj = NULL; struct amdgpu_bo **gtt_obj = NULL; + struct amdgpu_bo_param bp; uint64_t gart_addr, vram_addr; unsigned n, size; int i, r; @@ -52,15 +54,21 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) n -= adev->irq.ih.ring_size; n /= size; - gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL); + gtt_obj = kcalloc(n, sizeof(*gtt_obj), GFP_KERNEL); if (!gtt_obj) { DRM_ERROR("Failed to allocate %d pointers\n", n); r = 1; goto out_cleanup; } - - r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0, - ttm_bo_type_kernel, NULL, &vram_obj); + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = 0; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + + r = amdgpu_bo_create(adev, &bp, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; @@ -68,20 +76,20 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) r = amdgpu_bo_reserve(vram_obj, false); if (unlikely(r != 0)) goto out_unref; - r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM, &vram_addr); + r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM); if (r) { DRM_ERROR("Failed to pin VRAM object\n"); goto out_unres; } + vram_addr = amdgpu_bo_gpu_offset(vram_obj); for (i = 0; i < n; i++) { void *gtt_map, *vram_map; void **gart_start, **gart_end; void **vram_start, **vram_end; struct dma_fence *fence = NULL; - r = amdgpu_bo_create(adev, size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, 0, - ttm_bo_type_kernel, NULL, gtt_obj + i); + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + r = amdgpu_bo_create(adev, &bp, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_lclean; @@ -90,11 +98,17 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) r = amdgpu_bo_reserve(gtt_obj[i], false); if (unlikely(r != 0)) goto out_lclean_unref; - r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, &gart_addr); + r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT); if (r) { DRM_ERROR("Failed to pin GTT object %d\n", i); goto out_lclean_unres; } + r = amdgpu_ttm_alloc_gart(>t_obj[i]->tbo); + if (r) { + DRM_ERROR("%p bind failed\n", gtt_obj[i]); + goto out_lclean_unpin; + } + gart_addr = amdgpu_bo_gpu_offset(gtt_obj[i]); r = amdgpu_bo_kmap(gtt_obj[i], >t_map); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 532263ab6e16..76920035eb22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -150,10 +150,10 @@ TRACE_EVENT(amdgpu_cs, TP_fast_assign( __entry->bo_list = p->bo_list; - __entry->ring = p->job->ring->idx; + __entry->ring = p->ring->idx; __entry->dw = p->job->ibs[i].length_dw; __entry->fences = amdgpu_fence_count_emitted( - p->job->ring); + p->ring); ), TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u", __entry->bo_list, __entry->ring, __entry->dw, @@ -178,7 +178,7 @@ TRACE_EVENT(amdgpu_cs_ioctl, __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; - __entry->ring_name = job->ring->name; + __entry->ring_name = to_amdgpu_ring(job->base.sched)->name; __entry->num_ibs = job->num_ibs; ), TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", @@ -203,7 +203,7 @@ TRACE_EVENT(amdgpu_sched_run_job, __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; - __entry->ring_name = job->ring->name; + __entry->ring_name = to_amdgpu_ring(job->base.sched)->name; __entry->num_ibs = job->num_ibs; ), TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", @@ -275,7 +275,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap, ), TP_fast_assign( - __entry->bo = bo_va->base.bo; + __entry->bo = bo_va ? bo_va->base.bo : NULL; __entry->start = mapping->start; __entry->last = mapping->last; __entry->offset = mapping->offset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 205da3ff9cd0..13977ea6a097 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -63,24 +63,52 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); /* * Global memory. */ + +/** + * amdgpu_ttm_mem_global_init - Initialize and acquire reference to + * memory object + * + * @ref: Object for initialization. + * + * This is called by drm_global_item_ref() when an object is being + * initialized. + */ static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref) { return ttm_mem_global_init(ref->object); } +/** + * amdgpu_ttm_mem_global_release - Drop reference to a memory object + * + * @ref: Object being removed + * + * This is called by drm_global_item_unref() when an object is being + * released. + */ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref) { ttm_mem_global_release(ref->object); } +/** + * amdgpu_ttm_global_init - Initialize global TTM memory reference + * structures. + * + * @adev: AMDGPU device for which the global structures need to be + * registered. + * + * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init() + * during bring up. + */ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) { struct drm_global_reference *global_ref; - struct amdgpu_ring *ring; - struct drm_sched_rq *rq; int r; + /* ensure reference is false in case init fails */ adev->mman.mem_global_referenced = false; + global_ref = &adev->mman.mem_global_ref; global_ref->global_type = DRM_GLOBAL_TTM_MEM; global_ref->size = sizeof(struct ttm_mem_global); @@ -108,21 +136,10 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) mutex_init(&adev->mman.gtt_window_lock); - ring = adev->mman.buffer_funcs_ring; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - r = drm_sched_entity_init(&ring->sched, &adev->mman.entity, - rq, amdgpu_sched_jobs, NULL); - if (r) { - DRM_ERROR("Failed setting up TTM BO move run queue.\n"); - goto error_entity; - } - adev->mman.mem_global_referenced = true; return 0; -error_entity: - drm_global_item_unref(&adev->mman.bo_global_ref.ref); error_bo: drm_global_item_unref(&adev->mman.mem_global_ref); error_mem: @@ -132,8 +149,6 @@ error_mem: static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) { if (adev->mman.mem_global_referenced) { - drm_sched_entity_fini(adev->mman.entity.sched, - &adev->mman.entity); mutex_destroy(&adev->mman.gtt_window_lock); drm_global_item_unref(&adev->mman.bo_global_ref.ref); drm_global_item_unref(&adev->mman.mem_global_ref); @@ -146,6 +161,18 @@ static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags) return 0; } +/** + * amdgpu_init_mem_type - Initialize a memory manager for a specific + * type of memory request. + * + * @bdev: The TTM BO device object (contains a reference to + * amdgpu_device) + * @type: The type of memory requested + * @man: + * + * This is called by ttm_bo_init_mm() when a buffer object is being + * initialized. + */ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, struct ttm_mem_type_manager *man) { @@ -161,6 +188,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, man->default_caching = TTM_PL_FLAG_CACHED; break; case TTM_PL_TT: + /* GTT memory */ man->func = &amdgpu_gtt_mgr_func; man->gpu_offset = adev->gmc.gart_start; man->available_caching = TTM_PL_MASK_CACHING; @@ -193,6 +221,14 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, return 0; } +/** + * amdgpu_evict_flags - Compute placement flags + * + * @bo: The buffer object to evict + * @placement: Possible destination(s) for evicted BO + * + * Fill in placement data when ttm_bo_evict() is called + */ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *placement) { @@ -204,12 +240,14 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM }; + /* Don't handle scatter gather BOs */ if (bo->type == ttm_bo_type_sg) { placement->num_placement = 0; placement->num_busy_placement = 0; return; } + /* Object isn't an AMDGPU object so ignore */ if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) { placement->placement = &placements; placement->busy_placement = &placements; @@ -217,26 +255,16 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, placement->num_busy_placement = 1; return; } + abo = ttm_to_amdgpu_bo(bo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: if (!adev->mman.buffer_funcs_enabled) { + /* Move to system memory */ amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); - } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && - !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { - unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; - struct drm_mm_node *node = bo->mem.mm_node; - unsigned long pages_left; - - for (pages_left = bo->mem.num_pages; - pages_left; - pages_left -= node->size, node++) { - if (node->start < fpfn) - break; - } - - if (!pages_left) - goto gtt; + } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && + !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && + amdgpu_bo_in_cpu_visible_vram(abo)) { /* Try evicting to the CPU inaccessible part of VRAM * first, but only set GTT as busy placement, so this @@ -245,12 +273,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, */ amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); - abo->placements[0].fpfn = fpfn; + abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; abo->placements[0].lpfn = 0; abo->placement.busy_placement = &abo->placements[1]; abo->placement.num_busy_placement = 1; } else { -gtt: + /* Move to GTT memory */ amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); } break; @@ -261,6 +289,15 @@ gtt: *placement = abo->placement; } +/** + * amdgpu_verify_access - Verify access for a mmap call + * + * @bo: The buffer object to map + * @filp: The file pointer from the process performing the mmap + * + * This is called by ttm_bo_mmap() to verify whether a process + * has the right to mmap a BO to their process space. + */ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); @@ -278,6 +315,15 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) filp->private_data); } +/** + * amdgpu_move_null - Register memory for a buffer object + * + * @bo: The bo to assign the memory to + * @new_mem: The memory to be assigned. + * + * Assign the memory from new_mem to the memory of the buffer object + * bo. + */ static void amdgpu_move_null(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem) { @@ -288,6 +334,10 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo, new_mem->mm_node = NULL; } +/** + * amdgpu_mm_node_addr - Compute the GPU relative offset of a GTT + * buffer. + */ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, struct drm_mm_node *mm_node, struct ttm_mem_reg *mem) @@ -302,9 +352,10 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, } /** - * amdgpu_find_mm_node - Helper function finds the drm_mm_node - * corresponding to @offset. It also modifies the offset to be - * within the drm_mm_node returned + * amdgpu_find_mm_node - Helper function finds the drm_mm_node + * corresponding to @offset. It also modifies + * the offset to be within the drm_mm_node + * returned */ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, unsigned long *offset) @@ -443,7 +494,12 @@ error: return r; } - +/** + * amdgpu_move_blit - Copy an entire buffer to another buffer + * + * This is a helper called by amdgpu_bo_move() and + * amdgpu_move_vram_ram() to help move buffers to and from VRAM. + */ static int amdgpu_move_blit(struct ttm_buffer_object *bo, bool evict, bool no_wait_gpu, struct ttm_mem_reg *new_mem, @@ -478,6 +534,11 @@ error: return r; } +/** + * amdgpu_move_vram_ram - Copy VRAM buffer to RAM buffer + * + * Called by amdgpu_bo_move(). + */ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) @@ -490,6 +551,8 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, int r; adev = amdgpu_ttm_adev(bo->bdev); + + /* create space/pages for new_mem in GTT space */ tmp_mem = *new_mem; tmp_mem.mm_node = NULL; placement.num_placement = 1; @@ -504,25 +567,36 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, return r; } + /* set caching flags */ r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement); if (unlikely(r)) { goto out_cleanup; } + /* Bind the memory to the GTT space */ r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx); if (unlikely(r)) { goto out_cleanup; } + + /* blit VRAM to GTT */ r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem); if (unlikely(r)) { goto out_cleanup; } + + /* move BO (in tmp_mem) to new_mem */ r = ttm_bo_move_ttm(bo, ctx, new_mem); out_cleanup: ttm_bo_mem_put(bo, &tmp_mem); return r; } +/** + * amdgpu_move_ram_vram - Copy buffer from RAM to VRAM + * + * Called by amdgpu_bo_move(). + */ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) @@ -535,6 +609,8 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, int r; adev = amdgpu_ttm_adev(bo->bdev); + + /* make space in GTT for old_mem buffer */ tmp_mem = *new_mem; tmp_mem.mm_node = NULL; placement.num_placement = 1; @@ -548,10 +624,14 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, if (unlikely(r)) { return r; } + + /* move/bind old memory to GTT space */ r = ttm_bo_move_ttm(bo, ctx, &tmp_mem); if (unlikely(r)) { goto out_cleanup; } + + /* copy to VRAM */ r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem); if (unlikely(r)) { goto out_cleanup; @@ -561,6 +641,11 @@ out_cleanup: return r; } +/** + * amdgpu_bo_move - Move a buffer object to a new memory location + * + * Called by ttm_bo_handle_move_mem() + */ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) @@ -626,6 +711,11 @@ memcpy: return 0; } +/** + * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault + * + * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault() + */ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) { struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; @@ -695,7 +785,7 @@ struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; u64 offset; uint64_t userptr; - struct mm_struct *usermm; + struct task_struct *usertask; uint32_t userflags; spinlock_t guptasklock; struct list_head guptasks; @@ -703,17 +793,29 @@ struct amdgpu_ttm_tt { uint32_t last_set_pages; }; +/** + * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to + * by a USERPTR pointer to memory + * + * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). + * This provides a wrapper around the get_user_pages() call to provide + * device accessible pages that back user memory. + */ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct mm_struct *mm = gtt->usertask->mm; unsigned int flags = 0; unsigned pinned = 0; int r; + if (!mm) /* Happens during process shutdown */ + return -ESRCH; + if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) flags |= FOLL_WRITE; - down_read(¤t->mm->mmap_sem); + down_read(&mm->mmap_sem); if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { /* check that we only use anonymous memory @@ -721,13 +823,14 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; struct vm_area_struct *vma; - vma = find_vma(gtt->usermm, gtt->userptr); + vma = find_vma(mm, gtt->userptr); if (!vma || vma->vm_file || vma->vm_end < end) { - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return -EPERM; } } + /* loop enough times using contiguous pages of memory */ do { unsigned num_pages = ttm->num_pages - pinned; uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; @@ -739,7 +842,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) list_add(&guptask.list, >t->guptasks); spin_unlock(>t->guptasklock); - r = get_user_pages(userptr, num_pages, flags, p, NULL); + if (mm == current->mm) + r = get_user_pages(userptr, num_pages, flags, p, NULL); + else + r = get_user_pages_remote(gtt->usertask, + mm, userptr, num_pages, + flags, p, NULL, NULL); spin_lock(>t->guptasklock); list_del(&guptask.list); @@ -752,15 +860,23 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) } while (pinned < ttm->num_pages); - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return 0; release_pages: release_pages(pages, pinned); - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return r; } +/** + * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages + * as necessary. + * + * Called by amdgpu_cs_list_validate(). This creates the page list + * that backs user memory and will ultimately be mapped into the device + * address space. + */ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -775,6 +891,11 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) } } +/** + * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty + * + * Called while unpinning userptr pages + */ void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -793,7 +914,12 @@ void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) } } -/* prepare the sg table with the user pages */ +/** + * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the + * user pages + * + * Called by amdgpu_ttm_backend_bind() + **/ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); @@ -805,17 +931,20 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) enum dma_data_direction direction = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + /* Allocate an SG array and squash pages into it */ r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0, ttm->num_pages << PAGE_SHIFT, GFP_KERNEL); if (r) goto release_sg; + /* Map SG to device */ r = -ENOMEM; nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); if (nents != ttm->sg->nents) goto release_sg; + /* convert SG to linear array of pages and dma addresses */ drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); @@ -826,6 +955,9 @@ release_sg: return r; } +/** + * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages + */ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); @@ -839,14 +971,60 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) if (!ttm->sg->sgl) return; - /* free the sg table and pages again */ + /* unmap the pages mapped to the device */ dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); + /* mark the pages as dirty */ amdgpu_ttm_tt_mark_user_pages(ttm); sg_free_table(ttm->sg); } +int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, + struct ttm_buffer_object *tbo, + uint64_t flags) +{ + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); + struct ttm_tt *ttm = tbo->ttm; + struct amdgpu_ttm_tt *gtt = (void *)ttm; + int r; + + if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) { + uint64_t page_idx = 1; + + r = amdgpu_gart_bind(adev, gtt->offset, page_idx, + ttm->pages, gtt->ttm.dma_address, flags); + if (r) + goto gart_bind_fail; + + /* Patch mtype of the second part BO */ + flags &= ~AMDGPU_PTE_MTYPE_MASK; + flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC); + + r = amdgpu_gart_bind(adev, + gtt->offset + (page_idx << PAGE_SHIFT), + ttm->num_pages - page_idx, + &ttm->pages[page_idx], + &(gtt->ttm.dma_address[page_idx]), flags); + } else { + r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, + ttm->pages, gtt->ttm.dma_address, flags); + } + +gart_bind_fail: + if (r) + DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", + ttm->num_pages, gtt->offset); + + return r; +} + +/** + * amdgpu_ttm_backend_bind - Bind GTT memory + * + * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem(). + * This handles binding GTT memory to the device address space. + */ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { @@ -877,7 +1055,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, return 0; } + /* compute PTE flags relevant to this BO memory */ flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem); + + /* bind pages into GART page tables */ gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, ttm->pages, gtt->ttm.dma_address, flags); @@ -888,6 +1069,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, return r; } +/** + * amdgpu_ttm_alloc_gart - Allocate GART memory for buffer object + */ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); @@ -903,6 +1087,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) amdgpu_gtt_mgr_has_gart_addr(&bo->mem)) return 0; + /* allocate GTT space */ tmp = bo->mem; tmp.mm_node = NULL; placement.num_placement = 1; @@ -918,10 +1103,12 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) if (unlikely(r)) return r; + /* compute PTE flags for this buffer object */ flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); + + /* Bind pages */ gtt->offset = (u64)tmp.start << PAGE_SHIFT; - r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages, - bo->ttm->pages, gtt->ttm.dma_address, flags); + r = amdgpu_ttm_gart_bind(adev, bo, flags); if (unlikely(r)) { ttm_bo_mem_put(bo, &tmp); return r; @@ -935,31 +1122,40 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) return 0; } +/** + * amdgpu_ttm_recover_gart - Rebind GTT pages + * + * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to + * rebind GTT pages during a GPU reset. + */ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); - struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm; uint64_t flags; int r; - if (!gtt) + if (!tbo->ttm) return 0; - flags = amdgpu_ttm_tt_pte_flags(adev, >t->ttm.ttm, &tbo->mem); - r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages, - gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags); - if (r) - DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", - gtt->ttm.ttm.num_pages, gtt->offset); + flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem); + r = amdgpu_ttm_gart_bind(adev, tbo, flags); + return r; } +/** + * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages + * + * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and + * ttm_tt_destroy(). + */ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; int r; + /* if the pages have userptr pinning then clear that first */ if (gtt->userptr) amdgpu_ttm_tt_unpin_userptr(ttm); @@ -978,6 +1174,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + if (gtt->usertask) + put_task_struct(gtt->usertask); + ttm_dma_tt_fini(>t->ttm); kfree(gtt); } @@ -988,6 +1187,13 @@ static struct ttm_backend_func amdgpu_backend_func = { .destroy = &amdgpu_ttm_backend_destroy, }; +/** + * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO + * + * @bo: The buffer object to create a GTT ttm_tt object around + * + * Called by ttm_tt_create(). + */ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { @@ -1001,6 +1207,8 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, return NULL; } gtt->ttm.ttm.func = &amdgpu_backend_func; + + /* allocate space for the uninitialized page entries */ if (ttm_sg_tt_init(>t->ttm, bo, page_flags)) { kfree(gtt); return NULL; @@ -1008,6 +1216,12 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, return >t->ttm.ttm; } +/** + * amdgpu_ttm_tt_populate - Map GTT pages visible to the device + * + * Map the pages of a ttm_tt object to an address space visible + * to the underlying device. + */ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx) { @@ -1015,6 +1229,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, struct amdgpu_ttm_tt *gtt = (void *)ttm; bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */ if (gtt && gtt->userptr) { ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) @@ -1039,9 +1254,17 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, } #endif + /* fall back to generic helper to populate the page array + * and map them to the device */ return ttm_populate_and_map_pages(adev->dev, >t->ttm, ctx); } +/** + * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays + * + * Unmaps pages of a ttm_tt object from the device address space and + * unpopulates the page array backing it. + */ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) { struct amdgpu_device *adev; @@ -1067,9 +1290,21 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) } #endif + /* fall back to generic helper to unmap and unpopulate array */ ttm_unmap_and_unpopulate_pages(adev->dev, >t->ttm); } +/** + * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt + * for the current task + * + * @ttm: The ttm_tt object to bind this userptr object to + * @addr: The address in the current tasks VM space to use + * @flags: Requirements of userptr object. + * + * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages + * to current task + */ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, uint32_t flags) { @@ -1079,8 +1314,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, return -EINVAL; gtt->userptr = addr; - gtt->usermm = current->mm; gtt->userflags = flags; + + if (gtt->usertask) + put_task_struct(gtt->usertask); + gtt->usertask = current->group_leader; + get_task_struct(gtt->usertask); + spin_lock_init(>t->guptasklock); INIT_LIST_HEAD(>t->guptasks); atomic_set(>t->mmu_invalidations, 0); @@ -1089,6 +1329,9 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, return 0; } +/** + * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object + */ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -1096,9 +1339,18 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) if (gtt == NULL) return NULL; - return gtt->usermm; + if (gtt->usertask == NULL) + return NULL; + + return gtt->usertask->mm; } +/** + * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays + * inside an address range for the + * current task. + * + */ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end) { @@ -1109,10 +1361,16 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, if (gtt == NULL || !gtt->userptr) return false; + /* Return false if no part of the ttm_tt object lies within + * the range + */ size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE; if (gtt->userptr > end || gtt->userptr + size <= start) return false; + /* Search the lists of tasks that hold this mapping and see + * if current is one of them. If it is return false. + */ spin_lock(>t->guptasklock); list_for_each_entry(entry, >t->guptasks, list) { if (entry->task == current) { @@ -1127,6 +1385,10 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, return true; } +/** + * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been + * invalidated? + */ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated) { @@ -1137,6 +1399,12 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, return prev_invalidated != *last_invalidated; } +/** + * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this + * ttm_tt object been invalidated + * since the last time they've + * been set? + */ bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -1147,6 +1415,9 @@ bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; } +/** + * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only? + */ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -1157,6 +1428,12 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); } +/** + * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object + * + * @ttm: The ttm_tt object to compute the flags for + * @mem: The memory registry backing this ttm_tt object + */ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, struct ttm_mem_reg *mem) { @@ -1181,6 +1458,16 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, return flags; } +/** + * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict + * a buffer object. + * + * Return true if eviction is sensible. Called by + * ttm_mem_evict_first() on behalf of ttm_bo_mem_force_space() + * which tries to evict buffer objects until it can find space + * for a new object and by ttm_bo_force_list_clean() which is + * used to clean out a memory space. + */ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { @@ -1227,6 +1514,19 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, return ttm_bo_eviction_valuable(bo, place); } +/** + * amdgpu_ttm_access_memory - Read or Write memory that backs a + * buffer object. + * + * @bo: The buffer object to read/write + * @offset: Offset into buffer object + * @buf: Secondary buffer to write/read from + * @len: Length in bytes of access + * @write: true if writing + * + * This is used to access VRAM that backs a buffer object via MMIO + * access for debugging purposes. + */ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, unsigned long offset, void *buf, int len, int write) @@ -1329,6 +1629,7 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) { struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_param bp; int r = 0; int i; u64 vram_size = adev->gmc.visible_vram_size; @@ -1336,17 +1637,21 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) u64 size = adev->fw_vram_usage.size; struct amdgpu_bo *bo; + memset(&bp, 0, sizeof(bp)); + bp.size = adev->fw_vram_usage.size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; adev->fw_vram_usage.va = NULL; adev->fw_vram_usage.reserved_bo = NULL; if (adev->fw_vram_usage.size > 0 && adev->fw_vram_usage.size <= vram_size) { - r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - ttm_bo_type_kernel, NULL, + r = amdgpu_bo_create(adev, &bp, &adev->fw_vram_usage.reserved_bo); if (r) goto error_create; @@ -1375,7 +1680,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) AMDGPU_GEM_DOMAIN_VRAM, adev->fw_vram_usage.start_offset, (adev->fw_vram_usage.start_offset + - adev->fw_vram_usage.size), NULL); + adev->fw_vram_usage.size)); if (r) goto error_pin; r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, @@ -1398,13 +1703,22 @@ error_create: adev->fw_vram_usage.reserved_bo = NULL; return r; } - +/** + * amdgpu_ttm_init - Init the memory management (ttm) as well as + * various gtt/vram related fields. + * + * This initializes all of the memory space pools that the TTM layer + * will need such as the GTT space (system memory mapped to the device), + * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which + * can be mapped per VMID. + */ int amdgpu_ttm_init(struct amdgpu_device *adev) { uint64_t gtt_size; int r; u64 vis_vram_limit; + /* initialize global references for vram/gtt */ r = amdgpu_ttm_global_init(adev); if (r) { return r; @@ -1425,6 +1739,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* We opt to avoid OOM on system pages allocations */ adev->mman.bdev.no_retry = true; + /* Initialize VRAM pool with all of VRAM divided into pages */ r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, adev->gmc.real_vram_size >> PAGE_SHIFT); if (r) { @@ -1454,15 +1769,23 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } - r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->stolen_vga_memory, - NULL, NULL); - if (r) - return r; + /* allocate memory as required for VGA + * This is used for VGA emulation and pre-OS scanout buffers to + * avoid display artifacts while transitioning between pre-OS + * and driver. */ + if (adev->gmc.stolen_size) { + r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->stolen_vga_memory, + NULL, NULL); + if (r) + return r; + } DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); + /* Compute GTT size, either bsaed on 3/4th the size of RAM size + * or whatever the user passed on module init */ if (amdgpu_gtt_size == -1) { struct sysinfo si; @@ -1473,6 +1796,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } else gtt_size = (uint64_t)amdgpu_gtt_size << 20; + + /* Initialize GTT memory pool */ r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); if (r) { DRM_ERROR("Failed initializing GTT heap.\n"); @@ -1481,6 +1806,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: %uM of GTT memory ready.\n", (unsigned)(gtt_size / (1024 * 1024))); + /* Initialize various on-chip memory pools */ adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT; adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT; @@ -1520,6 +1846,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } } + /* Register debugfs entries for amdgpu_ttm */ r = amdgpu_ttm_debugfs_init(adev); if (r) { DRM_ERROR("Failed to init debugfs\n"); @@ -1528,13 +1855,25 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return 0; } +/** + * amdgpu_ttm_late_init - Handle any late initialization for + * amdgpu_ttm + */ +void amdgpu_ttm_late_init(struct amdgpu_device *adev) +{ + /* return the VGA stolen memory (if any) back to VRAM */ + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); +} + +/** + * amdgpu_ttm_fini - De-initialize the TTM memory pools + */ void amdgpu_ttm_fini(struct amdgpu_device *adev) { if (!adev->mman.initialized) return; amdgpu_ttm_debugfs_fini(adev); - amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); if (adev->mman.aper_base_kaddr) iounmap(adev->mman.aper_base_kaddr); @@ -1567,10 +1906,29 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) { struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM]; uint64_t size; + int r; - if (!adev->mman.initialized || adev->in_gpu_reset) + if (!adev->mman.initialized || adev->in_gpu_reset || + adev->mman.buffer_funcs_enabled == enable) return; + if (enable) { + struct amdgpu_ring *ring; + struct drm_sched_rq *rq; + + ring = adev->mman.buffer_funcs_ring; + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL); + if (r) { + DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", + r); + return; + } + } else { + drm_sched_entity_destroy(adev->mman.entity.sched, + &adev->mman.entity); + } + /* this just adjusts TTM size idea, which sets lpfn to the correct value */ if (enable) size = adev->gmc.real_vram_size; @@ -1648,7 +2006,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, if (r) goto error_free; - r = amdgpu_job_submit(job, ring, &adev->mman.entity, + r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence); if (r) goto error_free; @@ -1717,24 +2075,19 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); - if (direct_submit) { - r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, - NULL, fence); - job->fence = dma_fence_get(*fence); - if (r) - DRM_ERROR("Error scheduling IBs (%d)\n", r); - amdgpu_job_free(job); - } else { - r = amdgpu_job_submit(job, ring, &adev->mman.entity, + if (direct_submit) + r = amdgpu_job_submit_direct(job, ring, fence); + else + r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, fence); - if (r) - goto error_free; - } + if (r) + goto error_free; return r; error_free: amdgpu_job_free(job); + DRM_ERROR("Error scheduling IBs (%d)\n", r); return r; } @@ -1817,7 +2170,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); - r = amdgpu_job_submit(job, ring, &adev->mman.entity, + r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, fence); if (r) goto error_free; @@ -1856,6 +2209,11 @@ static const struct drm_info_list amdgpu_ttm_debugfs_list[] = { #endif }; +/** + * amdgpu_ttm_vram_read - Linear read access to VRAM + * + * Accesses VRAM via MMIO for debugging purposes. + */ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -1895,6 +2253,11 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_ttm_vram_write - Linear write access to VRAM + * + * Accesses VRAM via MMIO for debugging purposes. + */ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { @@ -1943,6 +2306,9 @@ static const struct file_operations amdgpu_ttm_vram_fops = { #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS +/** + * amdgpu_ttm_gtt_read - Linear read access to GTT memory + */ static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -1990,6 +2356,13 @@ static const struct file_operations amdgpu_ttm_gtt_fops = { #endif +/** + * amdgpu_iomem_read - Virtual read access to GPU mapped memory + * + * This function is used to read memory that has been mapped to the + * GPU and the known addresses are not physical addresses but instead + * bus addresses (e.g., what you'd put in an IB or ring buffer). + */ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -1998,6 +2371,7 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, ssize_t result = 0; int r; + /* retrieve the IOMMU domain if any for this device */ dom = iommu_get_domain_for_dev(adev->dev); while (size) { @@ -2010,6 +2384,10 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, bytes = bytes < size ? bytes : size; + /* Translate the bus address to a physical address. If + * the domain is NULL it means there is no IOMMU active + * and the address translation is the identity + */ addr = dom ? iommu_iova_to_phys(dom, addr) : addr; pfn = addr >> PAGE_SHIFT; @@ -2034,6 +2412,13 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_iomem_write - Virtual write access to GPU mapped memory + * + * This function is used to write memory that has been mapped to the + * GPU and the known addresses are not physical addresses but instead + * bus addresses (e.g., what you'd put in an IB or ring buffer). + */ static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 6ea7de863041..8b3cc6687769 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -73,10 +73,12 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_mem_reg *mem); uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man); int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man); +u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo); uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); int amdgpu_ttm_init(struct amdgpu_device *adev); +void amdgpu_ttm_late_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 5916cc25e28b..f55f72a37ca8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -161,8 +161,38 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr) le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes)); DRM_DEBUG("reg_list_separate_size_bytes: %u\n", le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes)); - DRM_DEBUG("reg_list_separate_size_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes)); + DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes)); + if (version_minor == 1) { + const struct rlc_firmware_header_v2_1 *v2_1 = + container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0); + DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n", + le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length)); + DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver)); + DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver)); + DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n", + le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes)); + DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n", + le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes)); + DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver)); + DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver)); + DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n", + le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes)); + DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n", + le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes)); + DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver)); + DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_srm_feature_ver)); + DRM_DEBUG("save_restore_list_srm_size_bytes %u\n", + le32_to_cpu(v2_1->save_restore_list_srm_size_bytes)); + DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n", + le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes)); + } } else { DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor); } @@ -265,6 +295,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; else @@ -276,6 +307,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; + case CHIP_VEGA20: + return AMDGPU_FW_LOAD_DIRECT; default: DRM_ERROR("Unknown firmware load type\n"); } @@ -307,7 +340,10 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 && ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 && ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT && - ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) { + ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT && + ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL && + ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM && + ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) { ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes); memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + @@ -329,6 +365,18 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, le32_to_cpu(header->ucode_array_offset_bytes) + le32_to_cpu(cp_hdr->jt_offset) * 4), ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) { + ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes; + memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl, + ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM) { + ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes; + memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_gpm, + ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { + ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes; + memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm, + ucode->ucode_size); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 30b5500dc152..08e38579af24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -98,6 +98,24 @@ struct rlc_firmware_header_v2_0 { uint32_t reg_list_separate_array_offset_bytes; /* payload offset from the start of the header */ }; +/* version_major=2, version_minor=1 */ +struct rlc_firmware_header_v2_1 { + struct rlc_firmware_header_v2_0 v2_0; + uint32_t reg_list_format_direct_reg_list_length; /* length of direct reg list format array */ + uint32_t save_restore_list_cntl_ucode_ver; + uint32_t save_restore_list_cntl_feature_ver; + uint32_t save_restore_list_cntl_size_bytes; + uint32_t save_restore_list_cntl_offset_bytes; + uint32_t save_restore_list_gpm_ucode_ver; + uint32_t save_restore_list_gpm_feature_ver; + uint32_t save_restore_list_gpm_size_bytes; + uint32_t save_restore_list_gpm_offset_bytes; + uint32_t save_restore_list_srm_ucode_ver; + uint32_t save_restore_list_srm_feature_ver; + uint32_t save_restore_list_srm_size_bytes; + uint32_t save_restore_list_srm_offset_bytes; +}; + /* version_major=1, version_minor=0 */ struct sdma_firmware_header_v1_0 { struct common_firmware_header header; @@ -148,6 +166,7 @@ union amdgpu_firmware_header { struct gfx_firmware_header_v1_0 gfx; struct rlc_firmware_header_v1_0 rlc; struct rlc_firmware_header_v2_0 rlc_v2_0; + struct rlc_firmware_header_v2_1 rlc_v2_1; struct sdma_firmware_header_v1_0 sdma; struct sdma_firmware_header_v1_1 sdma_v1_1; struct gpu_info_firmware_header_v1_0 gpu_info; @@ -168,6 +187,9 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_CP_MEC2, AMDGPU_UCODE_ID_CP_MEC2_JT, AMDGPU_UCODE_ID_RLC_G, + AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL, + AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM, + AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM, AMDGPU_UCODE_ID_STORAGE, AMDGPU_UCODE_ID_SMC, AMDGPU_UCODE_ID_UVD, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 627542b22ae4..80b5c453f8c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -53,11 +53,11 @@ /* Firmware Names */ #ifdef CONFIG_DRM_AMDGPU_CIK -#define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin" -#define FIRMWARE_KABINI "radeon/kabini_uvd.bin" -#define FIRMWARE_KAVERI "radeon/kaveri_uvd.bin" -#define FIRMWARE_HAWAII "radeon/hawaii_uvd.bin" -#define FIRMWARE_MULLINS "radeon/mullins_uvd.bin" +#define FIRMWARE_BONAIRE "amdgpu/bonaire_uvd.bin" +#define FIRMWARE_KABINI "amdgpu/kabini_uvd.bin" +#define FIRMWARE_KAVERI "amdgpu/kaveri_uvd.bin" +#define FIRMWARE_HAWAII "amdgpu/hawaii_uvd.bin" +#define FIRMWARE_MULLINS "amdgpu/mullins_uvd.bin" #endif #define FIRMWARE_TONGA "amdgpu/tonga_uvd.bin" #define FIRMWARE_CARRIZO "amdgpu/carrizo_uvd.bin" @@ -66,15 +66,18 @@ #define FIRMWARE_POLARIS10 "amdgpu/polaris10_uvd.bin" #define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin" #define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin" +#define FIRMWARE_VEGAM "amdgpu/vegam_uvd.bin" #define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin" #define FIRMWARE_VEGA12 "amdgpu/vega12_uvd.bin" +#define FIRMWARE_VEGA20 "amdgpu/vega20_uvd.bin" -#define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00) -#define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00) -#define mmUVD_GPCOM_VCPU_CMD_VEGA10 (0x03c3 + 0x7e00) -#define mmUVD_NO_OP_VEGA10 (0x03ff + 0x7e00) -#define mmUVD_ENGINE_CNTL_VEGA10 (0x03c6 + 0x7e00) +/* These are common relative offsets for all asics, from uvd_7_0_offset.h, */ +#define UVD_GPCOM_VCPU_CMD 0x03c3 +#define UVD_GPCOM_VCPU_DATA0 0x03c4 +#define UVD_GPCOM_VCPU_DATA1 0x03c5 +#define UVD_NO_OP 0x03ff +#define UVD_BASE_SI 0x3800 /** * amdgpu_uvd_cs_ctx - Command submission parser context @@ -109,9 +112,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY); MODULE_FIRMWARE(FIRMWARE_POLARIS10); MODULE_FIRMWARE(FIRMWARE_POLARIS11); MODULE_FIRMWARE(FIRMWARE_POLARIS12); +MODULE_FIRMWARE(FIRMWARE_VEGAM); MODULE_FIRMWARE(FIRMWARE_VEGA10); MODULE_FIRMWARE(FIRMWARE_VEGA12); +MODULE_FIRMWARE(FIRMWARE_VEGA20); static void amdgpu_uvd_idle_work_handler(struct work_struct *work); @@ -122,8 +127,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) unsigned long bo_size; const char *fw_name; const struct common_firmware_header *hdr; - unsigned version_major, version_minor, family_id; - int i, r; + unsigned family_id; + int i, j, r; INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler); @@ -172,6 +177,12 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) case CHIP_VEGA12: fw_name = FIRMWARE_VEGA12; break; + case CHIP_VEGAM: + fw_name = FIRMWARE_VEGAM; + break; + case CHIP_VEGA20: + fw_name = FIRMWARE_VEGA20; + break; default: return -EINVAL; } @@ -197,52 +208,70 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) hdr = (const struct common_firmware_header *)adev->uvd.fw->data; family_id = le32_to_cpu(hdr->ucode_version) & 0xff; - version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; - version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; - DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", - version_major, version_minor, family_id); - - /* - * Limit the number of UVD handles depending on microcode major - * and minor versions. The firmware version which has 40 UVD - * instances support is 1.80. So all subsequent versions should - * also have the same support. - */ - if ((version_major > 0x01) || - ((version_major == 0x01) && (version_minor >= 0x50))) - adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES; - adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) | - (family_id << 8)); + if (adev->asic_type < CHIP_VEGA20) { + unsigned version_major, version_minor; + + version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; + version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; + DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", + version_major, version_minor, family_id); + + /* + * Limit the number of UVD handles depending on microcode major + * and minor versions. The firmware version which has 40 UVD + * instances support is 1.80. So all subsequent versions should + * also have the same support. + */ + if ((version_major > 0x01) || + ((version_major == 0x01) && (version_minor >= 0x50))) + adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES; + + adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) | + (family_id << 8)); + + if ((adev->asic_type == CHIP_POLARIS10 || + adev->asic_type == CHIP_POLARIS11) && + (adev->uvd.fw_version < FW_1_66_16)) + DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n", + version_major, version_minor); + } else { + unsigned int enc_major, enc_minor, dec_minor; + + dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; + enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f; + enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3; + DRM_INFO("Found UVD firmware ENC: %hu.%hu DEC: .%hu Family ID: %hu\n", + enc_major, enc_minor, dec_minor, family_id); - if ((adev->asic_type == CHIP_POLARIS10 || - adev->asic_type == CHIP_POLARIS11) && - (adev->uvd.fw_version < FW_1_66_16)) - DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n", - version_major, version_minor); + adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES; + + adev->uvd.fw_version = le32_to_cpu(hdr->ucode_version); + } bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE + AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo, - &adev->uvd.gpu_addr, &adev->uvd.cpu_addr); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); - return r; + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { + + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo, + &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); + return r; + } } - ring = &adev->uvd.ring; + ring = &adev->uvd.inst[0].ring; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity, - rq, amdgpu_sched_jobs, NULL); - if (r != 0) { - DRM_ERROR("Failed setting up UVD run queue.\n"); + r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL); + if (r) { + DRM_ERROR("Failed setting up UVD kernel entity.\n"); return r; } - for (i = 0; i < adev->uvd.max_handles; ++i) { atomic_set(&adev->uvd.handles[i], 0); adev->uvd.filp[i] = NULL; @@ -274,20 +303,23 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) { - int i; - kfree(adev->uvd.saved_bo); + int i, j; - drm_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); + drm_sched_entity_destroy(&adev->uvd.inst->ring.sched, + &adev->uvd.entity); - amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo, - &adev->uvd.gpu_addr, - (void **)&adev->uvd.cpu_addr); + for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { + kfree(adev->uvd.inst[j].saved_bo); - amdgpu_ring_fini(&adev->uvd.ring); + amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo, + &adev->uvd.inst[j].gpu_addr, + (void **)&adev->uvd.inst[j].cpu_addr); - for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i) - amdgpu_ring_fini(&adev->uvd.ring_enc[i]); + amdgpu_ring_fini(&adev->uvd.inst[j].ring); + for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i) + amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]); + } release_firmware(adev->uvd.fw); return 0; @@ -297,10 +329,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) { unsigned size; void *ptr; - int i; - - if (adev->uvd.vcpu_bo == NULL) - return 0; + int i, j; cancel_delayed_work_sync(&adev->uvd.idle_work); @@ -314,15 +343,19 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) return 0; } - size = amdgpu_bo_size(adev->uvd.vcpu_bo); - ptr = adev->uvd.cpu_addr; + for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { + if (adev->uvd.inst[j].vcpu_bo == NULL) + continue; - adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL); - if (!adev->uvd.saved_bo) - return -ENOMEM; + size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo); + ptr = adev->uvd.inst[j].cpu_addr; - memcpy_fromio(adev->uvd.saved_bo, ptr, size); + adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL); + if (!adev->uvd.inst[j].saved_bo) + return -ENOMEM; + memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); + } return 0; } @@ -330,51 +363,54 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) { unsigned size; void *ptr; + int i; - if (adev->uvd.vcpu_bo == NULL) - return -EINVAL; + for (i = 0; i < adev->uvd.num_uvd_inst; i++) { + if (adev->uvd.inst[i].vcpu_bo == NULL) + return -EINVAL; - size = amdgpu_bo_size(adev->uvd.vcpu_bo); - ptr = adev->uvd.cpu_addr; + size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo); + ptr = adev->uvd.inst[i].cpu_addr; - if (adev->uvd.saved_bo != NULL) { - memcpy_toio(ptr, adev->uvd.saved_bo, size); - kfree(adev->uvd.saved_bo); - adev->uvd.saved_bo = NULL; - } else { - const struct common_firmware_header *hdr; - unsigned offset; - - hdr = (const struct common_firmware_header *)adev->uvd.fw->data; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset, - le32_to_cpu(hdr->ucode_size_bytes)); - size -= le32_to_cpu(hdr->ucode_size_bytes); - ptr += le32_to_cpu(hdr->ucode_size_bytes); + if (adev->uvd.inst[i].saved_bo != NULL) { + memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size); + kfree(adev->uvd.inst[i].saved_bo); + adev->uvd.inst[i].saved_bo = NULL; + } else { + const struct common_firmware_header *hdr; + unsigned offset; + + hdr = (const struct common_firmware_header *)adev->uvd.fw->data; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); + } + memset_io(ptr, 0, size); + /* to restore uvd fence seq */ + amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring); } - memset_io(ptr, 0, size); - /* to restore uvd fence seq */ - amdgpu_fence_driver_force_completion(&adev->uvd.ring); } - return 0; } void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) { - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst[0].ring; int i, r; for (i = 0; i < adev->uvd.max_handles; ++i) { uint32_t handle = atomic_read(&adev->uvd.handles[i]); + if (handle != 0 && adev->uvd.filp[i] == filp) { struct dma_fence *fence; - r = amdgpu_uvd_get_destroy_msg(ring, handle, - false, &fence); + r = amdgpu_uvd_get_destroy_msg(ring, handle, false, + &fence); if (r) { - DRM_ERROR("Error destroying UVD (%d)!\n", r); + DRM_ERROR("Error destroying UVD %d!\n", r); continue; } @@ -665,7 +701,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, r = amdgpu_bo_kmap(bo, &ptr); if (r) { - DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r); + DRM_ERROR("Failed mapping the UVD) message (%ld)!\n", r); return r; } @@ -687,7 +723,8 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, /* try to alloc a new handle */ for (i = 0; i < adev->uvd.max_handles; ++i) { if (atomic_read(&adev->uvd.handles[i]) == handle) { - DRM_ERROR("Handle 0x%x already in use!\n", handle); + DRM_ERROR(")Handle 0x%x already in use!\n", + handle); return -EINVAL; } @@ -800,7 +837,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) } if ((cmd == 0 || cmd == 0x3) && - (start >> 28) != (ctx->parser->adev->uvd.gpu_addr >> 28)) { + (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) { DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", start, end); return -EINVAL; @@ -968,6 +1005,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, uint64_t addr; long r; int i; + unsigned offset_idx = 0; + unsigned offset[3] = { UVD_BASE_SI, 0, 0 }; amdgpu_bo_kunmap(bo); amdgpu_bo_unpin(bo); @@ -987,17 +1026,16 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, goto err; if (adev->asic_type >= CHIP_VEGA10) { - data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0_VEGA10, 0); - data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1_VEGA10, 0); - data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD_VEGA10, 0); - data[3] = PACKET0(mmUVD_NO_OP_VEGA10, 0); - } else { - data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0); - data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0); - data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0); - data[3] = PACKET0(mmUVD_NO_OP, 0); + offset_idx = 1 + ring->me; + offset[1] = adev->reg_offset[UVD_HWIP][0][1]; + offset[2] = adev->reg_offset[UVD_HWIP][1][1]; } + data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0); + data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0); + data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0); + data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0); + ib = &job->ibs[0]; addr = amdgpu_bo_gpu_offset(bo); ib->ptr[0] = data[0]; @@ -1021,19 +1059,16 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, if (r < 0) goto err_free; - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); - job->fence = dma_fence_get(f); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err_free; - - amdgpu_job_free(job); } else { r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, AMDGPU_FENCE_OWNER_UNDEFINED, false); if (r) goto err_free; - r = amdgpu_job_submit(job, ring, &adev->uvd.entity, + r = amdgpu_job_submit(job, &adev->uvd.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &f); if (r) goto err_free; @@ -1122,7 +1157,14 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, uvd.idle_work.work); - unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring); + unsigned fences = 0, i, j; + + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { + fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring); + for (j = 0; j < adev->uvd.num_enc_rings; ++j) { + fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]); + } + } if (fences == 0) { if (adev->pm.dpm_enabled) { @@ -1179,27 +1221,28 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence; long r; + uint32_t ip_instance = ring->me; r = amdgpu_uvd_get_create_msg(ring, 1, NULL); if (r) { - DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r); goto error; } r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); if (r) { - DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r); goto error; } r = dma_fence_wait_timeout(fence, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); + DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance); r = -ETIMEDOUT; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r); } else { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx); r = 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 32ea20b99e53..66872286ab12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -31,30 +31,36 @@ #define AMDGPU_UVD_SESSION_SIZE (50*1024) #define AMDGPU_UVD_FIRMWARE_OFFSET 256 +#define AMDGPU_MAX_UVD_INSTANCES 2 + #define AMDGPU_UVD_FIRMWARE_SIZE(adev) \ (AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \ 8) - AMDGPU_UVD_FIRMWARE_OFFSET) -struct amdgpu_uvd { +struct amdgpu_uvd_inst { struct amdgpu_bo *vcpu_bo; void *cpu_addr; uint64_t gpu_addr; - unsigned fw_version; void *saved_bo; - unsigned max_handles; - atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; - struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; - struct delayed_work idle_work; - const struct firmware *fw; /* UVD firmware */ struct amdgpu_ring ring; struct amdgpu_ring ring_enc[AMDGPU_MAX_UVD_ENC_RINGS]; struct amdgpu_irq_src irq; + uint32_t srbm_soft_reset; +}; + +struct amdgpu_uvd { + const struct firmware *fw; /* UVD firmware */ + unsigned fw_version; + unsigned max_handles; + unsigned num_enc_rings; + uint8_t num_uvd_inst; bool address_64_bit; bool use_ctx_buf; + struct amdgpu_uvd_inst inst[AMDGPU_MAX_UVD_INSTANCES]; + struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; + atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; struct drm_sched_entity entity; - struct drm_sched_entity entity_enc; - uint32_t srbm_soft_reset; - unsigned num_enc_rings; + struct delayed_work idle_work; }; int amdgpu_uvd_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index a33804bd3314..86182c966ed6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -40,22 +40,24 @@ /* Firmware Names */ #ifdef CONFIG_DRM_AMDGPU_CIK -#define FIRMWARE_BONAIRE "radeon/bonaire_vce.bin" -#define FIRMWARE_KABINI "radeon/kabini_vce.bin" -#define FIRMWARE_KAVERI "radeon/kaveri_vce.bin" -#define FIRMWARE_HAWAII "radeon/hawaii_vce.bin" -#define FIRMWARE_MULLINS "radeon/mullins_vce.bin" +#define FIRMWARE_BONAIRE "amdgpu/bonaire_vce.bin" +#define FIRMWARE_KABINI "amdgpu/kabini_vce.bin" +#define FIRMWARE_KAVERI "amdgpu/kaveri_vce.bin" +#define FIRMWARE_HAWAII "amdgpu/hawaii_vce.bin" +#define FIRMWARE_MULLINS "amdgpu/mullins_vce.bin" #endif #define FIRMWARE_TONGA "amdgpu/tonga_vce.bin" #define FIRMWARE_CARRIZO "amdgpu/carrizo_vce.bin" #define FIRMWARE_FIJI "amdgpu/fiji_vce.bin" #define FIRMWARE_STONEY "amdgpu/stoney_vce.bin" #define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin" -#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" -#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin" +#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" +#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin" +#define FIRMWARE_VEGAM "amdgpu/vegam_vce.bin" #define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin" #define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin" +#define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin" #ifdef CONFIG_DRM_AMDGPU_CIK MODULE_FIRMWARE(FIRMWARE_BONAIRE); @@ -71,9 +73,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY); MODULE_FIRMWARE(FIRMWARE_POLARIS10); MODULE_FIRMWARE(FIRMWARE_POLARIS11); MODULE_FIRMWARE(FIRMWARE_POLARIS12); +MODULE_FIRMWARE(FIRMWARE_VEGAM); MODULE_FIRMWARE(FIRMWARE_VEGA10); MODULE_FIRMWARE(FIRMWARE_VEGA12); +MODULE_FIRMWARE(FIRMWARE_VEGA20); static void amdgpu_vce_idle_work_handler(struct work_struct *work); @@ -132,12 +136,18 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) case CHIP_POLARIS12: fw_name = FIRMWARE_POLARIS12; break; + case CHIP_VEGAM: + fw_name = FIRMWARE_VEGAM; + break; case CHIP_VEGA10: fw_name = FIRMWARE_VEGA10; break; case CHIP_VEGA12: fw_name = FIRMWARE_VEGA12; break; + case CHIP_VEGA20: + fw_name = FIRMWARE_VEGA20; + break; default: return -EINVAL; @@ -180,8 +190,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) ring = &adev->vce.ring[0]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->vce.entity, - rq, amdgpu_sched_jobs, NULL); + r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCE run queue.\n"); return r; @@ -212,7 +221,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) if (adev->vce.vcpu_bo == NULL) return 0; - drm_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); + drm_sched_entity_destroy(&adev->vce.ring[0].sched, &adev->vce.entity); amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, (void **)&adev->vce.cpu_addr); @@ -460,12 +469,10 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); - job->fence = dma_fence_get(f); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err; - amdgpu_job_free(job); if (fence) *fence = dma_fence_get(f); dma_fence_put(f); @@ -522,19 +529,13 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (direct) { - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); - job->fence = dma_fence_get(f); - if (r) - goto err; - - amdgpu_job_free(job); - } else { - r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity, + if (direct) + r = amdgpu_job_submit_direct(job, ring, &f); + else + r = amdgpu_job_submit(job, &ring->adev->vce.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &f); - if (r) - goto err; - } + if (r) + goto err; if (fence) *fence = dma_fence_get(f); @@ -755,6 +756,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) if (r) goto out; break; + + case 0x0500000d: /* MV buffer */ + r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, + idx + 2, 0, 0); + if (r) + goto out; + + r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8, + idx + 7, 0, 0); + if (r) + goto out; + break; } idx += len / 4; @@ -860,6 +873,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) goto out; break; + case 0x0500000d: /* MV buffer */ + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, + idx + 2, *size, 0); + if (r) + goto out; + + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8, + idx + 7, *size / 12, 0); + if (r) + goto out; + break; + default: DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); r = -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 58e495330b38..798648a19710 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -49,12 +49,10 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work); int amdgpu_vcn_sw_init(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; - struct drm_sched_rq *rq; unsigned long bo_size; const char *fw_name; const struct common_firmware_header *hdr; - unsigned version_major, version_minor, family_id; + unsigned char fw_check; int r; INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); @@ -84,12 +82,34 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) } hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - family_id = le32_to_cpu(hdr->ucode_version) & 0xff; - version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; - version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; - DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n", - version_major, version_minor, family_id); + adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); + + /* Bit 20-23, it is encode major and non-zero for new naming convention. + * This field is part of version minor and DRM_DISABLED_FLAG in old naming + * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG + * is zero in old naming convention, this field is always zero so far. + * These four bits are used to tell which naming convention is present. + */ + fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf; + if (fw_check) { + unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev; + + fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff; + enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff; + enc_major = fw_check; + dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf; + vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf; + DRM_INFO("Found VCN firmware Version ENC: %hu.%hu DEC: %hu VEP: %hu Revision: %hu\n", + enc_major, enc_minor, dec_ver, vep, fw_rev); + } else { + unsigned int version_major, version_minor, family_id; + family_id = le32_to_cpu(hdr->ucode_version) & 0xff; + version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; + version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; + DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n", + version_major, version_minor, family_id); + } bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) + AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE @@ -102,24 +122,6 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) return r; } - ring = &adev->vcn.ring_dec; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, - rq, amdgpu_sched_jobs, NULL); - if (r != 0) { - DRM_ERROR("Failed setting up VCN dec run queue.\n"); - return r; - } - - ring = &adev->vcn.ring_enc[0]; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, - rq, amdgpu_sched_jobs, NULL); - if (r != 0) { - DRM_ERROR("Failed setting up VCN enc run queue.\n"); - return r; - } - return 0; } @@ -129,10 +131,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) kfree(adev->vcn.saved_bo); - drm_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec); - - drm_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc); - amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, &adev->vcn.gpu_addr, (void **)&adev->vcn.cpu_addr); @@ -142,6 +140,8 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_enc_rings; ++i) amdgpu_ring_fini(&adev->vcn.ring_enc[i]); + amdgpu_ring_fini(&adev->vcn.ring_jpeg); + release_firmware(adev->vcn.fw); return 0; @@ -205,13 +205,20 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) struct amdgpu_device *adev = container_of(work, struct amdgpu_device, vcn.idle_work.work); unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec); + unsigned i; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); + } + + fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); if (fences == 0) { - if (adev->pm.dpm_enabled) { - /* might be used when with pg/cg + if (adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, false); - */ - } + else + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_GATE); } else { schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); } @@ -222,10 +229,12 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); - if (set_clocks && adev->pm.dpm_enabled) { - /* might be used when with pg/cg - amdgpu_dpm_enable_uvd(adev, true); - */ + if (set_clocks) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + else + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); } } @@ -271,7 +280,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) } static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, - struct amdgpu_bo *bo, bool direct, + struct amdgpu_bo *bo, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; @@ -299,19 +308,9 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, } ib->length_dw = 16; - if (direct) { - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); - job->fence = dma_fence_get(f); - if (r) - goto err_free; - - amdgpu_job_free(job); - } else { - r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec, - AMDGPU_FENCE_OWNER_UNDEFINED, &f); - if (r) - goto err_free; - } + r = amdgpu_job_submit_direct(job, ring, &f); + if (r) + goto err_free; amdgpu_bo_fence(bo, f, false); amdgpu_bo_unreserve(bo); @@ -363,11 +362,11 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = 14; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - return amdgpu_vcn_dec_send_msg(ring, bo, true, fence); + return amdgpu_vcn_dec_send_msg(ring, bo, fence); } static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct dma_fence **fence) + struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_bo *bo = NULL; @@ -389,7 +388,7 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = 6; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence); + return amdgpu_vcn_dec_send_msg(ring, bo, fence); } int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) @@ -403,7 +402,7 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto error; } - r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence); + r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence); if (r) { DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); goto error; @@ -497,12 +496,10 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); - job->fence = dma_fence_get(f); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err; - amdgpu_job_free(job); if (fence) *fence = dma_fence_get(f); dma_fence_put(f); @@ -551,12 +548,10 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); - job->fence = dma_fence_get(f); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err; - amdgpu_job_free(job); if (fence) *fence = dma_fence_get(f); dma_fence_put(f); @@ -599,3 +594,127 @@ error: dma_fence_put(fence); return r; } + +int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 3); + + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0, 0, 0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < adev->usec_timeout) { + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + + return r; +} + +static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + const unsigned ib_size_dw = 16; + int i, r; + + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + if (r) + return r; + + ib = &job->ibs[0]; + + ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH), 0, 0, PACKETJ_TYPE0); + ib->ptr[1] = 0xDEADBEEF; + for (i = 2; i < 16; i += 2) { + ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); + ib->ptr[i+1] = 0; + } + ib->length_dw = 16; + + r = amdgpu_job_submit_direct(job, ring, &f); + if (r) + goto err; + + if (fence) + *fence = dma_fence_get(f); + dma_fence_put(f); + + return 0; + +err: + amdgpu_job_free(job); + return r; +} + +int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + struct dma_fence *fence = NULL; + long r = 0; + + r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence); + if (r) { + DRM_ERROR("amdgpu: failed to set jpeg register (%ld).\n", r); + goto error; + } + + r = dma_fence_wait_timeout(fence, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out.\n"); + r = -ETIMEDOUT; + goto error; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + goto error; + } else + r = 0; + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH)); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < adev->usec_timeout) + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + else { + DRM_ERROR("ib test failed (0x%08X)\n", tmp); + r = -EINVAL; + } + + dma_fence_put(fence); + +error: + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 2fd7db891689..0b0b8638d73f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -45,6 +45,17 @@ #define VCN_ENC_CMD_REG_WRITE 0x0000000b #define VCN_ENC_CMD_REG_WAIT 0x0000000c +enum engine_status_constants { + UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002, + UVD_STATUS__UVD_BUSY = 0x00000004, + GB_ADDR_CONFIG_DEFAULT = 0x26010011, + UVD_STATUS__IDLE = 0x2, + UVD_STATUS__BUSY = 0x5, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1, + UVD_STATUS__RBC_BUSY = 0x1, +}; + struct amdgpu_vcn { struct amdgpu_bo *vcpu_bo; void *cpu_addr; @@ -55,9 +66,8 @@ struct amdgpu_vcn { const struct firmware *fw; /* VCN firmware */ struct amdgpu_ring ring_dec; struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; + struct amdgpu_ring ring_jpeg; struct amdgpu_irq_src irq; - struct drm_sched_entity entity_dec; - struct drm_sched_entity entity_enc; unsigned num_enc_rings; }; @@ -74,4 +84,7 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); +int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring); +int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index da55a78d7380..098dd1ba751a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -33,9 +33,11 @@ #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_gmc.h" -/* - * GPUVM +/** + * DOC: GPUVM + * * GPUVM is similar to the legacy gart on older asics, however * rather than there being a single global gart table * for the entire GPU, there are multiple VM page tables active @@ -63,43 +65,123 @@ INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, #undef START #undef LAST -/* Local structure. Encapsulate some VM table update parameters to reduce +/** + * struct amdgpu_pte_update_params - Local structure + * + * Encapsulate some VM table update parameters to reduce * the number of function parameters + * */ struct amdgpu_pte_update_params { - /* amdgpu device we do this update for */ + + /** + * @adev: amdgpu device we do this update for + */ struct amdgpu_device *adev; - /* optional amdgpu_vm we do this update for */ + + /** + * @vm: optional amdgpu_vm we do this update for + */ struct amdgpu_vm *vm; - /* address where to copy page table entries from */ + + /** + * @src: address where to copy page table entries from + */ uint64_t src; - /* indirect buffer to fill with commands */ + + /** + * @ib: indirect buffer to fill with commands + */ struct amdgpu_ib *ib; - /* Function which actually does the update */ + + /** + * @func: Function which actually does the update + */ void (*func)(struct amdgpu_pte_update_params *params, struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags); - /* The next two are used during VM update by CPU - * DMA addresses to use for mapping - * Kernel pointer of PD/PT BO that needs to be updated + /** + * @pages_addr: + * + * DMA addresses to use for mapping, used during VM update by CPU */ dma_addr_t *pages_addr; + + /** + * @kptr: + * + * Kernel pointer of PD/PT BO that needs to be updated, + * used during VM update by CPU + */ void *kptr; }; -/* Helper to disable partial resident texture feature from a fence callback */ +/** + * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback + */ struct amdgpu_prt_cb { + + /** + * @adev: amdgpu device + */ struct amdgpu_device *adev; + + /** + * @cb: callback + */ struct dma_fence_cb cb; }; /** + * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm + * + * @base: base structure for tracking BO usage in a VM + * @vm: vm to which bo is to be added + * @bo: amdgpu buffer object + * + * Initialize a bo_va_base structure and add it to the appropriate lists + * + */ +static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo) +{ + base->vm = vm; + base->bo = bo; + INIT_LIST_HEAD(&base->bo_list); + INIT_LIST_HEAD(&base->vm_status); + + if (!bo) + return; + list_add_tail(&base->bo_list, &bo->va); + + if (bo->tbo.type == ttm_bo_type_kernel) + list_move(&base->vm_status, &vm->relocated); + + if (bo->tbo.resv != vm->root.base.bo->tbo.resv) + return; + + if (bo->preferred_domains & + amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) + return; + + /* + * we checked all the prerequisites, but it looks like this per vm bo + * is currently evicted. add the bo to the evicted list to make sure it + * is validated on next vm use to avoid fault. + * */ + list_move_tail(&base->vm_status, &vm->evicted); +} + +/** * amdgpu_vm_level_shift - return the addr shift for each level * * @adev: amdgpu_device pointer + * @level: VMPT level * - * Returns the number of bits the pfn needs to be right shifted for a level. + * Returns: + * The number of bits the pfn needs to be right shifted for a level. */ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, unsigned level) @@ -127,8 +209,10 @@ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, * amdgpu_vm_num_entries - return the number of entries in a PD/PT * * @adev: amdgpu_device pointer + * @level: VMPT level * - * Calculate the number of entries in a page directory or page table. + * Returns: + * The number of entries in a page directory or page table. */ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, unsigned level) @@ -151,8 +235,10 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, * amdgpu_vm_bo_size - returns the size of the BOs in bytes * * @adev: amdgpu_device pointer + * @level: VMPT level * - * Calculate the size of the BO for a page directory or page table in bytes. + * Returns: + * The size of the BO for a page directory or page table in bytes. */ static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) { @@ -190,30 +276,25 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, * @param: parameter for the validation callback * * Validate the page table BOs on command submission if neccessary. + * + * Returns: + * Validation result. */ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*validate)(void *p, struct amdgpu_bo *bo), void *param) { struct ttm_bo_global *glob = adev->mman.bdev.glob; - int r; + struct amdgpu_vm_bo_base *bo_base, *tmp; + int r = 0; - spin_lock(&vm->status_lock); - while (!list_empty(&vm->evicted)) { - struct amdgpu_vm_bo_base *bo_base; - struct amdgpu_bo *bo; + list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { + struct amdgpu_bo *bo = bo_base->bo; - bo_base = list_first_entry(&vm->evicted, - struct amdgpu_vm_bo_base, - vm_status); - spin_unlock(&vm->status_lock); - - bo = bo_base->bo; - BUG_ON(!bo); if (bo->parent) { r = validate(param, bo); if (r) - return r; + break; spin_lock(&glob->lru_lock); ttm_bo_move_to_lru_tail(&bo->tbo); @@ -222,22 +303,29 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_unlock(&glob->lru_lock); } - if (bo->tbo.type == ttm_bo_type_kernel && - vm->use_cpu_for_update) { - r = amdgpu_bo_kmap(bo, NULL); - if (r) - return r; - } - - spin_lock(&vm->status_lock); - if (bo->tbo.type != ttm_bo_type_kernel) + if (bo->tbo.type != ttm_bo_type_kernel) { + spin_lock(&vm->moved_lock); list_move(&bo_base->vm_status, &vm->moved); - else + spin_unlock(&vm->moved_lock); + } else { list_move(&bo_base->vm_status, &vm->relocated); + } } - spin_unlock(&vm->status_lock); - return 0; + spin_lock(&glob->lru_lock); + list_for_each_entry(bo_base, &vm->idle, vm_status) { + struct amdgpu_bo *bo = bo_base->bo; + + if (!bo->parent) + continue; + + ttm_bo_move_to_lru_tail(&bo->tbo); + if (bo->shadow) + ttm_bo_move_to_lru_tail(&bo->shadow->tbo); + } + spin_unlock(&glob->lru_lock); + + return r; } /** @@ -246,26 +334,28 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, * @vm: VM to check * * Check if all VM PDs/PTs are ready for updates + * + * Returns: + * True if eviction list is empty. */ bool amdgpu_vm_ready(struct amdgpu_vm *vm) { - bool ready; - - spin_lock(&vm->status_lock); - ready = list_empty(&vm->evicted); - spin_unlock(&vm->status_lock); - - return ready; + return list_empty(&vm->evicted); } /** * amdgpu_vm_clear_bo - initially clear the PDs/PTs * * @adev: amdgpu_device pointer + * @vm: VM to clear BO from * @bo: BO to clear * @level: level this BO is at + * @pte_support_ats: indicate ATS support from PTE * * Root PD needs to be reserved when calling this. + * + * Returns: + * 0 on success, errno otherwise. */ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo *bo, @@ -335,8 +425,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (r) goto error_free; - r = amdgpu_job_submit(job, ring, &vm->entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_UNDEFINED, + &fence); if (r) goto error_free; @@ -361,10 +451,16 @@ error: * * @adev: amdgpu_device pointer * @vm: requested vm + * @parent: parent PT * @saddr: start of the address range * @eaddr: end of the address range + * @level: VMPT level + * @ats: indicate ATS support from PTE * * Make sure the page directories and page tables are allocated + * + * Returns: + * 0 on success, errno otherwise. */ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -412,11 +508,16 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, struct amdgpu_bo *pt; if (!entry->base.bo) { - r = amdgpu_bo_create(adev, - amdgpu_vm_bo_size(adev, level), - AMDGPU_GPU_PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, flags, - ttm_bo_type_kernel, resv, &pt); + struct amdgpu_bo_param bp; + + memset(&bp, 0, sizeof(bp)); + bp.size = amdgpu_vm_bo_size(adev, level); + bp.byte_align = AMDGPU_GPU_PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = flags; + bp.type = ttm_bo_type_kernel; + bp.resv = resv; + r = amdgpu_bo_create(adev, &bp, &pt); if (r) return r; @@ -441,12 +542,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, */ pt->parent = amdgpu_bo_ref(parent->base.bo); - entry->base.vm = vm; - entry->base.bo = pt; - list_add_tail(&entry->base.bo_list, &pt->va); - spin_lock(&vm->status_lock); - list_add(&entry->base.vm_status, &vm->relocated); - spin_unlock(&vm->status_lock); + amdgpu_vm_bo_base_init(&entry->base, vm, pt); } if (level < AMDGPU_VM_PTB) { @@ -472,6 +568,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, * @size: Size from start address we need. * * Make sure the page tables are allocated. + * + * Returns: + * 0 on success, errno otherwise. */ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -537,6 +636,15 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) } } +/** + * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job. + * + * @ring: ring on which the job will be submitted + * @job: job to submit + * + * Returns: + * True if sync is needed. + */ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, struct amdgpu_job *job) { @@ -564,19 +672,17 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, return vm_flush_needed || gds_switch_needed; } -static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) -{ - return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size); -} - /** * amdgpu_vm_flush - hardware flush the vm * * @ring: ring to use for flush - * @vmid: vmid number to use - * @pd_addr: address of the page directory + * @job: related job + * @need_pipe_sync: is pipe sync needed * * Emit a VM flush when it is necessary. + * + * Returns: + * 0 on success, errno otherwise. */ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) { @@ -628,7 +734,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); if (vm_flush_needed || pasid_mapping_needed) { - r = amdgpu_fence_emit(ring, &fence); + r = amdgpu_fence_emit(ring, &fence, 0); if (r) return r; } @@ -684,6 +790,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ * Returns the found bo_va or NULL if none is found * * Object has to be reserved! + * + * Returns: + * Found bo_va or NULL. */ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, struct amdgpu_bo *bo) @@ -765,7 +874,10 @@ static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, * @addr: the unmapped addr * * Look up the physical address of the page that the pte resolves - * to and return the pointer for the page table entry. + * to. + * + * Returns: + * The pointer for the page table entry. */ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) { @@ -818,6 +930,17 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, } } + +/** + * amdgpu_vm_wait_pd - Wait for PT BOs to be free. + * + * @adev: amdgpu_device pointer + * @vm: related vm + * @owner: fence owner + * + * Returns: + * 0 on success, errno otherwise. + */ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, void *owner) { @@ -871,7 +994,10 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, /* * amdgpu_vm_invalidate_level - mark all PD levels as invalid * + * @adev: amdgpu_device pointer + * @vm: related vm * @parent: parent PD + * @level: VMPT level * * Mark all PD level as invalid after an error. */ @@ -893,10 +1019,8 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev, if (!entry->base.bo) continue; - spin_lock(&vm->status_lock); - if (list_empty(&entry->base.vm_status)) - list_add(&entry->base.vm_status, &vm->relocated); - spin_unlock(&vm->status_lock); + if (!entry->base.moved) + list_move(&entry->base.vm_status, &vm->relocated); amdgpu_vm_invalidate_level(adev, vm, entry, level + 1); } } @@ -908,7 +1032,9 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev, * @vm: requested vm * * Makes sure all directories are up to date. - * Returns 0 for success, error for failure. + * + * Returns: + * 0 for success, error for failure. */ int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm) @@ -926,6 +1052,14 @@ restart: params.adev = adev; if (vm->use_cpu_for_update) { + struct amdgpu_vm_bo_base *bo_base; + + list_for_each_entry(bo_base, &vm->relocated, vm_status) { + r = amdgpu_bo_kmap(bo_base->bo, NULL); + if (unlikely(r)) + return r; + } + r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); if (unlikely(r)) return r; @@ -941,7 +1075,6 @@ restart: params.func = amdgpu_vm_do_set_ptes; } - spin_lock(&vm->status_lock); while (!list_empty(&vm->relocated)) { struct amdgpu_vm_bo_base *bo_base, *parent; struct amdgpu_vm_pt *pt, *entry; @@ -950,14 +1083,12 @@ restart: bo_base = list_first_entry(&vm->relocated, struct amdgpu_vm_bo_base, vm_status); + bo_base->moved = false; list_del_init(&bo_base->vm_status); - spin_unlock(&vm->status_lock); bo = bo_base->bo->parent; - if (!bo) { - spin_lock(&vm->status_lock); + if (!bo) continue; - } parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base, bo_list); @@ -966,12 +1097,10 @@ restart: amdgpu_vm_update_pde(¶ms, vm, pt, entry); - spin_lock(&vm->status_lock); if (!vm->use_cpu_for_update && (ndw - params.ib->length_dw) < 32) break; } - spin_unlock(&vm->status_lock); if (vm->use_cpu_for_update) { /* Flush HDP */ @@ -991,8 +1120,8 @@ restart: amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, AMDGPU_FENCE_OWNER_VM, false); WARN_ON(params.ib->length_dw > ndw); - r = amdgpu_job_submit(job, ring, &vm->entity, - AMDGPU_FENCE_OWNER_VM, &fence); + r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, + &fence); if (r) goto error; @@ -1074,9 +1203,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, if (entry->huge) { /* Add the entry to the relocated list to update it. */ entry->huge = false; - spin_lock(&p->vm->status_lock); list_move(&entry->base.vm_status, &p->vm->relocated); - spin_unlock(&p->vm->status_lock); } return; } @@ -1094,14 +1221,15 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, * amdgpu_vm_update_ptes - make sure that page tables are valid * * @params: see amdgpu_pte_update_params definition - * @vm: requested vm * @start: start of GPU address range * @end: end of GPU address range * @dst: destination address to map to, the next dst inside the function * @flags: mapping flags * * Update the page tables in the range @start - @end. - * Returns 0 for success, -EINVAL for failure. + * + * Returns: + * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, uint64_t start, uint64_t end, @@ -1155,7 +1283,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, * @end: last PTE to handle * @dst: addr those PTEs should point to * @flags: hw mapping flags - * Returns 0 for success, -EINVAL for failure. + * + * Returns: + * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, uint64_t start, uint64_t end, @@ -1227,7 +1357,9 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, * @fence: optional resulting fence * * Fill in the page table entries between @start and @last. - * Returns 0 for success, -EINVAL for failure. + * + * Returns: + * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, struct dma_fence *exclusive, @@ -1303,7 +1435,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ndw += ncmds * 10; /* extra commands for begin/end fragments */ - ndw += 2 * 10 * adev->vm_manager.fragment_size; + if (vm->root.base.bo->shadow) + ndw += 2 * 10 * adev->vm_manager.fragment_size * 2; + else + ndw += 2 * 10 * adev->vm_manager.fragment_size; params.func = amdgpu_vm_do_set_ptes; } @@ -1350,8 +1485,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, amdgpu_ring_pad_ib(ring, params.ib); WARN_ON(params.ib->length_dw > ndw); - r = amdgpu_job_submit(job, ring, &vm->entity, - AMDGPU_FENCE_OWNER_VM, &f); + r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, &f); if (r) goto error_free; @@ -1379,7 +1513,9 @@ error_free: * * Split the mapping into smaller chunks so that each update fits * into a SDMA IB. - * Returns 0 for success, -EINVAL for failure. + * + * Returns: + * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, struct dma_fence *exclusive, @@ -1432,7 +1568,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, if (nodes) { addr = nodes->start << PAGE_SHIFT; max_entries = (nodes->size - pfn) * - (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); + AMDGPU_GPU_PAGES_IN_CPU_PAGE; } else { addr = 0; max_entries = S64_MAX; @@ -1442,7 +1578,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, uint64_t count; max_entries = min(max_entries, 16ull * 1024ull); - for (count = 1; count < max_entries; ++count) { + for (count = 1; + count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE; + ++count) { uint64_t idx = pfn + count; if (pages_addr[idx] != @@ -1455,7 +1593,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, dma_addr = pages_addr; } else { addr = pages_addr[pfn]; - max_entries = count; + max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE; } } else if (flags & AMDGPU_PTE_VALID) { @@ -1470,7 +1608,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, if (r) return r; - pfn += last - start + 1; + pfn += (last - start + 1) / AMDGPU_GPU_PAGES_IN_CPU_PAGE; if (nodes && nodes->size == pfn) { pfn = 0; ++nodes; @@ -1490,7 +1628,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, * @clear: if true clear the entries * * Fill in the page table entries for @bo_va. - * Returns 0 for success, -EINVAL for failure. + * + * Returns: + * 0 for success, -EINVAL for failure. */ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, @@ -1506,18 +1646,17 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, uint64_t flags; int r; - if (clear || !bo_va->base.bo) { + if (clear || !bo) { mem = NULL; nodes = NULL; exclusive = NULL; } else { struct ttm_dma_tt *ttm; - mem = &bo_va->base.bo->tbo.mem; + mem = &bo->tbo.mem; nodes = mem->mm_node; if (mem->mem_type == TTM_PL_TT) { - ttm = container_of(bo_va->base.bo->tbo.ttm, - struct ttm_dma_tt, ttm); + ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); pages_addr = ttm->dma_address; } exclusive = reservation_object_get_excl(bo->tbo.resv); @@ -1555,9 +1694,22 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, amdgpu_asic_flush_hdp(adev, NULL); } - spin_lock(&vm->status_lock); + spin_lock(&vm->moved_lock); list_del_init(&bo_va->base.vm_status); - spin_unlock(&vm->status_lock); + spin_unlock(&vm->moved_lock); + + /* If the BO is not in its preferred location add it back to + * the evicted list so that it gets validated again on the + * next command submission. + */ + if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { + uint32_t mem_type = bo->tbo.mem.mem_type; + + if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) + list_add_tail(&bo_va->base.vm_status, &vm->evicted); + else + list_add(&bo_va->base.vm_status, &vm->idle); + } list_splice_init(&bo_va->invalids, &bo_va->valids); bo_va->cleared = clear; @@ -1572,6 +1724,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, /** * amdgpu_vm_update_prt_state - update the global PRT state + * + * @adev: amdgpu_device pointer */ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) { @@ -1586,6 +1740,8 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) /** * amdgpu_vm_prt_get - add a PRT user + * + * @adev: amdgpu_device pointer */ static void amdgpu_vm_prt_get(struct amdgpu_device *adev) { @@ -1598,6 +1754,8 @@ static void amdgpu_vm_prt_get(struct amdgpu_device *adev) /** * amdgpu_vm_prt_put - drop a PRT user + * + * @adev: amdgpu_device pointer */ static void amdgpu_vm_prt_put(struct amdgpu_device *adev) { @@ -1607,6 +1765,9 @@ static void amdgpu_vm_prt_put(struct amdgpu_device *adev) /** * amdgpu_vm_prt_cb - callback for updating the PRT status + * + * @fence: fence for the callback + * @_cb: the callback function */ static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb) { @@ -1618,6 +1779,9 @@ static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb) /** * amdgpu_vm_add_prt_cb - add callback for updating the PRT status + * + * @adev: amdgpu_device pointer + * @fence: fence for the callback */ static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev, struct dma_fence *fence) @@ -1709,9 +1873,11 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) * or if an error occurred) * * Make sure all freed BOs are cleared in the PT. - * Returns 0 for success. - * * PTs have to be reserved and mutex must be locked! + * + * Returns: + * 0 for success. + * */ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -1756,29 +1922,29 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requested vm - * @sync: sync object to add fences to * * Make sure all BOs which are moved are updated in the PTs. - * Returns 0 for success. + * + * Returns: + * 0 for success. * * PTs have to be reserved! */ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm) { + struct amdgpu_bo_va *bo_va, *tmp; + struct list_head moved; bool clear; - int r = 0; - - spin_lock(&vm->status_lock); - while (!list_empty(&vm->moved)) { - struct amdgpu_bo_va *bo_va; - struct reservation_object *resv; + int r; - bo_va = list_first_entry(&vm->moved, - struct amdgpu_bo_va, base.vm_status); - spin_unlock(&vm->status_lock); + INIT_LIST_HEAD(&moved); + spin_lock(&vm->moved_lock); + list_splice_init(&vm->moved, &moved); + spin_unlock(&vm->moved_lock); - resv = bo_va->base.bo->tbo.resv; + list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) { + struct reservation_object *resv = bo_va->base.bo->tbo.resv; /* Per VM BOs never need to bo cleared in the page tables */ if (resv == vm->root.base.bo->tbo.resv) @@ -1791,17 +1957,19 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, clear = true; r = amdgpu_vm_bo_update(adev, bo_va, clear); - if (r) + if (r) { + spin_lock(&vm->moved_lock); + list_splice(&moved, &vm->moved); + spin_unlock(&vm->moved_lock); return r; + } if (!clear && resv != vm->root.base.bo->tbo.resv) reservation_object_unlock(resv); - spin_lock(&vm->status_lock); } - spin_unlock(&vm->status_lock); - return r; + return 0; } /** @@ -1813,7 +1981,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, * * Add @bo into the requested vm. * Add @bo to the list of bos associated with the vm - * Returns newly added bo_va or NULL for failure + * + * Returns: + * Newly added bo_va or NULL for failure * * Object has to be reserved! */ @@ -1827,36 +1997,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, if (bo_va == NULL) { return NULL; } - bo_va->base.vm = vm; - bo_va->base.bo = bo; - INIT_LIST_HEAD(&bo_va->base.bo_list); - INIT_LIST_HEAD(&bo_va->base.vm_status); + amdgpu_vm_bo_base_init(&bo_va->base, vm, bo); bo_va->ref_count = 1; INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); - if (!bo) - return bo_va; - - list_add_tail(&bo_va->base.bo_list, &bo->va); - - if (bo->tbo.resv != vm->root.base.bo->tbo.resv) - return bo_va; - - if (bo->preferred_domains & - amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) - return bo_va; - - /* - * We checked all the prerequisites, but it looks like this per VM BO - * is currently evicted. add the BO to the evicted list to make sure it - * is validated on next VM use to avoid fault. - * */ - spin_lock(&vm->status_lock); - list_move_tail(&bo_va->base.vm_status, &vm->evicted); - spin_unlock(&vm->status_lock); - return bo_va; } @@ -1884,11 +2030,11 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, if (mapping->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); - if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { - spin_lock(&vm->status_lock); - if (list_empty(&bo_va->base.vm_status)) - list_add(&bo_va->base.vm_status, &vm->moved); - spin_unlock(&vm->status_lock); + if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv && + !bo_va->base.moved) { + spin_lock(&vm->moved_lock); + list_move(&bo_va->base.vm_status, &vm->moved); + spin_unlock(&vm->moved_lock); } trace_amdgpu_vm_bo_map(bo_va, mapping); } @@ -1900,10 +2046,13 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, * @bo_va: bo_va to store the address * @saddr: where to map the BO * @offset: requested offset in the BO + * @size: BO size in bytes * @flags: attributes of pages (read/write/valid/etc.) * * Add a mapping of the BO at the specefied addr into the VM. - * Returns 0 for success, error for failure. + * + * Returns: + * 0 for success, error for failure. * * Object has to be reserved and unreserved outside! */ @@ -1961,11 +2110,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, * @bo_va: bo_va to store the address * @saddr: where to map the BO * @offset: requested offset in the BO + * @size: BO size in bytes * @flags: attributes of pages (read/write/valid/etc.) * * Add a mapping of the BO at the specefied addr into the VM. Replace existing * mappings as we do so. - * Returns 0 for success, error for failure. + * + * Returns: + * 0 for success, error for failure. * * Object has to be reserved and unreserved outside! */ @@ -2022,7 +2174,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, * @saddr: where to the BO is mapped * * Remove a mapping of the BO at the specefied addr from the VM. - * Returns 0 for success, error for failure. + * + * Returns: + * 0 for success, error for failure. * * Object has to be reserved and unreserved outside! */ @@ -2076,7 +2230,9 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, * @size: size of the range * * Remove all mappings in a range, split them as appropriate. - * Returns 0 for success, error for failure. + * + * Returns: + * 0 for success, error for failure. */ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -2112,7 +2268,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, before->last = saddr - 1; before->offset = tmp->offset; before->flags = tmp->flags; - list_add(&before->list, &tmp->list); + before->bo_va = tmp->bo_va; + list_add(&before->list, &tmp->bo_va->invalids); } /* Remember mapping split at the end */ @@ -2122,7 +2279,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, after->offset = tmp->offset; after->offset += after->start - tmp->start; after->flags = tmp->flags; - list_add(&after->list, &tmp->list); + after->bo_va = tmp->bo_va; + list_add(&after->list, &tmp->bo_va->invalids); } list_del(&tmp->list); @@ -2171,8 +2329,13 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, * amdgpu_vm_bo_lookup_mapping - find mapping by address * * @vm: the requested VM + * @addr: the address * * Find a mapping by it's address. + * + * Returns: + * The amdgpu_bo_va_mapping matching for addr or NULL + * */ struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, uint64_t addr) @@ -2198,9 +2361,9 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, list_del(&bo_va->base.bo_list); - spin_lock(&vm->status_lock); + spin_lock(&vm->moved_lock); list_del(&bo_va->base.vm_status); - spin_unlock(&vm->status_lock); + spin_unlock(&vm->moved_lock); list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); @@ -2224,8 +2387,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, * amdgpu_vm_bo_invalidate - mark the bo as invalid * * @adev: amdgpu_device pointer - * @vm: requested vm * @bo: amdgpu buffer object + * @evicted: is the BO evicted * * Mark @bo as invalid. */ @@ -2234,36 +2397,45 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, { struct amdgpu_vm_bo_base *bo_base; + /* shadow bo doesn't have bo base, its validation needs its parent */ + if (bo->parent && bo->parent->shadow == bo) + bo = bo->parent; + list_for_each_entry(bo_base, &bo->va, bo_list) { struct amdgpu_vm *vm = bo_base->vm; + bool was_moved = bo_base->moved; bo_base->moved = true; if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { - spin_lock(&bo_base->vm->status_lock); if (bo->tbo.type == ttm_bo_type_kernel) list_move(&bo_base->vm_status, &vm->evicted); else list_move_tail(&bo_base->vm_status, &vm->evicted); - spin_unlock(&bo_base->vm->status_lock); continue; } - if (bo->tbo.type == ttm_bo_type_kernel) { - spin_lock(&bo_base->vm->status_lock); - if (list_empty(&bo_base->vm_status)) - list_add(&bo_base->vm_status, &vm->relocated); - spin_unlock(&bo_base->vm->status_lock); + if (was_moved) continue; - } - spin_lock(&bo_base->vm->status_lock); - if (list_empty(&bo_base->vm_status)) - list_add(&bo_base->vm_status, &vm->moved); - spin_unlock(&bo_base->vm->status_lock); + if (bo->tbo.type == ttm_bo_type_kernel) { + list_move(&bo_base->vm_status, &vm->relocated); + } else { + spin_lock(&bo_base->vm->moved_lock); + list_move(&bo_base->vm_status, &vm->moved); + spin_unlock(&bo_base->vm->moved_lock); + } } } +/** + * amdgpu_vm_get_block_size - calculate VM page table size as power of two + * + * @vm_size: VM size + * + * Returns: + * VM page table as power of two + */ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) { /* Total bits covered by PD + PTs */ @@ -2282,6 +2454,10 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) * * @adev: amdgpu_device pointer * @vm_size: the default vm size if it's set auto + * @fragment_size_default: Default PTE fragment size + * @max_level: max VMPT level + * @max_bits: max address space size in bits + * */ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, uint32_t fragment_size_default, unsigned max_level, @@ -2349,12 +2525,18 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, * @adev: amdgpu_device pointer * @vm: requested vm * @vm_context: Indicates if it GFX or Compute context + * @pasid: Process address space identifier * * Init @vm fields. + * + * Returns: + * 0 for success, error for failure. */ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int vm_context, unsigned int pasid) { + struct amdgpu_bo_param bp; + struct amdgpu_bo *root; const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, AMDGPU_VM_PTE_COUNT(adev) * 8); unsigned ring_instance; @@ -2367,10 +2549,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->va = RB_ROOT_CACHED; for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; - spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->evicted); INIT_LIST_HEAD(&vm->relocated); + spin_lock_init(&vm->moved_lock); INIT_LIST_HEAD(&vm->moved); + INIT_LIST_HEAD(&vm->idle); INIT_LIST_HEAD(&vm->freed); /* create scheduler entity for page table updates */ @@ -2379,8 +2562,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ring_instance %= adev->vm_manager.vm_pte_num_rings; ring = adev->vm_manager.vm_pte_rings[ring_instance]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - r = drm_sched_entity_init(&ring->sched, &vm->entity, - rq, amdgpu_sched_jobs, NULL); + r = drm_sched_entity_init(&vm->entity, &rq, 1, NULL); if (r) return r; @@ -2398,7 +2580,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, } DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), + WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); vm->last_update = NULL; @@ -2409,24 +2591,28 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, flags |= AMDGPU_GEM_CREATE_SHADOW; size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); - r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags, - ttm_bo_type_kernel, NULL, &vm->root.base.bo); + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = align; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = flags; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + r = amdgpu_bo_create(adev, &bp, &root); if (r) goto error_free_sched_entity; - r = amdgpu_bo_reserve(vm->root.base.bo, true); + r = amdgpu_bo_reserve(root, true); if (r) goto error_free_root; - r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, + r = amdgpu_vm_clear_bo(adev, vm, root, adev->vm_manager.root_level, vm->pte_support_ats); if (r) goto error_unreserve; - vm->root.base.vm = vm; - list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); - list_add_tail(&vm->root.base.vm_status, &vm->evicted); + amdgpu_vm_bo_base_init(&vm->root.base, vm, root); amdgpu_bo_unreserve(vm->root.base.bo); if (pasid) { @@ -2456,7 +2642,7 @@ error_free_root: vm->root.base.bo = NULL; error_free_sched_entity: - drm_sched_entity_fini(&ring->sched, &vm->entity); + drm_sched_entity_destroy(&ring->sched, &vm->entity); return r; } @@ -2464,6 +2650,9 @@ error_free_sched_entity: /** * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM * + * @adev: amdgpu_device pointer + * @vm: requested vm + * * This only works on GFX VMs that don't have any BOs added and no * page tables allocated yet. * @@ -2476,7 +2665,8 @@ error_free_sched_entity: * setting. May leave behind an unused shadow BO for the page * directory when switching from SDMA updates to CPU updates. * - * Returns 0 for success, -errno for errors. + * Returns: + * 0 for success, -errno for errors. */ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) { @@ -2510,7 +2700,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->pte_support_ats = pte_support_ats; DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), + WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); if (vm->pasid) { @@ -2589,7 +2779,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } - drm_sched_entity_fini(vm->entity.sched, &vm->entity); + drm_sched_entity_destroy(vm->entity.sched, &vm->entity); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(adev->dev, "still active bo inside vm\n"); @@ -2631,8 +2821,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) * @adev: amdgpu_device pointer * @pasid: PASID do identify the VM * - * This function is expected to be called in interrupt context. Returns - * true if there was fault credit, false otherwise + * This function is expected to be called in interrupt context. + * + * Returns: + * True if there was fault credit, false otherwise */ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, unsigned int pasid) @@ -2686,7 +2878,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) */ #ifdef CONFIG_X86_64 if (amdgpu_vm_update_mode == -1) { - if (amdgpu_vm_is_large_bar(adev)) + if (amdgpu_gmc_vram_full_visible(&adev->gmc)) adev->vm_manager.vm_update_mode = AMDGPU_VM_USE_CPU_FOR_COMPUTE; else @@ -2716,6 +2908,16 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) amdgpu_vmid_mgr_fini(adev); } +/** + * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs. + * + * @dev: drm device pointer + * @data: drm_amdgpu_vm + * @filp: drm file pointer + * + * Returns: + * 0 for success, -errno for errors. + */ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { union drm_amdgpu_vm *args = data; @@ -2739,3 +2941,42 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return 0; } + +/** + * amdgpu_vm_get_task_info - Extracts task info for a PASID. + * + * @dev: drm device pointer + * @pasid: PASID identifier for VM + * @task_info: task_info to fill. + */ +void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, + struct amdgpu_task_info *task_info) +{ + struct amdgpu_vm *vm; + + spin_lock(&adev->vm_manager.pasid_lock); + + vm = idr_find(&adev->vm_manager.pasid_idr, pasid); + if (vm) + *task_info = vm->task_info; + + spin_unlock(&adev->vm_manager.pasid_lock); +} + +/** + * amdgpu_vm_set_task_info - Sets VMs task info. + * + * @vm: vm for which to set the info + */ +void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) +{ + if (!vm->task_info.pid) { + vm->task_info.pid = current->pid; + get_task_comm(vm->task_info.task_name, current); + + if (current->group_leader->mm == current->mm) { + vm->task_info.tgid = current->group_leader->pid; + get_task_comm(vm->task_info.process_name, current->group_leader); + } + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 30f080364c97..d416f895233d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -75,11 +75,12 @@ struct amdgpu_bo_list_entry; /* PDE Block Fragment Size for VEGA10 */ #define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59) -/* VEGA10 only */ + +/* For GFX9 */ #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) -/* For Raven */ +#define AMDGPU_MTYPE_NC 0 #define AMDGPU_MTYPE_CC 2 #define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \ @@ -163,13 +164,18 @@ struct amdgpu_vm_pt { #define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48) #define AMDGPU_VM_FAULT_ADDR(fault) ((u64)(fault) & 0xfffffffff000ULL) + +struct amdgpu_task_info { + char process_name[TASK_COMM_LEN]; + char task_name[TASK_COMM_LEN]; + pid_t pid; + pid_t tgid; +}; + struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; - /* protecting invalidated */ - spinlock_t status_lock; - /* BOs who needs a validation */ struct list_head evicted; @@ -178,6 +184,10 @@ struct amdgpu_vm { /* BOs moved, but not yet updated in the PT */ struct list_head moved; + spinlock_t moved_lock; + + /* All BOs of this VM not currently in the state machine */ + struct list_head idle; /* BO mappings freed, but not yet updated in the PT */ struct list_head freed; @@ -186,9 +196,6 @@ struct amdgpu_vm { struct amdgpu_vm_pt root; struct dma_fence *last_update; - /* protecting freed */ - spinlock_t freed_lock; - /* Scheduler entity for page table updates */ struct drm_sched_entity entity; @@ -216,6 +223,9 @@ struct amdgpu_vm { /* Valid while the PD is reserved or fenced */ uint64_t pd_phys_addr; + + /* Some basic info about the task */ + struct amdgpu_task_info task_info; }; struct amdgpu_vm_manager { @@ -318,4 +328,9 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, struct amdgpu_job *job); void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); +void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, + struct amdgpu_task_info *task_info); + +void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 9aca653bec07..9cfa8a9ada92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -97,6 +97,34 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, } /** + * amdgpu_vram_mgr_bo_visible_size - CPU visible BO size + * + * @bo: &amdgpu_bo buffer object (must be in VRAM) + * + * Returns: + * How much of the given &amdgpu_bo buffer object lies in CPU visible VRAM. + */ +u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_mem_reg *mem = &bo->tbo.mem; + struct drm_mm_node *nodes = mem->mm_node; + unsigned pages = mem->num_pages; + u64 usage; + + if (amdgpu_gmc_vram_full_visible(&adev->gmc)) + return amdgpu_bo_size(bo); + + if (mem->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT) + return 0; + + for (usage = 0; nodes && pages; pages -= nodes->size, nodes++) + usage += amdgpu_vram_mgr_vis_size(adev, nodes); + + return usage; +} + +/** * amdgpu_vram_mgr_new - allocate new ranges * * @man: TTM memory type manager @@ -135,7 +163,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node); } - nodes = kcalloc(num_nodes, sizeof(*nodes), GFP_KERNEL); + nodes = kvmalloc_array(num_nodes, sizeof(*nodes), + GFP_KERNEL | __GFP_ZERO); if (!nodes) return -ENOMEM; @@ -190,7 +219,7 @@ error: drm_mm_remove_node(&nodes[i]); spin_unlock(&mgr->lock); - kfree(nodes); + kvfree(nodes); return r == -ENOSPC ? 0 : r; } @@ -229,7 +258,7 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, atomic64_sub(usage, &mgr->usage); atomic64_sub(vis_usage, &mgr->vis_usage); - kfree(mem->mm_node); + kvfree(mem->mm_node); mem->mm_node = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 69500a8b4e2d..e9934de1b9cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -1221,7 +1221,7 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, ectx.abort = false; ectx.last_jump = 0; if (ws) - ectx.ws = kzalloc(4 * ws, GFP_KERNEL); + ectx.ws = kcalloc(4, ws, GFP_KERNEL); else ectx.ws = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 47ef3e6e7178..d2469453dca2 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -49,10 +49,10 @@ #include "gmc/gmc_7_1_d.h" #include "gmc/gmc_7_1_sh_mask.h" -MODULE_FIRMWARE("radeon/bonaire_smc.bin"); -MODULE_FIRMWARE("radeon/bonaire_k_smc.bin"); -MODULE_FIRMWARE("radeon/hawaii_smc.bin"); -MODULE_FIRMWARE("radeon/hawaii_k_smc.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_smc.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_k_smc.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_smc.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_k_smc.bin"); #define MC_CG_ARB_FREQ_F0 0x0a #define MC_CG_ARB_FREQ_F1 0x0b @@ -951,12 +951,12 @@ static void ci_apply_state_adjust_rules(struct amdgpu_device *adev, else pi->battery_state = false; - if (adev->pm.dpm.ac_power) + if (adev->pm.ac_power) max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; else max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; - if (adev->pm.dpm.ac_power == false) { + if (adev->pm.ac_power == false) { for (i = 0; i < ps->performance_level_count; i++) { if (ps->performance_levels[i].mclk > max_limits->mclk) ps->performance_levels[i].mclk = max_limits->mclk; @@ -4078,7 +4078,7 @@ static int ci_enable_uvd_dpm(struct amdgpu_device *adev, bool enable) const struct amdgpu_clock_and_voltage_limits *max_limits; int i; - if (adev->pm.dpm.ac_power) + if (adev->pm.ac_power) max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; else max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; @@ -4127,7 +4127,7 @@ static int ci_enable_vce_dpm(struct amdgpu_device *adev, bool enable) const struct amdgpu_clock_and_voltage_limits *max_limits; int i; - if (adev->pm.dpm.ac_power) + if (adev->pm.ac_power) max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; else max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; @@ -4160,7 +4160,7 @@ static int ci_enable_samu_dpm(struct amdgpu_device *adev, bool enable) const struct amdgpu_clock_and_voltage_limits *max_limits; int i; - if (adev->pm.dpm.ac_power) + if (adev->pm.ac_power) max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; else max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; @@ -4191,7 +4191,7 @@ static int ci_enable_acp_dpm(struct amdgpu_device *adev, bool enable) const struct amdgpu_clock_and_voltage_limits *max_limits; int i; - if (adev->pm.dpm.ac_power) + if (adev->pm.ac_power) max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; else max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; @@ -5679,8 +5679,9 @@ static int ci_parse_power_table(struct amdgpu_device *adev) (mode_info->atom_context->bios + data_offset + le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset)); - adev->pm.dpm.ps = kzalloc(sizeof(struct amdgpu_ps) * - state_array->ucNumEntries, GFP_KERNEL); + adev->pm.dpm.ps = kcalloc(state_array->ucNumEntries, + sizeof(struct amdgpu_ps), + GFP_KERNEL); if (!adev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; @@ -5814,7 +5815,7 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev) default: BUG(); } - snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name); err = request_firmware(&adev->pm.fw, fw_name, adev->dev); if (err) goto out; @@ -5845,8 +5846,7 @@ static int ci_dpm_init(struct amdgpu_device *adev) adev->pm.dpm.priv = pi; pi->sys_pcie_mask = - (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >> - CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT; + adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK; pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID; @@ -5903,7 +5903,7 @@ static int ci_dpm_init(struct amdgpu_device *adev) pi->pcie_dpm_key_disabled = 0; pi->thermal_sclk_dpm_enabled = 0; - if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK) + if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) pi->caps_sclk_ds = true; else pi->caps_sclk_ds = false; @@ -5927,7 +5927,9 @@ static int ci_dpm_init(struct amdgpu_device *adev) ci_set_private_data_variables_based_on_pptable(adev); adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries = - kzalloc(4 * sizeof(struct amdgpu_clock_voltage_dependency_entry), GFP_KERNEL); + kcalloc(4, + sizeof(struct amdgpu_clock_voltage_dependency_entry), + GFP_KERNEL); if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) { ci_dpm_fini(adev); return -ENOMEM; @@ -6255,7 +6257,7 @@ static int ci_dpm_late_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!amdgpu_dpm) + if (!adev->pm.dpm_enabled) return 0; /* init the sysfs and debugfs files late */ @@ -6764,6 +6766,19 @@ static int ci_dpm_read_sensor(void *handle, int idx, } } +static int ci_set_powergating_by_smu(void *handle, + uint32_t block_type, bool gate) +{ + switch (block_type) { + case AMD_IP_BLOCK_TYPE_UVD: + ci_dpm_powergate_uvd(handle, gate); + break; + default: + break; + } + return 0; +} + static const struct amd_ip_funcs ci_dpm_ip_funcs = { .name = "ci_dpm", .early_init = ci_dpm_early_init, @@ -6801,7 +6816,7 @@ static const struct amd_pm_funcs ci_dpm_funcs = { .debugfs_print_current_performance_level = &ci_dpm_debugfs_print_current_performance_level, .force_performance_level = &ci_dpm_force_performance_level, .vblank_too_short = &ci_dpm_vblank_too_short, - .powergate_uvd = &ci_dpm_powergate_uvd, + .set_powergating_by_smu = &ci_set_powergating_by_smu, .set_fan_control_mode = &ci_dpm_set_fan_control_mode, .get_fan_control_mode = &ci_dpm_get_fan_control_mode, .set_fan_speed_percent = &ci_dpm_set_fan_speed_percent, diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 0df22030e713..702e257a483f 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1735,6 +1735,12 @@ static void cik_invalidate_hdp(struct amdgpu_device *adev, } } +static bool cik_need_full_reset(struct amdgpu_device *adev) +{ + /* change this when we support soft reset */ + return true; +} + static const struct amdgpu_asic_funcs cik_asic_funcs = { .read_disabled_bios = &cik_read_disabled_bios, @@ -1748,6 +1754,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .get_config_memsize = &cik_get_config_memsize, .flush_hdp = &cik_flush_hdp, .invalidate_hdp = &cik_invalidate_hdp, + .need_full_reset = &cik_need_full_reset, }; static int cik_common_early_init(void *handle) @@ -1996,9 +2003,9 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); if (amdgpu_dpm == -1) - amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); - else amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); + else + amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -2017,9 +2024,9 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); if (amdgpu_dpm == -1) - amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); - else amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); + else + amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index a7576255cc30..d0fa2aac2388 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -54,16 +54,16 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev); static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev); static int cik_sdma_soft_reset(void *handle); -MODULE_FIRMWARE("radeon/bonaire_sdma.bin"); -MODULE_FIRMWARE("radeon/bonaire_sdma1.bin"); -MODULE_FIRMWARE("radeon/hawaii_sdma.bin"); -MODULE_FIRMWARE("radeon/hawaii_sdma1.bin"); -MODULE_FIRMWARE("radeon/kaveri_sdma.bin"); -MODULE_FIRMWARE("radeon/kaveri_sdma1.bin"); -MODULE_FIRMWARE("radeon/kabini_sdma.bin"); -MODULE_FIRMWARE("radeon/kabini_sdma1.bin"); -MODULE_FIRMWARE("radeon/mullins_sdma.bin"); -MODULE_FIRMWARE("radeon/mullins_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_sdma.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/kaveri_sdma.bin"); +MODULE_FIRMWARE("amdgpu/kaveri_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/kabini_sdma.bin"); +MODULE_FIRMWARE("amdgpu/kabini_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/mullins_sdma.bin"); +MODULE_FIRMWARE("amdgpu/mullins_sdma1.bin"); u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev); @@ -132,9 +132,9 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (i == 0) - snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); else - snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma1.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); if (err) goto out; @@ -177,9 +177,8 @@ static uint64_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - return (RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; + return (RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) & 0x3fffc) >> 2; } /** @@ -192,9 +191,8 @@ static uint64_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring) static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], (lower_32_bits(ring->wptr) << 2) & 0x3fffc); } @@ -248,7 +246,7 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring) SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ u32 ref_and_mask; - if (ring == &ring->adev->sdma.instance[0].ring) + if (ring->me == 0) ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA0_MASK; else ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA1_MASK; @@ -1290,8 +1288,10 @@ static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) { int i; - for (i = 0; i < adev->sdma.num_instances; i++) + for (i = 0; i < adev->sdma.num_instances; i++) { adev->sdma.instance[i].ring.funcs = &cik_sdma_ring_funcs; + adev->sdma.instance[i].ring.me = i; + } } static const struct amdgpu_irq_src_funcs cik_sdma_trap_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 452f88ea46a2..308f9f238bc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -41,6 +41,8 @@ #include "gmc/gmc_8_1_d.h" #include "gmc/gmc_8_1_sh_mask.h" +#include "ivsrcid/ivsrcid_vislands30.h" + static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev); static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev); @@ -1823,7 +1825,6 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_framebuffer *amdgpu_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct amdgpu_bo *abo; @@ -1842,32 +1843,28 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - amdgpu_fb = to_amdgpu_framebuffer(fb); + if (atomic) target_fb = fb; - } else { - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = amdgpu_fb->obj; + obj = target_fb->obj[0]; abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) return r; - if (atomic) { - fb_location = amdgpu_bo_gpu_offset(abo); - } else { - r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location); + if (!atomic) { + r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); return -EINVAL; } } + fb_location = amdgpu_bo_gpu_offset(abo); amdgpu_bo_get_tiling_flags(abo, &tiling_flags); amdgpu_bo_unreserve(abo); @@ -2043,8 +2040,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - amdgpu_fb = to_amdgpu_framebuffer(fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2375,13 +2371,14 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } - ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr); + ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); drm_gem_object_put_unlocked(obj); return ret; } + amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); dce_v10_0_lock_cursor(crtc, true); @@ -2526,11 +2523,9 @@ static void dce_v10_0_crtc_disable(struct drm_crtc *crtc) dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct amdgpu_framebuffer *amdgpu_fb; struct amdgpu_bo *abo; - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); @@ -2744,14 +2739,14 @@ static int dce_v10_0_sw_init(void *handle) return r; } - for (i = 8; i < 20; i += 2) { + for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) { r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq); if (r) return r; } /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index a7c1c584a191..76dfb76f7900 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -41,6 +41,8 @@ #include "gmc/gmc_8_1_d.h" #include "gmc/gmc_8_1_sh_mask.h" +#include "ivsrcid/ivsrcid_vislands30.h" + static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev); static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev); @@ -173,6 +175,7 @@ static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev) ARRAY_SIZE(polaris11_golden_settings_a11)); break; case CHIP_POLARIS10: + case CHIP_VEGAM: amdgpu_device_program_register_sequence(adev, polaris10_golden_settings_a11, ARRAY_SIZE(polaris10_golden_settings_a11)); @@ -473,6 +476,7 @@ static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev) num_crtc = 2; break; case CHIP_POLARIS10: + case CHIP_VEGAM: num_crtc = 6; break; case CHIP_POLARIS11: @@ -1445,6 +1449,7 @@ static int dce_v11_0_audio_init(struct amdgpu_device *adev) adev->mode_info.audio.num_pins = 7; break; case CHIP_POLARIS10: + case CHIP_VEGAM: adev->mode_info.audio.num_pins = 8; break; case CHIP_POLARIS11: @@ -1862,7 +1867,6 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_framebuffer *amdgpu_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct amdgpu_bo *abo; @@ -1881,32 +1885,28 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - amdgpu_fb = to_amdgpu_framebuffer(fb); + if (atomic) target_fb = fb; - } else { - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = amdgpu_fb->obj; + obj = target_fb->obj[0]; abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) return r; - if (atomic) { - fb_location = amdgpu_bo_gpu_offset(abo); - } else { - r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location); + if (!atomic) { + r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); return -EINVAL; } } + fb_location = amdgpu_bo_gpu_offset(abo); amdgpu_bo_get_tiling_flags(abo, &tiling_flags); amdgpu_bo_unreserve(abo); @@ -2082,8 +2082,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - amdgpu_fb = to_amdgpu_framebuffer(fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2253,7 +2252,8 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc) if ((adev->asic_type == CHIP_POLARIS10) || (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) { + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) { struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(amdgpu_crtc->encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; @@ -2450,13 +2450,14 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } - ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr); + ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); drm_gem_object_put_unlocked(obj); return ret; } + amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); dce_v11_0_lock_cursor(crtc, true); @@ -2601,11 +2602,9 @@ static void dce_v11_0_crtc_disable(struct drm_crtc *crtc) dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct amdgpu_framebuffer *amdgpu_fb; struct amdgpu_bo *abo; - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); @@ -2673,7 +2672,8 @@ static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc, if ((adev->asic_type == CHIP_POLARIS10) || (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) { + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) { struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(amdgpu_crtc->encoder); int encoder_mode = @@ -2830,6 +2830,7 @@ static int dce_v11_0_early_init(void *handle) adev->mode_info.num_dig = 9; break; case CHIP_POLARIS10: + case CHIP_VEGAM: adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 6; break; @@ -2859,14 +2860,14 @@ static int dce_v11_0_sw_init(void *handle) return r; } - for (i = 8; i < 20; i += 2) { + for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) { r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq); if (r) return r; } /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); if (r) return r; @@ -2949,7 +2950,8 @@ static int dce_v11_0_hw_init(void *handle) amdgpu_atombios_encoder_init_dig(adev); if ((adev->asic_type == CHIP_POLARIS10) || (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) { + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) { amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk, DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS); amdgpu_atombios_crtc_set_dce_clock(adev, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 9f67b7fd3487..c9adc627305d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1780,7 +1780,6 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_framebuffer *amdgpu_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct amdgpu_bo *abo; @@ -1798,32 +1797,28 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - amdgpu_fb = to_amdgpu_framebuffer(fb); + if (atomic) target_fb = fb; - } else { - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = amdgpu_fb->obj; + obj = target_fb->obj[0]; abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) return r; - if (atomic) { - fb_location = amdgpu_bo_gpu_offset(abo); - } else { - r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location); + if (!atomic) { + r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); return -EINVAL; } } + fb_location = amdgpu_bo_gpu_offset(abo); amdgpu_bo_get_tiling_flags(abo, &tiling_flags); amdgpu_bo_unreserve(abo); @@ -1978,8 +1973,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - amdgpu_fb = to_amdgpu_framebuffer(fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2268,13 +2262,14 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } - ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr); + ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); drm_gem_object_put_unlocked(obj); return ret; } + amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); dce_v6_0_lock_cursor(crtc, true); @@ -2414,11 +2409,9 @@ static void dce_v6_0_crtc_disable(struct drm_crtc *crtc) dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct amdgpu_framebuffer *amdgpu_fb; struct amdgpu_bo *abo; - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index f55422cbd77a..50cd03beac7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -1754,7 +1754,6 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_framebuffer *amdgpu_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct amdgpu_bo *abo; @@ -1773,32 +1772,28 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - amdgpu_fb = to_amdgpu_framebuffer(fb); + if (atomic) target_fb = fb; - } else { - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = amdgpu_fb->obj; + obj = target_fb->obj[0]; abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) return r; - if (atomic) { - fb_location = amdgpu_bo_gpu_offset(abo); - } else { - r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location); + if (!atomic) { + r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); return -EINVAL; } } + fb_location = amdgpu_bo_gpu_offset(abo); amdgpu_bo_get_tiling_flags(abo, &tiling_flags); amdgpu_bo_unreserve(abo); @@ -1955,8 +1950,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - amdgpu_fb = to_amdgpu_framebuffer(fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2279,13 +2273,14 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } - ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr); + ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); drm_gem_object_put_unlocked(obj); return ret; } + amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); dce_v8_0_lock_cursor(crtc, true); @@ -2430,11 +2425,9 @@ static void dce_v8_0_crtc_disable(struct drm_crtc *crtc) dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct amdgpu_framebuffer *amdgpu_fb; struct amdgpu_bo *abo; - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index b51f05dc9582..15257634a53a 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -36,6 +36,7 @@ #include "dce_v10_0.h" #include "dce_v11_0.h" #include "dce_virtual.h" +#include "ivsrcid/ivsrcid_vislands30.h" #define DCE_VIRTUAL_VBLANK_PERIOD 16666666 @@ -168,11 +169,9 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc) dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct amdgpu_framebuffer *amdgpu_fb; struct amdgpu_bo *abo; - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); @@ -271,25 +270,18 @@ static int dce_virtual_early_init(void *handle) static struct drm_encoder * dce_virtual_encoder(struct drm_connector *connector) { - int enc_id = connector->encoder_ids[0]; struct drm_encoder *encoder; int i; - for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { - if (connector->encoder_ids[i] == 0) - break; - - encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]); - if (!encoder) - continue; - + drm_connector_for_each_possible_encoder(connector, encoder, i) { if (encoder->encoder_type == DRM_MODE_ENCODER_VIRTUAL) return encoder; } /* pick the first one */ - if (enc_id) - return drm_encoder_find(connector->dev, NULL, enc_id); + drm_connector_for_each_possible_encoder(connector, encoder, i) + return encoder; + return NULL; } @@ -329,7 +321,7 @@ static int dce_virtual_get_modes(struct drm_connector *connector) return 0; } -static int dce_virtual_mode_valid(struct drm_connector *connector, +static enum drm_mode_status dce_virtual_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { return MODE_OK; @@ -380,7 +372,7 @@ static int dce_virtual_sw_init(void *handle) int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 229, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SMU_DISP_TIMER2_TRIGGER, &adev->crtc_irq); if (r) return r; @@ -462,8 +454,9 @@ static int dce_virtual_hw_init(void *handle) break; case CHIP_CARRIZO: case CHIP_STONEY: - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_VEGAM: dce_v11_0_disable_dce(adev); break; case CHIP_TOPAZ: @@ -474,6 +467,7 @@ static int dce_virtual_hw_init(void *handle) break; case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: break; default: DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); @@ -634,7 +628,7 @@ static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev, drm_connector_register(connector); /* link them */ - drm_mode_connector_attach_encoder(connector, encoder); + drm_connector_attach_encoder(connector, encoder); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c new file mode 100644 index 000000000000..9935371db7ce --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -0,0 +1,120 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "df_v1_7.h" + +#include "df/df_1_7_default.h" +#include "df/df_1_7_offset.h" +#include "df/df_1_7_sh_mask.h" + +static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; + +static void df_v1_7_init (struct amdgpu_device *adev) +{ +} + +static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp; + + if (enable) { + tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl); + tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK; + WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp); + } else + WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, + mmFabricConfigAccessControl_DEFAULT); +} + +static u32 df_v1_7_get_fb_channel_number(struct amdgpu_device *adev) +{ + u32 tmp; + + tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); + tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; + tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; + + return tmp; +} + +static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev) +{ + int fb_channel_number; + + fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); + + return df_v1_7_channel_number[fb_channel_number]; +} + +static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp; + + /* Put DF on broadcast mode */ + adev->df_funcs->enable_broadcast_mode(adev, true); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { + tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY; + WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } else { + tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V1_7_MGCG_DISABLE; + WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } + + /* Exit boradcast mode */ + adev->df_funcs->enable_broadcast_mode(adev, false); +} + +static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) +{ + u32 tmp; + + /* AMD_CG_SUPPORT_DF_MGCG */ + tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); + if (tmp & DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY) + *flags |= AMD_CG_SUPPORT_DF_MGCG; +} + +static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev, + bool enable) +{ + WREG32_FIELD15(DF, 0, DF_CS_AON0_CoherentSlaveModeCtrlA0, + ForceParWrRMW, enable); +} + +const struct amdgpu_df_funcs df_v1_7_funcs = { + .init = df_v1_7_init, + .enable_broadcast_mode = df_v1_7_enable_broadcast_mode, + .get_fb_channel_number = df_v1_7_get_fb_channel_number, + .get_hbm_channel_number = df_v1_7_get_hbm_channel_number, + .update_medium_grain_clock_gating = df_v1_7_update_medium_grain_clock_gating, + .get_clockgating_state = df_v1_7_get_clockgating_state, + .enable_ecc_force_par_wr_rmw = df_v1_7_enable_ecc_force_par_wr_rmw, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.h b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h new file mode 100644 index 000000000000..74621104c487 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h @@ -0,0 +1,40 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __DF_V1_7_H__ +#define __DF_V1_7_H__ + +#include "soc15_common.h" +enum DF_V1_7_MGCG +{ + DF_V1_7_MGCG_DISABLE = 0, + DF_V1_7_MGCG_ENABLE_00_CYCLE_DELAY =1, + DF_V1_7_MGCG_ENABLE_01_CYCLE_DELAY =2, + DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY =13, + DF_V1_7_MGCG_ENABLE_31_CYCLE_DELAY =14, + DF_V1_7_MGCG_ENABLE_63_CYCLE_DELAY =15 +}; + +extern const struct amdgpu_df_funcs df_v1_7_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c new file mode 100644 index 000000000000..d5ebe566809b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -0,0 +1,116 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "df_v3_6.h" + +#include "df/df_3_6_default.h" +#include "df/df_3_6_offset.h" +#include "df/df_3_6_sh_mask.h" + +static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0, + 16, 32, 0, 0, 0, 2, 4, 8}; + +static void df_v3_6_init(struct amdgpu_device *adev) +{ +} + +static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp; + + if (enable) { + tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl); + tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK; + WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp); + } else + WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, + mmFabricConfigAccessControl_DEFAULT); +} + +static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev) +{ + u32 tmp; + + tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0); + tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK; + tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; + + return tmp; +} + +static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev) +{ + int fb_channel_number; + + fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); + if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number)) + fb_channel_number = 0; + + return df_v3_6_channel_number[fb_channel_number]; +} + +static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp; + + /* Put DF on broadcast mode */ + adev->df_funcs->enable_broadcast_mode(adev, true); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { + tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY; + WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } else { + tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V3_6_MGCG_DISABLE; + WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } + + /* Exit broadcast mode */ + adev->df_funcs->enable_broadcast_mode(adev, false); +} + +static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) +{ + u32 tmp; + + /* AMD_CG_SUPPORT_DF_MGCG */ + tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); + if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY) + *flags |= AMD_CG_SUPPORT_DF_MGCG; +} + +const struct amdgpu_df_funcs df_v3_6_funcs = { + .init = df_v3_6_init, + .enable_broadcast_mode = df_v3_6_enable_broadcast_mode, + .get_fb_channel_number = df_v3_6_get_fb_channel_number, + .get_hbm_channel_number = df_v3_6_get_hbm_channel_number, + .update_medium_grain_clock_gating = + df_v3_6_update_medium_grain_clock_gating, + .get_clockgating_state = df_v3_6_get_clockgating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h new file mode 100644 index 000000000000..e79c58e5efcb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h @@ -0,0 +1,40 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __DF_V3_6_H__ +#define __DF_V3_6_H__ + +#include "soc15_common.h" + +enum DF_V3_6_MGCG { + DF_V3_6_MGCG_DISABLE = 0, + DF_V3_6_MGCG_ENABLE_00_CYCLE_DELAY = 1, + DF_V3_6_MGCG_ENABLE_01_CYCLE_DELAY = 2, + DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY = 13, + DF_V3_6_MGCG_ENABLE_31_CYCLE_DELAY = 14, + DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15 +}; + +extern const struct amdgpu_df_funcs df_v3_6_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index cd6bf291a853..de184a886057 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -44,30 +44,30 @@ static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev); -MODULE_FIRMWARE("radeon/tahiti_pfp.bin"); -MODULE_FIRMWARE("radeon/tahiti_me.bin"); -MODULE_FIRMWARE("radeon/tahiti_ce.bin"); -MODULE_FIRMWARE("radeon/tahiti_rlc.bin"); - -MODULE_FIRMWARE("radeon/pitcairn_pfp.bin"); -MODULE_FIRMWARE("radeon/pitcairn_me.bin"); -MODULE_FIRMWARE("radeon/pitcairn_ce.bin"); -MODULE_FIRMWARE("radeon/pitcairn_rlc.bin"); - -MODULE_FIRMWARE("radeon/verde_pfp.bin"); -MODULE_FIRMWARE("radeon/verde_me.bin"); -MODULE_FIRMWARE("radeon/verde_ce.bin"); -MODULE_FIRMWARE("radeon/verde_rlc.bin"); - -MODULE_FIRMWARE("radeon/oland_pfp.bin"); -MODULE_FIRMWARE("radeon/oland_me.bin"); -MODULE_FIRMWARE("radeon/oland_ce.bin"); -MODULE_FIRMWARE("radeon/oland_rlc.bin"); - -MODULE_FIRMWARE("radeon/hainan_pfp.bin"); -MODULE_FIRMWARE("radeon/hainan_me.bin"); -MODULE_FIRMWARE("radeon/hainan_ce.bin"); -MODULE_FIRMWARE("radeon/hainan_rlc.bin"); +MODULE_FIRMWARE("amdgpu/tahiti_pfp.bin"); +MODULE_FIRMWARE("amdgpu/tahiti_me.bin"); +MODULE_FIRMWARE("amdgpu/tahiti_ce.bin"); +MODULE_FIRMWARE("amdgpu/tahiti_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/pitcairn_pfp.bin"); +MODULE_FIRMWARE("amdgpu/pitcairn_me.bin"); +MODULE_FIRMWARE("amdgpu/pitcairn_ce.bin"); +MODULE_FIRMWARE("amdgpu/pitcairn_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/verde_pfp.bin"); +MODULE_FIRMWARE("amdgpu/verde_me.bin"); +MODULE_FIRMWARE("amdgpu/verde_ce.bin"); +MODULE_FIRMWARE("amdgpu/verde_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/oland_pfp.bin"); +MODULE_FIRMWARE("amdgpu/oland_me.bin"); +MODULE_FIRMWARE("amdgpu/oland_ce.bin"); +MODULE_FIRMWARE("amdgpu/oland_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/hainan_pfp.bin"); +MODULE_FIRMWARE("amdgpu/hainan_me.bin"); +MODULE_FIRMWARE("amdgpu/hainan_ce.bin"); +MODULE_FIRMWARE("amdgpu/hainan_rlc.bin"); static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev); static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer); @@ -335,7 +335,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) default: BUG(); } - snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); if (err) goto out; @@ -346,7 +346,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); if (err) goto out; @@ -357,7 +357,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); if (err) goto out; @@ -368,7 +368,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 42b6144c1fd5..95452c5a9df6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -57,36 +57,36 @@ static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev); -MODULE_FIRMWARE("radeon/bonaire_pfp.bin"); -MODULE_FIRMWARE("radeon/bonaire_me.bin"); -MODULE_FIRMWARE("radeon/bonaire_ce.bin"); -MODULE_FIRMWARE("radeon/bonaire_rlc.bin"); -MODULE_FIRMWARE("radeon/bonaire_mec.bin"); - -MODULE_FIRMWARE("radeon/hawaii_pfp.bin"); -MODULE_FIRMWARE("radeon/hawaii_me.bin"); -MODULE_FIRMWARE("radeon/hawaii_ce.bin"); -MODULE_FIRMWARE("radeon/hawaii_rlc.bin"); -MODULE_FIRMWARE("radeon/hawaii_mec.bin"); - -MODULE_FIRMWARE("radeon/kaveri_pfp.bin"); -MODULE_FIRMWARE("radeon/kaveri_me.bin"); -MODULE_FIRMWARE("radeon/kaveri_ce.bin"); -MODULE_FIRMWARE("radeon/kaveri_rlc.bin"); -MODULE_FIRMWARE("radeon/kaveri_mec.bin"); -MODULE_FIRMWARE("radeon/kaveri_mec2.bin"); - -MODULE_FIRMWARE("radeon/kabini_pfp.bin"); -MODULE_FIRMWARE("radeon/kabini_me.bin"); -MODULE_FIRMWARE("radeon/kabini_ce.bin"); -MODULE_FIRMWARE("radeon/kabini_rlc.bin"); -MODULE_FIRMWARE("radeon/kabini_mec.bin"); - -MODULE_FIRMWARE("radeon/mullins_pfp.bin"); -MODULE_FIRMWARE("radeon/mullins_me.bin"); -MODULE_FIRMWARE("radeon/mullins_ce.bin"); -MODULE_FIRMWARE("radeon/mullins_rlc.bin"); -MODULE_FIRMWARE("radeon/mullins_mec.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_pfp.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_me.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_ce.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_rlc.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_mec.bin"); + +MODULE_FIRMWARE("amdgpu/hawaii_pfp.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_me.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_ce.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_rlc.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_mec.bin"); + +MODULE_FIRMWARE("amdgpu/kaveri_pfp.bin"); +MODULE_FIRMWARE("amdgpu/kaveri_me.bin"); +MODULE_FIRMWARE("amdgpu/kaveri_ce.bin"); +MODULE_FIRMWARE("amdgpu/kaveri_rlc.bin"); +MODULE_FIRMWARE("amdgpu/kaveri_mec.bin"); +MODULE_FIRMWARE("amdgpu/kaveri_mec2.bin"); + +MODULE_FIRMWARE("amdgpu/kabini_pfp.bin"); +MODULE_FIRMWARE("amdgpu/kabini_me.bin"); +MODULE_FIRMWARE("amdgpu/kabini_ce.bin"); +MODULE_FIRMWARE("amdgpu/kabini_rlc.bin"); +MODULE_FIRMWARE("amdgpu/kabini_mec.bin"); + +MODULE_FIRMWARE("amdgpu/mullins_pfp.bin"); +MODULE_FIRMWARE("amdgpu/mullins_me.bin"); +MODULE_FIRMWARE("amdgpu/mullins_ce.bin"); +MODULE_FIRMWARE("amdgpu/mullins_rlc.bin"); +MODULE_FIRMWARE("amdgpu/mullins_mec.bin"); static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = { @@ -925,7 +925,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev) default: BUG(); } - snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); if (err) goto out; @@ -933,7 +933,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; - snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); if (err) goto out; @@ -941,7 +941,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; - snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); if (err) goto out; @@ -949,7 +949,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; - snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); if (err) goto out; @@ -958,7 +958,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev) goto out; if (adev->asic_type == CHIP_KAVERI) { - snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); if (err) goto out; @@ -967,7 +967,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev) goto out; } - snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index e14263fca1c9..5cd45210113f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -51,6 +51,8 @@ #include "smu/smu_7_1_3_d.h" +#include "ivsrcid/ivsrcid_vislands30.h" + #define GFX8_NUM_GFX_RINGS 1 #define GFX8_MEC_HPD_SIZE 2048 @@ -125,18 +127,6 @@ MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); - MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); @@ -149,6 +139,18 @@ MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); + MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); @@ -161,6 +163,13 @@ MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); +MODULE_FIRMWARE("amdgpu/vegam_ce.bin"); +MODULE_FIRMWARE("amdgpu/vegam_pfp.bin"); +MODULE_FIRMWARE("amdgpu/vegam_me.bin"); +MODULE_FIRMWARE("amdgpu/vegam_mec.bin"); +MODULE_FIRMWARE("amdgpu/vegam_mec2.bin"); +MODULE_FIRMWARE("amdgpu/vegam_rlc.bin"); + static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = { {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, @@ -292,6 +301,37 @@ static const u32 tonga_mgcg_cgcg_init[] = mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, }; +static const u32 golden_settings_vegam_a11[] = +{ + mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, + mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000, + mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, + mmDB_DEBUG2, 0xf00fffff, 0x00000400, + mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, + mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, + mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a, + mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e, + mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, + mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, + mmSQ_CONFIG, 0x07f80000, 0x01180000, + mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, + mmTCC_CTRL, 0x00100000, 0xf31fff7f, + mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, + mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, + mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054, + mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, +}; + +static const u32 vegam_golden_common_all[] = +{ + mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, + mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, + mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, + mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, +}; + static const u32 golden_settings_polaris11_a11[] = { mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, @@ -666,6 +706,17 @@ static const u32 stoney_mgcg_cgcg_init[] = mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, }; + +static const char * const sq_edc_source_names[] = { + "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred", + "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch", + "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return", + "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR", + "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS", + "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS", + "SQ_EDC_INFO_SOURCE_TA: EDC source is TA", +}; + static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); @@ -712,6 +763,14 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) tonga_golden_common_all, ARRAY_SIZE(tonga_golden_common_all)); break; + case CHIP_VEGAM: + amdgpu_device_program_register_sequence(adev, + golden_settings_vegam_a11, + ARRAY_SIZE(golden_settings_vegam_a11)); + amdgpu_device_program_register_sequence(adev, + vegam_golden_common_all, + ARRAY_SIZE(vegam_golden_common_all)); + break; case CHIP_POLARIS11: case CHIP_POLARIS12: amdgpu_device_program_register_sequence(adev, @@ -820,26 +879,32 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; struct dma_fence *f = NULL; - uint32_t scratch; - uint32_t tmp = 0; + + unsigned int index; + uint64_t gpu_addr; + uint32_t tmp; long r; - r = amdgpu_gfx_scratch_get(adev, &scratch); + r = amdgpu_device_wb_get(adev, &index); if (r) { - DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); return r; } - WREG32(scratch, 0xCAFEDEAD); + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 256, &ib); + r = amdgpu_ib_get(adev, NULL, 16, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); goto err1; } - ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); - ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); - ib.ptr[2] = 0xDEADBEEF; - ib.length_dw = 3; + ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); + ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; + ib.ptr[2] = lower_32_bits(gpu_addr); + ib.ptr[3] = upper_32_bits(gpu_addr); + ib.ptr[4] = 0xDEADBEEF; + ib.length_dw = 5; r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); if (r) @@ -854,20 +919,21 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err2; } - tmp = RREG32(scratch); + + tmp = adev->wb.wb[index]; if (tmp == 0xDEADBEEF) { DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } else { - DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", - scratch, tmp); + DRM_ERROR("ib test on ring %d failed\n", ring->idx); r = -EINVAL; } + err2: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err1: - amdgpu_gfx_scratch_free(adev, scratch); + amdgpu_device_wb_free(adev, index); return r; } @@ -918,17 +984,20 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) case CHIP_FIJI: chip_name = "fiji"; break; - case CHIP_POLARIS11: - chip_name = "polaris11"; + case CHIP_STONEY: + chip_name = "stoney"; break; case CHIP_POLARIS10: chip_name = "polaris10"; break; + case CHIP_POLARIS11: + chip_name = "polaris11"; + break; case CHIP_POLARIS12: chip_name = "polaris12"; break; - case CHIP_STONEY: - chip_name = "stoney"; + case CHIP_VEGAM: + chip_name = "vegam"; break; default: BUG(); @@ -1770,6 +1839,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_POLARIS10: + case CHIP_VEGAM: ret = amdgpu_atombios_get_gfx_info(adev); if (ret) return ret; @@ -1949,6 +2019,8 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, return 0; } +static void gfx_v8_0_sq_irq_work_func(struct work_struct *work); + static int gfx_v8_0_sw_init(void *handle) { int i, j, k, r, ring_id; @@ -1957,12 +2029,13 @@ static int gfx_v8_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; switch (adev->asic_type) { - case CHIP_FIJI: case CHIP_TONGA: + case CHIP_CARRIZO: + case CHIP_FIJI: + case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: - case CHIP_POLARIS10: - case CHIP_CARRIZO: + case CHIP_VEGAM: adev->gfx.mec.num_mec = 2; break; case CHIP_TOPAZ: @@ -1976,27 +2049,43 @@ static int gfx_v8_0_sw_init(void *handle) adev->gfx.mec.num_queue_per_pipe = 8; /* KIQ event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq); if (r) return r; /* EOP Event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq); if (r) return r; /* Privileged reg */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); if (r) return r; /* Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT, &adev->gfx.priv_inst_irq); if (r) return r; + /* Add CP EDC/ECC irq */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR, + &adev->gfx.cp_ecc_error_irq); + if (r) + return r; + + /* SQ interrupts. */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG, + &adev->gfx.sq_irq); + if (r) { + DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r); + return r; + } + + INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func); + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; gfx_v8_0_scratch_init(adev); @@ -2323,6 +2412,7 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) break; case CHIP_FIJI: + case CHIP_VEGAM: modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | @@ -3504,6 +3594,7 @@ gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) { switch (adev->asic_type) { case CHIP_FIJI: + case CHIP_VEGAM: *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) | PKR_YSEL(1) | @@ -4071,7 +4162,8 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev) gfx_v8_0_init_power_gating(adev); WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); } else if ((adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) { + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) { gfx_v8_0_init_csb(adev); gfx_v8_0_init_save_restore_list(adev); gfx_v8_0_enable_save_restore_machine(adev); @@ -4146,7 +4238,8 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); if (adev->asic_type == CHIP_POLARIS11 || adev->asic_type == CHIP_POLARIS10 || - adev->asic_type == CHIP_POLARIS12) { + adev->asic_type == CHIP_POLARIS12 || + adev->asic_type == CHIP_VEGAM) { tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); tmp &= ~0x3; WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); @@ -5056,6 +5149,10 @@ static int gfx_v8_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); + + amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0); + /* disable KCQ to avoid CPC touch memory not valid anymore */ for (i = 0; i < adev->gfx.num_compute_rings; i++) gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); @@ -5487,9 +5584,19 @@ static int gfx_v8_0_late_init(void *handle) if (r) return r; - amdgpu_device_ip_set_powergating_state(adev, - AMD_IP_BLOCK_TYPE_GFX, - AMD_PG_STATE_GATE); + r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); + if (r) { + DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r); + return r; + } + + r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0); + if (r) { + DRM_ERROR( + "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n", + r); + return r; + } return 0; } @@ -5497,13 +5604,12 @@ static int gfx_v8_0_late_init(void *handle) static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, bool enable) { - if ((adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) + if (((adev->asic_type == CHIP_POLARIS11) || + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) && + adev->powerplay.pp_funcs->set_powergating_by_smu) /* Send msg to SMU via Powerplay */ - amdgpu_device_ip_set_powergating_state(adev, - AMD_IP_BLOCK_TYPE_SMC, - enable ? - AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable); WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); } @@ -5588,6 +5694,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, break; case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); else @@ -6154,6 +6261,7 @@ static int gfx_v8_0_set_clockgating_state(void *handle, case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); break; default: @@ -6729,6 +6837,77 @@ static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, return 0; } +static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + int enable_flag; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + enable_flag = 0; + break; + + case AMDGPU_IRQ_STATE_ENABLE: + enable_flag = 1; + break; + + default: + return -EINVAL; + } + + WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); + WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag); + WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag); + WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag); + WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); + WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + + return 0; +} + +static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + int enable_flag; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + enable_flag = 1; + break; + + case AMDGPU_IRQ_STATE_ENABLE: + enable_flag = 0; + break; + + default: + return -EINVAL; + } + + WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL, + enable_flag); + + return 0; +} + static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -6779,6 +6958,114 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("CP EDC/ECC error detected."); + return 0; +} + +static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data) +{ + u32 enc, se_id, sh_id, cu_id; + char type[20]; + int sq_edc_source = -1; + + enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING); + se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID); + + switch (enc) { + case 0: + DRM_INFO("SQ general purpose intr detected:" + "se_id %d, immed_overflow %d, host_reg_overflow %d," + "host_cmd_overflow %d, cmd_timestamp %d," + "reg_timestamp %d, thread_trace_buff_full %d," + "wlt %d, thread_trace %d.\n", + se_id, + REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW), + REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW), + REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW), + REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP), + REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP), + REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL), + REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT), + REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE) + ); + break; + case 1: + case 2: + + cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID); + sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID); + + /* + * This function can be called either directly from ISR + * or from BH in which case we can access SQ_EDC_INFO + * instance + */ + if (in_task()) { + mutex_lock(&adev->grbm_idx_mutex); + gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id); + + sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE); + + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + } + + if (enc == 1) + sprintf(type, "instruction intr"); + else + sprintf(type, "EDC/ECC error"); + + DRM_INFO( + "SQ %s detected: " + "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d " + "trap %s, sq_ed_info.source %s.\n", + type, se_id, sh_id, cu_id, + REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID), + REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID), + REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID), + REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false", + (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable" + ); + break; + default: + DRM_ERROR("SQ invalid encoding type\n."); + } +} + +static void gfx_v8_0_sq_irq_work_func(struct work_struct *work) +{ + + struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work); + struct sq_work *sq_work = container_of(work, struct sq_work, work); + + gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data); +} + +static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + unsigned ih_data = entry->src_data[0]; + + /* + * Try to submit work so SQ_EDC_INFO can be accessed from + * BH. If previous work submission hasn't finished yet + * just print whatever info is possible directly from the ISR. + */ + if (work_pending(&adev->gfx.sq_work.work)) { + gfx_v8_0_parse_sq_irq(adev, ih_data); + } else { + adev->gfx.sq_work.ih_data = ih_data; + schedule_work(&adev->gfx.sq_work.work); + } + + return 0; +} + static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned int type, @@ -6979,6 +7266,16 @@ static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { .process = gfx_v8_0_kiq_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = { + .set = gfx_v8_0_set_cp_ecc_int_state, + .process = gfx_v8_0_cp_ecc_error_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = { + .set = gfx_v8_0_set_sq_int_state, + .process = gfx_v8_0_sq_irq, +}; + static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) { adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; @@ -6992,6 +7289,12 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; + + adev->gfx.cp_ecc_error_irq.num_types = 1; + adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs; + + adev->gfx.sq_irq.num_types = 1; + adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs; } static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9d39fd5b1822..9ab39117cc4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -27,6 +27,7 @@ #include "amdgpu_gfx.h" #include "soc15.h" #include "soc15d.h" +#include "amdgpu_atomfirmware.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" @@ -37,11 +38,12 @@ #include "clearstate_gfx9.h" #include "v9_structs.h" +#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" + #define GFX9_NUM_GFX_RINGS 1 #define GFX9_MEC_HPD_SIZE 2048 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L -#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34 #define mmPWR_MISC_CNTL_STATUS 0x0183 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 @@ -64,6 +66,13 @@ MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); +MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); +MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); +MODULE_FIRMWARE("amdgpu/vega20_me.bin"); +MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); +MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); +MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); + MODULE_FIRMWARE("amdgpu/raven_ce.bin"); MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); MODULE_FIRMWARE("amdgpu/raven_me.bin"); @@ -73,40 +82,59 @@ MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); static const struct soc15_reg_golden golden_settings_gc_9_0[] = { - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) }; static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) +}; + +static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) }; static const struct soc15_reg_golden golden_settings_gc_9_1[] = @@ -185,6 +213,30 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000) }; +static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = +{ + mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, +}; + +static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = +{ + mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, +}; + #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 @@ -218,6 +270,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_9_2_1_vg12, ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); break; + case CHIP_VEGA20: + soc15_program_register_sequence(adev, + golden_settings_gc_9_0, + ARRAY_SIZE(golden_settings_gc_9_0)); + soc15_program_register_sequence(adev, + golden_settings_gc_9_0_vg20, + ARRAY_SIZE(golden_settings_gc_9_0_vg20)); + break; case CHIP_RAVEN: soc15_program_register_sequence(adev, golden_settings_gc_9_1, @@ -401,6 +461,27 @@ static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) kfree(adev->gfx.rlc.register_list_format); } +static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_1 *rlc_hdr; + + rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); + adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); + adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); + adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); + adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); + adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); + adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); + adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); + adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); + adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); + adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); + adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); + adev->gfx.rlc.reg_list_format_direct_reg_list_length = + le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); +} + static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; @@ -412,6 +493,8 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) const struct rlc_firmware_header_v2_0 *rlc_hdr; unsigned int *tmp = NULL; unsigned int i = 0; + uint16_t version_major; + uint16_t version_minor; DRM_DEBUG("\n"); @@ -422,6 +505,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) case CHIP_VEGA12: chip_name = "vega12"; break; + case CHIP_VEGA20: + chip_name = "vega20"; + break; case CHIP_RAVEN: chip_name = "raven"; break; @@ -468,6 +554,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) goto out; err = amdgpu_ucode_validate(adev->gfx.rlc_fw); rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + if (version_major == 2 && version_minor == 1) + adev->gfx.rlc.is_rlc_v2_1 = true; + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); adev->gfx.rlc.save_and_restore_offset = @@ -508,6 +600,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); + if (adev->gfx.rlc.is_rlc_v2_1) + gfx_v9_0_init_rlc_ext_microcode(adev); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); if (err) @@ -566,6 +661,29 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + if (adev->gfx.rlc.is_rlc_v2_1 && + adev->gfx.rlc.save_restore_list_cntl_size_bytes && + adev->gfx.rlc.save_restore_list_gpm_size_bytes && + adev->gfx.rlc.save_restore_list_srm_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); + } + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; info->fw = adev->gfx.mec_fw; @@ -841,6 +959,7 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) dst_ptr = adev->gfx.rlc.cs_ptr; gfx_v9_0_get_csb_buffer(adev, dst_ptr); amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); } @@ -869,6 +988,39 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) return 0; } +static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + if (unlikely(r != 0)) + return r; + + r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, + AMDGPU_GEM_DOMAIN_VRAM); + if (!r) + adev->gfx.rlc.clear_state_gpu_addr = + amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); + + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + + return r; +} + +static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) +{ + int r; + + if (!adev->gfx.rlc.clear_state_obj) + return; + + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); + if (likely(r == 0)) { + amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + } +} + static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -1013,9 +1165,10 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q }; -static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) +static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; + int err; adev->gfx.funcs = &gfx_v9_0_gfx_funcs; @@ -1037,6 +1190,20 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; DRM_INFO("fix gfx.config for vega12\n"); break; + case CHIP_VEGA20: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); + gb_addr_config &= ~0xf3e777ff; + gb_addr_config |= 0x22014042; + /* check vbios table if gpu info is not available */ + err = amdgpu_atomfirmware_get_gfx_info(adev); + if (err) + return err; + break; case CHIP_RAVEN: adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; @@ -1086,6 +1253,8 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.gb_addr_config, GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); + + return 0; } static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, @@ -1319,6 +1488,7 @@ static int gfx_v9_0_sw_init(void *handle) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: adev->gfx.mec.num_mec = 2; break; @@ -1331,23 +1501,23 @@ static int gfx_v9_0_sw_init(void *handle) adev->gfx.mec.num_queue_per_pipe = 8; /* KIQ event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_IB2_INTERRUPT_PKT, &adev->gfx.kiq.irq); if (r) return r; /* EOP Event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); if (r) return r; /* Privileged reg */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 184, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); if (r) return r; /* Privileged inst */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 185, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, &adev->gfx.priv_inst_irq); if (r) return r; @@ -1446,7 +1616,9 @@ static int gfx_v9_0_sw_init(void *handle) adev->gfx.ce_ram_size = 0x8000; - gfx_v9_0_gpu_early_init(adev); + r = gfx_v9_0_gpu_early_init(adev); + if (r) + return r; r = gfx_v9_0_ngg_init(adev); if (r) @@ -1600,6 +1772,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) gfx_v9_0_setup_rb(adev); gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); + adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ @@ -1616,7 +1789,10 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); - tmp = adev->gmc.shared_aperture_start >> 48; + tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, + (adev->gmc.private_aperture_start >> 48)); + tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, + (adev->gmc.shared_aperture_start >> 48)); WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); } } @@ -1708,55 +1884,44 @@ static void gfx_v9_0_init_csb(struct amdgpu_device *adev) adev->gfx.rlc.clear_state_size); } -static void gfx_v9_0_parse_ind_reg_list(int *register_list_format, +static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, int indirect_offset, int list_size, int *unique_indirect_regs, - int *unique_indirect_reg_count, - int max_indirect_reg_count, + int unique_indirect_reg_count, int *indirect_start_offsets, int *indirect_start_offsets_count, - int max_indirect_start_offsets_count) + int max_start_offsets_count) { int idx; - bool new_entry = true; for (; indirect_offset < list_size; indirect_offset++) { + WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); + indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; + *indirect_start_offsets_count = *indirect_start_offsets_count + 1; + + while (register_list_format[indirect_offset] != 0xFFFFFFFF) { + indirect_offset += 2; + + /* look for the matching indice */ + for (idx = 0; idx < unique_indirect_reg_count; idx++) { + if (unique_indirect_regs[idx] == + register_list_format[indirect_offset] || + !unique_indirect_regs[idx]) + break; + } - if (new_entry) { - new_entry = false; - indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; - *indirect_start_offsets_count = *indirect_start_offsets_count + 1; - BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count); - } - - if (register_list_format[indirect_offset] == 0xFFFFFFFF) { - new_entry = true; - continue; - } - - indirect_offset += 2; + BUG_ON(idx >= unique_indirect_reg_count); - /* look for the matching indice */ - for (idx = 0; idx < *unique_indirect_reg_count; idx++) { - if (unique_indirect_regs[idx] == - register_list_format[indirect_offset]) - break; - } + if (!unique_indirect_regs[idx]) + unique_indirect_regs[idx] = register_list_format[indirect_offset]; - if (idx >= *unique_indirect_reg_count) { - unique_indirect_regs[*unique_indirect_reg_count] = - register_list_format[indirect_offset]; - idx = *unique_indirect_reg_count; - *unique_indirect_reg_count = *unique_indirect_reg_count + 1; - BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count); + indirect_offset++; } - - register_list_format[indirect_offset] = idx; } } -static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) +static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) { int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; int unique_indirect_reg_count = 0; @@ -1765,7 +1930,7 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) int indirect_start_offsets_count = 0; int list_size = 0; - int i = 0; + int i = 0, j = 0; u32 tmp = 0; u32 *register_list_format = @@ -1776,15 +1941,15 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) adev->gfx.rlc.reg_list_format_size_bytes); /* setup unique_indirect_regs array and indirect_start_offsets array */ - gfx_v9_0_parse_ind_reg_list(register_list_format, - GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH, - adev->gfx.rlc.reg_list_format_size_bytes >> 2, - unique_indirect_regs, - &unique_indirect_reg_count, - ARRAY_SIZE(unique_indirect_regs), - indirect_start_offsets, - &indirect_start_offsets_count, - ARRAY_SIZE(indirect_start_offsets)); + unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); + gfx_v9_1_parse_ind_reg_list(register_list_format, + adev->gfx.rlc.reg_list_format_direct_reg_list_length, + adev->gfx.rlc.reg_list_format_size_bytes >> 2, + unique_indirect_regs, + unique_indirect_reg_count, + indirect_start_offsets, + &indirect_start_offsets_count, + ARRAY_SIZE(indirect_start_offsets)); /* enable auto inc in case it is disabled */ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); @@ -1798,19 +1963,37 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), adev->gfx.rlc.register_restore[i]); - /* load direct register */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0); - for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), - adev->gfx.rlc.register_restore[i]); - /* load indirect register */ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), adev->gfx.rlc.reg_list_format_start); - for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) + + /* direct register portion */ + for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), register_list_format[i]); + /* indirect register portion */ + while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { + if (register_list_format[i] == 0xFFFFFFFF) { + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); + continue; + } + + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); + + for (j = 0; j < unique_indirect_reg_count; j++) { + if (register_list_format[i] == unique_indirect_regs[j]) { + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); + break; + } + } + + BUG_ON(j >= unique_indirect_reg_count); + + i++; + } + /* set save/restore list size */ list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; list_size = list_size >> 1; @@ -1823,14 +2006,19 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) adev->gfx.rlc.starting_offsets_start); for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), - indirect_start_offsets[i]); + indirect_start_offsets[i]); /* load unique indirect regs*/ for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i, - unique_indirect_regs[i] & 0x3FFFF); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i, - unique_indirect_regs[i] >> 20); + if (unique_indirect_regs[i] != 0) { + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], + unique_indirect_regs[i] & 0x3FFFF); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], + unique_indirect_regs[i] >> 20); + } } kfree(register_list_format); @@ -2010,34 +2198,26 @@ static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *ad static void gfx_v9_0_init_pg(struct amdgpu_device *adev) { + gfx_v9_0_init_csb(adev); + + /* + * Rlc save restore list is workable since v2_1. + * And it's needed by gfxoff feature. + */ + if (adev->gfx.rlc.is_rlc_v2_1) { + gfx_v9_1_init_rlc_save_restore_list(adev); + gfx_v9_0_enable_save_restore_machine(adev); + } + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | AMD_PG_SUPPORT_GFX_DMG | AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GDS | AMD_PG_SUPPORT_RLC_SMU_HS)) { - gfx_v9_0_init_csb(adev); - gfx_v9_0_init_rlc_save_restore_list(adev); - gfx_v9_0_enable_save_restore_machine(adev); - - if (adev->asic_type == CHIP_RAVEN) { - WREG32(mmRLC_JUMP_TABLE_RESTORE, - adev->gfx.rlc.cp_table_gpu_addr >> 8); - gfx_v9_0_init_gfx_power_gating(adev); - - if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { - gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); - gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); - } else { - gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); - gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); - } - - if (adev->pg_flags & AMD_PG_SUPPORT_CP) - gfx_v9_0_enable_cp_power_gating(adev, true); - else - gfx_v9_0_enable_cp_power_gating(adev, false); - } + WREG32(mmRLC_JUMP_TABLE_RESTORE, + adev->gfx.rlc.cp_table_gpu_addr >> 8); + gfx_v9_0_init_gfx_power_gating(adev); } } @@ -2126,9 +2306,6 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) /* disable CG */ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); - /* disable PG */ - WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0); - gfx_v9_0_rlc_reset(adev); gfx_v9_0_init_pg(adev); @@ -2990,6 +3167,10 @@ static int gfx_v9_0_hw_init(void *handle) gfx_v9_0_gpu_init(adev); + r = gfx_v9_0_csb_vram_pin(adev); + if (r) + return r; + r = gfx_v9_0_rlc_resume(adev); if (r) return r; @@ -3061,6 +3242,9 @@ static int gfx_v9_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_UNGATE); + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); @@ -3095,6 +3279,8 @@ static int gfx_v9_0_hw_fini(void *handle) gfx_v9_0_cp_enable(adev, false); gfx_v9_0_rlc_stop(adev); + gfx_v9_0_csb_vram_unpin(adev); + return 0; } @@ -3339,8 +3525,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, bool enable) { - /* TODO: double check if we need to perform under safe mdoe */ - /* gfx_v9_0_enter_rlc_safe_mode(adev); */ + gfx_v9_0_enter_rlc_safe_mode(adev); if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { gfx_v9_0_enable_gfx_cg_power_gating(adev, true); @@ -3351,7 +3536,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); } - /* gfx_v9_0_exit_rlc_safe_mode(adev); */ + gfx_v9_0_exit_rlc_safe_mode(adev); } static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, @@ -3382,8 +3567,11 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 1 - RLC_CGTT_MGCG_OVERRIDE */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); - data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | - RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + + if (adev->asic_type != CHIP_VEGA12) + data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; + + data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); @@ -3413,11 +3601,15 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev } else { /* 1 - MGCG_OVERRIDE */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); - data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | - RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | + + if (adev->asic_type != CHIP_VEGA12) + data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; + + data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); @@ -3453,9 +3645,11 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, /* update CGCG and CGLS override bits */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); - /* enable 3Dcgcg FSM(0x0020003f) */ + + /* enable 3Dcgcg FSM(0x0000363f) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); - data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + + data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | @@ -3502,9 +3696,10 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); - /* enable cgcg FSM(0x0020003F) */ + /* enable cgcg FSM(0x0000363F) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); - data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + + data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | @@ -3586,6 +3781,15 @@ static int gfx_v9_0_set_powergating_state(void *handle, /* update mgcg state */ gfx_v9_0_update_gfx_mg_power_gating(adev, enable); + + /* set gfx off through smu */ + if (enable && adev->powerplay.pp_funcs->set_powergating_by_smu) + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true); + break; + case CHIP_VEGA12: + /* set gfx off through smu */ + if (enable && adev->powerplay.pp_funcs->set_powergating_by_smu) + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true); break; default: break; @@ -3605,6 +3809,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: gfx_v9_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -3742,7 +3947,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } amdgpu_ring_write(ring, header); -BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | @@ -3774,13 +3979,16 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; /* RELEASE_MEM - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); - amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | - EOP_TC_ACTION_EN | - EOP_TC_WB_ACTION_EN | - EOP_TC_MD_ACTION_EN | + amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | + EOP_TC_NC_ACTION_EN) : + (EOP_TCL1_ACTION_EN | + EOP_TC_ACTION_EN | + EOP_TC_WB_ACTION_EN | + EOP_TC_MD_ACTION_EN)) | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5))); amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); @@ -4137,6 +4345,20 @@ static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); } +static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + + if (amdgpu_sriov_vf(ring->adev)) + gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, + ref, mask, 0x20); + else + amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, + ref, mask); +} + static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { @@ -4458,6 +4680,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_tmz = gfx_v9_0_ring_emit_tmz, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -4492,6 +4715,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .set_priority = gfx_v9_0_ring_set_priority_compute, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { @@ -4522,6 +4746,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .emit_rreg = gfx_v9_0_ring_emit_rreg, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, }; static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) @@ -4577,6 +4802,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; break; @@ -4686,6 +4912,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 5617cf62c566..75317f283c69 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -41,11 +41,11 @@ static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev); static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev); static int gmc_v6_0_wait_for_idle(void *handle); -MODULE_FIRMWARE("radeon/tahiti_mc.bin"); -MODULE_FIRMWARE("radeon/pitcairn_mc.bin"); -MODULE_FIRMWARE("radeon/verde_mc.bin"); -MODULE_FIRMWARE("radeon/oland_mc.bin"); -MODULE_FIRMWARE("radeon/si58_mc.bin"); +MODULE_FIRMWARE("amdgpu/tahiti_mc.bin"); +MODULE_FIRMWARE("amdgpu/pitcairn_mc.bin"); +MODULE_FIRMWARE("amdgpu/verde_mc.bin"); +MODULE_FIRMWARE("amdgpu/oland_mc.bin"); +MODULE_FIRMWARE("amdgpu/si58_mc.bin"); #define MC_SEQ_MISC0__MT__MASK 0xf0000000 #define MC_SEQ_MISC0__MT__GDDR1 0x10000000 @@ -134,9 +134,9 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev) is_58_fw = true; if (is_58_fw) - snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin"); + snprintf(fw_name, sizeof(fw_name), "amdgpu/si58_mc.bin"); else - snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name); err = request_firmware(&adev->gmc.fw, fw_name, adev->dev); if (err) goto out; @@ -819,12 +819,33 @@ static int gmc_v6_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_bo_late_init(adev); + if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else return 0; } +static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ + u32 d1vga_control = RREG32(mmD1VGA_CONTROL); + unsigned size; + + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { + size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ + } else { + u32 viewport = RREG32(mmVIEWPORT_SIZE); + size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) * + 4); + } + /* return 0 if the pre-OS buffer uses up most of vram */ + if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) + return 0; + return size; +} + static int gmc_v6_0_sw_init(void *handle) { int r; @@ -851,8 +872,6 @@ static int gmc_v6_0_sw_init(void *handle) adev->gmc.mc_mask = 0xffffffffffULL; - adev->gmc.stolen_size = 256 * 1024; - adev->need_dma32 = false; dma_bits = adev->need_dma32 ? 32 : 40; r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); @@ -878,6 +897,8 @@ static int gmc_v6_0_sw_init(void *handle) if (r) return r; + adev->gmc.stolen_size = gmc_v6_0_get_vbios_fb_size(adev); + r = amdgpu_bo_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 80054f36e487..10920f0bd85f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -43,12 +43,14 @@ #include "amdgpu_atombios.h" +#include "ivsrcid/ivsrcid_vislands30.h" + static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev); static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev); static int gmc_v7_0_wait_for_idle(void *handle); -MODULE_FIRMWARE("radeon/bonaire_mc.bin"); -MODULE_FIRMWARE("radeon/hawaii_mc.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_mc.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_mc.bin"); MODULE_FIRMWARE("amdgpu/topaz_mc.bin"); static const u32 golden_settings_iceland_a11[] = @@ -147,10 +149,7 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) default: BUG(); } - if (adev->asic_type == CHIP_TOPAZ) - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name); - else - snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name); err = request_firmware(&adev->gmc.fw, fw_name, adev->dev); if (err) @@ -958,12 +957,33 @@ static int gmc_v7_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_bo_late_init(adev); + if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else return 0; } +static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ + u32 d1vga_control = RREG32(mmD1VGA_CONTROL); + unsigned size; + + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { + size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ + } else { + u32 viewport = RREG32(mmVIEWPORT_SIZE); + size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) * + 4); + } + /* return 0 if the pre-OS buffer uses up most of vram */ + if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) + return 0; + return size; +} + static int gmc_v7_0_sw_init(void *handle) { int r; @@ -978,11 +998,11 @@ static int gmc_v7_0_sw_init(void *handle) adev->gmc.vram_type = gmc_v7_0_convert_vram_type(tmp); } - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault); if (r) return r; @@ -998,8 +1018,6 @@ static int gmc_v7_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ - adev->gmc.stolen_size = 256 * 1024; - /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. * IGP - can handle 40-bits @@ -1030,6 +1048,8 @@ static int gmc_v7_0_sw_init(void *handle) if (r) return r; + adev->gmc.stolen_size = gmc_v7_0_get_vbios_fb_size(adev); + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index d71d4cb68f9c..75f3ffb2891e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -44,6 +44,7 @@ #include "amdgpu_atombios.h" +#include "ivsrcid/ivsrcid_vislands30.h" static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev); static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev); @@ -138,6 +139,7 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) break; case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: amdgpu_device_program_register_sequence(adev, golden_settings_polaris11_a11, ARRAY_SIZE(golden_settings_polaris11_a11)); @@ -231,6 +233,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) case CHIP_FIJI: case CHIP_CARRIZO: case CHIP_STONEY: + case CHIP_VEGAM: return 0; default: BUG(); } @@ -567,9 +570,10 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) /* set the gart size */ if (amdgpu_gart_size == -1) { switch (adev->asic_type) { - case CHIP_POLARIS11: /* all engines support GPUVM */ case CHIP_POLARIS10: /* all engines support GPUVM */ + case CHIP_POLARIS11: /* all engines support GPUVM */ case CHIP_POLARIS12: /* all engines support GPUVM */ + case CHIP_VEGAM: /* all engines support GPUVM */ default: adev->gmc.gart_size = 256ULL << 20; break; @@ -1049,12 +1053,33 @@ static int gmc_v8_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_bo_late_init(adev); + if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else return 0; } +static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ + u32 d1vga_control = RREG32(mmD1VGA_CONTROL); + unsigned size; + + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { + size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ + } else { + u32 viewport = RREG32(mmVIEWPORT_SIZE); + size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) * + 4); + } + /* return 0 if the pre-OS buffer uses up most of vram */ + if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) + return 0; + return size; +} + #define mmMC_SEQ_MISC0_FIJI 0xA71 static int gmc_v8_0_sw_init(void *handle) @@ -1068,7 +1093,8 @@ static int gmc_v8_0_sw_init(void *handle) } else { u32 tmp; - if (adev->asic_type == CHIP_FIJI) + if ((adev->asic_type == CHIP_FIJI) || + (adev->asic_type == CHIP_VEGAM)) tmp = RREG32(mmMC_SEQ_MISC0_FIJI); else tmp = RREG32(mmMC_SEQ_MISC0); @@ -1076,11 +1102,11 @@ static int gmc_v8_0_sw_init(void *handle) adev->gmc.vram_type = gmc_v8_0_convert_vram_type(tmp); } - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault); if (r) return r; @@ -1096,8 +1122,6 @@ static int gmc_v8_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ - adev->gmc.stolen_size = 256 * 1024; - /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. * IGP - can handle 40-bits @@ -1128,6 +1152,8 @@ static int gmc_v8_0_sw_init(void *handle) if (r) return r; + adev->gmc.stolen_size = gmc_v8_0_get_vbios_fb_size(adev); + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) @@ -1422,8 +1448,13 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, gmc_v8_0_set_fault_enable_default(adev, false); if (printk_ratelimit()) { - dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", - entry->src_id, entry->src_data[0]); + struct amdgpu_task_info task_info = { 0 }; + + amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + + dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n", + entry->src_id, entry->src_data[0], task_info.process_name, + task_info.tgid, task_info.task_name, task_info.pid); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", addr); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index e687363900bb..9df94b45d17d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -43,19 +43,15 @@ #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" -#define mmDF_CS_AON0_DramBaseAddress0 0x0044 -#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX 0 -//DF_CS_AON0_DramBaseAddress0 -#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0 -#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1 -#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x4 -#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x8 -#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc -#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L -#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L -#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L -#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L -#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L +#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" + +/* add these here since we already include dce12 headers and these are for DCN */ +#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d +#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2 +#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0 +#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10 +#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL +#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L /* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/ #define AMDGPU_NUM_OF_VMIDS 8 @@ -263,11 +259,16 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, } if (printk_ratelimit()) { + struct amdgpu_task_info task_info = { 0 }; + + amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + dev_err(adev->dev, - "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", + "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d\n)\n", entry->vmid_src ? "mmhub" : "gfxhub", entry->src_id, entry->ring_id, entry->vmid, - entry->pasid); + entry->pasid, task_info.process_name, task_info.tgid, + task_info.task_name, task_info.pid); dev_err(adev->dev, " at page 0x%016llx from %d\n", addr, entry->client_id); if (!amdgpu_sriov_vf(adev)) @@ -385,11 +386,9 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); - - /* wait for the invalidate to complete */ - amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, - 1 << vmid, 1 << vmid); + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, + hub->vm_inv_eng0_ack + eng, + req, 1 << vmid); return pd_addr; } @@ -556,8 +555,7 @@ static int gmc_v9_0_early_init(void *handle) adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = adev->gmc.shared_aperture_start + (4ULL << 30) - 1; - adev->gmc.private_aperture_start = - adev->gmc.shared_aperture_end + 1; + adev->gmc.private_aperture_start = 0x1000000000000000ULL; adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; @@ -659,6 +657,11 @@ static int gmc_v9_0_late_init(void *handle) unsigned i; int r; + /* + * TODO - Uncomment once GART corruption issue is fixed. + */ + /* amdgpu_bo_late_init(adev); */ + for(i = 0; i < adev->num_rings; ++i) { struct amdgpu_ring *ring = adev->rings[i]; unsigned vmhub = ring->funcs->vmhub; @@ -679,6 +682,7 @@ static int gmc_v9_0_late_init(void *handle) DRM_INFO("ECC is active.\n"); } else if (r == 0) { DRM_INFO("ECC is not present.\n"); + adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false); } else { DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r); return r; @@ -697,10 +701,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, amdgpu_device_vram_location(adev, &adev->gmc, base); amdgpu_device_gart_location(adev, mc); /* base offset of vram pages */ - if (adev->flags & AMD_IS_APU) - adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); - else - adev->vm_manager.vram_base_offset = 0; + adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); } /** @@ -714,7 +715,6 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, */ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) { - u32 tmp; int chansize, numchan; int r; @@ -727,39 +727,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) else chansize = 128; - tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); - tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; - tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; - switch (tmp) { - case 0: - default: - numchan = 1; - break; - case 1: - numchan = 2; - break; - case 2: - numchan = 0; - break; - case 3: - numchan = 4; - break; - case 4: - numchan = 0; - break; - case 5: - numchan = 8; - break; - case 6: - numchan = 0; - break; - case 7: - numchan = 16; - break; - case 8: - numchan = 2; - break; - } + numchan = adev->df_funcs->get_hbm_channel_number(adev); adev->gmc.vram_width = numchan * chansize; } @@ -792,6 +760,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: /* all engines support GPUVM */ case CHIP_VEGA12: /* all engines support GPUVM */ + case CHIP_VEGA20: default: adev->gmc.gart_size = 512ULL << 20; break; @@ -826,6 +795,52 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) return amdgpu_gart_table_vram_alloc(adev); } +static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ +#if 0 + u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); +#endif + unsigned size; + + /* + * TODO Remove once GART corruption is resolved + * Check related code in gmc_v9_0_sw_fini + * */ + size = 9 * 1024 * 1024; + +#if 0 + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { + size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ + } else { + u32 viewport; + + switch (adev->asic_type) { + case CHIP_RAVEN: + viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); + size = (REG_GET_FIELD(viewport, + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) * + 4); + break; + case CHIP_VEGA10: + case CHIP_VEGA12: + default: + viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE); + size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) * + 4); + break; + } + } + /* return 0 if the pre-OS buffer uses up most of vram */ + if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) + return 0; + +#endif + return size; +} + static int gmc_v9_0_sw_init(void *handle) { int r; @@ -851,6 +866,7 @@ static int gmc_v9_0_sw_init(void *handle) break; case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: /* * To fulfill 4-level page support, * vm size is 256TB (48bit), maximum size of Vega10, @@ -863,9 +879,9 @@ static int gmc_v9_0_sw_init(void *handle) } /* This interrupt is VMC page fault.*/ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, 0, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT, &adev->gmc.vm_fault); - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, 0, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, &adev->gmc.vm_fault); if (r) @@ -877,12 +893,6 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - /* - * It needs to reserve 8M stolen memory for vega10 - * TODO: Figure out how to avoid that... - */ - adev->gmc.stolen_size = 8 * 1024 * 1024; - /* set DMA mask + need_dma32 flags. * PCIE - can handle 44-bits. * IGP - can handle 44-bits @@ -907,6 +917,8 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; + adev->gmc.stolen_size = gmc_v9_0_get_vbios_fb_size(adev); + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) @@ -950,6 +962,18 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); gmc_v9_0_gart_fini(adev); + + /* + * TODO: + * Currently there is a bug where some memory client outside + * of the driver writes to first 8M of VRAM on S3 resume, + * this overrides GART which by default gets placed in first 8M and + * causes VM_FAULTS once GTT is accessed. + * Keep the stolen memory reservation until the while this is not solved. + * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init + */ + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); + amdgpu_bo_fini(adev); return 0; @@ -960,6 +984,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_VEGA20: soc15_program_register_sequence(adev, golden_settings_mmhub_1_0_0, ARRAY_SIZE(golden_settings_mmhub_1_0_0)); diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 26ba984ab2b7..3f57f6463dc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -1921,7 +1921,7 @@ static int kv_dpm_set_power_state(void *handle) int ret; if (pi->bapm_enable) { - ret = amdgpu_kv_smc_bapm_enable(adev, adev->pm.dpm.ac_power); + ret = amdgpu_kv_smc_bapm_enable(adev, adev->pm.ac_power); if (ret) { DRM_ERROR("amdgpu_kv_smc_bapm_enable failed\n"); return ret; @@ -2727,8 +2727,9 @@ static int kv_parse_power_table(struct amdgpu_device *adev) (mode_info->atom_context->bios + data_offset + le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset)); - adev->pm.dpm.ps = kzalloc(sizeof(struct amdgpu_ps) * - state_array->ucNumEntries, GFP_KERNEL); + adev->pm.dpm.ps = kcalloc(state_array->ucNumEntries, + sizeof(struct amdgpu_ps), + GFP_KERNEL); if (!adev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; @@ -2817,7 +2818,7 @@ static int kv_dpm_init(struct amdgpu_device *adev) pi->caps_tcp_ramping = true; } - if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK) + if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) pi->caps_sclk_ds = true; else pi->caps_sclk_ds = false; @@ -2974,7 +2975,7 @@ static int kv_dpm_late_init(void *handle) /* powerdown unused blocks for now */ struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!amdgpu_dpm) + if (!adev->pm.dpm_enabled) return 0; kv_dpm_powergate_acp(adev, true); @@ -3305,6 +3306,19 @@ static int kv_dpm_read_sensor(void *handle, int idx, } } +static int kv_set_powergating_by_smu(void *handle, + uint32_t block_type, bool gate) +{ + switch (block_type) { + case AMD_IP_BLOCK_TYPE_UVD: + kv_dpm_powergate_uvd(handle, gate); + break; + default: + break; + } + return 0; +} + static const struct amd_ip_funcs kv_dpm_ip_funcs = { .name = "kv_dpm", .early_init = kv_dpm_early_init, @@ -3341,7 +3355,7 @@ static const struct amd_pm_funcs kv_dpm_funcs = { .print_power_state = &kv_dpm_print_power_state, .debugfs_print_current_performance_level = &kv_dpm_debugfs_print_current_performance_level, .force_performance_level = &kv_dpm_force_performance_level, - .powergate_uvd = &kv_dpm_powergate_uvd, + .set_powergating_by_smu = kv_set_powergating_by_smu, .enable_bapm = &kv_dpm_enable_bapm, .get_vce_clock_state = amdgpu_get_vce_clock_state, .check_state_equal = kv_check_state_equal, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 43f925773b57..e70a0d4d6db4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -471,8 +471,8 @@ void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, RENG_EXECUTE_ON_REG_UPDATE, 1); WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); - if (adev->powerplay.pp_funcs->set_mmhub_powergating_by_smu) - amdgpu_dpm_set_mmhub_powergating_by_smu(adev); + if (adev->powerplay.pp_funcs->set_powergating_by_smu) + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true); } else { pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, @@ -734,6 +734,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: mmhub_v1_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 493348672475..078f70faedcb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -260,8 +260,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) } while (timeout > 1); flr_done: - if (locked) + if (locked) { + adev->in_gpu_reset = 0; mutex_unlock(&adev->lock_reset); + } /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_lockup_timeout == 0) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index df34dc79d444..365517c0121e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -34,10 +34,19 @@ #define smnCPM_CONTROL 0x11180460 #define smnPCIE_CNTL2 0x11180070 +/* vega20 */ +#define mmRCC_DEV0_EPF0_STRAP0_VG20 0x0011 +#define mmRCC_DEV0_EPF0_STRAP0_VG20_BASE_IDX 2 + static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); + if (adev->asic_type == CHIP_VEGA20) + tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0_VG20); + else + tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); + tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; @@ -75,10 +84,14 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); u32 doorbell_range = RREG32(reg); + u32 range = 2; + + if (adev->asic_type == CHIP_VEGA20) + range = 8; if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); - doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2); + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, range); } else doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); @@ -133,6 +146,9 @@ static void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *ade { uint32_t def, data; + if (adev->asic_type == CHIP_VEGA20) + return; + /* NBIF_MGCG_CTRL_LCLK */ def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 8da6da90b1c9..0cf48d26c676 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -40,11 +40,20 @@ enum psp_gfx_crtl_cmd_id GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000, /* initialize GPCOM ring */ GFX_CTRL_CMD_ID_DESTROY_RINGS = 0x00030000, /* destroy rings */ GFX_CTRL_CMD_ID_CAN_INIT_RINGS = 0x00040000, /* is it allowed to initialized the rings */ + GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */ + GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */ + GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */ GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ }; +/*----------------------------------------------------------------------------- + NOTE: All physical addresses used in this interface are actually + GPU Virtual Addresses. +*/ + + /* Control registers of the TEE Gfx interface. These are located in * SRBM-to-PSP mailbox registers (total 8 registers). */ @@ -55,8 +64,8 @@ struct psp_gfx_ctrl volatile uint32_t rbi_rptr; /* +8 Read pointer (index) of RBI ring */ volatile uint32_t gpcom_wptr; /* +12 Write pointer (index) of GPCOM ring */ volatile uint32_t gpcom_rptr; /* +16 Read pointer (index) of GPCOM ring */ - volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of physical address of ring buffer */ - volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of physical address of ring buffer */ + volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of GPU Virtual of ring buffer (VMID=0)*/ + volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of GPU Virtual of ring buffer (VMID=0) */ volatile uint32_t ring_buf_size; /* +28 Ring buffer size (in bytes) */ }; @@ -78,6 +87,8 @@ enum psp_gfx_cmd_id GFX_CMD_ID_LOAD_ASD = 0x00000004, /* load ASD Driver */ GFX_CMD_ID_SETUP_TMR = 0x00000005, /* setup TMR region */ GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */ + GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */ + GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */ }; @@ -85,11 +96,11 @@ enum psp_gfx_cmd_id /* Command to load Trusted Application binary into PSP OS. */ struct psp_gfx_cmd_load_ta { - uint32_t app_phy_addr_lo; /* bits [31:0] of the physical address of the TA binary (must be 4 KB aligned) */ - uint32_t app_phy_addr_hi; /* bits [63:32] of the physical address of the TA binary */ + uint32_t app_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of the TA binary (must be 4 KB aligned) */ + uint32_t app_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of the TA binary */ uint32_t app_len; /* length of the TA binary in bytes */ - uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the physical address of CMD buffer (must be 4 KB aligned) */ - uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the physical address of CMD buffer */ + uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of CMD buffer (must be 4 KB aligned) */ + uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of CMD buffer */ uint32_t cmd_buf_len; /* length of the CMD buffer in bytes; must be multiple of 4 KB */ /* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided @@ -111,8 +122,8 @@ struct psp_gfx_cmd_unload_ta */ struct psp_gfx_buf_desc { - uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of the buffer (must be 4 KB aligned) */ - uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of the buffer */ + uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of the buffer (must be 4 KB aligned) */ + uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of the buffer */ uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */ }; @@ -145,8 +156,8 @@ struct psp_gfx_cmd_invoke_cmd /* Command to setup TMR region. */ struct psp_gfx_cmd_setup_tmr { - uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of TMR buffer (must be 4 KB aligned) */ - uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of TMR buffer */ + uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of TMR buffer (must be 4 KB aligned) */ + uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of TMR buffer */ uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB) */ }; @@ -174,18 +185,32 @@ enum psp_gfx_fw_type GFX_FW_TYPE_ISP = 16, GFX_FW_TYPE_ACP = 17, GFX_FW_TYPE_SMU = 18, + GFX_FW_TYPE_MMSCH = 19, + GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20, + GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21, + GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL = 22, + GFX_FW_TYPE_MAX = 23 }; /* Command to load HW IP FW. */ struct psp_gfx_cmd_load_ip_fw { - uint32_t fw_phy_addr_lo; /* bits [31:0] of physical address of FW location (must be 4 KB aligned) */ - uint32_t fw_phy_addr_hi; /* bits [63:32] of physical address of FW location */ + uint32_t fw_phy_addr_lo; /* bits [31:0] of GPU Virtual address of FW location (must be 4 KB aligned) */ + uint32_t fw_phy_addr_hi; /* bits [63:32] of GPU Virtual address of FW location */ uint32_t fw_size; /* FW buffer size in bytes */ enum psp_gfx_fw_type fw_type; /* FW type */ }; +/* Command to save/restore HW IP FW. */ +struct psp_gfx_cmd_save_restore_ip_fw +{ + uint32_t save_fw; /* if set, command is used for saving fw otherwise for resetoring*/ + uint32_t save_restore_addr_lo; /* bits [31:0] of FB address of GART memory used as save/restore buffer (must be 4 KB aligned) */ + uint32_t save_restore_addr_hi; /* bits [63:32] of FB address of GART memory used as save/restore buffer */ + uint32_t buf_size; /* Size of the save/restore buffer in bytes */ + enum psp_gfx_fw_type fw_type; /* FW type */ +}; /* All GFX ring buffer commands. */ union psp_gfx_commands @@ -195,7 +220,7 @@ union psp_gfx_commands struct psp_gfx_cmd_invoke_cmd cmd_invoke_cmd; struct psp_gfx_cmd_setup_tmr cmd_setup_tmr; struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw; - + struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw; }; @@ -226,8 +251,8 @@ struct psp_gfx_cmd_resp /* These fields are used for RBI only. They are all 0 in GPCOM commands */ - uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of physical address of response buffer (must be 4 KB aligned) */ - uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of physical address of response buffer */ + uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of GPU Virtual address of response buffer (must be 4 KB aligned) */ + uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of GPU Virtual address of response buffer */ uint32_t resp_offset; /* +20 offset within response buffer */ uint32_t resp_buf_size; /* +24 total size of the response buffer in bytes */ @@ -251,19 +276,19 @@ struct psp_gfx_cmd_resp /* Structure of the Ring Buffer Frame */ struct psp_gfx_rb_frame { - uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of physical address of command buffer (must be 4 KB aligned) */ - uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of physical address of command buffer */ + uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of GPU Virtual address of command buffer (must be 4 KB aligned) */ + uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of GPU Virtual address of command buffer */ uint32_t cmd_buf_size; /* +8 command buffer size in bytes */ - uint32_t fence_addr_lo; /* +12 bits [31:0] of physical address of Fence for this frame */ - uint32_t fence_addr_hi; /* +16 bits [63:32] of physical address of Fence for this frame */ + uint32_t fence_addr_lo; /* +12 bits [31:0] of GPU Virtual address of Fence for this frame */ + uint32_t fence_addr_hi; /* +16 bits [63:32] of GPU Virtual address of Fence for this frame */ uint32_t fence_value; /* +20 Fence value */ uint32_t sid_lo; /* +24 bits [31:0] of SID value (used only for RBI frames) */ uint32_t sid_hi; /* +28 bits [63:32] of SID value (used only for RBI frames) */ uint8_t vmid; /* +32 VMID value used for mapping of all addresses for this frame */ uint8_t frame_type; /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */ uint8_t reserved1[2]; /* +34 reserved, must be 0 */ - uint32_t reserved2[7]; /* +40 reserved, must be 0 */ - /* total 64 bytes */ + uint32_t reserved2[7]; /* +36 reserved, must be 0 */ + /* total 64 bytes */ }; #endif /* _PSP_TEE_GFX_IF_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 8873d833a7f7..0ff136d02d9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -70,6 +70,15 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type * case AMDGPU_UCODE_ID_RLC_G: *type = GFX_FW_TYPE_RLC_G; break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL; + break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM; + break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM; + break; case AMDGPU_UCODE_ID_SMC: *type = GFX_FW_TYPE_SMU; break; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 196e75def1f2..727071fee6f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -41,9 +41,14 @@ MODULE_FIRMWARE("amdgpu/vega10_sos.bin"); MODULE_FIRMWARE("amdgpu/vega10_asd.bin"); MODULE_FIRMWARE("amdgpu/vega12_sos.bin"); MODULE_FIRMWARE("amdgpu/vega12_asd.bin"); +MODULE_FIRMWARE("amdgpu/vega20_sos.bin"); +MODULE_FIRMWARE("amdgpu/vega20_asd.bin"); + #define smnMP1_FIRMWARE_FLAGS 0x3010028 +static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554}; + static int psp_v3_1_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) { @@ -207,12 +212,31 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) return ret; } +static bool psp_v3_1_match_version(struct amdgpu_device *adev, uint32_t ver) +{ + int i; + + if (ver == adev->psp.sos_fw_version) + return true; + + /* + * Double check if the latest four legacy versions. + * If yes, it is still the right version. + */ + for (i = 0; i < sizeof(sos_old_versions) / sizeof(uint32_t); i++) { + if (sos_old_versions[i] == adev->psp.sos_fw_version) + return true; + } + + return false; +} + static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) { int ret; unsigned int psp_gfxdrv_command_reg = 0; struct amdgpu_device *adev = psp->adev; - uint32_t sol_reg; + uint32_t sol_reg, ver; /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. @@ -245,6 +269,10 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0, true); + ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + if (!psp_v3_1_match_version(adev, ver)) + DRM_WARN("SOS version doesn't match\n"); + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index c7190c39c4f5..15ae4bc9c072 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -44,6 +44,8 @@ #include "iceland_sdma_pkt_open.h" +#include "ivsrcid/ivsrcid_vislands30.h" + static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -202,8 +204,7 @@ static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; - u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2; + u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2; return wptr; } @@ -218,9 +219,8 @@ static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2); } static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) @@ -273,7 +273,7 @@ static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring) { u32 ref_and_mask = 0; - if (ring == &ring->adev->sdma.instance[0].ring) + if (ring->me == 0) ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1); else ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1); @@ -898,7 +898,7 @@ static int sdma_v2_4_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224, + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, &adev->sdma.trap_irq); if (r) return r; @@ -910,7 +910,7 @@ static int sdma_v2_4_sw_init(void *handle) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247, + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, &adev->sdma.illegal_inst_irq); if (r) return r; @@ -1213,8 +1213,10 @@ static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) { int i; - for (i = 0; i < adev->sdma.num_instances; i++) + for (i = 0; i < adev->sdma.num_instances; i++) { adev->sdma.instance[i].ring.funcs = &sdma_v2_4_ring_funcs; + adev->sdma.instance[i].ring.me = i; + } } static const struct amdgpu_irq_src_funcs sdma_v2_4_trap_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index be20a387d961..1e07ff274d73 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -44,6 +44,8 @@ #include "tonga_sdma_pkt_open.h" +#include "ivsrcid/ivsrcid_vislands30.h" + static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -62,6 +64,8 @@ MODULE_FIRMWARE("amdgpu/polaris11_sdma.bin"); MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin"); MODULE_FIRMWARE("amdgpu/polaris12_sdma.bin"); MODULE_FIRMWARE("amdgpu/polaris12_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/vegam_sdma.bin"); +MODULE_FIRMWARE("amdgpu/vegam_sdma1.bin"); static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = @@ -209,6 +213,7 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev) break; case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: amdgpu_device_program_register_sequence(adev, golden_settings_polaris11_a11, ARRAY_SIZE(golden_settings_polaris11_a11)); @@ -275,15 +280,18 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) case CHIP_FIJI: chip_name = "fiji"; break; - case CHIP_POLARIS11: - chip_name = "polaris11"; - break; case CHIP_POLARIS10: chip_name = "polaris10"; break; + case CHIP_POLARIS11: + chip_name = "polaris11"; + break; case CHIP_POLARIS12: chip_name = "polaris12"; break; + case CHIP_VEGAM: + chip_name = "vegam"; + break; case CHIP_CARRIZO: chip_name = "carrizo"; break; @@ -359,9 +367,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) /* XXX check if swapping is necessary on BE */ wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2; } else { - int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; - - wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2; + wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2; } return wptr; @@ -388,9 +394,7 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); } else { - int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; - - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2); } } @@ -444,7 +448,7 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { u32 ref_and_mask = 0; - if (ring == &ring->adev->sdma.instance[0].ring) + if (ring->me == 0) ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1); else ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1); @@ -1173,7 +1177,7 @@ static int sdma_v3_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224, + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, &adev->sdma.trap_irq); if (r) return r; @@ -1185,7 +1189,7 @@ static int sdma_v3_0_sw_init(void *handle) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247, + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, &adev->sdma.illegal_inst_irq); if (r) return r; @@ -1649,8 +1653,10 @@ static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) { int i; - for (i = 0; i < adev->sdma.num_instances; i++) + for (i = 0; i < adev->sdma.num_instances; i++) { adev->sdma.instance[i].ring.funcs = &sdma_v3_0_ring_funcs; + adev->sdma.instance[i].ring.me = i; + } } static const struct amdgpu_irq_src_funcs sdma_v3_0_trap_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 399f876f9cad..e7ca4623cfb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -38,10 +38,15 @@ #include "soc15.h" #include "vega10_sdma_pkt_open.h" +#include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h" +#include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h" + MODULE_FIRMWARE("amdgpu/vega10_sdma.bin"); MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); MODULE_FIRMWARE("amdgpu/vega12_sdma.bin"); MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/vega20_sdma.bin"); +MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin"); MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L @@ -107,6 +112,28 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] = SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0) }; +static const struct soc15_reg_golden golden_settings_sdma_4_2[] = +{ + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0) +}; + static const struct soc15_reg_golden golden_settings_sdma_rv1[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002), @@ -139,6 +166,11 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma_vg12, ARRAY_SIZE(golden_settings_sdma_vg12)); break; + case CHIP_VEGA20: + soc15_program_register_sequence(adev, + golden_settings_sdma_4_2, + ARRAY_SIZE(golden_settings_sdma_4_2)); + break; case CHIP_RAVEN: soc15_program_register_sequence(adev, golden_settings_sdma_4_1, @@ -182,6 +214,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) case CHIP_VEGA12: chip_name = "vega12"; break; + case CHIP_VEGA20: + chip_name = "vega20"; + break; case CHIP_RAVEN: chip_name = "raven"; break; @@ -264,13 +299,12 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { u32 lowbit, highbit; - int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2; - highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; + lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2; + highbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", - me, highbit, lowbit); + ring->me, highbit, lowbit); wptr = highbit; wptr = wptr << 32; wptr |= lowbit; @@ -307,17 +341,15 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { - int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; - DRM_DEBUG("Not using doorbell -- " "mmSDMA%i_GFX_RB_WPTR == 0x%08x " "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - me, + ring->me, lower_32_bits(ring->wptr << 2), - me, + ring->me, upper_32_bits(ring->wptr << 2)); - WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); } } @@ -360,6 +392,31 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, } +static void sdma_v4_0_wait_reg_mem(struct amdgpu_ring *ring, + int mem_space, int hdp, + uint32_t addr0, uint32_t addr1, + uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) | + SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + if (mem_space) { + /* memory */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + } else { + /* registers */ + amdgpu_ring_write(ring, addr0 << 2); + amdgpu_ring_write(ring, addr1 << 2); + } + amdgpu_ring_write(ring, ref); /* reference */ + amdgpu_ring_write(ring, mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */ +} + /** * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring * @@ -373,20 +430,15 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; - if (ring == &ring->adev->sdma.instance[0].ring) + if (ring->me == 0) ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; else ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2); - amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2); - amdgpu_ring_write(ring, ref_and_mask); /* reference */ - amdgpu_ring_write(ring, ref_and_mask); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + sdma_v4_0_wait_reg_mem(ring, 0, 1, + adev->nbio_funcs->get_hdp_flush_done_offset(adev), + adev->nbio_funcs->get_hdp_flush_req_offset(adev), + ref_and_mask, ref_and_mask, 10); } /** @@ -1114,16 +1166,10 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) uint64_t addr = ring->fence_drv.gpu_addr; /* wait for idle */ - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ - SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); - amdgpu_ring_write(ring, addr & 0xfffffffc); - amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); - amdgpu_ring_write(ring, seq); /* reference */ - amdgpu_ring_write(ring, 0xffffffff); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ + sdma_v4_0_wait_reg_mem(ring, 1, 0, + addr & 0xfffffffc, + upper_32_bits(addr) & 0xffffffff, + seq, 0xffffffff, 4); } @@ -1154,15 +1200,7 @@ static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring, static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ - amdgpu_ring_write(ring, reg << 2); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, val); /* reference */ - amdgpu_ring_write(ring, mask); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); + sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10); } static int sdma_v4_0_early_init(void *handle) @@ -1190,13 +1228,13 @@ static int sdma_v4_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, 224, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_TRAP, &adev->sdma.trap_irq); if (r) return r; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, 224, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_TRAP, &adev->sdma.trap_irq); if (r) return r; @@ -1510,6 +1548,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: sdma_v4_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -1605,14 +1644,17 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .pad_ib = sdma_v4_0_ring_pad_ib, .emit_wreg = sdma_v4_0_ring_emit_wreg, .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) { int i; - for (i = 0; i < adev->sdma.num_instances; i++) + for (i = 0; i < adev->sdma.num_instances; i++) { adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; + adev->sdma.instance[i].ring.me = i; + } } static const struct amdgpu_irq_src_funcs sdma_v4_0_trap_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index a675ec6d2811..c364ef94cc36 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1252,6 +1252,12 @@ static void si_invalidate_hdp(struct amdgpu_device *adev, } } +static bool si_need_full_reset(struct amdgpu_device *adev) +{ + /* change this when we support soft reset */ + return true; +} + static int si_get_pcie_lanes(struct amdgpu_device *adev) { u32 link_width_cntl; @@ -1332,6 +1338,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .get_config_memsize = &si_get_config_memsize, .flush_hdp = &si_flush_hdp, .invalidate_hdp = &si_invalidate_hdp, + .need_full_reset = &si_need_full_reset, }; static uint32_t si_get_rev_id(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 797d505bf9ee..db327b412562 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -56,16 +56,16 @@ #define BIOS_SCRATCH_4 0x5cd -MODULE_FIRMWARE("radeon/tahiti_smc.bin"); -MODULE_FIRMWARE("radeon/pitcairn_smc.bin"); -MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin"); -MODULE_FIRMWARE("radeon/verde_smc.bin"); -MODULE_FIRMWARE("radeon/verde_k_smc.bin"); -MODULE_FIRMWARE("radeon/oland_smc.bin"); -MODULE_FIRMWARE("radeon/oland_k_smc.bin"); -MODULE_FIRMWARE("radeon/hainan_smc.bin"); -MODULE_FIRMWARE("radeon/hainan_k_smc.bin"); -MODULE_FIRMWARE("radeon/banks_k_2_smc.bin"); +MODULE_FIRMWARE("amdgpu/tahiti_smc.bin"); +MODULE_FIRMWARE("amdgpu/pitcairn_smc.bin"); +MODULE_FIRMWARE("amdgpu/pitcairn_k_smc.bin"); +MODULE_FIRMWARE("amdgpu/verde_smc.bin"); +MODULE_FIRMWARE("amdgpu/verde_k_smc.bin"); +MODULE_FIRMWARE("amdgpu/oland_smc.bin"); +MODULE_FIRMWARE("amdgpu/oland_k_smc.bin"); +MODULE_FIRMWARE("amdgpu/hainan_smc.bin"); +MODULE_FIRMWARE("amdgpu/hainan_k_smc.bin"); +MODULE_FIRMWARE("amdgpu/banks_k_2_smc.bin"); static const struct amd_pm_funcs si_dpm_funcs; @@ -3480,7 +3480,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, disable_sclk_switching = true; } - if (adev->pm.dpm.ac_power) + if (adev->pm.ac_power) max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; else max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; @@ -3489,7 +3489,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, if (ps->performance_levels[i].vddc > ps->performance_levels[i+1].vddc) ps->performance_levels[i].vddc = ps->performance_levels[i+1].vddc; } - if (adev->pm.dpm.ac_power == false) { + if (adev->pm.ac_power == false) { for (i = 0; i < ps->performance_level_count; i++) { if (ps->performance_levels[i].mclk > max_limits->mclk) ps->performance_levels[i].mclk = max_limits->mclk; @@ -7242,8 +7242,9 @@ static int si_parse_power_table(struct amdgpu_device *adev) (mode_info->atom_context->bios + data_offset + le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset)); - adev->pm.dpm.ps = kzalloc(sizeof(struct amdgpu_ps) * - state_array->ucNumEntries, GFP_KERNEL); + adev->pm.dpm.ps = kcalloc(state_array->ucNumEntries, + sizeof(struct amdgpu_ps), + GFP_KERNEL); if (!adev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; @@ -7317,8 +7318,7 @@ static int si_dpm_init(struct amdgpu_device *adev) pi = &eg_pi->rv7xx; si_pi->sys_pcie_mask = - (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >> - CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT; + adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK; si_pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID; si_pi->boot_pcie_gen = si_get_current_pcie_speed(adev); @@ -7346,7 +7346,9 @@ static int si_dpm_init(struct amdgpu_device *adev) return ret; adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries = - kzalloc(4 * sizeof(struct amdgpu_clock_voltage_dependency_entry), GFP_KERNEL); + kcalloc(4, + sizeof(struct amdgpu_clock_voltage_dependency_entry), + GFP_KERNEL); if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) { amdgpu_free_extended_power_table(adev); return -ENOMEM; @@ -7580,7 +7582,7 @@ static int si_dpm_late_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!amdgpu_dpm) + if (!adev->pm.dpm_enabled) return 0; ret = si_set_temperature_range(adev); @@ -7664,7 +7666,7 @@ static int si_dpm_init_microcode(struct amdgpu_device *adev) default: BUG(); } - snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name); err = request_firmware(&adev->pm.fw, fw_name, adev->dev); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 51cf8a30f6c2..83f2717fcf81 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -41,8 +41,6 @@ #include "sdma1/sdma1_4_0_offset.h" #include "hdp/hdp_4_0_offset.h" #include "hdp/hdp_4_0_sh_mask.h" -#include "mp/mp_9_0_offset.h" -#include "mp/mp_9_0_sh_mask.h" #include "smuio/smuio_9_0_offset.h" #include "smuio/smuio_9_0_sh_mask.h" @@ -52,6 +50,8 @@ #include "gmc_v9_0.h" #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" +#include "df_v1_7.h" +#include "df_v3_6.h" #include "vega10_ih.h" #include "sdma_v4_0.h" #include "uvd_v7_0.h" @@ -60,33 +60,6 @@ #include "dce_virtual.h" #include "mxgpu_ai.h" -#define mmFabricConfigAccessControl 0x0410 -#define mmFabricConfigAccessControl_BASE_IDX 0 -#define mmFabricConfigAccessControl_DEFAULT 0x00000000 -//FabricConfigAccessControl -#define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT 0x0 -#define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT 0x1 -#define FabricConfigAccessControl__CfgRegInstID__SHIFT 0x10 -#define FabricConfigAccessControl__CfgRegInstAccEn_MASK 0x00000001L -#define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK 0x00000002L -#define FabricConfigAccessControl__CfgRegInstID_MASK 0x00FF0000L - - -#define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc -#define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0 -//DF_PIE_AON0_DfGlobalClkGater -#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0 -#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL - -enum { - DF_MGCG_DISABLE = 0, - DF_MGCG_ENABLE_00_CYCLE_DELAY =1, - DF_MGCG_ENABLE_01_CYCLE_DELAY =2, - DF_MGCG_ENABLE_15_CYCLE_DELAY =13, - DF_MGCG_ENABLE_31_CYCLE_DELAY =14, - DF_MGCG_ENABLE_63_CYCLE_DELAY =15 -}; - #define mmMP0_MISC_CGTT_CTRL0 0x01b9 #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0 #define mmMP0_MISC_LIGHT_SLEEP_CTRL 0x01ba @@ -313,6 +286,7 @@ static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)}, { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)}, + { SOC15_REG_ENTRY(GC, 0, mmDB_DEBUG2)}, }; static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num, @@ -341,6 +315,8 @@ static uint32_t soc15_get_register_value(struct amdgpu_device *adev, } else { if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)) return adev->gfx.config.gb_addr_config; + else if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2)) + return adev->gfx.config.db_debug2; return RREG32(reg_offset); } } @@ -512,15 +488,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) case CHIP_RAVEN: vega10_reg_base_init(adev); break; + case CHIP_VEGA20: + vega20_reg_base_init(adev); + break; default: return -EINVAL; } if (adev->flags & AMD_IS_APU) adev->nbio_funcs = &nbio_v7_0_funcs; + else if (adev->asic_type == CHIP_VEGA20) + adev->nbio_funcs = &nbio_v7_0_funcs; else adev->nbio_funcs = &nbio_v6_1_funcs; + if (adev->asic_type == CHIP_VEGA20) + adev->df_funcs = &df_v3_6_funcs; + else + adev->df_funcs = &df_v1_7_funcs; adev->nbio_funcs->detect_hw_virt(adev); if (amdgpu_sriov_vf(adev)) @@ -529,12 +514,15 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); - if (!amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); + if (adev->asic_type != CHIP_VEGA20) { + amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + if (!amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); + } if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -593,6 +581,12 @@ static void soc15_invalidate_hdp(struct amdgpu_device *adev, HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); } +static bool soc15_need_full_reset(struct amdgpu_device *adev) +{ + /* change this when we implement soft reset */ + return true; +} + static const struct amdgpu_asic_funcs soc15_asic_funcs = { .read_disabled_bios = &soc15_read_disabled_bios, @@ -606,6 +600,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .get_config_memsize = &soc15_get_config_memsize, .flush_hdp = &soc15_flush_hdp, .invalidate_hdp = &soc15_invalidate_hdp, + .need_full_reset = &soc15_need_full_reset, }; static int soc15_common_early_init(void *handle) @@ -675,6 +670,28 @@ static int soc15_common_early_init(void *handle) adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x14; break; + case CHIP_VEGA20: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_VCE_MGCG | + AMD_CG_SUPPORT_UVD_MGCG; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x28; + break; case CHIP_RAVEN: adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | @@ -694,8 +711,15 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_SDMA_MGCG | - AMD_CG_SUPPORT_SDMA_LS; - adev->pg_flags = AMD_PG_SUPPORT_SDMA; + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_VCN_MGCG; + + adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; + + if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_RLC_SMU_HS; adev->external_rev_id = 0x1; break; @@ -871,32 +895,6 @@ static void soc15_update_rom_medium_grain_clock_gating(struct amdgpu_device *ade WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0), data); } -static void soc15_update_df_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) -{ - uint32_t data; - - /* Put DF on broadcast mode */ - data = RREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl)); - data &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK; - WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl), data); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { - data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater)); - data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; - data |= DF_MGCG_ENABLE_15_CYCLE_DELAY; - WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data); - } else { - data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater)); - data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; - data |= DF_MGCG_DISABLE; - WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data); - } - - WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl), - mmFabricConfigAccessControl_DEFAULT); -} - static int soc15_common_set_clockgating_state(void *handle, enum amd_clockgating_state state) { @@ -908,6 +906,7 @@ static int soc15_common_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: adev->nbio_funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); adev->nbio_funcs->update_medium_grain_light_sleep(adev, @@ -920,7 +919,7 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE ? true : false); soc15_update_rom_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - soc15_update_df_medium_grain_clock_gating(adev, + adev->df_funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; case CHIP_RAVEN: @@ -973,10 +972,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) *flags |= AMD_CG_SUPPORT_ROM_MGCG; - /* AMD_CG_SUPPORT_DF_MGCG */ - data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater)); - if (data & DF_MGCG_ENABLE_15_CYCLE_DELAY) - *flags |= AMD_CG_SUPPORT_DF_MGCG; + adev->df_funcs->get_clockgating_state(adev, flags); } static int soc15_common_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index f70da8a29f86..1f714b7af520 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -55,5 +55,6 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, const u32 array_size); int vega10_reg_base_init(struct amdgpu_device *adev); +int vega20_reg_base_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index def865067edd..0942f492d2e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -47,6 +47,21 @@ #define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \ WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value) +#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask, ret) \ + do { \ + uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ + uint32_t loop = adev->usec_timeout; \ + while ((tmp_ & (mask)) != (expected_value)) { \ + udelay(2); \ + tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ + loop--; \ + if (!loop) { \ + ret = -ETIMEDOUT; \ + break; \ + } \ + } \ + } while (0) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 7f408f85fdb6..edfe50821cd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -53,6 +53,29 @@ #define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1) +#define PACKETJ_CONDITION_CHECK0 0 +#define PACKETJ_CONDITION_CHECK1 1 +#define PACKETJ_CONDITION_CHECK2 2 +#define PACKETJ_CONDITION_CHECK3 3 +#define PACKETJ_CONDITION_CHECK4 4 +#define PACKETJ_CONDITION_CHECK5 5 +#define PACKETJ_CONDITION_CHECK6 6 +#define PACKETJ_CONDITION_CHECK7 7 + +#define PACKETJ_TYPE0 0 +#define PACKETJ_TYPE1 1 +#define PACKETJ_TYPE2 2 +#define PACKETJ_TYPE3 3 +#define PACKETJ_TYPE4 4 +#define PACKETJ_TYPE5 5 +#define PACKETJ_TYPE6 6 +#define PACKETJ_TYPE7 7 + +#define PACKETJ(reg, r, cond, type) ((reg & 0x3FFFF) | \ + ((r & 0x3F) << 18) | \ + ((cond & 0xF) << 24) | \ + ((type & 0xF) << 28)) + /* Packet 3 types */ #define PACKET3_NOP 0x10 #define PACKET3_SET_BASE 0x11 @@ -159,6 +182,7 @@ #define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ #define EOP_TCL1_ACTION_EN (1 << 16) #define EOP_TC_ACTION_EN (1 << 17) /* L2 */ +#define EOP_TC_NC_ACTION_EN (1 << 19) #define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */ #define DATA_SEL(x) ((x) << 29) @@ -268,6 +292,11 @@ * x=1: tmz_end */ +#define PACKET3_INVALIDATE_TLBS 0x98 +# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0) +# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4) +# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5) +# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29) #define PACKET3_SET_RESOURCES 0xA0 /* 1. header * 2. CONTROL diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 948bb9437757..6fed3d7797a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -93,6 +93,7 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring) static int uvd_v4_2_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->uvd.num_uvd_inst = 1; uvd_v4_2_set_ring_funcs(adev); uvd_v4_2_set_irq_funcs(adev); @@ -107,7 +108,7 @@ static int uvd_v4_2_sw_init(void *handle) int r; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); if (r) return r; @@ -119,9 +120,9 @@ static int uvd_v4_2_sw_init(void *handle) if (r) return r; - ring = &adev->uvd.ring; + ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); return r; } @@ -150,7 +151,7 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev, static int uvd_v4_2_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t tmp; int r; @@ -208,7 +209,7 @@ done: static int uvd_v4_2_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; if (RREG32(mmUVD_STATUS) != 0) uvd_v4_2_stop(adev); @@ -251,7 +252,7 @@ static int uvd_v4_2_resume(void *handle) */ static int uvd_v4_2_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t rb_bufsz; int i, j, r; u32 tmp; @@ -523,6 +524,18 @@ static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, ib->length_dw); } +static void uvd_v4_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0)); + amdgpu_ring_write(ring, 0); + } +} + /** * uvd_v4_2_mc_resume - memory controller programming * @@ -536,7 +549,7 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev) uint32_t size; /* programm the VCPU memory controller bits 0-27 */ - addr = (adev->uvd.gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; + addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3; WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr); WREG32(mmUVD_VCPU_CACHE_SIZE0, size); @@ -553,11 +566,11 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev) WREG32(mmUVD_VCPU_CACHE_SIZE2, size); /* bits 28-31 */ - addr = (adev->uvd.gpu_addr >> 28) & 0xF; + addr = (adev->uvd.inst->gpu_addr >> 28) & 0xF; WREG32(mmUVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); /* bits 32-39 */ - addr = (adev->uvd.gpu_addr >> 32) & 0xFF; + addr = (adev->uvd.inst->gpu_addr >> 32) & 0xFF; WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); @@ -664,7 +677,7 @@ static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_DEBUG("IH: UVD TRAP\n"); - amdgpu_fence_process(&adev->uvd.ring); + amdgpu_fence_process(&adev->uvd.inst->ring); return 0; } @@ -688,7 +701,7 @@ static int uvd_v4_2_set_powergating_state(void *handle, if (state == AMD_PG_STATE_GATE) { uvd_v4_2_stop(adev); - if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) { + if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) { if (!(RREG32_SMC(ixCURRENT_PG_STATUS) & CURRENT_PG_STATUS__UVD_PG_STATUS_MASK)) { WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK | @@ -699,7 +712,7 @@ static int uvd_v4_2_set_powergating_state(void *handle, } return 0; } else { - if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) { + if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) { if (RREG32_SMC(ixCURRENT_PG_STATUS) & CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) { WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK | @@ -732,7 +745,6 @@ static const struct amd_ip_funcs uvd_v4_2_ip_funcs = { static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, - .nop = PACKET0(mmUVD_NO_OP, 0), .support_64bit_ptrs = false, .get_rptr = uvd_v4_2_ring_get_rptr, .get_wptr = uvd_v4_2_ring_get_wptr, @@ -745,7 +757,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { .emit_fence = uvd_v4_2_ring_emit_fence, .test_ring = uvd_v4_2_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = uvd_v4_2_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_uvd_ring_begin_use, .end_use = amdgpu_uvd_ring_end_use, @@ -753,7 +765,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev) { - adev->uvd.ring.funcs = &uvd_v4_2_ring_funcs; + adev->uvd.inst->ring.funcs = &uvd_v4_2_ring_funcs; } static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = { @@ -763,8 +775,8 @@ static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = { static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev) { - adev->uvd.irq.num_types = 1; - adev->uvd.irq.funcs = &uvd_v4_2_irq_funcs; + adev->uvd.inst->irq.num_types = 1; + adev->uvd.inst->irq.funcs = &uvd_v4_2_irq_funcs; } const struct amdgpu_ip_block_version uvd_v4_2_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 6445d55e7d5a..aeaa1ca46a99 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -35,6 +35,7 @@ #include "vi.h" #include "smu/smu_7_1_2_d.h" #include "smu/smu_7_1_2_sh_mask.h" +#include "ivsrcid/ivsrcid_vislands30.h" static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev); @@ -89,6 +90,7 @@ static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring) static int uvd_v5_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->uvd.num_uvd_inst = 1; uvd_v5_0_set_ring_funcs(adev); uvd_v5_0_set_irq_funcs(adev); @@ -103,7 +105,7 @@ static int uvd_v5_0_sw_init(void *handle) int r; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq); if (r) return r; @@ -115,9 +117,9 @@ static int uvd_v5_0_sw_init(void *handle) if (r) return r; - ring = &adev->uvd.ring; + ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); return r; } @@ -144,7 +146,7 @@ static int uvd_v5_0_sw_fini(void *handle) static int uvd_v5_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t tmp; int r; @@ -204,7 +206,7 @@ done: static int uvd_v5_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; if (RREG32(mmUVD_STATUS) != 0) uvd_v5_0_stop(adev); @@ -253,9 +255,9 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev) /* programm memory controller bits 0-27 */ WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->uvd.gpu_addr)); + lower_32_bits(adev->uvd.inst->gpu_addr)); WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->uvd.gpu_addr)); + upper_32_bits(adev->uvd.inst->gpu_addr)); offset = AMDGPU_UVD_FIRMWARE_OFFSET; size = AMDGPU_UVD_FIRMWARE_SIZE(adev); @@ -287,7 +289,7 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev) */ static int uvd_v5_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; uint32_t mp_swap_cntl; @@ -540,6 +542,18 @@ static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, ib->length_dw); } +static void uvd_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0)); + amdgpu_ring_write(ring, 0); + } +} + static bool uvd_v5_0_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -586,7 +600,7 @@ static int uvd_v5_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_DEBUG("IH: UVD TRAP\n"); - amdgpu_fence_process(&adev->uvd.ring); + amdgpu_fence_process(&adev->uvd.inst->ring); return 0; } @@ -840,7 +854,6 @@ static const struct amd_ip_funcs uvd_v5_0_ip_funcs = { static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, - .nop = PACKET0(mmUVD_NO_OP, 0), .support_64bit_ptrs = false, .get_rptr = uvd_v5_0_ring_get_rptr, .get_wptr = uvd_v5_0_ring_get_wptr, @@ -853,7 +866,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { .emit_fence = uvd_v5_0_ring_emit_fence, .test_ring = uvd_v5_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = uvd_v5_0_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_uvd_ring_begin_use, .end_use = amdgpu_uvd_ring_end_use, @@ -861,7 +874,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev) { - adev->uvd.ring.funcs = &uvd_v5_0_ring_funcs; + adev->uvd.inst->ring.funcs = &uvd_v5_0_ring_funcs; } static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = { @@ -871,8 +884,8 @@ static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = { static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->uvd.irq.num_types = 1; - adev->uvd.irq.funcs = &uvd_v5_0_irq_funcs; + adev->uvd.inst->irq.num_types = 1; + adev->uvd.inst->irq.funcs = &uvd_v5_0_irq_funcs; } const struct amdgpu_ip_block_version uvd_v5_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index f26f515db2fb..598dbeaba636 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -36,6 +36,7 @@ #include "bif/bif_5_1_d.h" #include "gmc/gmc_8_1_d.h" #include "vi.h" +#include "ivsrcid/ivsrcid_vislands30.h" /* Polaris10/11/12 firmware version */ #define FW_1_130_16 ((1 << 24) | (130 << 16) | (16 << 8)) @@ -62,7 +63,7 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, static inline bool uvd_v6_0_enc_support(struct amdgpu_device *adev) { return ((adev->asic_type >= CHIP_POLARIS10) && - (adev->asic_type <= CHIP_POLARIS12) && + (adev->asic_type <= CHIP_VEGAM) && (!adev->uvd.fw_version || adev->uvd.fw_version >= FW_1_130_16)); } @@ -91,7 +92,7 @@ static uint64_t uvd_v6_0_enc_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->uvd.ring_enc[0]) + if (ring == &adev->uvd.inst->ring_enc[0]) return RREG32(mmUVD_RB_RPTR); else return RREG32(mmUVD_RB_RPTR2); @@ -121,7 +122,7 @@ static uint64_t uvd_v6_0_enc_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->uvd.ring_enc[0]) + if (ring == &adev->uvd.inst->ring_enc[0]) return RREG32(mmUVD_RB_WPTR); else return RREG32(mmUVD_RB_WPTR2); @@ -152,7 +153,7 @@ static void uvd_v6_0_enc_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->uvd.ring_enc[0]) + if (ring == &adev->uvd.inst->ring_enc[0]) WREG32(mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); else @@ -247,12 +248,10 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); - job->fence = dma_fence_get(f); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err; - amdgpu_job_free(job); if (fence) *fence = dma_fence_get(f); dma_fence_put(f); @@ -311,19 +310,13 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (direct) { - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); - job->fence = dma_fence_get(f); - if (r) - goto err; - - amdgpu_job_free(job); - } else { - r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity, + if (direct) + r = amdgpu_job_submit_direct(job, ring, &f); + else + r = amdgpu_job_submit(job, &ring->adev->vce.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &f); - if (r) - goto err; - } + if (r) + goto err; if (fence) *fence = dma_fence_get(f); @@ -375,6 +368,7 @@ error: static int uvd_v6_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->uvd.num_uvd_inst = 1; if (!(adev->flags & AMD_IS_APU) && (RREG32_SMC(ixCC_HARVEST_FUSES) & CC_HARVEST_FUSES__UVD_DISABLE_MASK)) @@ -399,14 +393,14 @@ static int uvd_v6_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq); if (r) return r; /* UVD ENC TRAP */ if (uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 119, &adev->uvd.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + VISLANDS30_IV_SRCID_UVD_ENC_GEN_PURP, &adev->uvd.inst->irq); if (r) return r; } @@ -418,39 +412,29 @@ static int uvd_v6_0_sw_init(void *handle) if (!uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) - adev->uvd.ring_enc[i].funcs = NULL; + adev->uvd.inst->ring_enc[i].funcs = NULL; - adev->uvd.irq.num_types = 1; + adev->uvd.inst->irq.num_types = 1; adev->uvd.num_enc_rings = 0; DRM_INFO("UVD ENC is disabled\n"); - } else { - struct drm_sched_rq *rq; - ring = &adev->uvd.ring_enc[0]; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, - rq, amdgpu_sched_jobs, NULL); - if (r) { - DRM_ERROR("Failed setting up UVD ENC run queue.\n"); - return r; - } } r = amdgpu_uvd_resume(adev); if (r) return r; - ring = &adev->uvd.ring; + ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); if (r) return r; if (uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - ring = &adev->uvd.ring_enc[i]; + ring = &adev->uvd.inst->ring_enc[i]; sprintf(ring->name, "uvd_enc%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); if (r) return r; } @@ -469,10 +453,8 @@ static int uvd_v6_0_sw_fini(void *handle) return r; if (uvd_v6_0_enc_support(adev)) { - drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); - for (i = 0; i < adev->uvd.num_enc_rings; ++i) - amdgpu_ring_fini(&adev->uvd.ring_enc[i]); + amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]); } return amdgpu_uvd_sw_fini(adev); @@ -488,7 +470,7 @@ static int uvd_v6_0_sw_fini(void *handle) static int uvd_v6_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t tmp; int i, r; @@ -532,7 +514,7 @@ static int uvd_v6_0_hw_init(void *handle) if (uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - ring = &adev->uvd.ring_enc[i]; + ring = &adev->uvd.inst->ring_enc[i]; ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) { @@ -563,7 +545,7 @@ done: static int uvd_v6_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; if (RREG32(mmUVD_STATUS) != 0) uvd_v6_0_stop(adev); @@ -611,9 +593,9 @@ static void uvd_v6_0_mc_resume(struct amdgpu_device *adev) /* programm memory controller bits 0-27 */ WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->uvd.gpu_addr)); + lower_32_bits(adev->uvd.inst->gpu_addr)); WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->uvd.gpu_addr)); + upper_32_bits(adev->uvd.inst->gpu_addr)); offset = AMDGPU_UVD_FIRMWARE_OFFSET; size = AMDGPU_UVD_FIRMWARE_SIZE(adev); @@ -726,7 +708,7 @@ static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev, */ static int uvd_v6_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; uint32_t mp_swap_cntl; @@ -866,14 +848,14 @@ static int uvd_v6_0_start(struct amdgpu_device *adev) WREG32_FIELD(UVD_RBC_RB_CNTL, RB_NO_FETCH, 0); if (uvd_v6_0_enc_support(adev)) { - ring = &adev->uvd.ring_enc[0]; + ring = &adev->uvd.inst->ring_enc[0]; WREG32(mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32(mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32(mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32(mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32(mmUVD_RB_SIZE, ring->ring_size / 4); - ring = &adev->uvd.ring_enc[1]; + ring = &adev->uvd.inst->ring_enc[1]; WREG32(mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32(mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32(mmUVD_RB_BASE_LO2, ring->gpu_addr); @@ -964,6 +946,16 @@ static void uvd_v6_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, } /** + * uvd_v6_0_ring_emit_hdp_flush - skip HDP flushing + * + * @ring: amdgpu_ring pointer + */ +static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* The firmware doesn't seem to like touching registers at this point. */ +} + +/** * uvd_v6_0_ring_test_ring - register write test * * @ring: amdgpu_ring pointer @@ -1089,6 +1081,18 @@ static void uvd_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0xE); } +static void uvd_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0)); + amdgpu_ring_write(ring, 0); + } +} + static void uvd_v6_0_enc_ring_emit_pipeline_sync(struct amdgpu_ring *ring) { uint32_t seq = ring->fence_drv.sync_seq; @@ -1148,10 +1152,10 @@ static bool uvd_v6_0_check_soft_reset(void *handle) srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); if (srbm_soft_reset) { - adev->uvd.srbm_soft_reset = srbm_soft_reset; + adev->uvd.inst->srbm_soft_reset = srbm_soft_reset; return true; } else { - adev->uvd.srbm_soft_reset = 0; + adev->uvd.inst->srbm_soft_reset = 0; return false; } } @@ -1160,7 +1164,7 @@ static int uvd_v6_0_pre_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!adev->uvd.srbm_soft_reset) + if (!adev->uvd.inst->srbm_soft_reset) return 0; uvd_v6_0_stop(adev); @@ -1172,9 +1176,9 @@ static int uvd_v6_0_soft_reset(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 srbm_soft_reset; - if (!adev->uvd.srbm_soft_reset) + if (!adev->uvd.inst->srbm_soft_reset) return 0; - srbm_soft_reset = adev->uvd.srbm_soft_reset; + srbm_soft_reset = adev->uvd.inst->srbm_soft_reset; if (srbm_soft_reset) { u32 tmp; @@ -1202,7 +1206,7 @@ static int uvd_v6_0_post_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!adev->uvd.srbm_soft_reset) + if (!adev->uvd.inst->srbm_soft_reset) return 0; mdelay(5); @@ -1228,17 +1232,17 @@ static int uvd_v6_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case 124: - amdgpu_fence_process(&adev->uvd.ring); + amdgpu_fence_process(&adev->uvd.inst->ring); break; case 119: if (likely(uvd_v6_0_enc_support(adev))) - amdgpu_fence_process(&adev->uvd.ring_enc[0]); + amdgpu_fence_process(&adev->uvd.inst->ring_enc[0]); else int_handled = false; break; case 120: if (likely(uvd_v6_0_enc_support(adev))) - amdgpu_fence_process(&adev->uvd.ring_enc[1]); + amdgpu_fence_process(&adev->uvd.inst->ring_enc[1]); else int_handled = false; break; @@ -1521,22 +1525,22 @@ static const struct amd_ip_funcs uvd_v6_0_ip_funcs = { static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, - .nop = PACKET0(mmUVD_NO_OP, 0), .support_64bit_ptrs = false, .get_rptr = uvd_v6_0_ring_get_rptr, .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, .parse_cs = amdgpu_uvd_ring_parse_cs, .emit_frame_size = - 6 + 6 + /* hdp flush / invalidate */ + 6 + /* hdp invalidate */ 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ 14, /* uvd_v6_0_ring_emit_fence x1 no user fence */ .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */ .emit_ib = uvd_v6_0_ring_emit_ib, .emit_fence = uvd_v6_0_ring_emit_fence, + .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush, .test_ring = uvd_v6_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = uvd_v6_0_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_uvd_ring_begin_use, .end_use = amdgpu_uvd_ring_end_use, @@ -1546,13 +1550,12 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, - .nop = PACKET0(mmUVD_NO_OP, 0), .support_64bit_ptrs = false, .get_rptr = uvd_v6_0_ring_get_rptr, .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, .emit_frame_size = - 6 + 6 + /* hdp flush / invalidate */ + 6 + /* hdp invalidate */ 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /* uvd_v6_0_ring_emit_vm_flush */ 14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */ @@ -1561,9 +1564,10 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { .emit_fence = uvd_v6_0_ring_emit_fence, .emit_vm_flush = uvd_v6_0_ring_emit_vm_flush, .emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync, + .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush, .test_ring = uvd_v6_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = uvd_v6_0_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_uvd_ring_begin_use, .end_use = amdgpu_uvd_ring_end_use, @@ -1600,10 +1604,10 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = { static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev) { if (adev->asic_type >= CHIP_POLARIS10) { - adev->uvd.ring.funcs = &uvd_v6_0_ring_vm_funcs; + adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_vm_funcs; DRM_INFO("UVD is enabled in VM mode\n"); } else { - adev->uvd.ring.funcs = &uvd_v6_0_ring_phys_funcs; + adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_phys_funcs; DRM_INFO("UVD is enabled in physical mode\n"); } } @@ -1613,7 +1617,7 @@ static void uvd_v6_0_set_enc_ring_funcs(struct amdgpu_device *adev) int i; for (i = 0; i < adev->uvd.num_enc_rings; ++i) - adev->uvd.ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs; + adev->uvd.inst->ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs; DRM_INFO("UVD ENC is enabled in VM mode\n"); } @@ -1626,11 +1630,11 @@ static const struct amdgpu_irq_src_funcs uvd_v6_0_irq_funcs = { static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev) { if (uvd_v6_0_enc_support(adev)) - adev->uvd.irq.num_types = adev->uvd.num_enc_rings + 1; + adev->uvd.inst->irq.num_types = adev->uvd.num_enc_rings + 1; else - adev->uvd.irq.num_types = 1; + adev->uvd.inst->irq.num_types = 1; - adev->uvd.irq.funcs = &uvd_v6_0_irq_funcs; + adev->uvd.inst->irq.funcs = &uvd_v6_0_irq_funcs; } const struct amdgpu_ip_block_version uvd_v6_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index eddc57f3b72a..db5f3d78ab12 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -39,6 +39,9 @@ #include "hdp/hdp_4_0_offset.h" #include "mmhub/mmhub_1_0_offset.h" #include "mmhub/mmhub_1_0_sh_mask.h" +#include "ivsrcid/uvd/irqsrcs_uvd_7_0.h" + +#define UVD7_MAX_HW_INSTANCES_VEGA20 2 static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev); @@ -47,6 +50,11 @@ static int uvd_v7_0_start(struct amdgpu_device *adev); static void uvd_v7_0_stop(struct amdgpu_device *adev); static int uvd_v7_0_sriov_start(struct amdgpu_device *adev); +static int amdgpu_ih_clientid_uvds[] = { + SOC15_IH_CLIENTID_UVD, + SOC15_IH_CLIENTID_UVD1 +}; + /** * uvd_v7_0_ring_get_rptr - get read pointer * @@ -58,7 +66,7 @@ static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR); } /** @@ -72,10 +80,10 @@ static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->uvd.ring_enc[0]) - return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); + if (ring == &adev->uvd.inst[ring->me].ring_enc[0]) + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR); else - return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2); } /** @@ -89,7 +97,7 @@ static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR); } /** @@ -106,10 +114,10 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; - if (ring == &adev->uvd.ring_enc[0]) - return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); + if (ring == &adev->uvd.inst[ring->me].ring_enc[0]) + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR); else - return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2); } /** @@ -123,7 +131,7 @@ static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } /** @@ -144,11 +152,11 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring) return; } - if (ring == &adev->uvd.ring_enc[0]) - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, + if (ring == &adev->uvd.inst[ring->me].ring_enc[0]) + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); else - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); } @@ -170,8 +178,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 16); if (r) { - DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n", - ring->idx, r); + DRM_ERROR("amdgpu: uvd enc failed to lock (%d)ring %d (%d).\n", + ring->me, ring->idx, r); return r; } amdgpu_ring_write(ring, HEVC_ENC_CMD_END); @@ -184,11 +192,11 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); + DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n", + ring->me, ring->idx, i); } else { - DRM_ERROR("amdgpu: ring %d test failed\n", - ring->idx); + DRM_ERROR("amdgpu: (%d)ring %d test failed\n", + ring->me, ring->idx); r = -ETIMEDOUT; } @@ -242,12 +250,10 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); - job->fence = dma_fence_get(f); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err; - amdgpu_job_free(job); if (fence) *fence = dma_fence_get(f); dma_fence_put(f); @@ -305,19 +311,13 @@ int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (direct) { - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); - job->fence = dma_fence_get(f); - if (r) - goto err; - - amdgpu_job_free(job); - } else { - r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity, + if (direct) + r = amdgpu_job_submit_direct(job, ring, &f); + else + r = amdgpu_job_submit(job, &ring->adev->vce.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &f); - if (r) - goto err; - } + if (r) + goto err; if (fence) *fence = dma_fence_get(f); @@ -342,24 +342,24 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL); if (r) { - DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ring->me, r); goto error; } r = uvd_v7_0_enc_get_destroy_msg(ring, 1, true, &fence); if (r) { - DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ring->me, r); goto error; } r = dma_fence_wait_timeout(fence, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); + DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ring->me); r = -ETIMEDOUT; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ring->me, r); } else { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ring->me, ring->idx); r = 0; } error: @@ -370,6 +370,10 @@ error: static int uvd_v7_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->asic_type == CHIP_VEGA20) + adev->uvd.num_uvd_inst = UVD7_MAX_HW_INSTANCES_VEGA20; + else + adev->uvd.num_uvd_inst = 1; if (amdgpu_sriov_vf(adev)) adev->uvd.num_enc_rings = 1; @@ -385,20 +389,21 @@ static int uvd_v7_0_early_init(void *handle) static int uvd_v7_0_sw_init(void *handle) { struct amdgpu_ring *ring; - struct drm_sched_rq *rq; - int i, r; + int i, j, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.irq); - if (r) - return r; - - /* UVD ENC TRAP */ - for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.irq); + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { + /* UVD TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], UVD_7_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->uvd.inst[j].irq); if (r) return r; + + /* UVD ENC TRAP */ + for (i = 0; i < adev->uvd.num_enc_rings; ++i) { + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], i + UVD_7_0__SRCID__UVD_ENC_GEN_PURP, &adev->uvd.inst[j].irq); + if (r) + return r; + } } r = amdgpu_uvd_sw_init(adev); @@ -415,43 +420,37 @@ static int uvd_v7_0_sw_init(void *handle) DRM_INFO("PSP loading UVD firmware\n"); } - ring = &adev->uvd.ring_enc[0]; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, - rq, amdgpu_sched_jobs, NULL); - if (r) { - DRM_ERROR("Failed setting up UVD ENC run queue.\n"); - return r; - } - r = amdgpu_uvd_resume(adev); if (r) return r; - if (!amdgpu_sriov_vf(adev)) { - ring = &adev->uvd.ring; - sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); - if (r) - return r; - } - for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - ring = &adev->uvd.ring_enc[i]; - sprintf(ring->name, "uvd_enc%d", i); - if (amdgpu_sriov_vf(adev)) { - ring->use_doorbell = true; - - /* currently only use the first enconding ring for - * sriov, so set unused location for other unused rings. - */ - if (i == 0) - ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; - else - ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1; + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { + if (!amdgpu_sriov_vf(adev)) { + ring = &adev->uvd.inst[j].ring; + sprintf(ring->name, "uvd<%d>", j); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0); + if (r) + return r; + } + + for (i = 0; i < adev->uvd.num_enc_rings; ++i) { + ring = &adev->uvd.inst[j].ring_enc[i]; + sprintf(ring->name, "uvd_enc%d<%d>", i, j); + if (amdgpu_sriov_vf(adev)) { + ring->use_doorbell = true; + + /* currently only use the first enconding ring for + * sriov, so set unused location for other unused rings. + */ + if (i == 0) + ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; + else + ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1; + } + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0); + if (r) + return r; } - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); - if (r) - return r; } r = amdgpu_virt_alloc_mm_table(adev); @@ -463,7 +462,7 @@ static int uvd_v7_0_sw_init(void *handle) static int uvd_v7_0_sw_fini(void *handle) { - int i, r; + int i, j, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_virt_free_mm_table(adev); @@ -472,11 +471,10 @@ static int uvd_v7_0_sw_fini(void *handle) if (r) return r; - drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); - - for (i = 0; i < adev->uvd.num_enc_rings; ++i) - amdgpu_ring_fini(&adev->uvd.ring_enc[i]); - + for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { + for (i = 0; i < adev->uvd.num_enc_rings; ++i) + amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]); + } return amdgpu_uvd_sw_fini(adev); } @@ -490,9 +488,9 @@ static int uvd_v7_0_sw_fini(void *handle) static int uvd_v7_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring; uint32_t tmp; - int i, r; + int i, j, r; if (amdgpu_sriov_vf(adev)) r = uvd_v7_0_sriov_start(adev); @@ -501,57 +499,60 @@ static int uvd_v7_0_hw_init(void *handle) if (r) goto done; - if (!amdgpu_sriov_vf(adev)) { - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - goto done; + for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { + ring = &adev->uvd.inst[j].ring; + + if (!amdgpu_sriov_vf(adev)) { + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } + + r = amdgpu_ring_alloc(ring, 10); + if (r) { + DRM_ERROR("amdgpu: (%d)ring failed to lock UVD ring (%d).\n", j, r); + goto done; + } + + tmp = PACKET0(SOC15_REG_OFFSET(UVD, j, + mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(SOC15_REG_OFFSET(UVD, j, + mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(SOC15_REG_OFFSET(UVD, j, + mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + /* Clear timeout status bits */ + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j, + mmUVD_SEMA_TIMEOUT_STATUS), 0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j, + mmUVD_SEMA_CNTL), 0)); + amdgpu_ring_write(ring, 3); + + amdgpu_ring_commit(ring); } - r = amdgpu_ring_alloc(ring, 10); - if (r) { - DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); - goto done; - } - - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0); - amdgpu_ring_write(ring, tmp); - amdgpu_ring_write(ring, 0xFFFFF); - - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0); - amdgpu_ring_write(ring, tmp); - amdgpu_ring_write(ring, 0xFFFFF); - - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0); - amdgpu_ring_write(ring, tmp); - amdgpu_ring_write(ring, 0xFFFFF); - - /* Clear timeout status bits */ - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_TIMEOUT_STATUS), 0)); - amdgpu_ring_write(ring, 0x8); - - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_CNTL), 0)); - amdgpu_ring_write(ring, 3); - - amdgpu_ring_commit(ring); - } - - for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - ring = &adev->uvd.ring_enc[i]; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - goto done; + for (i = 0; i < adev->uvd.num_enc_rings; ++i) { + ring = &adev->uvd.inst[j].ring_enc[i]; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } } } - done: if (!r) DRM_INFO("UVD and UVD ENC initialized successfully.\n"); @@ -569,7 +570,7 @@ done: static int uvd_v7_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + int i; if (!amdgpu_sriov_vf(adev)) uvd_v7_0_stop(adev); @@ -578,7 +579,8 @@ static int uvd_v7_0_hw_fini(void *handle) DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); } - ring->ready = false; + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) + adev->uvd.inst[i].ring.ready = false; return 0; } @@ -618,48 +620,51 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev) { uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev); uint32_t offset; + int i; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - offset = 0; - } else { - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->uvd.gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->uvd.gpu_addr)); - offset = size; - } + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + offset = 0; + } else { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->uvd.inst[i].gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->uvd.inst[i].gpu_addr)); + offset = size; + } - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, - AMDGPU_UVD_FIRMWARE_OFFSET >> 3); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); - - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->uvd.gpu_addr + offset)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->uvd.gpu_addr + offset)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE); - - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, - AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); - - WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - - WREG32_SOC15(UVD, 0, mmUVD_GP_SCRATCH4, adev->uvd.max_handles); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size); + + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->uvd.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->uvd.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE); + + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, + AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); + + WREG32_SOC15(UVD, i, mmUVD_UDEC_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, i, mmUVD_UDEC_DB_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, i, mmUVD_UDEC_DBW_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + WREG32_SOC15(UVD, i, mmUVD_GP_SCRATCH4, adev->uvd.max_handles); + } } static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, @@ -669,6 +674,7 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, uint64_t addr = table->gpu_addr; struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; uint32_t size; + int i; size = header->header_size + header->vce_table_size + header->uvd_table_size; @@ -688,11 +694,12 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, /* 4, set resp to zero */ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0); - WDOORBELL32(adev->uvd.ring_enc[0].doorbell_index, 0); - adev->wb.wb[adev->uvd.ring_enc[0].wptr_offs] = 0; - adev->uvd.ring_enc[0].wptr = 0; - adev->uvd.ring_enc[0].wptr_old = 0; - + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { + WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0); + adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0; + adev->uvd.inst[i].ring_enc[0].wptr = 0; + adev->uvd.inst[i].ring_enc[0].wptr_old = 0; + } /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001); @@ -725,6 +732,7 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) struct mmsch_v1_0_cmd_end end = { {0} }; uint32_t *init_table = adev->virt.mm_table.cpu_addr; struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; + uint8_t i = 0; direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; @@ -742,120 +750,121 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) init_table += header->uvd_table_offset; - ring = &adev->uvd.ring; - ring->wptr = 0; - size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); - - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), - 0xFFFFFFFF, 0x00000004); - /* mc resume*/ - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - offset = 0; - } else { - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - lower_32_bits(adev->uvd.gpu_addr)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - upper_32_bits(adev->uvd.gpu_addr)); - offset = size; + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { + ring = &adev->uvd.inst[i].ring; + ring->wptr = 0; + size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); + + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), + 0xFFFFFFFF, 0x00000004); + /* mc resume*/ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + offset = 0; + } else { + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->uvd.inst[i].gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->uvd.inst[i].gpu_addr)); + offset = size; + } + + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size); + + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->uvd.inst[i].gpu_addr + offset)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->uvd.inst[i].gpu_addr + offset)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); + + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2), + AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); + + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); + /* mc resume end*/ + + /* disable clock gating */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_CGC_CTRL), + ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0); + + /* disable interupt */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), + ~UVD_MASTINT_EN__VCPU_EN_MASK, 0); + + /* stall UMC and register bus before resetting VCPU */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + /* put LMI, VCPU, RBC etc... into reset */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), + (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | + UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | + UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | + UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | + UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | + UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK)); + + /* initialize UVD memory controller */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL), + (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + 0x00100000L)); + + /* take all subblocks out of reset, except VCPU */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + + /* enable VCPU clock */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK); + + /* enable master interrupt */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), + ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), + (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); + + /* clear the bit 4 of UVD_STATUS */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0); + + /* force RBC into idle state */ + size = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp); + + ring = &adev->uvd.inst[i].ring_enc[0]; + ring->wptr = 0; + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), ring->gpu_addr); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), ring->ring_size / 4); + + /* boot up the VCPU */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), 0); + + /* enable UMC */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); + + MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0x02, 0x02); } - - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), - AMDGPU_UVD_FIRMWARE_OFFSET >> 3); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size); - - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), - lower_32_bits(adev->uvd.gpu_addr + offset)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), - upper_32_bits(adev->uvd.gpu_addr + offset)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); - - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), - lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), - upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2), - AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); - - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); - /* mc resume end*/ - - /* disable clock gating */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), - ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0); - - /* disable interupt */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), - ~UVD_MASTINT_EN__VCPU_EN_MASK, 0); - - /* stall UMC and register bus before resetting VCPU */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, - UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - - /* put LMI, VCPU, RBC etc... into reset */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), - (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | - UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | - UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | - UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | - UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | - UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | - UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK)); - - /* initialize UVD memory controller */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL), - (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | - UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__REQ_MODE_MASK | - 0x00100000L)); - - /* take all subblocks out of reset, except VCPU */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); - - /* enable VCPU clock */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), - UVD_VCPU_CNTL__CLK_EN_MASK); - - /* enable master interrupt */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), - ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), - (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); - - /* clear the bit 4 of UVD_STATUS */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0); - - /* force RBC into idle state */ - size = order_base_2(ring->ring_size); - tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp); - - ring = &adev->uvd.ring_enc[0]; - ring->wptr = 0; - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4); - - /* boot up the VCPU */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); - - /* enable UMC */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); - - MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02); - /* add end packet */ memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; @@ -874,15 +883,17 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) */ static int uvd_v7_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; uint32_t mp_swap_cntl; - int i, j, r; + int i, j, k, r; - /* disable DPG */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, - ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + for (k = 0; k < adev->uvd.num_uvd_inst; ++k) { + /* disable DPG */ + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + } /* disable byte swapping */ lmi_swap_cntl = 0; @@ -890,157 +901,159 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) uvd_v7_0_mc_resume(adev); - /* disable clock gating */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), 0, - ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK); - - /* disable interupt */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, - ~UVD_MASTINT_EN__VCPU_EN_MASK); - - /* stall UMC and register bus before resetting VCPU */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), - UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - mdelay(1); - - /* put LMI, VCPU, RBC etc... into reset */ - WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, - UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | - UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | - UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | - UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | - UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | - UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | - UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); - mdelay(5); + for (k = 0; k < adev->uvd.num_uvd_inst; ++k) { + ring = &adev->uvd.inst[k].ring; + /* disable clock gating */ + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_CGC_CTRL), 0, + ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK); - /* initialize UVD memory controller */ - WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, - (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | - UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__REQ_MODE_MASK | - 0x00100000L); + /* disable interupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* stall UMC and register bus before resetting VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + mdelay(1); + + /* put LMI, VCPU, RBC etc... into reset */ + WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, + UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | + UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | + UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | + UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | + UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | + UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); + mdelay(5); + + /* initialize UVD memory controller */ + WREG32_SOC15(UVD, k, mmUVD_LMI_CTRL, + (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + 0x00100000L); #ifdef __BIG_ENDIAN - /* swap (8 in 32) RB and IB */ - lmi_swap_cntl = 0xa; - mp_swap_cntl = 0; + /* swap (8 in 32) RB and IB */ + lmi_swap_cntl = 0xa; + mp_swap_cntl = 0; #endif - WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); - WREG32_SOC15(UVD, 0, mmUVD_MP_SWAP_CNTL, mp_swap_cntl); - - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88); - - /* take all subblocks out of reset, except VCPU */ - WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); - mdelay(5); + WREG32_SOC15(UVD, k, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); + WREG32_SOC15(UVD, k, mmUVD_MP_SWAP_CNTL, mp_swap_cntl); - /* enable VCPU clock */ - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, - UVD_VCPU_CNTL__CLK_EN_MASK); + WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA0, 0x40c2040); + WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA1, 0x0); + WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB0, 0x40c2040); + WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB1, 0x0); + WREG32_SOC15(UVD, k, mmUVD_MPC_SET_ALU, 0); + WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUX, 0x88); - /* enable UMC */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + /* take all subblocks out of reset, except VCPU */ + WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(5); - /* boot up the VCPU */ - WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0); - mdelay(10); + /* enable VCPU clock */ + WREG32_SOC15(UVD, k, mmUVD_VCPU_CNTL, + UVD_VCPU_CNTL__CLK_EN_MASK); + + /* enable UMC */ + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + /* boot up the VCPU */ + WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, 0); + mdelay(10); - for (i = 0; i < 10; ++i) { - uint32_t status; + for (i = 0; i < 10; ++i) { + uint32_t status; - for (j = 0; j < 100; ++j) { - status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); + for (j = 0; j < 100; ++j) { + status = RREG32_SOC15(UVD, k, mmUVD_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; if (status & 2) break; + + DRM_ERROR("UVD(%d) not responding, trying to reset the VCPU!!!\n", k); + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), 0, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); mdelay(10); + r = -1; } - r = 0; - if (status & 2) - break; - DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n"); - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, - ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); - mdelay(10); - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0, - ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); - mdelay(10); - r = -1; - } - - if (r) { - DRM_ERROR("UVD not responding, giving up!!!\n"); - return r; - } - /* enable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), - (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), - ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); - - /* clear the bit 4 of UVD_STATUS */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0, - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); - - /* force RBC into idle state */ - rb_bufsz = order_base_2(ring->ring_size); - tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); - - /* set the write pointer delay */ - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); - - /* set the wb address */ - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, - (upper_32_bits(ring->gpu_addr) >> 2)); - - /* programm the RB_BASE for ring buffer */ - WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); - - /* Initialize the ring buffer's read and write pointers */ - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); - - ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, - lower_32_bits(ring->wptr)); - - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, - ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); - - ring = &adev->uvd.ring_enc[0]; - WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); + if (r) { + DRM_ERROR("UVD(%d) not responding, giving up!!!\n", k); + return r; + } + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), + (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), + ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); - ring = &adev->uvd.ring_enc[1]; - WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + /* clear the bit 4 of UVD_STATUS */ + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, k, mmUVD_RBC_RB_CNTL, tmp); + + /* set the write pointer delay */ + WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* set the wb address */ + WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR_ADDR, + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR, 0); + + ring->wptr = RREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_RBC_RB_CNTL), 0, + ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); + + ring = &adev->uvd.inst[k].ring_enc[0]; + WREG32_SOC15(UVD, k, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, k, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, k, mmUVD_RB_SIZE, ring->ring_size / 4); + + ring = &adev->uvd.inst[k].ring_enc[1]; + WREG32_SOC15(UVD, k, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, k, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, k, mmUVD_RB_SIZE2, ring->ring_size / 4); + } return 0; } @@ -1053,26 +1066,30 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) */ static void uvd_v7_0_stop(struct amdgpu_device *adev) { - /* force RBC into idle state */ - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101); - - /* Stall UMC and register bus before resetting VCPU */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), - UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - mdelay(1); - - /* put VCPU into reset */ - WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); - mdelay(5); + uint8_t i = 0; + + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { + /* force RBC into idle state */ + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, 0x11010101); - /* disable VCPU clock */ - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0); + /* Stall UMC and register bus before resetting VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + mdelay(1); - /* Unstall UMC and register bus */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + /* put VCPU into reset */ + WREG32_SOC15(UVD, i, mmUVD_SOFT_RESET, + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(5); + + /* disable VCPU clock */ + WREG32_SOC15(UVD, i, mmUVD_VCPU_CNTL, 0x0); + + /* Unstall UMC and register bus */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + } } /** @@ -1091,26 +1108,26 @@ static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0)); amdgpu_ring_write(ring, seq); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, addr & 0xffffffff); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); amdgpu_ring_write(ring, 2); } @@ -1136,6 +1153,16 @@ static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, } /** + * uvd_v7_0_ring_emit_hdp_flush - skip HDP flushing + * + * @ring: amdgpu_ring pointer + */ +static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* The firmware doesn't seem to like touching registers at this point. */ +} + +/** * uvd_v7_0_ring_test_ring - register write test * * @ring: amdgpu_ring pointer @@ -1149,30 +1176,30 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID, 0xCAFEDEAD); + WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); + DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n", + ring->me, ring->idx, r); return r; } amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID); + tmp = RREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); + DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n", + ring->me, ring->idx, i); } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); + DRM_ERROR("(%d)amdgpu: ring %d test failed (0x%08X)\n", + ring->me, ring->idx, tmp); r = -EINVAL; } return r; @@ -1193,17 +1220,17 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0)); amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_RBC_IB_SIZE), 0)); amdgpu_ring_write(ring, ib->length_dw); } @@ -1231,13 +1258,13 @@ static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); amdgpu_ring_write(ring, val); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); amdgpu_ring_write(ring, 8); } @@ -1247,16 +1274,16 @@ static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, struct amdgpu_device *adev = ring->adev; amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); amdgpu_ring_write(ring, val); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GP_SCRATCH8), 0)); amdgpu_ring_write(ring, mask); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); amdgpu_ring_write(ring, 12); } @@ -1277,12 +1304,15 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - int i; struct amdgpu_device *adev = ring->adev; + int i; - for (i = 0; i < count; i++) - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0)); + WARN_ON(ring->wptr % 2 || count % 2); + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_NO_OP), 0)); + amdgpu_ring_write(ring, 0); + } } static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) @@ -1349,16 +1379,16 @@ static bool uvd_v7_0_check_soft_reset(void *handle) if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) || REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) || - (RREG32_SOC15(UVD, 0, mmUVD_STATUS) & + (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) & AMDGPU_UVD_STATUS_BUSY_MASK)) srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); if (srbm_soft_reset) { - adev->uvd.srbm_soft_reset = srbm_soft_reset; + adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset; return true; } else { - adev->uvd.srbm_soft_reset = 0; + adev->uvd.inst[ring->me].srbm_soft_reset = 0; return false; } } @@ -1367,7 +1397,7 @@ static int uvd_v7_0_pre_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!adev->uvd.srbm_soft_reset) + if (!adev->uvd.inst[ring->me].srbm_soft_reset) return 0; uvd_v7_0_stop(adev); @@ -1379,9 +1409,9 @@ static int uvd_v7_0_soft_reset(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 srbm_soft_reset; - if (!adev->uvd.srbm_soft_reset) + if (!adev->uvd.inst[ring->me].srbm_soft_reset) return 0; - srbm_soft_reset = adev->uvd.srbm_soft_reset; + srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset; if (srbm_soft_reset) { u32 tmp; @@ -1409,7 +1439,7 @@ static int uvd_v7_0_post_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!adev->uvd.srbm_soft_reset) + if (!adev->uvd.inst[ring->me].srbm_soft_reset) return 0; mdelay(5); @@ -1431,17 +1461,32 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + uint32_t ip_instance; + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_UVD: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_UVD1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + DRM_DEBUG("IH: UVD TRAP\n"); + switch (entry->src_id) { case 124: - amdgpu_fence_process(&adev->uvd.ring); + amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring); break; case 119: - amdgpu_fence_process(&adev->uvd.ring_enc[0]); + amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[0]); break; case 120: if (!amdgpu_sriov_vf(adev)) - amdgpu_fence_process(&adev->uvd.ring_enc[1]); + amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[1]); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -1457,9 +1502,9 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev) { uint32_t data, data1, data2, suvd_flags; - data = RREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL); - data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); - data2 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL); + data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL); + data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE); + data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL); data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); @@ -1503,18 +1548,18 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev) UVD_SUVD_CGC_CTRL__SDB_MODE_MASK); data1 |= suvd_flags; - WREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL, data); - WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, 0); - WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); - WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL, data2); + WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data); + WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0); + WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1); + WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2); } static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) { uint32_t data, data1, cgc_flags, suvd_flags; - data = RREG32_SOC15(UVD, 0, mmUVD_CGC_GATE); - data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); + data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE); + data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE); cgc_flags = UVD_CGC_GATE__SYS_MASK | UVD_CGC_GATE__UDEC_MASK | @@ -1546,8 +1591,8 @@ static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) data |= cgc_flags; data1 |= suvd_flags; - WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, data); - WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); + WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data); + WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1); } static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) @@ -1606,7 +1651,7 @@ static int uvd_v7_0_set_powergating_state(void *handle, if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD)) return 0; - WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); + WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); if (state == AMD_PG_STATE_GATE) { uvd_v7_0_stop(adev); @@ -1647,14 +1692,13 @@ const struct amd_ip_funcs uvd_v7_0_ip_funcs = { static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, - .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, .vmhub = AMDGPU_MMHUB, .get_rptr = uvd_v7_0_ring_get_rptr, .get_wptr = uvd_v7_0_ring_get_wptr, .set_wptr = uvd_v7_0_ring_set_wptr, .emit_frame_size = - 6 + 6 + /* hdp flush / invalidate */ + 6 + /* hdp invalidate */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + 8 + /* uvd_v7_0_ring_emit_vm_flush */ @@ -1663,6 +1707,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .emit_ib = uvd_v7_0_ring_emit_ib, .emit_fence = uvd_v7_0_ring_emit_fence, .emit_vm_flush = uvd_v7_0_ring_emit_vm_flush, + .emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush, .test_ring = uvd_v7_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, .insert_nop = uvd_v7_0_ring_insert_nop, @@ -1671,6 +1716,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .end_use = amdgpu_uvd_ring_end_use, .emit_wreg = uvd_v7_0_ring_emit_wreg, .emit_reg_wait = uvd_v7_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { @@ -1702,22 +1748,32 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .end_use = amdgpu_uvd_ring_end_use, .emit_wreg = uvd_v7_0_enc_ring_emit_wreg, .emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev) { - adev->uvd.ring.funcs = &uvd_v7_0_ring_vm_funcs; - DRM_INFO("UVD is enabled in VM mode\n"); + int i; + + for (i = 0; i < adev->uvd.num_uvd_inst; i++) { + adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs; + adev->uvd.inst[i].ring.me = i; + DRM_INFO("UVD(%d) is enabled in VM mode\n", i); + } } static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev) { - int i; + int i, j; - for (i = 0; i < adev->uvd.num_enc_rings; ++i) - adev->uvd.ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs; + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { + for (i = 0; i < adev->uvd.num_enc_rings; ++i) { + adev->uvd.inst[j].ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs; + adev->uvd.inst[j].ring_enc[i].me = j; + } - DRM_INFO("UVD ENC is enabled in VM mode\n"); + DRM_INFO("UVD(%d) ENC is enabled in VM mode\n", j); + } } static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = { @@ -1727,8 +1783,12 @@ static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = { static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->uvd.irq.num_types = adev->uvd.num_enc_rings + 1; - adev->uvd.irq.funcs = &uvd_v7_0_irq_funcs; + int i; + + for (i = 0; i < adev->uvd.num_uvd_inst; i++) { + adev->uvd.inst[i].irq.num_types = adev->uvd.num_enc_rings + 1; + adev->uvd.inst[i].irq.funcs = &uvd_v7_0_irq_funcs; + } } const struct amdgpu_ip_block_version uvd_v7_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 47f70827195b..d48e877b682e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -56,7 +56,7 @@ static uint64_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vce.ring[0]) + if (ring->me == 0) return RREG32(mmVCE_RB_RPTR); else return RREG32(mmVCE_RB_RPTR2); @@ -73,7 +73,7 @@ static uint64_t vce_v2_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vce.ring[0]) + if (ring->me == 0) return RREG32(mmVCE_RB_WPTR); else return RREG32(mmVCE_RB_WPTR2); @@ -90,7 +90,7 @@ static void vce_v2_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vce.ring[0]) + if (ring->me == 0) WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); else WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); @@ -627,8 +627,10 @@ static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev) { int i; - for (i = 0; i < adev->vce.num_rings; i++) + for (i = 0; i < adev->vce.num_rings; i++) { adev->vce.ring[i].funcs = &vce_v2_0_ring_funcs; + adev->vce.ring[i].me = i; + } } static const struct amdgpu_irq_src_funcs vce_v2_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 428d1928e44e..cc6ce6cc03f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -39,6 +39,7 @@ #include "smu/smu_7_1_2_sh_mask.h" #include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_sh_mask.h" +#include "ivsrcid/ivsrcid_vislands30.h" #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04 @@ -86,9 +87,9 @@ static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); - if (ring == &adev->vce.ring[0]) + if (ring->me == 0) v = RREG32(mmVCE_RB_RPTR); - else if (ring == &adev->vce.ring[1]) + else if (ring->me == 1) v = RREG32(mmVCE_RB_RPTR2); else v = RREG32(mmVCE_RB_RPTR3); @@ -118,9 +119,9 @@ static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); - if (ring == &adev->vce.ring[0]) + if (ring->me == 0) v = RREG32(mmVCE_RB_WPTR); - else if (ring == &adev->vce.ring[1]) + else if (ring->me == 1) v = RREG32(mmVCE_RB_WPTR2); else v = RREG32(mmVCE_RB_WPTR3); @@ -149,9 +150,9 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring) else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); - if (ring == &adev->vce.ring[0]) + if (ring->me == 0) WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); - else if (ring == &adev->vce.ring[1]) + else if (ring->me == 1) WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); else WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); @@ -388,7 +389,8 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev) default: if ((adev->asic_type == CHIP_POLARIS10) || (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) return AMDGPU_VCE_HARVEST_VCE1; return 0; @@ -421,7 +423,7 @@ static int vce_v3_0_sw_init(void *handle) int r, i; /* VCE */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 167, &adev->vce.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq); if (r) return r; @@ -467,8 +469,8 @@ static int vce_v3_0_hw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; vce_v3_0_override_vce_clock_gating(adev, true); - if (!(adev->flags & AMD_IS_APU)) - amdgpu_asic_set_vce_clocks(adev, 10000, 10000); + + amdgpu_asic_set_vce_clocks(adev, 10000, 10000); for (i = 0; i < adev->vce.num_rings; i++) adev->vce.ring[i].ready = false; @@ -899,7 +901,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = { .emit_frame_size = 4 + /* vce_v3_0_emit_pipeline_sync */ 6, /* amdgpu_vce_ring_emit_fence x1 no user fence */ - .emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */ + .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */ .emit_ib = amdgpu_vce_ring_emit_ib, .emit_fence = amdgpu_vce_ring_emit_fence, .test_ring = amdgpu_vce_ring_test_ring, @@ -923,7 +925,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = { 6 + /* vce_v3_0_emit_vm_flush */ 4 + /* vce_v3_0_emit_pipeline_sync */ 6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */ - .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */ + .emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */ .emit_ib = vce_v3_0_ring_emit_ib, .emit_vm_flush = vce_v3_0_emit_vm_flush, .emit_pipeline_sync = vce_v3_0_emit_pipeline_sync, @@ -941,12 +943,16 @@ static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev) int i; if (adev->asic_type >= CHIP_STONEY) { - for (i = 0; i < adev->vce.num_rings; i++) + for (i = 0; i < adev->vce.num_rings; i++) { adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs; + adev->vce.ring[i].me = i; + } DRM_INFO("VCE enabled in VM mode\n"); } else { - for (i = 0; i < adev->vce.num_rings; i++) + for (i = 0; i < adev->vce.num_rings; i++) { adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs; + adev->vce.ring[i].me = i; + } DRM_INFO("VCE enabled in physical mode\n"); } } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 73fd48d6c756..65f8860169e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -39,6 +39,8 @@ #include "mmhub/mmhub_1_0_offset.h" #include "mmhub/mmhub_1_0_sh_mask.h" +#include "ivsrcid/vce/irqsrcs_vce_4_0.h" + #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 #define VCE_V4_0_FW_SIZE (384 * 1024) @@ -60,9 +62,9 @@ static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vce.ring[0]) + if (ring->me == 0) return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); - else if (ring == &adev->vce.ring[1]) + else if (ring->me == 1) return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); else return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); @@ -82,9 +84,9 @@ static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; - if (ring == &adev->vce.ring[0]) + if (ring->me == 0) return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); - else if (ring == &adev->vce.ring[1]) + else if (ring->me == 1) return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); else return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); @@ -108,10 +110,10 @@ static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) return; } - if (ring == &adev->vce.ring[0]) + if (ring->me == 0) WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); - else if (ring == &adev->vce.ring[1]) + else if (ring->me == 1) WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); else @@ -1081,14 +1083,17 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .end_use = amdgpu_vce_ring_end_use, .emit_wreg = vce_v4_0_emit_wreg, .emit_reg_wait = vce_v4_0_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) { int i; - for (i = 0; i < adev->vce.num_rings; i++) + for (i = 0; i < adev->vce.num_rings; i++) { adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; + adev->vce.ring[i].me = i; + } DRM_INFO("VCE enabled in VM mode\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 8c132673bc79..2ce91a748c40 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -35,11 +35,14 @@ #include "mmhub/mmhub_9_1_offset.h" #include "mmhub/mmhub_9_1_sh_mask.h" -static int vcn_v1_0_start(struct amdgpu_device *adev); +#include "ivsrcid/vcn/irqsrcs_vcn_1_0.h" + static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); +static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr); /** * vcn_v1_0_early_init - set function pointers @@ -56,6 +59,7 @@ static int vcn_v1_0_early_init(void *handle) vcn_v1_0_set_dec_ring_funcs(adev); vcn_v1_0_set_enc_ring_funcs(adev); + vcn_v1_0_set_jpeg_ring_funcs(adev); vcn_v1_0_set_irq_funcs(adev); return 0; @@ -75,18 +79,23 @@ static int vcn_v1_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* VCN DEC TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 124, &adev->vcn.irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.irq); if (r) return r; /* VCN ENC TRAP */ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + 119, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.irq); if (r) return r; } + /* VCN JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.irq); + if (r) + return r; + r = amdgpu_vcn_sw_init(adev); if (r) return r; @@ -109,6 +118,12 @@ static int vcn_v1_0_sw_init(void *handle) return r; } + ring = &adev->vcn.ring_jpeg; + sprintf(ring->name, "vcn_jpeg"); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + if (r) + return r; + return r; } @@ -146,10 +161,6 @@ static int vcn_v1_0_hw_init(void *handle) struct amdgpu_ring *ring = &adev->vcn.ring_dec; int i, r; - r = vcn_v1_0_start(adev); - if (r) - goto done; - ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) { @@ -167,6 +178,14 @@ static int vcn_v1_0_hw_init(void *handle) } } + ring = &adev->vcn.ring_jpeg; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } + done: if (!r) DRM_INFO("VCN decode and encode initialized successfully.\n"); @@ -185,11 +204,9 @@ static int vcn_v1_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring = &adev->vcn.ring_dec; - int r; - r = vcn_v1_0_stop(adev); - if (r) - return r; + if (RREG32_SOC15(VCN, 0, mmUVD_STATUS)) + vcn_v1_0_stop(adev); ring->ready = false; @@ -288,14 +305,14 @@ static void vcn_v1_0_mc_resume(struct amdgpu_device *adev) * * Disable clock gating for VCN block */ -static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw) +static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev) { uint32_t data; /* JPEG disable CGC */ data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); - if (sw) + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; else data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE_MASK; @@ -310,7 +327,7 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw) /* UVD disable CGC */ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); - if (sw) + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; else data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; @@ -415,13 +432,13 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw) * * Enable clock gating for VCN block */ -static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw) +static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev) { uint32_t data = 0; /* enable JPEG CGC */ data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); - if (sw) + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; else data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; @@ -435,7 +452,7 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw) /* enable UVD CGC */ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); - if (sw) + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; else data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; @@ -480,6 +497,94 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw) WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); } +static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + int ret; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT); + + WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON, 0xFFFFFF, ret); + } else { + data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT); + WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFFFFF, ret); + } + + /* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS , UVDU_PWR_STATUS are 0 (power on) */ + + data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS); + data &= ~0x103; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) + data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | UVD_POWER_STATUS__UVD_PG_EN_MASK; + + WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data); +} + +static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + int ret; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + /* Before power off, this indicator has to be turned on */ + data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS); + data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK; + data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; + WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data); + + + data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT); + + WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); + + data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDIL_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDIR_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDW_PWR_STATUS__SHIFT); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFFFFF, ret); + } +} + /** * vcn_v1_0_start - start VCN block * @@ -497,10 +602,11 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) /* disable byte swapping */ lmi_swap_cntl = 0; - vcn_v1_0_mc_resume(adev); - + vcn_1_0_disable_static_power_gating(adev); /* disable clock gating */ - vcn_v1_0_disable_clock_gating(adev, true); + vcn_v1_0_disable_clock_gating(adev); + + vcn_v1_0_mc_resume(adev); /* disable interupt */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, @@ -647,6 +753,22 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + ring = &adev->vcn.ring_jpeg; + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L); + + /* initialize wptr */ + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); + + /* copy patch commands to the jpeg ring */ + vcn_v1_0_jpeg_ring_set_patch_ring(ring, + (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission)); + return 0; } @@ -680,16 +802,45 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev) WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - /* enable clock gating */ - vcn_v1_0_enable_clock_gating(adev, true); + WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0); + vcn_v1_0_enable_clock_gating(adev); + vcn_1_0_enable_static_power_gating(adev); return 0; } +static bool vcn_v1_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == 0x2); +} + +static int vcn_v1_0_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret = 0; + + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, 0x2, 0x2, ret); + + return ret; +} + static int vcn_v1_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { - /* needed for driver unload*/ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + if (enable) { + /* wait for STATUS to clear */ + if (vcn_v1_0_is_idle(handle)) + return -EBUSY; + vcn_v1_0_enable_clock_gating(adev); + } else { + /* disable HW gating and enable Sw gating */ + vcn_v1_0_disable_clock_gating(adev); + } return 0; } @@ -1015,6 +1166,383 @@ static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, amdgpu_ring_write(ring, val); } + +/** + * vcn_v1_0_jpeg_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v1_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR); +} + +/** + * vcn_v1_0_jpeg_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v1_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); +} + +/** + * vcn_v1_0_jpeg_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v1_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); +} + +/** + * vcn_v1_0_jpeg_ring_insert_start - insert a start command + * + * @ring: amdgpu_ring pointer + * + * Write a start command to the ring. + */ +static void vcn_v1_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x80010000); +} + +/** + * vcn_v1_0_jpeg_ring_insert_end - insert a end command + * + * @ring: amdgpu_ring pointer + * + * Write a end command to the ring. + */ +static void vcn_v1_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x00010000); +} + +/** + * vcn_v1_0_jpeg_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + struct amdgpu_device *adev = ring->adev; + + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2)); + amdgpu_ring_write(ring, 0xffffffff); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x3fbc); + + amdgpu_ring_write(ring, + PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x1); +} + +/** + * vcn_v1_0_jpeg_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer. + */ +static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_ib *ib, + unsigned vmid, bool ctx_switch) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, ib->length_dw); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x2); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); + amdgpu_ring_write(ring, 0x2); +} + +static void vcn_v1_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val, + uint32_t mask) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, val); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, + PACKETJ(0, 0, 0, PACKETJ_TYPE3)); + } + amdgpu_ring_write(ring, mask); +} + +static void vcn_v1_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t data0, data1, mask; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + vcn_v1_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask); +} + +static void vcn_v1_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, + PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + } + amdgpu_ring_write(ring, val); +} + +static void vcn_v1_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); + } +} + +static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val) +{ + struct amdgpu_device *adev = ring->adev; + ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + ring->ring[(*ptr)++] = 0; + ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0); + } else { + ring->ring[(*ptr)++] = reg_offset; + ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0); + } + ring->ring[(*ptr)++] = val; +} + +static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr) +{ + struct amdgpu_device *adev = ring->adev; + + uint32_t reg, reg_offset, val, mask, i; + + // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW + reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW); + reg_offset = (reg << 2); + val = lower_32_bits(ring->gpu_addr); + vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH + reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH); + reg_offset = (reg << 2); + val = upper_32_bits(ring->gpu_addr); + vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 3rd to 5th: issue MEM_READ commands + for (i = 0; i <= 2; i++) { + ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2); + ring->ring[ptr++] = 0; + } + + // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability + reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL); + reg_offset = (reg << 2); + val = 0x13; + vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 7th: program mmUVD_JRBC_RB_REF_DATA + reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA); + reg_offset = (reg << 2); + val = 0x1; + vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL + reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL); + reg_offset = (reg << 2); + val = 0x1; + mask = 0x1; + + ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0); + ring->ring[ptr++] = 0x01400200; + ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0); + ring->ring[ptr++] = val; + ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + ring->ring[ptr++] = 0; + ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3); + } else { + ring->ring[ptr++] = reg_offset; + ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3); + } + ring->ring[ptr++] = mask; + + //9th to 21st: insert no-op + for (i = 0; i <= 12; i++) { + ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); + ring->ring[ptr++] = 0; + } + + //22nd: reset mmUVD_JRBC_RB_RPTR + reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_RPTR); + reg_offset = (reg << 2); + val = 0; + vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); + + //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch + reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL); + reg_offset = (reg << 2); + val = 0x12; + vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); +} + static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -1039,6 +1567,9 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev, case 120: amdgpu_fence_process(&adev->vcn.ring_enc[1]); break; + case 126: + amdgpu_fence_process(&adev->vcn.ring_jpeg); + break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); @@ -1048,16 +1579,36 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static void vcn_v1_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - int i; struct amdgpu_device *adev = ring->adev; + int i; - for (i = 0; i < count; i++) - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0)); + WARN_ON(ring->wptr % 2 || count % 2); + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0)); + amdgpu_ring_write(ring, 0); + } } +static int vcn_v1_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + /* This doesn't actually powergate the VCN block. + * That's done in the dpm code via the SMC. This + * just re-inits the block as necessary. The actual + * gating still happens in the dpm code. We should + * revisit this when there is a cleaner line between + * the smc and the hw blocks + */ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (state == AMD_PG_STATE_GATE) + return vcn_v1_0_stop(adev); + else + return vcn_v1_0_start(adev); +} static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .name = "vcn_v1_0", @@ -1069,20 +1620,19 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .hw_fini = vcn_v1_0_hw_fini, .suspend = vcn_v1_0_suspend, .resume = vcn_v1_0_resume, - .is_idle = NULL /* vcn_v1_0_is_idle */, - .wait_for_idle = NULL /* vcn_v1_0_wait_for_idle */, + .is_idle = vcn_v1_0_is_idle, + .wait_for_idle = vcn_v1_0_wait_for_idle, .check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */, .pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */, .soft_reset = NULL /* vcn_v1_0_soft_reset */, .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, .set_clockgating_state = vcn_v1_0_set_clockgating_state, - .set_powergating_state = NULL /* vcn_v1_0_set_powergating_state */, + .set_powergating_state = vcn_v1_0_set_powergating_state, }; static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, - .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, .vmhub = AMDGPU_MMHUB, .get_rptr = vcn_v1_0_dec_ring_get_rptr, @@ -1101,7 +1651,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush, .test_ring = amdgpu_vcn_dec_ring_test_ring, .test_ib = amdgpu_vcn_dec_ring_test_ib, - .insert_nop = vcn_v1_0_ring_insert_nop, + .insert_nop = vcn_v1_0_dec_ring_insert_nop, .insert_start = vcn_v1_0_dec_ring_insert_start, .insert_end = vcn_v1_0_dec_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, @@ -1109,6 +1659,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .end_use = amdgpu_vcn_ring_end_use, .emit_wreg = vcn_v1_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { @@ -1139,6 +1690,40 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .end_use = amdgpu_vcn_ring_end_use, .emit_wreg = vcn_v1_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .nop = PACKET0(0x81ff, 0), + .support_64bit_ptrs = false, + .vmhub = AMDGPU_MMHUB, + .extra_dw = 64, + .get_rptr = vcn_v1_0_jpeg_ring_get_rptr, + .get_wptr = vcn_v1_0_jpeg_ring_get_wptr, + .set_wptr = vcn_v1_0_jpeg_ring_set_wptr, + .emit_frame_size = + 6 + 6 + /* hdp invalidate / flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* vcn_v1_0_dec_ring_emit_vm_flush */ + 14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */ + 6, + .emit_ib_size = 22, /* vcn_v1_0_dec_ring_emit_ib */ + .emit_ib = vcn_v1_0_jpeg_ring_emit_ib, + .emit_fence = vcn_v1_0_jpeg_ring_emit_fence, + .emit_vm_flush = vcn_v1_0_jpeg_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_jpeg_ring_test_ring, + .test_ib = amdgpu_vcn_jpeg_ring_test_ib, + .insert_nop = vcn_v1_0_jpeg_ring_nop, + .insert_start = vcn_v1_0_jpeg_ring_insert_start, + .insert_end = vcn_v1_0_jpeg_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v1_0_jpeg_ring_emit_wreg, + .emit_reg_wait = vcn_v1_0_jpeg_ring_emit_reg_wait, }; static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) @@ -1157,6 +1742,12 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev) DRM_INFO("VCN encode is enabled in VM mode\n"); } +static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) +{ + adev->vcn.ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs; + DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); +} + static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { .set = vcn_v1_0_set_interrupt_state, .process = vcn_v1_0_process_interrupt, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index 45aafca7f315..c5c9b2bc190d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -51,6 +51,7 @@ int vega10_reg_base_init(struct amdgpu_device *adev) adev->reg_offset[PWR_HWIP][i] = (uint32_t *)(&(PWR_BASE.instance[i])); adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIF_BASE.instance[i])); adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c new file mode 100644 index 000000000000..52778de93ab0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -0,0 +1,53 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "soc15.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "vega20_ip_offset.h" + +int vega20_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the blocke beend by our driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i])); + adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCE_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + } + return 0; +} + + diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 126f1276d347..42c8ad105b05 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -305,9 +305,10 @@ static void vi_init_golden_registers(struct amdgpu_device *adev) stoney_mgcg_cgcg_init, ARRAY_SIZE(stoney_mgcg_cgcg_init)); break; - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: default: break; } @@ -728,33 +729,59 @@ static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock, return r; tmp = RREG32_SMC(cntl_reg); - tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK | - CG_DCLK_CNTL__DCLK_DIVIDER_MASK); + + if (adev->flags & AMD_IS_APU) + tmp &= ~CG_DCLK_CNTL__DCLK_DIVIDER_MASK; + else + tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK | + CG_DCLK_CNTL__DCLK_DIVIDER_MASK); tmp |= dividers.post_divider; WREG32_SMC(cntl_reg, tmp); for (i = 0; i < 100; i++) { - if (RREG32_SMC(status_reg) & CG_DCLK_STATUS__DCLK_STATUS_MASK) - break; + tmp = RREG32_SMC(status_reg); + if (adev->flags & AMD_IS_APU) { + if (tmp & 0x10000) + break; + } else { + if (tmp & CG_DCLK_STATUS__DCLK_STATUS_MASK) + break; + } mdelay(10); } if (i == 100) return -ETIMEDOUT; - return 0; } +#define ixGNB_CLK1_DFS_CNTL 0xD82200F0 +#define ixGNB_CLK1_STATUS 0xD822010C +#define ixGNB_CLK2_DFS_CNTL 0xD8220110 +#define ixGNB_CLK2_STATUS 0xD822012C +#define ixGNB_CLK3_DFS_CNTL 0xD8220130 +#define ixGNB_CLK3_STATUS 0xD822014C + static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) { int r; - r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS); - if (r) - return r; + if (adev->flags & AMD_IS_APU) { + r = vi_set_uvd_clock(adev, vclk, ixGNB_CLK2_DFS_CNTL, ixGNB_CLK2_STATUS); + if (r) + return r; - r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS); - if (r) - return r; + r = vi_set_uvd_clock(adev, dclk, ixGNB_CLK1_DFS_CNTL, ixGNB_CLK1_STATUS); + if (r) + return r; + } else { + r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS); + if (r) + return r; + + r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS); + if (r) + return r; + } return 0; } @@ -764,6 +791,22 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) int r, i; struct atom_clock_dividers dividers; u32 tmp; + u32 reg_ctrl; + u32 reg_status; + u32 status_mask; + u32 reg_mask; + + if (adev->flags & AMD_IS_APU) { + reg_ctrl = ixGNB_CLK3_DFS_CNTL; + reg_status = ixGNB_CLK3_STATUS; + status_mask = 0x00010000; + reg_mask = CG_ECLK_CNTL__ECLK_DIVIDER_MASK; + } else { + reg_ctrl = ixCG_ECLK_CNTL; + reg_status = ixCG_ECLK_STATUS; + status_mask = CG_ECLK_STATUS__ECLK_STATUS_MASK; + reg_mask = CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | CG_ECLK_CNTL__ECLK_DIVIDER_MASK; + } r = amdgpu_atombios_get_clock_dividers(adev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, @@ -772,24 +815,25 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) return r; for (i = 0; i < 100; i++) { - if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK) + if (RREG32_SMC(reg_status) & status_mask) break; mdelay(10); } + if (i == 100) return -ETIMEDOUT; - tmp = RREG32_SMC(ixCG_ECLK_CNTL); - tmp &= ~(CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | - CG_ECLK_CNTL__ECLK_DIVIDER_MASK); + tmp = RREG32_SMC(reg_ctrl); + tmp &= ~reg_mask; tmp |= dividers.post_divider; - WREG32_SMC(ixCG_ECLK_CNTL, tmp); + WREG32_SMC(reg_ctrl, tmp); for (i = 0; i < 100; i++) { - if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK) + if (RREG32_SMC(reg_status) & status_mask) break; mdelay(10); } + if (i == 100) return -ETIMEDOUT; @@ -876,6 +920,27 @@ static void vi_invalidate_hdp(struct amdgpu_device *adev, } } +static bool vi_need_full_reset(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_CARRIZO: + case CHIP_STONEY: + /* CZ has hang issues with full reset at the moment */ + return false; + case CHIP_FIJI: + case CHIP_TONGA: + /* XXX: soft reset should work on fiji and tonga */ + return true; + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_TOPAZ: + default: + /* change this when we support soft reset */ + return true; + } +} + static const struct amdgpu_asic_funcs vi_asic_funcs = { .read_disabled_bios = &vi_read_disabled_bios, @@ -889,6 +954,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .get_config_memsize = &vi_get_config_memsize, .flush_hdp = &vi_flush_hdp, .invalidate_hdp = &vi_invalidate_hdp, + .need_full_reset = &vi_need_full_reset, }; #define CZ_REV_BRISTOL(rev) \ @@ -1031,6 +1097,30 @@ static int vi_common_early_init(void *handle) adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x64; break; + case CHIP_VEGAM: + adev->cg_flags = 0; + /*AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_DRM_LS | + AMD_CG_SUPPORT_UVD_MGCG | + AMD_CG_SUPPORT_VCE_MGCG;*/ + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x6E; + break; case CHIP_CARRIZO: adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG | AMD_CG_SUPPORT_GFX_MGCG | @@ -1273,11 +1363,11 @@ static int vi_common_set_clockgating_state_by_smu(void *handle, if (adev->cg_flags & (AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_MC_MGCG)) { if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) { - pp_support_state = AMD_CG_SUPPORT_MC_LS; + pp_support_state = PP_STATE_SUPPORT_LS; pp_state = PP_STATE_LS; } if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG) { - pp_support_state |= AMD_CG_SUPPORT_MC_MGCG; + pp_support_state |= PP_STATE_SUPPORT_CG; pp_state |= PP_STATE_CG; } if (state == AMD_CG_STATE_UNGATE) @@ -1292,11 +1382,11 @@ static int vi_common_set_clockgating_state_by_smu(void *handle, if (adev->cg_flags & (AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_SDMA_MGCG)) { if (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS) { - pp_support_state = AMD_CG_SUPPORT_SDMA_LS; + pp_support_state = PP_STATE_SUPPORT_LS; pp_state = PP_STATE_LS; } if (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG) { - pp_support_state |= AMD_CG_SUPPORT_SDMA_MGCG; + pp_support_state |= PP_STATE_SUPPORT_CG; pp_state |= PP_STATE_CG; } if (state == AMD_CG_STATE_UNGATE) @@ -1311,11 +1401,11 @@ static int vi_common_set_clockgating_state_by_smu(void *handle, if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_MGCG)) { if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { - pp_support_state = AMD_CG_SUPPORT_HDP_LS; + pp_support_state = PP_STATE_SUPPORT_LS; pp_state = PP_STATE_LS; } if (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG) { - pp_support_state |= AMD_CG_SUPPORT_HDP_MGCG; + pp_support_state |= PP_STATE_SUPPORT_CG; pp_state |= PP_STATE_CG; } if (state == AMD_CG_STATE_UNGATE) @@ -1422,6 +1512,7 @@ static int vi_common_set_clockgating_state(void *handle, case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: vi_common_set_clockgating_state_by_smu(adev, state); default: break; @@ -1551,9 +1642,10 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block); } break; - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: amdgpu_device_ip_block_add(adev, &vi_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block); amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); |