diff options
Diffstat (limited to 'drivers/gpu/drm')
384 files changed, 16034 insertions, 9900 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f42e8d467c12..b1bb10625cd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -90,6 +90,7 @@ #include "amdgpu_mes.h" #include "amdgpu_umc.h" #include "amdgpu_mmhub.h" +#include "amdgpu_df.h" #define MAX_GPU_INSTANCE 16 @@ -664,29 +665,6 @@ struct amdgpu_mmio_remap { resource_size_t bus_addr; }; -struct amdgpu_df_funcs { - void (*sw_init)(struct amdgpu_device *adev); - void (*sw_fini)(struct amdgpu_device *adev); - void (*enable_broadcast_mode)(struct amdgpu_device *adev, - bool enable); - u32 (*get_fb_channel_number)(struct amdgpu_device *adev); - u32 (*get_hbm_channel_number)(struct amdgpu_device *adev); - void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, - bool enable); - void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); - void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, - bool enable); - int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, - int is_enable); - int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, - int is_disable); - void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, - uint64_t *count); - uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); - void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, - uint32_t ficadl_val, uint32_t ficadh_val); -}; /* Define the HW IP blocks will be used in driver , add more if necessary */ enum amd_hw_ip_block_type { GC_HWIP = 1, @@ -930,6 +908,9 @@ struct amdgpu_device { bool enable_mes; struct amdgpu_mes mes; + /* df */ + struct amdgpu_df df; + struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; int num_ip_blocks; struct mutex mn_lock; @@ -943,8 +924,6 @@ struct amdgpu_device { /* soc15 register offset based on ip, instance and segment */ uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; - const struct amdgpu_df_funcs *df_funcs; - /* delayed work_func for deferring clockgating during resume */ struct delayed_work delayed_init_work; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index d3da9dde4ee1..8609287620ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -613,15 +613,9 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (is_support_sw_smu(adev)) - smu_switch_power_profile(&adev->smu, - PP_SMC_POWER_PROFILE_COMPUTE, - !idle); - else if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->switch_power_profile) - amdgpu_dpm_switch_power_profile(adev, - PP_SMC_POWER_PROFILE_COMPUTE, - !idle); + amdgpu_dpm_switch_power_profile(adev, + PP_SMC_POWER_PROFILE_COMPUTE, + !idle); } bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) @@ -634,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + if (adev->family == AMDGPU_FAMILY_AI) { + int i; + + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); + } else { + amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); + } + + return 0; +} + +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + uint32_t flush_type = 0; + bool all_hub = false; + + if (adev->gmc.xgmi.num_physical_nodes && + adev->asic_type == CHIP_VEGA20) + flush_type = 2; + + if (adev->family == AMDGPU_FAMILY_AI) + all_hub = true; + + return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); +} + bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 069d5d230810..47b0f2957d1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t *ib_cmd, uint32_t ib_len); void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid); +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 3c119407dc34..4bcc175a149d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -71,32 +71,56 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t sdma_engine_reg_base[8] = { - SOC15_REG_OFFSET(SDMA0, 0, - mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA1, 0, - mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA2, 0, - mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA3, 0, - mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA4, 0, - mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA5, 0, - mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA6, 0, - mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA7, 0, - mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL - }; - - uint32_t retval = sdma_engine_reg_base[engine_id] + uint32_t sdma_engine_reg_base = 0; + uint32_t sdma_rlc_reg_offset; + + switch (engine_id) { + default: + dev_warn(adev->dev, + "Invalid sdma engine id (%d), using engine id 0\n", + engine_id); + /* fall through */ + case 0: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; + break; + case 1: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL; + break; + case 2: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0, + mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL; + break; + case 3: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0, + mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL; + break; + case 4: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0, + mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL; + break; + case 5: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0, + mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL; + break; + case 6: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0, + mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL; + break; + case 7: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0, + mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL; + break; + } + + sdma_rlc_reg_offset = sdma_engine_reg_base + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, - queue_id, retval); + queue_id, sdma_rlc_reg_offset); - return retval; + return sdma_rlc_reg_offset; } static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, @@ -281,6 +305,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, .init_interrupts = kgd_gfx_v9_init_interrupts, .hqd_load = kgd_gfx_v9_hqd_load, + .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, .hqd_dump = kgd_gfx_v9_hqd_dump, .hqd_sdma_dump = kgd_hqd_sdma_dump, @@ -296,7 +321,5 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .get_tile_config = kgd_gfx_v9_get_tile_config, .set_vm_context_page_table_base = kgd_set_vm_context_page_table_base, - .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, - .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 61cd707158e4..a7b17c8deb00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -107,13 +107,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, lock_srbm(kgd, mec, pipe, queue_id, 0); } -static uint32_t get_queue_mask(struct amdgpu_device *adev, +static uint64_t get_queue_mask(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + - queue_id) & 31; + unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id; - return ((uint32_t)1) << bit; + return 1ull << bit; } static void release_queue(struct kgd_dev *kgd) @@ -268,21 +268,6 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); acquire_queue(kgd, pipe_id, queue_id); - /* HIQ is set during driver init period with vmid set to 0*/ - if (m->cp_hqd_vmid == 0) { - uint32_t value, mec, pipe; - - mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; - pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - - pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", - mec, pipe, queue_id); - value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); - value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, - ((mec << 5) | (pipe << 3) | queue_id | 0x80)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); - } - /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); @@ -332,9 +317,10 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, lower_32_bits((uint64_t)wptr)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uint64_t)wptr)); - pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id)); + pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), - get_queue_mask(adev, pipe_id, queue_id)); + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ @@ -350,6 +336,59 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } +static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct v10_compute_mqd *m; + uint32_t mec, pipe; + int r; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + + spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 7); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(queue_id) | + PACKET3_MAP_QUEUES_PIPE(pipe) | + PACKET3_MAP_QUEUES_ME((mec - 1)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(&adev->gfx.kiq.ring_lock); + release_queue(kgd); + + return r; +} + static int kgd_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) @@ -686,71 +725,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) -{ - signed long r; - uint32_t seq; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - - spin_lock(&adev->gfx.kiq.ring_lock); - amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ - amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); - amdgpu_ring_write(ring, - PACKET3_INVALIDATE_TLBS_DST_SEL(1) | - PACKET3_INVALIDATE_TLBS_PASID(pasid)); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); - - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); - if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - return -ETIME; - } - - return 0; -} - -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - uint16_t queried_pasid; - bool ret; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - - if (amdgpu_emu_mode == 0 && ring->sched.ready) - return invalidate_tlbs_with_kiq(adev, pasid); - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - ret = get_atc_vmid_pasid_mapping_info(kgd, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, 0); - break; - } - } - - return 0; -} - -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid %d\n", vmid); - return 0; - } - - amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); - return 0; -} - static int kgd_address_watch_disable(struct kgd_dev *kgd) { return 0; @@ -817,6 +791,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, + .hiq_mqd_load = kgd_hiq_mqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, .hqd_dump = kgd_hqd_dump, .hqd_sdma_dump = kgd_hqd_sdma_dump, @@ -832,7 +807,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { get_atc_vmid_pasid_mapping_info, .get_tile_config = amdgpu_amdkfd_get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 6e6f0a99ec06..8f052e98a3c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -696,45 +696,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, lower_32_bits(page_table_base)); } -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - unsigned int tmp; - - if (adev->in_gpu_reset) - return -EIO; - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && - (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - break; - } - } - - return 0; -} - -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid\n"); - return 0; - } - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - return 0; -} - /** * read_vmid_from_vmfault_reg - read vmid from register * @@ -771,7 +732,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index bfbddedb2380..19a10db93d68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -657,45 +657,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, lower_32_bits(page_table_base)); } -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - unsigned int tmp; - - if (adev->in_gpu_reset) - return -EIO; - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && - (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - break; - } - } - - return 0; -} - -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid %d\n", vmid); - return -EINVAL; - } - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - return 0; -} - const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -717,6 +678,4 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index e7861f0ef415..8562afe5b761 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -103,13 +103,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, lock_srbm(kgd, mec, pipe, queue_id, 0); } -static uint32_t get_queue_mask(struct amdgpu_device *adev, +static uint64_t get_queue_mask(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + - queue_id) & 31; + unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id; - return ((uint32_t)1) << bit; + return 1ull << bit; } static void release_queue(struct kgd_dev *kgd) @@ -258,21 +258,6 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, acquire_queue(kgd, pipe_id, queue_id); - /* HIQ is set during driver init period with vmid set to 0*/ - if (m->cp_hqd_vmid == 0) { - uint32_t value, mec, pipe; - - mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; - pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - - pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", - mec, pipe, queue_id); - value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); - value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, - ((mec << 5) | (pipe << 3) | queue_id | 0x80)); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); - } - /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); @@ -323,7 +308,7 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uintptr_t)wptr)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), - get_queue_mask(adev, pipe_id, queue_id)); + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ @@ -339,6 +324,59 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } +int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct v9_mqd *m; + uint32_t mec, pipe; + int r; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + + spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 7); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(queue_id) | + PACKET3_MAP_QUEUES_PIPE(pipe) | + PACKET3_MAP_QUEUES_ME((mec - 1)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(&adev->gfx.kiq.ring_lock); + release_queue(kgd); + + return r; +} + int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) @@ -617,100 +655,6 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, - uint32_t flush_type) -{ - signed long r; - uint32_t seq; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - - spin_lock(&adev->gfx.kiq.ring_lock); - amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ - amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); - amdgpu_ring_write(ring, - PACKET3_INVALIDATE_TLBS_DST_SEL(1) | - PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | - PACKET3_INVALIDATE_TLBS_PASID(pasid) | - PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); - - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); - if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - return -ETIME; - } - - return 0; -} - -int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid, i; - uint16_t queried_pasid; - bool ret; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - uint32_t flush_type = 0; - - if (adev->in_gpu_reset) - return -EIO; - if (adev->gmc.xgmi.num_physical_nodes && - adev->asic_type == CHIP_VEGA20) - flush_type = 2; - - if (ring->sched.ready) - return invalidate_tlbs_with_kiq(adev, pasid, flush_type); - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - for (i = 0; i < adev->num_vmhubs; i++) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - i, flush_type); - break; - } - } - - return 0; -} - -int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int i; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid %d\n", vmid); - return 0; - } - - /* Use legacy mode tlb invalidation. - * - * Currently on Raven the code below is broken for anything but - * legacy mode due to a MMHUB power gating problem. A workaround - * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ - * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack - * bit. - * - * TODO 1: agree on the right set of invalidation registers for - * KFD use. Use the last one for now. Invalidate both GC and - * MMHUB. - * - * TODO 2: support range-based invalidation, requires kfg2kgd - * interface change - */ - for (i = 0; i < adev->num_vmhubs; i++) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); - - return 0; -} - int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) { return 0; @@ -778,6 +722,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, .init_interrupts = kgd_gfx_v9_init_interrupts, .hqd_load = kgd_gfx_v9_hqd_load, + .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, .hqd_dump = kgd_gfx_v9_hqd_dump, .hqd_sdma_dump = kgd_hqd_sdma_dump, @@ -793,7 +738,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .get_tile_config = kgd_gfx_v9_get_tile_config, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, - .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, - .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 02b1426d17d1..63d3e6683dfe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -33,6 +33,9 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); +int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off); int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs); @@ -57,7 +60,5 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, uint8_t vmid, uint16_t *p_pasid); -int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, struct tile_config *config); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5b330f69194b..a52a084158b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -909,6 +909,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (parser->entity && parser->entity != entity) return -EINVAL; + /* Return if there is no run queue associated with this entity. + * Possibly because of disabled HW IP*/ + if (entity->rq == NULL) + return -EINVAL; + parser->entity = entity; ring = to_amdgpu_ring(entity->rq->sched); @@ -1229,7 +1234,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, goto error_abort; } - job->owner = p->filp; p->fence = dma_fence_get(&job->base.s_fence->finished); amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 63343bb43049..f24ed9a1a3e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -26,6 +26,7 @@ #include <linux/kthread.h> #include <linux/pci.h> #include <linux/uaccess.h> +#include <linux/pm_runtime.h> #include <drm/drm_debugfs.h> @@ -144,10 +145,17 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, *pos &= (1UL << 22) - 1; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + if (use_bank) { if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || - (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) + (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se_bank, sh_bank, instance_bank); @@ -193,6 +201,9 @@ end: if (pm_pg_lock) mutex_unlock(&adev->pm.mutex); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -237,13 +248,20 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; value = RREG32_PCIE(*pos >> 2); r = put_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } result += 4; buf += 4; @@ -251,6 +269,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -276,12 +297,19 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } WREG32_PCIE(*pos >> 2, value); @@ -291,6 +319,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -316,13 +347,20 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; value = RREG32_DIDT(*pos >> 2); r = put_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } result += 4; buf += 4; @@ -330,6 +368,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -355,12 +396,19 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } WREG32_DIDT(*pos >> 2, value); @@ -370,6 +418,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -395,13 +446,20 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; value = RREG32_SMC(*pos); r = put_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } result += 4; buf += 4; @@ -409,6 +467,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -434,12 +495,19 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } WREG32_SMC(*pos, value); @@ -449,6 +517,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -572,7 +643,16 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, idx = *pos >> 2; valuesize = sizeof(values); + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -633,6 +713,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, wave = (*pos & GENMASK_ULL(36, 31)) >> 31; simd = (*pos & GENMASK_ULL(44, 37)) >> 37; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -644,6 +728,9 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); mutex_unlock(&adev->grbm_idx_mutex); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (!x) return -EINVAL; @@ -711,6 +798,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, if (!data) return -ENOMEM; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -726,6 +817,9 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); mutex_unlock(&adev->grbm_idx_mutex); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + while (size) { uint32_t value; @@ -859,6 +953,10 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) struct amdgpu_device *adev = dev->dev_private; int r = 0, i; + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; + /* Avoid accidently unparking the sched thread during GPU reset */ mutex_lock(&adev->lock_reset); @@ -889,6 +987,9 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) mutex_unlock(&adev->lock_reset); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } @@ -907,8 +1008,17 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev)); + + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } @@ -917,8 +1027,17 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT)); + + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9b4c18b3546f..53d882000101 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2345,14 +2345,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; /* handle putting the SMC in the appropriate state */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { - if (is_support_sw_smu(adev)) { - r = smu_set_mp1_state(&adev->smu, adev->mp1_state); - } else if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_mp1_state) { - r = adev->powerplay.pp_funcs->set_mp1_state( - adev->powerplay.pp_handle, - adev->mp1_state); - } + r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); if (r) { DRM_ERROR("SMC failed to set mp1 state %d, %d\n", adev->mp1_state, r); @@ -2855,6 +2848,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, hash_init(adev->mn_hash); mutex_init(&adev->lock_reset); mutex_init(&adev->psp.mutex); + mutex_init(&adev->notifier_lock); r = amdgpu_device_check_arguments(adev); if (r) @@ -3765,6 +3759,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_RAVEN: + case CHIP_ARCTURUS: break; default: goto disabled; @@ -4359,55 +4354,21 @@ int amdgpu_device_baco_enter(struct drm_device *dev) if (ras && ras->supported) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; - int ret; - - ret = smu_baco_enter(smu); - if (ret) - return ret; - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* enter BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -EIO; - } - - return 0; + return amdgpu_dpm_baco_enter(adev); } int amdgpu_device_baco_exit(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int ret = 0; if (!amdgpu_device_supports_baco(adev->ddev)) return -ENOTSUPP; - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; - int ret; - - ret = smu_baco_exit(smu); - if (ret) - return ret; - - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* exit BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -EIO; - } + ret = amdgpu_dpm_baco_exit(adev); + if (ret) + return ret; if (ras && ras->supported) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h new file mode 100644 index 000000000000..61a26c15c8dd --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -0,0 +1,62 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_DF_H__ +#define __AMDGPU_DF_H__ + +struct amdgpu_df_hash_status { + bool hash_64k; + bool hash_2m; + bool hash_1g; +}; + +struct amdgpu_df_funcs { + void (*sw_init)(struct amdgpu_device *adev); + void (*sw_fini)(struct amdgpu_device *adev); + void (*enable_broadcast_mode)(struct amdgpu_device *adev, + bool enable); + u32 (*get_fb_channel_number)(struct amdgpu_device *adev); + u32 (*get_hbm_channel_number)(struct amdgpu_device *adev); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); + void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, + bool enable); + int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, + int is_enable); + int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, + int is_disable); + void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, + uint64_t *count); + uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); + void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, + uint32_t ficadl_val, uint32_t ficadh_val); +}; + +struct amdgpu_df { + struct amdgpu_df_hash_status hash_status; + const struct amdgpu_df_funcs *funcs; +}; + +#endif /* __AMDGPU_DF_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 4e699071d144..6d520a3eec40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -513,13 +513,23 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, * will not allow USWC mappings. * Also, don't allow GTT domain if the BO doens't have USWC falg set. */ - if (adev->asic_type >= CHIP_CARRIZO && - adev->asic_type < CHIP_RAVEN && - (adev->flags & AMD_IS_APU) && - (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && + if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && amdgpu_bo_support_uswc(bo_flags) && - amdgpu_device_asic_has_dc_support(adev->asic_type)) - domain |= AMDGPU_GEM_DOMAIN_GTT; + amdgpu_device_asic_has_dc_support(adev->asic_type)) { + switch (adev->asic_type) { + case CHIP_CARRIZO: + case CHIP_STONEY: + domain |= AMDGPU_GEM_DOMAIN_GTT; + break; + case CHIP_RAVEN: + /* enable S/G on PCO and RV2 */ + if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) + domain |= AMDGPU_GEM_DOMAIN_GTT; + break; + default: + break; + } + } #endif return domain; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index cd76fbf4385d..a2e8c3dfb4f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -946,23 +946,54 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block bool swsmu = is_support_sw_smu(adev); switch (block_type) { - case AMD_IP_BLOCK_TYPE_GFX: case AMD_IP_BLOCK_TYPE_UVD: - case AMD_IP_BLOCK_TYPE_VCN: case AMD_IP_BLOCK_TYPE_VCE: - case AMD_IP_BLOCK_TYPE_SDMA: if (swsmu) { ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); - } else { - if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_powergating_by_smu) { - mutex_lock(&adev->pm.mutex); - ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( - (adev)->powerplay.pp_handle, block_type, gate)); - mutex_unlock(&adev->pm.mutex); - } + } else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) { + /* + * TODO: need a better lock mechanism + * + * Here adev->pm.mutex lock protection is enforced on + * UVD and VCE cases only. Since for other cases, there + * may be already lock protection in amdgpu_pm.c. + * This is a quick fix for the deadlock issue below. + * NFO: task ocltst:2028 blocked for more than 120 seconds. + * Tainted: G OE 5.0.0-37-generic #40~18.04.1-Ubuntu + * echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. + * cltst D 0 2028 2026 0x00000000 + * all Trace: + * __schedule+0x2c0/0x870 + * schedule+0x2c/0x70 + * schedule_preempt_disabled+0xe/0x10 + * __mutex_lock.isra.9+0x26d/0x4e0 + * __mutex_lock_slowpath+0x13/0x20 + * ? __mutex_lock_slowpath+0x13/0x20 + * mutex_lock+0x2f/0x40 + * amdgpu_dpm_set_powergating_by_smu+0x64/0xe0 [amdgpu] + * gfx_v8_0_enable_gfx_static_mg_power_gating+0x3c/0x70 [amdgpu] + * gfx_v8_0_set_powergating_state+0x66/0x260 [amdgpu] + * amdgpu_device_ip_set_powergating_state+0x62/0xb0 [amdgpu] + * pp_dpm_force_performance_level+0xe7/0x100 [amdgpu] + * amdgpu_set_dpm_forced_performance_level+0x129/0x330 [amdgpu] + */ + mutex_lock(&adev->pm.mutex); + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( + (adev)->powerplay.pp_handle, block_type, gate)); + mutex_unlock(&adev->pm.mutex); } break; + case AMD_IP_BLOCK_TYPE_GFX: + case AMD_IP_BLOCK_TYPE_VCN: + case AMD_IP_BLOCK_TYPE_SDMA: + if (swsmu) + ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( + (adev)->powerplay.pp_handle, block_type, gate)); + break; case AMD_IP_BLOCK_TYPE_JPEG: if (swsmu) ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); @@ -970,12 +1001,9 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block case AMD_IP_BLOCK_TYPE_GMC: case AMD_IP_BLOCK_TYPE_ACP: if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_powergating_by_smu) { - mutex_lock(&adev->pm.mutex); + adev->powerplay.pp_funcs->set_powergating_by_smu) ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( (adev)->powerplay.pp_handle, block_type, gate)); - mutex_unlock(&adev->pm.mutex); - } break; default: break; @@ -983,3 +1011,163 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block return ret; } + +int amdgpu_dpm_baco_enter(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + int ret = 0; + + if (is_support_sw_smu(adev)) { + ret = smu_baco_enter(smu); + } else { + if (!pp_funcs || !pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* enter BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 1); + } + + return ret; +} + +int amdgpu_dpm_baco_exit(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + int ret = 0; + + if (is_support_sw_smu(adev)) { + ret = smu_baco_exit(smu); + } else { + if (!pp_funcs || !pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* exit BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 0); + } + + return ret; +} + +int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, + enum pp_mp1_state mp1_state) +{ + int ret = 0; + + if (is_support_sw_smu(adev)) { + ret = smu_set_mp1_state(&adev->smu, mp1_state); + } else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_mp1_state) { + ret = adev->powerplay.pp_funcs->set_mp1_state( + adev->powerplay.pp_handle, + mp1_state); + } + + return ret; +} + +bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + bool baco_cap; + + if (is_support_sw_smu(adev)) { + return smu_baco_is_support(smu); + } else { + if (!pp_funcs || !pp_funcs->get_asic_baco_capability) + return false; + + if (pp_funcs->get_asic_baco_capability(pp_handle, &baco_cap)) + return false; + + return baco_cap ? true : false; + } +} + +int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + + if (is_support_sw_smu(adev)) { + return smu_mode2_reset(smu); + } else { + if (!pp_funcs || !pp_funcs->asic_reset_mode_2) + return -ENOENT; + + return pp_funcs->asic_reset_mode_2(pp_handle); + } +} + +int amdgpu_dpm_baco_reset(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + int ret = 0; + + dev_info(adev->dev, "GPU BACO reset\n"); + + if (is_support_sw_smu(adev)) { + ret = smu_baco_enter(smu); + if (ret) + return ret; + + ret = smu_baco_exit(smu); + if (ret) + return ret; + } else { + if (!pp_funcs + || !pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* enter BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 1); + if (ret) + return ret; + + /* exit BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 0); + if (ret) + return ret; + } + + return 0; +} + +int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, + enum PP_SMC_POWER_PROFILE type, + bool en) +{ + int ret = 0; + + if (is_support_sw_smu(adev)) + ret = smu_switch_power_profile(&adev->smu, type, en); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->switch_power_profile) + ret = adev->powerplay.pp_funcs->switch_power_profile( + adev->powerplay.pp_handle, type, en); + + return ret; +} + +int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, + uint32_t pstate) +{ + int ret = 0; + + if (is_support_sw_smu_xgmi(adev)) + ret = smu_set_xgmi_pstate(&adev->smu, pstate); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_xgmi_pstate) + ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, + pstate); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 2cfb677272af..902ca6c00cca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -341,10 +341,6 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->reset_power_profile_state(\ (adev)->powerplay.pp_handle, request)) -#define amdgpu_dpm_switch_power_profile(adev, type, en) \ - ((adev)->powerplay.pp_funcs->switch_power_profile(\ - (adev)->powerplay.pp_handle, type, en)) - #define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \ ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ (adev)->powerplay.pp_handle, msg_id)) @@ -517,4 +513,24 @@ extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low); extern int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low); +int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, + uint32_t pstate); + +int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, + enum PP_SMC_POWER_PROFILE type, + bool en); + +int amdgpu_dpm_baco_reset(struct amdgpu_device *adev); + +int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev); + +bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev); + +int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, + enum pp_mp1_state mp1_state); + +int amdgpu_dpm_baco_exit(struct amdgpu_device *adev); + +int amdgpu_dpm_baco_enter(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index e9efee04ca23..3c01252b1e0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -741,10 +741,18 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return 0; seq_printf(m, "gpu recover\n"); amdgpu_device_gpu_recover(adev, NULL); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index db7b2b3f9966..b88b8b82bb64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -543,12 +543,6 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return; - if (!is_support_sw_smu(adev) && - (!adev->powerplay.pp_funcs || - !adev->powerplay.pp_funcs->set_powergating_by_smu)) - return; - - mutex_lock(&adev->gfx.gfx_off_mutex); if (!enable) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 8e88e0411662..af4bd279f42f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -76,11 +76,15 @@ struct kiq_pm4_funcs { struct amdgpu_ring *ring, u64 addr, u64 seq); + void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub); /* Packet sizes */ int set_resources_size; int map_queues_size; int unmap_queues_size; int query_status_size; + int invalidate_tlbs_size; }; struct amdgpu_kiq { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index c91dd602d5f1..86267baca07c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -60,6 +60,11 @@ */ #define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL +/* + * Default stolen memory size, 1024 * 768 * 4 + */ +#define AMDGPU_STOLEN_BIST_TRAINING_DEFAULT_SIZE 0x300000ULL + struct firmware; /* @@ -92,6 +97,9 @@ struct amdgpu_gmc_funcs { /* flush the vm tlb via mmio */ void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type); + /* flush the vm tlb via pasid */ + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, + uint32_t flush_type, bool all_hub); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); @@ -216,6 +224,9 @@ struct amdgpu_gmc { }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ + ((adev), (pasid), (type), (allhub))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 73328d0c741d..d42be880a236 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -153,7 +153,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, if (r) return r; - job->owner = owner; *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); priority = job->base.s_priority; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index dc7ee9358dcd..3f7b8433d179 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -49,7 +49,6 @@ struct amdgpu_job { uint32_t preamble_status; uint32_t preemption_status; uint32_t num_ibs; - void *owner; bool vm_needs_flush; uint64_t vm_pd_addr; unsigned vmid; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 285d460624c8..b03b1eb7ba04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -37,6 +37,7 @@ #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> #include <linux/nospec.h> +#include <linux/pm_runtime.h> #include "hwmgr.h" #define WIDTH_4K 3840 @@ -158,10 +159,15 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type pm; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { if (adev->smu.ppt_funcs->get_current_power_state) pm = smu_get_current_power_state(&adev->smu); @@ -173,6 +179,9 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, pm = adev->pm.dpm.user_state; } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%s\n", (pm == POWER_STATE_TYPE_BATTERY) ? "battery" : (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance"); @@ -186,6 +195,7 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type state; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; @@ -196,10 +206,12 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, state = POWER_STATE_TYPE_BALANCED; else if (strncmp("performance", buf, strlen("performance")) == 0) state = POWER_STATE_TYPE_PERFORMANCE; - else { - count = -EINVAL; - goto fail; - } + else + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { mutex_lock(&adev->pm.mutex); @@ -212,12 +224,11 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, adev->pm.dpm.user_state = state; mutex_unlock(&adev->pm.mutex); - /* Can't set dpm state when the card is off */ - if (!(adev->flags & AMD_IS_PX) || - (ddev->switch_power_state == DRM_SWITCH_POWER_ON)) - amdgpu_pm_compute_clocks(adev); + amdgpu_pm_compute_clocks(adev); } -fail: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -288,13 +299,14 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_dpm_forced_level level = 0xff; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return snprintf(buf, PAGE_SIZE, "off\n"); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) level = smu_get_performance_level(&adev->smu); @@ -303,6 +315,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, else level = adev->pm.dpm.forced_level; + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%s\n", (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" : (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : @@ -329,11 +344,6 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; - /* Can't force performance level when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - if (strncmp("low", buf, strlen("low")) == 0) { level = AMD_DPM_FORCED_LEVEL_LOW; } else if (strncmp("high", buf, strlen("high")) == 0) { @@ -353,17 +363,23 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, } else if (strncmp("profile_peak", buf, strlen("profile_peak")) == 0) { level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; } else { - count = -EINVAL; - goto fail; + return -EINVAL; } + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) current_level = smu_get_performance_level(&adev->smu); else if (adev->powerplay.pp_funcs->get_performance_level) current_level = amdgpu_dpm_get_performance_level(adev); - if (current_level == level) + if (current_level == level) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; + } /* profile_exit setting is valid only when current mode is in profile mode */ if (!(current_level & (AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD | @@ -372,29 +388,40 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) && (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) { pr_err("Currently not in any profile mode!\n"); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; } if (is_support_sw_smu(adev)) { ret = smu_force_performance_level(&adev->smu, level); - if (ret) - count = -EINVAL; + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; + } } else if (adev->powerplay.pp_funcs->force_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->pm.dpm.thermal_active) { - count = -EINVAL; mutex_unlock(&adev->pm.mutex); - goto fail; + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; } ret = amdgpu_dpm_force_performance_level(adev, level); - if (ret) - count = -EINVAL; - else + if (ret) { + mutex_unlock(&adev->pm.mutex); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; + } else { adev->pm.dpm.forced_level = level; + } mutex_unlock(&adev->pm.mutex); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); -fail: return count; } @@ -407,6 +434,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, struct pp_states_info data; int i, buf_len, ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_get_power_num_states(&adev->smu, &data); if (ret) @@ -414,6 +445,9 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, } else if (adev->powerplay.pp_funcs->get_pp_num_states) amdgpu_dpm_get_pp_num_states(adev, &data); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums); for (i = 0; i < data.nums; i++) buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i, @@ -439,6 +473,10 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { pm = smu_get_current_power_state(smu); ret = smu_get_power_num_states(smu, &data); @@ -450,6 +488,9 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, amdgpu_dpm_get_pp_num_states(adev, &data); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + for (i = 0; i < data.nums; i++) { if (pm == data.states[i]) break; @@ -500,14 +541,18 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, struct pp_states_info data; ret = kstrtoul(buf, 0, &idx); - if (ret || idx >= ARRAY_SIZE(data.states)) { - count = -EINVAL; - goto fail; - } + if (ret || idx >= ARRAY_SIZE(data.states)) + return -EINVAL; + idx = array_index_nospec(idx, ARRAY_SIZE(data.states)); amdgpu_dpm_get_pp_num_states(adev, &data); state = data.states[idx]; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + /* only set user selected power states */ if (state != POWER_STATE_TYPE_INTERNAL_BOOT && state != POWER_STATE_TYPE_DEFAULT) { @@ -515,8 +560,10 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, AMD_PP_TASK_ENABLE_USER_STATE, &state); adev->pp_force_state_enabled = true; } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); } -fail: + return count; } @@ -538,20 +585,32 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; char *table = NULL; - int size; + int size, ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { size = smu_sys_get_pp_table(&adev->smu, (void **)&table); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (size < 0) return size; - } - else if (adev->powerplay.pp_funcs->get_pp_table) + } else if (adev->powerplay.pp_funcs->get_pp_table) { size = amdgpu_dpm_get_pp_table(adev, &table); - else + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (size < 0) + return size; + } else { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return 0; + } if (size >= PAGE_SIZE) size = PAGE_SIZE - 1; @@ -573,13 +632,23 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return ret; + } } else if (adev->powerplay.pp_funcs->set_pp_table) amdgpu_dpm_set_pp_table(adev, buf, count); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -703,18 +772,28 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, tmp_str++; } + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_od_edit_dpm_table(&adev->smu, type, parameter, parameter_size); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } else { if (adev->powerplay.pp_funcs->odn_edit_dpm_table) { ret = amdgpu_dpm_odn_edit_dpm_table(adev, type, parameter, parameter_size); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } if (type == PP_OD_COMMIT_DPM_TABLE) { @@ -722,12 +801,18 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; } else { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; } } } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; } @@ -738,27 +823,33 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - uint32_t size = 0; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf); size += smu_print_clk_levels(&adev->smu, SMU_OD_MCLK, buf+size); size += smu_print_clk_levels(&adev->smu, SMU_OD_VDDC_CURVE, buf+size); size += smu_print_clk_levels(&adev->smu, SMU_OD_RANGE, buf+size); - return size; } else if (adev->powerplay.pp_funcs->print_clock_levels) { size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size); size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size); - return size; } else { - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return size; } /** @@ -796,15 +887,27 @@ static ssize_t amdgpu_set_pp_feature_status(struct device *dev, pr_debug("featuremask = 0x%llx\n", featuremask); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; } @@ -815,16 +918,27 @@ static ssize_t amdgpu_get_pp_feature_status(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev)) return 0; - if (is_support_sw_smu(adev)) { - return smu_sys_get_pp_feature_mask(&adev->smu, buf); - } else if (adev->powerplay.pp_funcs->get_ppfeature_status) - return amdgpu_dpm_get_ppfeature_status(adev, buf); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; - return snprintf(buf, PAGE_SIZE, "\n"); + if (is_support_sw_smu(adev)) + size = smu_sys_get_pp_feature_mask(&adev->smu, buf); + else if (adev->powerplay.pp_funcs->get_ppfeature_status) + size = amdgpu_dpm_get_ppfeature_status(adev, buf); + else + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } /** @@ -863,16 +977,27 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_SCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } /* @@ -928,11 +1053,18 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (ret) return -EINVAL; @@ -945,16 +1077,27 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_MCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, @@ -964,8 +1107,8 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - int ret; uint32_t mask = 0; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; @@ -974,11 +1117,18 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (ret) return -EINVAL; @@ -991,16 +1141,27 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, @@ -1020,10 +1181,19 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1037,16 +1207,27 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_FCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, @@ -1066,10 +1247,19 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1083,16 +1273,27 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, @@ -1112,10 +1313,19 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1129,16 +1339,27 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_PCIE, buf); + size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, @@ -1158,10 +1379,19 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1176,15 +1406,23 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint32_t value = 0; + int ret; if (amdgpu_sriov_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK); else if (adev->powerplay.pp_funcs->get_sclk_od) value = amdgpu_dpm_get_sclk_od(adev); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%d\n", value); } @@ -1203,10 +1441,12 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, ret = kstrtol(buf, 0, &value); - if (ret) { - count = -EINVAL; - goto fail; - } + if (ret) + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value); @@ -1222,7 +1462,9 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, } } -fail: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -1233,15 +1475,23 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint32_t value = 0; + int ret; if (amdgpu_sriov_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK); else if (adev->powerplay.pp_funcs->get_mclk_od) value = amdgpu_dpm_get_mclk_od(adev); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%d\n", value); } @@ -1260,10 +1510,12 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, ret = kstrtol(buf, 0, &value); - if (ret) { - count = -EINVAL; - goto fail; - } + if (ret) + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value); @@ -1279,7 +1531,9 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, } } -fail: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -1309,16 +1563,27 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_get_power_profile_mode(&adev->smu, buf); + size = smu_get_power_profile_mode(&adev->smu, buf); else if (adev->powerplay.pp_funcs->get_power_profile_mode) - return amdgpu_dpm_get_power_profile_mode(adev, buf); + size = amdgpu_dpm_get_power_profile_mode(adev, buf); + else + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); - return snprintf(buf, PAGE_SIZE, "\n"); + return size; } @@ -1343,7 +1608,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, tmp[1] = '\0'; ret = kstrtol(tmp, 0, &profile_mode); if (ret) - goto fail; + return -EINVAL; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; @@ -1358,23 +1623,30 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, while (tmp_str[0]) { sub_str = strsep(&tmp_str, delimiter); ret = kstrtol(sub_str, 0, ¶meter[parameter_size]); - if (ret) { - count = -EINVAL; - goto fail; - } + if (ret) + return -EINVAL; parameter_size++; while (isspace(*tmp_str)) tmp_str++; } } parameter[parameter_size] = profile_mode; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true); else if (adev->powerplay.pp_funcs->set_power_profile_mode) ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (!ret) return count; -fail: + return -EINVAL; } @@ -1397,10 +1669,17 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + r = pm_runtime_get_sync(ddev->dev); + if (r < 0) + return r; + /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (r) return r; @@ -1426,10 +1705,17 @@ static ssize_t amdgpu_get_memory_busy_percent(struct device *dev, if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + r = pm_runtime_get_sync(ddev->dev); + if (r < 0) + return r; + /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (r) return r; @@ -1455,11 +1741,20 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint64_t count0, count1; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + amdgpu_asic_get_pcie_usage(adev, &count0, &count1); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%llu %llu %i\n", count0, count1, pcie_get_mps(adev->pdev)); } @@ -1547,42 +1842,43 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; int channel = to_sensor_dev_attr(attr)->index; int r, temp = 0, size = sizeof(temp); - /* Can't get temperature when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - if (channel >= PP_TEMP_MAX) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + switch (channel) { case PP_TEMP_JUNCTION: /* get current junction temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, (void *)&temp, &size); - if (r) - return r; break; case PP_TEMP_EDGE: /* get current edge temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, (void *)&temp, &size); - if (r) - return r; break; case PP_TEMP_MEM: /* get current memory temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP, (void *)&temp, &size); - if (r) - return r; + break; + default: + r = -EINVAL; break; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (r) + return r; + return snprintf(buf, PAGE_SIZE, "%d\n", temp); } @@ -1678,16 +1974,27 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); u32 pwm_mode = 0; + int ret; + + ret = pm_runtime_get_sync(adev->ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); } else { - if (!adev->powerplay.pp_funcs->get_fan_control_mode) + if (!adev->powerplay.pp_funcs->get_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return sprintf(buf, "%i\n", pwm_mode); } @@ -1697,27 +2004,32 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, size_t count) { struct amdgpu_device *adev = dev_get_drvdata(dev); - int err; + int err, ret; int value; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - err = kstrtoint(buf, 10, &value); if (err) return err; + ret = pm_runtime_get_sync(adev->ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, value); } else { - if (!adev->powerplay.pp_funcs->set_fan_control_mode) + if (!adev->powerplay.pp_funcs->set_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } amdgpu_dpm_set_fan_control_mode(adev, value); } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return count; } @@ -1744,34 +2056,43 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, u32 value; u32 pwm_mode; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); else pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + if (pwm_mode != AMD_FAN_CTRL_MANUAL) { pr_info("manual fan speed control should be enabled first\n"); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; } err = kstrtou32(buf, 10, &value); - if (err) + if (err) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return err; + } value = (value * 100) / 255; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_set_fan_speed_percent(&adev->smu, value); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->set_fan_speed_percent) { + else if (adev->powerplay.pp_funcs->set_fan_speed_percent) err = amdgpu_dpm_set_fan_speed_percent(adev, value); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return count; } @@ -1784,20 +2105,22 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, int err; u32 speed = 0; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_percent(&adev->smu, &speed); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->get_fan_speed_percent) { + else if (adev->powerplay.pp_funcs->get_fan_speed_percent) err = amdgpu_dpm_get_fan_speed_percent(adev, &speed); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; speed = (speed * 255) / 100; @@ -1812,20 +2135,22 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, int err; u32 speed = 0; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &speed); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { + else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return sprintf(buf, "%i\n", speed); } @@ -1839,8 +2164,16 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, u32 size = sizeof(min_rpm); int r; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM, (void *)&min_rpm, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -1856,8 +2189,16 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, u32 size = sizeof(max_rpm); int r; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM, (void *)&max_rpm, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -1872,20 +2213,22 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, int err; u32 rpm = 0; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &rpm); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { + else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return sprintf(buf, "%i\n", rpm); } @@ -1899,32 +2242,40 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, u32 value; u32 pwm_mode; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); else pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); - if (pwm_mode != AMD_FAN_CTRL_MANUAL) + if (pwm_mode != AMD_FAN_CTRL_MANUAL) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -ENODATA; - - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + } err = kstrtou32(buf, 10, &value); - if (err) + if (err) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return err; + } - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_set_fan_speed_rpm(&adev->smu, value); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) { + else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) err = amdgpu_dpm_set_fan_speed_rpm(adev, value); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return count; } @@ -1935,15 +2286,27 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); u32 pwm_mode = 0; + int ret; + + ret = pm_runtime_get_sync(adev->ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); } else { - if (!adev->powerplay.pp_funcs->get_fan_control_mode) + if (!adev->powerplay.pp_funcs->get_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1); } @@ -1957,12 +2320,6 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, int value; u32 pwm_mode; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - - err = kstrtoint(buf, 10, &value); if (err) return err; @@ -1974,14 +2331,24 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, else return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, pwm_mode); } else { - if (!adev->powerplay.pp_funcs->set_fan_control_mode) + if (!adev->powerplay.pp_funcs->set_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return count; } @@ -1990,18 +2357,20 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; u32 vddgfx; int r, size = sizeof(vddgfx); - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&vddgfx, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2020,7 +2389,6 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; u32 vddnb; int r, size = sizeof(vddnb); @@ -2028,14 +2396,17 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, if (!(adev->flags & AMD_IS_APU)) return -EINVAL; - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&vddnb, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2054,19 +2425,21 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; u32 query = 0; int r, size = sizeof(u32); unsigned uw; - /* Can't get power when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2089,16 +2462,27 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); uint32_t limit = 0; + ssize_t size; + int r; + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; if (is_support_sw_smu(adev)) { smu_get_power_limit(&adev->smu, &limit, true, true); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else { - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + return size; } static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, @@ -2107,16 +2491,27 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); uint32_t limit = 0; + ssize_t size; + int r; + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; if (is_support_sw_smu(adev)) { smu_get_power_limit(&adev->smu, &limit, false, true); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else { - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + return size; } @@ -2138,13 +2533,20 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, value = value / 1000000; /* convert to Watt */ - if (is_support_sw_smu(adev)) { + + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + + if (is_support_sw_smu(adev)) err = smu_set_power_limit(&adev->smu, value); - } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) { + else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value); - } else { + else err = -EINVAL; - } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); if (err) return err; @@ -2157,18 +2559,20 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; uint32_t sclk; int r, size = sizeof(sclk); - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&sclk, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2187,18 +2591,20 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; uint32_t mclk; int r, size = sizeof(mclk); - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, (void *)&mclk, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -3220,8 +3626,12 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; - struct drm_device *ddev = adev->ddev; u32 flags = 0; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; amdgpu_device_ip_get_clockgating_state(adev, &flags); seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags); @@ -3230,23 +3640,28 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) if (!adev->pm.dpm_enabled) { seq_printf(m, "dpm not enabled\n"); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); return 0; } - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) { - seq_printf(m, "PX asic powered off\n"); - } else if (!is_support_sw_smu(adev) && adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { + + if (!is_support_sw_smu(adev) && + adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level) adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m); else seq_printf(m, "Debugfs support not implemented for this asic\n"); mutex_unlock(&adev->pm.mutex); + r = 0; } else { - return amdgpu_debugfs_pm_info_pp(m, adev); + r = amdgpu_debugfs_pm_info_pp(m, adev); } - return 0; + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + + return r; } static const struct drm_info_list amdgpu_pm_info_list[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c index cf21ad0cad9a..07914e34bc25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c @@ -74,9 +74,9 @@ static void amdgpu_perf_start(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: if (!(flags & PERF_EF_RELOAD)) - pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1); + pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1); - pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 0); + pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 0); break; default: break; @@ -101,7 +101,7 @@ static void amdgpu_perf_read(struct perf_event *event) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df_funcs->pmc_get_count(pe->adev, hwc->conf, + pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->conf, &count); break; default: @@ -126,7 +126,7 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 0); + pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 0); break; default: break; @@ -156,7 +156,7 @@ static int amdgpu_perf_add(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - retval = pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1); + retval = pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1); break; default: return 0; @@ -184,7 +184,7 @@ static void amdgpu_perf_del(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 1); + pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 1); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 281d89640344..3a1570dafe34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -529,6 +529,11 @@ static int psp_xgmi_unload(struct psp_context *psp) { int ret; struct psp_gfx_cmd_resp *cmd; + struct amdgpu_device *adev = psp->adev; + + /* XGMI TA unload currently is not supported on Arcturus */ + if (adev->asic_type == CHIP_ARCTURUS) + return 0; /* * TODO: bypass the unloading in sriov for now diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 96fc538ec824..766be7f18282 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -686,6 +686,7 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, { struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data = {0, 0, 0, NULL}; + int i; if (!obj) return -EINVAL; @@ -700,6 +701,13 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, if (adev->umc.funcs->query_ras_error_address) adev->umc.funcs->query_ras_error_address(adev, &err_data); break; + case AMDGPU_RAS_BLOCK__SDMA: + if (adev->sdma.funcs->query_ras_error_count) { + for (i = 0; i < adev->sdma.num_instances; i++) + adev->sdma.funcs->query_ras_error_count(adev, i, + &err_data); + } + break; case AMDGPU_RAS_BLOCK__GFX: if (adev->gfx.funcs->query_ras_error_count) adev->gfx.funcs->query_ras_error_count(adev, &err_data); @@ -1345,7 +1353,8 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct amdgpu_ras *ras = container_of(work, struct amdgpu_ras, recovery_work); - amdgpu_device_gpu_recover(ras->adev, 0); + if (amdgpu_device_should_recover_gpu(ras->adev)) + amdgpu_device_gpu_recover(ras->adev, 0); atomic_set(&ras->in_recovery, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 346dcb1f7146..485335267d78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -50,6 +50,14 @@ struct amdgpu_sdma_instance { bool burst_nop; }; +struct amdgpu_sdma_ras_funcs { + int (*ras_late_init)(struct amdgpu_device *adev, + void *ras_ih_info); + void (*ras_fini)(struct amdgpu_device *adev); + int (*query_ras_error_count)(struct amdgpu_device *adev, + uint32_t instance, void *ras_error_status); +}; + struct amdgpu_sdma { struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; struct drm_gpu_scheduler *sdma_sched[AMDGPU_MAX_SDMA_INSTANCES]; @@ -61,6 +69,7 @@ struct amdgpu_sdma { uint32_t srbm_soft_reset; bool has_page_queue; struct ras_common_if *ras_if; + const struct amdgpu_sdma_ras_funcs *funcs; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 3114d8a47e88..dee446278417 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -41,6 +41,7 @@ #include <linux/swap.h> #include <linux/swiotlb.h> #include <linux/dma-buf.h> +#include <linux/sizes.h> #include <drm/ttm/ttm_bo_api.h> #include <drm/ttm/ttm_bo_driver.h> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index ed106d99d078..f96464e2c157 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -75,6 +75,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) break; case CHIP_ARCTURUS: fw_name = FIRMWARE_ARCTURUS; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; break; case CHIP_RENOIR: fw_name = FIRMWARE_RENOIR; @@ -165,15 +168,15 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); return r; } - } - if (adev->vcn.indirect_sram) { - r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.dpg_sram_bo, - &adev->vcn.dpg_sram_gpu_addr, &adev->vcn.dpg_sram_cpu_addr); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate DPG bo\n", r); - return r; + if (adev->vcn.indirect_sram) { + r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo, + &adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr); + if (r) { + dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r); + return r; + } } } @@ -186,15 +189,14 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) cancel_delayed_work_sync(&adev->vcn.idle_work); - if (adev->vcn.indirect_sram) { - amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, - &adev->vcn.dpg_sram_gpu_addr, - (void **)&adev->vcn.dpg_sram_cpu_addr); - } - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { if (adev->vcn.harvest_config & (1 << j)) continue; + if (adev->vcn.indirect_sram) { + amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo, + &adev->vcn.inst[j].dpg_sram_gpu_addr, + (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr); + } kvfree(adev->vcn.inst[j].saved_bo); amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, @@ -298,7 +300,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, j, &new_state); } fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); @@ -341,7 +343,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) new_state.fw_based = VCN_DPG_STATE__PAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index e6dee8224d33..c4984c5fb2db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -57,6 +57,11 @@ #define VCN_VID_IP_ADDRESS_2_0 0x0 #define VCN_AON_IP_ADDRESS_2_0 0x30000 +#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b +#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 +#define mmUVD_REG_XX_MASK 0x026c +#define mmUVD_REG_XX_MASK_BASE_IDX 1 + /* 1 second timeout */ #define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) @@ -104,27 +109,27 @@ internal_reg_offset >>= 2; \ }) -#define RREG32_SOC15_DPG_MODE_2_0(offset, mask_en) \ - ({ \ - WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \ - (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ - mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ - offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ - RREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA); \ +#define RREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, mask_en) \ + ({ \ + WREG32_SOC15(VCN, inst, mmUVD_DPG_LMA_CTL, \ + (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \ }) -#define WREG32_SOC15_DPG_MODE_2_0(offset, value, mask_en, indirect) \ - do { \ - if (!indirect) { \ - WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA, value); \ - WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \ - (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ - mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ - offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ - } else { \ - *adev->vcn.dpg_sram_curr_addr++ = offset; \ - *adev->vcn.dpg_sram_curr_addr++ = value; \ - } \ +#define WREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, value, mask_en, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \ + (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + } else { \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \ + } \ } while (0) enum engine_status_constants { @@ -173,6 +178,10 @@ struct amdgpu_vcn_inst { struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; struct amdgpu_irq_src irq; struct amdgpu_vcn_reg external; + struct amdgpu_bo *dpg_sram_bo; + void *dpg_sram_cpu_addr; + uint64_t dpg_sram_gpu_addr; + uint32_t *dpg_sram_curr_addr; }; struct amdgpu_vcn { @@ -184,10 +193,6 @@ struct amdgpu_vcn { struct dpg_pause_state pause_state; bool indirect_sram; - struct amdgpu_bo *dpg_sram_bo; - void *dpg_sram_cpu_addr; - uint64_t dpg_sram_gpu_addr; - uint32_t *dpg_sram_curr_addr; uint8_t num_vcn_inst; struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; @@ -199,7 +204,7 @@ struct amdgpu_vcn { unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); + int inst_idx, struct dpg_pause_state *new_state); }; int amdgpu_vcn_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 4dc75eda1d91..d16231d6a790 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -83,6 +83,32 @@ struct amdgpu_prt_cb { }; /** + * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS + * happens while holding this lock anywhere to prevent deadlocks when + * an MMU notifier runs in reclaim-FS context. + */ +static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm) +{ + mutex_lock(&vm->eviction_lock); + vm->saved_flags = memalloc_nofs_save(); +} + +static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm) +{ + if (mutex_trylock(&vm->eviction_lock)) { + vm->saved_flags = memalloc_nofs_save(); + return 1; + } + return 0; +} + +static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) +{ + memalloc_nofs_restore(vm->saved_flags); + mutex_unlock(&vm->eviction_lock); +} + +/** * amdgpu_vm_level_shift - return the addr shift for each level * * @adev: amdgpu_device pointer @@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, } } - mutex_lock(&vm->eviction_lock); + amdgpu_vm_eviction_lock(vm); vm->evicting = false; - mutex_unlock(&vm->eviction_lock); + amdgpu_vm_eviction_unlock(vm); return 0; } @@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (!(flags & AMDGPU_PTE_VALID)) owner = AMDGPU_FENCE_OWNER_KFD; - mutex_lock(&vm->eviction_lock); + amdgpu_vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; goto error_unlock; @@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, fence); error_unlock: - mutex_unlock(&vm->eviction_lock); + amdgpu_vm_eviction_unlock(vm); return r; } @@ -2533,18 +2559,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return false; /* Try to block ongoing updates */ - if (!mutex_trylock(&bo_base->vm->eviction_lock)) + if (!amdgpu_vm_eviction_trylock(bo_base->vm)) return false; /* Don't evict VM page tables while they are updated */ if (!dma_fence_is_signaled(bo_base->vm->last_direct) || !dma_fence_is_signaled(bo_base->vm->last_delayed)) { - mutex_unlock(&bo_base->vm->eviction_lock); + amdgpu_vm_eviction_unlock(bo_base->vm); return false; } bo_base->vm->evicting = true; - mutex_unlock(&bo_base->vm->eviction_lock); + amdgpu_vm_eviction_unlock(bo_base->vm); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index fade4f45320c..b4640ab38c95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -30,6 +30,7 @@ #include <drm/gpu_scheduler.h> #include <drm/drm_file.h> #include <drm/ttm/ttm_bo_driver.h> +#include <linux/sched/mm.h> #include "amdgpu_sync.h" #include "amdgpu_ring.h" @@ -239,9 +240,12 @@ struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; - /* Lock to prevent eviction while we are updating page tables */ + /* Lock to prevent eviction while we are updating page tables + * use vm_eviction_lock/unlock(vm) + */ struct mutex eviction_lock; bool evicting; + unsigned int saved_flags; /* BOs who needs a validation */ struct list_head evicted; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 5cf920d9358b..a97af422575a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -146,16 +146,16 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev, ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200); ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208); - fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_ctl_in); + fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in); if (fica_out != 0x1f) pr_err("xGMI error counters not enabled!\n"); - fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_status_in); + fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_status_in); if ((fica_out & 0xffff) == 2) error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63); - adev->df_funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); + adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); return snprintf(buf, PAGE_SIZE, "%d\n", error_count); } @@ -291,13 +291,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); - if (is_support_sw_smu_xgmi(adev)) - ret = smu_set_xgmi_pstate(&adev->smu, pstate); - else if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_xgmi_pstate) - ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, - pstate); - + ret = amdgpu_dpm_set_xgmi_pstate(adev, pstate); if (ret) { dev_err(adev->dev, "XGMI: Set pstate failure on device %llx, hive %llx, ret %d", diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index e9822ea8bb19..006f21ef7ddf 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1312,19 +1312,13 @@ static int cik_asic_pci_config_reset(struct amdgpu_device *adev) static bool cik_asic_supports_baco(struct amdgpu_device *adev) { - bool baco_support; - switch (adev->asic_type) { case CHIP_BONAIRE: case CHIP_HAWAII: - smu7_asic_get_baco_capability(adev, &baco_support); - break; + return amdgpu_dpm_is_baco_supported(adev); default: - baco_support = false; - break; + return false; } - - return baco_support; } static enum amd_reset_method @@ -1366,7 +1360,7 @@ static int cik_asic_reset(struct amdgpu_device *adev) if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { if (!adev->in_suspend) amdgpu_inc_vram_lost(adev); - r = smu7_asic_baco_reset(adev); + r = amdgpu_dpm_baco_reset(adev); } else { r = cik_asic_pci_config_reset(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h index 9870bf27870e..f91ab4c246b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.h +++ b/drivers/gpu/drm/amd/amdgpu/cik.h @@ -31,7 +31,5 @@ void cik_srbm_select(struct amdgpu_device *adev, int cik_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); -int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap); -int smu7_asic_baco_reset(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index d6221298b477..d6aca1c08068 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -31,6 +31,9 @@ static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; static void df_v1_7_sw_init(struct amdgpu_device *adev) { + adev->df.hash_status.hash_64k = false; + adev->df.hash_status.hash_2m = false; + adev->df.hash_status.hash_1g = false; } static void df_v1_7_sw_fini(struct amdgpu_device *adev) @@ -66,7 +69,7 @@ static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev) { int fb_channel_number; - fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); + fb_channel_number = adev->df.funcs->get_fb_channel_number(adev); return df_v1_7_channel_number[fb_channel_number]; } @@ -77,7 +80,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, u32 tmp; /* Put DF on broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, true); + adev->df.funcs->enable_broadcast_mode(adev, true); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); @@ -92,7 +95,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, } /* Exit boradcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, false); + adev->df.funcs->enable_broadcast_mode(adev, false); } static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 2f884d941e8d..f51326598a8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -262,6 +262,32 @@ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev, /* device attr for available perfmon counters */ static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL); +static void df_v3_6_query_hashes(struct amdgpu_device *adev) +{ + u32 tmp; + + adev->df.hash_status.hash_64k = false; + adev->df.hash_status.hash_2m = false; + adev->df.hash_status.hash_1g = false; + + if (adev->asic_type != CHIP_ARCTURUS) + return; + + /* encoding for hash-enabled on Arcturus */ + if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) { + tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl); + adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl64K); + adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl2M); + adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl1G); + } +} + /* init perfmons */ static void df_v3_6_sw_init(struct amdgpu_device *adev) { @@ -273,6 +299,8 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++) adev->df_perfmon_config_assign_mask[i] = 0; + + df_v3_6_query_hashes(adev); } static void df_v3_6_sw_fini(struct amdgpu_device *adev) @@ -311,7 +339,7 @@ static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev) { int fb_channel_number; - fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); + fb_channel_number = adev->df.funcs->get_fb_channel_number(adev); if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number)) fb_channel_number = 0; @@ -325,7 +353,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) { /* Put DF on broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, true); + adev->df.funcs->enable_broadcast_mode(adev, true); if (enable) { tmp = RREG32_SOC15(DF, 0, @@ -344,7 +372,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, } /* Exit broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, false); + adev->df.funcs->enable_broadcast_mode(adev, false); } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 6bc3b937fba2..874f641de281 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -40,6 +40,7 @@ #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "clearstate_gfx10.h" #include "v10_structs.h" @@ -120,7 +121,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070104), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), @@ -168,7 +169,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070105), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070105), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), @@ -345,15 +346,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); } +static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { .kiq_set_resources = gfx10_kiq_set_resources, .kiq_map_queues = gfx10_kiq_map_queues, .kiq_unmap_queues = gfx10_kiq_unmap_queues, .kiq_query_status = gfx10_kiq_query_status, + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, .query_status_size = 7, + .invalidate_tlbs_size = 12, }; static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) @@ -807,10 +822,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; info->ucode_id = AMDGPU_UCODE_ID_RLC_G; info->fw = adev->gfx.rlc_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - + if (info->fw) { + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } if (adev->gfx.rlc.is_rlc_v2_1 && adev->gfx.rlc.save_restore_list_cntl_size_bytes && adev->gfx.rlc.save_restore_list_gpm_size_bytes && @@ -3321,8 +3337,11 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index cfc1403fc855..46f0533ba43f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4558,8 +4558,11 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a5492e375f29..46ab46757b25 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -48,15 +48,6 @@ #include "amdgpu_ras.h" -#include "sdma0/sdma0_4_2_offset.h" -#include "sdma1/sdma1_4_2_offset.h" -#include "sdma2/sdma2_4_2_2_offset.h" -#include "sdma3/sdma3_4_2_2_offset.h" -#include "sdma4/sdma4_4_2_2_offset.h" -#include "sdma5/sdma5_4_2_2_offset.h" -#include "sdma6/sdma6_4_2_2_offset.h" -#include "sdma7/sdma7_4_2_2_offset.h" - #define GFX9_NUM_GFX_RINGS 1 #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -748,6 +739,134 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, void *inject_if); +static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, + uint64_t queue_mask) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, + PACKET3_SET_RESOURCES_VMID_MASK(0) | + /* vmid_mask:0* queue_type:0 (KIQ) */ + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); + amdgpu_ring_write(kiq_ring, + lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, + upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ +} + +static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | + /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | + /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | + /* num_queues: must be 1 */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +} + +static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(action) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + + if (action == PREEMPT_QUEUES_NO_UNMAP) { + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, seq); + } else { + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } +} + +static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, + u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_CONTEXT_ID(0) | + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | + PACKET3_QUERY_STATUS_COMMAND(2)); + /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); + amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); + amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); +} + +static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + +static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { + .kiq_set_resources = gfx_v9_0_kiq_set_resources, + .kiq_map_queues = gfx_v9_0_kiq_map_queues, + .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, + .kiq_query_status = gfx_v9_0_kiq_query_status, + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, + .set_resources_size = 8, + .map_queues_size = 7, + .unmap_queues_size = 6, + .query_status_size = 7, + .invalidate_tlbs_size = 12, +}; + +static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) +{ + adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; +} + static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { @@ -3115,74 +3234,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } -static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) -{ - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - uint64_t queue_mask = 0; - int r, i; - - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { - if (!test_bit(i, adev->gfx.mec.queue_bitmap)) - continue; - - /* This situation may be hit in the future if a new HW - * generation exposes more than 64 queues. If so, the - * definition of queue_mask needs updating */ - if (WARN_ON(i >= (sizeof(queue_mask)*8))) { - DRM_ERROR("Invalid KCQ enabled: %d\n", i); - break; - } - - queue_mask |= (1ull << i); - } - - r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); - if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - return r; - } - - /* set resources */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); - amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | - PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ - amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* oac mask */ - amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); - uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ - PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ - PACKET3_MAP_QUEUES_QUEUE(ring->queue) | - PACKET3_MAP_QUEUES_PIPE(ring->pipe) | - PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | - PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ - PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ - PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ - PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ - amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); - } - - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ enable failed\n"); - - return r; -} - static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -3319,8 +3370,11 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } @@ -3589,7 +3643,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) goto done; } - r = gfx_v9_0_kiq_kcq_enable(adev); + r = amdgpu_gfx_enable_kcq(adev); done: return r; } @@ -3646,6 +3700,23 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) return 0; } +static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) +{ + u32 tmp; + + if (adev->asic_type != CHIP_ARCTURUS) + return; + + tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, + adev->df.hash_status.hash_64k); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, + adev->df.hash_status.hash_2m); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, + adev->df.hash_status.hash_1g); + WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); +} + static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) { if (adev->asic_type != CHIP_ARCTURUS) @@ -3663,6 +3734,8 @@ static int gfx_v9_0_hw_init(void *handle) gfx_v9_0_constants_init(adev); + gfx_v9_0_init_tcp_config(adev); + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -3674,36 +3747,6 @@ static int gfx_v9_0_hw_init(void *handle) return r; } -static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) -{ - int r, i; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - - r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); - if (r) - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ - PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | - PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | - PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); - amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - } - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ disable failed\n"); - - return r; -} - static int gfx_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3715,7 +3758,7 @@ static int gfx_v9_0_hw_fini(void *handle) /* DF freeze and kcq disable will fail */ if (!amdgpu_ras_intr_triggered()) /* disable KCQ to avoid CPC touch memory not valid anymore */ - gfx_v9_0_kcq_disable(adev); + amdgpu_gfx_disable_kcq(adev); if (amdgpu_sriov_vf(adev)) { gfx_v9_0_cp_gfx_enable(adev, false); @@ -4021,14 +4064,6 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA2, 0, mmSDMA2_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA3, 0, mmSDMA3_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA4, 0, mmSDMA4_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA5, 0, mmSDMA5_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA6, 0, mmSDMA6_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA7, 0, mmSDMA7_EDC_COUNTER), 0, 1, 1}, }; static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) @@ -4092,7 +4127,6 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) adev->gfx.config.max_sh_per_se; int sgpr_work_group_size = 5; int gpr_reg_size = compute_dim_x / 16 + 6; - int sec_ded_counter_reg_size = adev->sdma.num_instances + 34; /* only support when RAS is enabled */ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) @@ -4232,7 +4266,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* read back registers to clear the counters */ mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < sec_ded_counter_reg_size; i++) { + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { gfx_v9_0_select_se_sh(adev, j, 0x0, k); @@ -4259,6 +4293,7 @@ static int gfx_v9_0_early_init(void *handle) else adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + gfx_v9_0_set_kiq_pm4_funcs(adev); gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); gfx_v9_0_set_gds_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index da9765ff45d6..bbede09983e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -30,6 +30,8 @@ #include "hdp/hdp_5_0_0_sh_mask.h" #include "gc/gc_10_1_0_sh_mask.h" #include "mmhub/mmhub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_offset.h" #include "dcn/dcn_2_0_0_offset.h" #include "dcn/dcn_2_0_0_sh_mask.h" #include "oss/osssys_5_0_0_offset.h" @@ -37,6 +39,7 @@ #include "navi10_enum.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "nbio_v2_3.h" @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, (!amdgpu_sriov_vf(adev))); } +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( + struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -380,6 +396,63 @@ error_alloc: DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); } +/** + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid, i; + signed long r; + uint32_t seq; + uint16_t queried_pasid; + bool ret; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + if (amdgpu_emu_mode == 0 && ring->sched.ready) { + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); + kiq->pmf->kiq_invalidate_tlbs(ring, + pasid, flush_type, all_hub); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; + } + + for (vmid = 1; vmid < 16; vmid++) { + + ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + if (all_hub) { + for (i = 0; i < adev->num_vmhubs; i++) + gmc_v10_0_flush_gpu_tlb(adev, vmid, + i, 0); + } else { + gmc_v10_0_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, 0); + } + break; + } + } + + return 0; +} + static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { @@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, .map_mtype = gmc_v10_0_map_mtype, @@ -566,6 +640,13 @@ static int gmc_v10_0_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; + /* + * Can't free the stolen VGA memory when it might be used for memory + * training again. + */ + if (!adev->fw_vram_usage.mem_train_support) + amdgpu_bo_late_init(adev); + r = amdgpu_gmc_allocate_vm_inv_eng(adev); if (r) return r; @@ -720,6 +801,10 @@ static int gmc_v10_0_sw_init(void *handle) r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT, &adev->gmc.vm_fault); + + if (r) + return r; + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, &adev->gmc.vm_fault); @@ -732,15 +817,6 @@ static int gmc_v10_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - /* - * Reserve 8M stolen memory for navi10 like vega10 - * TODO: will check if it's really needed on asic. - */ - if (amdgpu_emu_mode == 1) - adev->gmc.stolen_size = 0; - else - adev->gmc.stolen_size = 9 * 1024 *1024; - r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); @@ -753,6 +829,19 @@ static int gmc_v10_0_sw_init(void *handle) adev->gmc.stolen_size = gmc_v10_0_get_vbios_fb_size(adev); + /* + * In dual GPUs scenario, stolen_size is assigned to zero on the + * secondary GPU, since there is no pre-OS console using that memory. + * Then the bottom region of VRAM was allocated as GTT, unfortunately a + * small region of bottom VRAM was encroached by UMC firmware during + * GDDR6 BIST training, this cause page fault. + * The page fault can be fixed by forcing stolen_size to 3MB, then the + * bottom region of VRAM was allocated as stolen memory, GTT corruption + * avoid. + */ + adev->gmc.stolen_size = max(adev->gmc.stolen_size, + AMDGPU_STOLEN_BIST_TRAINING_DEFAULT_SIZE); + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) @@ -792,6 +881,13 @@ static void gmc_v10_0_gart_fini(struct amdgpu_device *adev) static int gmc_v10_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + void *stolen_vga_buf; + + /* + * Free the stolen memory if it wasn't already freed in late_init + * because of memory training. + */ + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); amdgpu_vm_manager_fini(adev); gmc_v10_0_gart_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index f08e5330642d..19d5b133e1d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) return 0; } +/** + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid; + unsigned int tmp; + + if (adev->in_gpu_reset) + return -EIO; + + for (vmid = 1; vmid < 16; vmid++) { + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = { static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, .set_prt = gmc_v7_0_set_prt, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 6d96d40fbcb8..27d83204fa2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) return 0; } +/** + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid; + unsigned int tmp; + + if (adev->in_gpu_reset) + return -EIO; + + for (vmid = 1; vmid < 16; vmid++) { + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; + +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = { static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, .set_prt = gmc_v8_0_set_prt, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 26194ac9af98..40a496804356 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -38,10 +38,12 @@ #include "dce/dce_12_0_sh_mask.h" #include "vega10_enum.h" #include "mmhub/mmhub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" #include "athub/athub_1_0_offset.h" #include "oss/osssys_4_0_offset.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "umc/umc_6_0_sh_mask.h" @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, adev->pdev->device == 0x15d8))); } +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for VM flush ACK!\n"); } +/** + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid, i; + signed long r; + uint32_t seq; + uint16_t queried_pasid; + bool ret; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + if (adev->in_gpu_reset) + return -EIO; + + if (ring->sched.ready) { + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); + kiq->pmf->kiq_invalidate_tlbs(ring, + pasid, flush_type, all_hub); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; + } + + for (vmid = 1; vmid < 16; vmid++) { + + ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + if (all_hub) { + for (i = 0; i < adev->num_vmhubs; i++) + gmc_v9_0_flush_gpu_tlb(adev, vmid, + i, 0); + } else { + gmc_v9_0_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, 0); + } + break; + } + } + + return 0; + +} + static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { @@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, .map_mtype = gmc_v9_0_map_mtype, @@ -817,8 +893,8 @@ static int gmc_v9_0_late_init(void *handle) r = amdgpu_atomfirmware_mem_ecc_supported(adev); if (!r) { DRM_INFO("ECC is not present.\n"); - if (adev->df_funcs->enable_ecc_force_par_wr_rmw) - adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false); + if (adev->df.funcs->enable_ecc_force_par_wr_rmw) + adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); } else { DRM_INFO("ECC is active.\n"); } @@ -1023,7 +1099,7 @@ static int gmc_v9_0_sw_init(void *handle) else chansize = 128; - numchan = adev->df_funcs->get_hbm_channel_number(adev); + numchan = adev->df.funcs->get_hbm_channel_number(adev); adev->gmc.vram_width = numchan * chansize; } diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index b0229543e887..2e0f8933410e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -478,7 +478,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -489,7 +489,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); @@ -502,7 +502,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -513,7 +513,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); @@ -726,6 +726,12 @@ static int nv_common_early_init(void *handle) AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_ATHUB; + /* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0, + * as a consequence, the rev_id and external_rev_id are wrong. + * workaround it by hardcoding rev_id to 0 (default value). + */ + if (amdgpu_sriov_vf(adev)) + adev->rev_id = 0; adev->external_rev_id = adev->rev_id + 0xa; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index a57f3d737677..685dd9754c67 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -43,10 +43,13 @@ MODULE_FIRMWARE("amdgpu/vega20_asd.bin"); MODULE_FIRMWARE("amdgpu/vega20_ta.bin"); MODULE_FIRMWARE("amdgpu/navi10_sos.bin"); MODULE_FIRMWARE("amdgpu/navi10_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi10_ta.bin"); MODULE_FIRMWARE("amdgpu/navi14_sos.bin"); MODULE_FIRMWARE("amdgpu/navi14_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi14_ta.bin"); MODULE_FIRMWARE("amdgpu/navi12_sos.bin"); MODULE_FIRMWARE("amdgpu/navi12_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi12_ta.bin"); MODULE_FIRMWARE("amdgpu/arcturus_sos.bin"); MODULE_FIRMWARE("amdgpu/arcturus_asd.bin"); MODULE_FIRMWARE("amdgpu/arcturus_ta.bin"); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index ce0753a9d241..27c7001be1ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -82,6 +82,7 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); +static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev); static const struct soc15_reg_golden golden_settings_sdma_4[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), @@ -254,7 +255,106 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), - SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000) + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe) +}; + +static const struct soc15_ras_field_entry sdma_v4_0_ras_fields[] = { + { "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UCODE_BUF_SED), + 0, 0, + }, + { "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_RB_CMD_BUF_SED), + 0, 0, + }, + { "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_IB_CMD_BUF_SED), + 0, 0, + }, + { "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RD_FIFO_SED), + 0, 0, + }, + { "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RDBST_FIFO_SED), + 0, 0, + }, + { "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_DATA_LUT_FIFO_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED), + 0, 0, + }, + { "SDMA_SPLIT_DAT_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_SPLIT_DAT_BUF_SED), + 0, 0, + }, + { "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MC_WR_ADDR_FIFO_SED), + 0, 0, + }, }; static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, @@ -1686,6 +1786,7 @@ static int sdma_v4_0_early_init(void *handle) sdma_v4_0_set_buffer_funcs(adev); sdma_v4_0_set_vm_pte_funcs(adev); sdma_v4_0_set_irq_funcs(adev); + sdma_v4_0_set_ras_funcs(adev); return 0; } @@ -1700,8 +1801,18 @@ static int sdma_v4_0_late_init(void *handle) struct ras_ih_if ih_info = { .cb = sdma_v4_0_process_ras_data_cb, }; + int i; - return amdgpu_sdma_ras_late_init(adev, &ih_info); + /* read back edc counter registers to clear the counters */ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { + for (i = 0; i < adev->sdma.num_instances; i++) + RREG32_SDMA(i, mmSDMA0_EDC_COUNTER); + } + + if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init) + return adev->sdma.funcs->ras_late_init(adev, &ih_info); + else + return 0; } static int sdma_v4_0_sw_init(void *handle) @@ -1773,7 +1884,8 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - amdgpu_sdma_ras_fini(adev); + if (adev->sdma.funcs && adev->sdma.funcs->ras_fini) + adev->sdma.funcs->ras_fini(adev); for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); @@ -2414,6 +2526,70 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } +static void sdma_v4_0_get_ras_error_count(uint32_t value, + uint32_t instance, + uint32_t *sec_count) +{ + uint32_t i; + uint32_t sec_cnt; + + /* double bits error (multiple bits) error detection is not supported */ + for (i = 0; i < ARRAY_SIZE(sdma_v4_0_ras_fields); i++) { + /* the SDMA_EDC_COUNTER register in each sdma instance + * shares the same sed shift_mask + * */ + sec_cnt = (value & + sdma_v4_0_ras_fields[i].sec_count_mask) >> + sdma_v4_0_ras_fields[i].sec_count_shift; + if (sec_cnt) { + DRM_INFO("Detected %s in SDMA%d, SED %d\n", + sdma_v4_0_ras_fields[i].name, + instance, sec_cnt); + *sec_count += sec_cnt; + } + } +} + +static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, + uint32_t instance, void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t sec_count = 0; + uint32_t reg_value = 0; + + reg_value = RREG32_SDMA(instance, mmSDMA0_EDC_COUNTER); + /* double bit error is not supported */ + if (reg_value) + sdma_v4_0_get_ras_error_count(reg_value, + instance, &sec_count); + /* err_data->ce_count should be initialized to 0 + * before calling into this function */ + err_data->ce_count += sec_count; + /* double bit error is not supported + * set ue count to 0 */ + err_data->ue_count = 0; + + return 0; +}; + +static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = { + .ras_late_init = amdgpu_sdma_ras_late_init, + .ras_fini = amdgpu_sdma_ras_fini, + .query_ras_error_count = sdma_v4_0_query_ras_error_count, +}; + +static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA20: + case CHIP_ARCTURUS: + adev->sdma.funcs = &sdma_v4_0_ras_funcs; + break; + default: + break; + } +} + const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 4, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 714cf4dfd0a7..317803f6a561 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -479,62 +479,18 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev) return ret; } -static int soc15_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) -{ - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; - - *cap = smu_baco_is_support(smu); - return 0; - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { - *cap = false; - return -ENOENT; - } - - return pp_funcs->get_asic_baco_capability(pp_handle, cap); - } -} - static int soc15_asic_baco_reset(struct amdgpu_device *adev) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int ret = 0; /* avoid NBIF got stuck when do RAS recovery in BACO reset */ if (ras && ras->supported) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); - dev_info(adev->dev, "GPU BACO reset\n"); - - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; - int ret; - - ret = smu_baco_enter(smu); - if (ret) - return ret; - - ret = smu_baco_exit(smu); - if (ret) - return ret; - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* enter BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -EIO; - - /* exit BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -EIO; - } + ret = amdgpu_dpm_baco_reset(adev); + if (ret) + return ret; /* re-enable doorbell interrupt after BACO exit */ if (ras && ras->supported) @@ -543,17 +499,6 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) return 0; } -static int soc15_mode2_reset(struct amdgpu_device *adev) -{ - if (is_support_sw_smu(adev)) - return smu_mode2_reset(&adev->smu); - if (!adev->powerplay.pp_funcs || - !adev->powerplay.pp_funcs->asic_reset_mode_2) - return -ENOENT; - - return adev->powerplay.pp_funcs->asic_reset_mode_2(adev->powerplay.pp_handle); -} - static enum amd_reset_method soc15_asic_reset_method(struct amdgpu_device *adev) { @@ -567,11 +512,11 @@ soc15_asic_reset_method(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_ARCTURUS: - soc15_asic_get_baco_capability(adev, &baco_reset); + baco_reset = amdgpu_dpm_is_baco_supported(adev); break; case CHIP_VEGA20: if (adev->psp.sos_fw_version >= 0x80067) - soc15_asic_get_baco_capability(adev, &baco_reset); + baco_reset = amdgpu_dpm_is_baco_supported(adev); /* * 1. PMFW version > 0x284300: all cases use baco @@ -598,7 +543,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev) amdgpu_inc_vram_lost(adev); return soc15_asic_baco_reset(adev); case AMD_RESET_METHOD_MODE2: - return soc15_mode2_reset(adev); + return amdgpu_dpm_mode2_reset(adev); default: if (!adev->in_suspend) amdgpu_inc_vram_lost(adev); @@ -608,25 +553,18 @@ static int soc15_asic_reset(struct amdgpu_device *adev) static bool soc15_supports_baco(struct amdgpu_device *adev) { - bool baco_support; - switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_ARCTURUS: - soc15_asic_get_baco_capability(adev, &baco_support); - break; + return amdgpu_dpm_is_baco_supported(adev); case CHIP_VEGA20: if (adev->psp.sos_fw_version >= 0x80067) - soc15_asic_get_baco_capability(adev, &baco_support); - else - baco_support = false; - break; + return amdgpu_dpm_is_baco_supported(adev); + return false; default: return false; } - - return baco_support; } /*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock, @@ -739,9 +677,9 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) } if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) - adev->df_funcs = &df_v3_6_funcs; + adev->df.funcs = &df_v3_6_funcs; else - adev->df_funcs = &df_v1_7_funcs; + adev->df.funcs = &df_v1_7_funcs; adev->rev_id = soc15_get_rev_id(adev); adev->nbio.funcs->detect_hw_virt(adev); @@ -834,8 +772,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); } else { - if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) - amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); } if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &jpeg_v2_5_ip_block); @@ -846,8 +783,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block); - if (is_support_sw_smu(adev)) - amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); + amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) @@ -1311,7 +1247,7 @@ static int soc15_common_sw_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_add_irq_id(adev); - adev->df_funcs->sw_init(adev); + adev->df.funcs->sw_init(adev); return 0; } @@ -1321,7 +1257,7 @@ static int soc15_common_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_nbio_ras_fini(adev); - adev->df_funcs->sw_fini(adev); + adev->df.funcs->sw_fini(adev); return 0; } @@ -1542,7 +1478,7 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE ? true : false); soc15_update_rom_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - adev->df_funcs->update_medium_grain_clock_gating(adev, + adev->df.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; case CHIP_RAVEN: @@ -1600,7 +1536,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) *flags |= AMD_CG_SUPPORT_ROM_MGCG; - adev->df_funcs->get_clockgating_state(adev, flags); + adev->df.funcs->get_clockgating_state(adev, flags); } static int soc15_common_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 11e924dd88ff..793bf70e64b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -30,8 +30,6 @@ #include "umc/umc_6_1_1_sh_mask.h" #include "umc/umc_6_1_2_offset.h" -#define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10 - #define UMC_6_INST_DIST 0x40000 /* @@ -56,12 +54,30 @@ const uint32_t {9, 25, 0, 16}, {15, 31, 6, 22} }; +static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev) +{ + WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN, 1); +} + static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) { WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, RSMU_UMC_INDEX_MODE_EN, 0); } +static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev) +{ + uint32_t rsmu_umc_index; + + rsmu_umc_index = RREG32_SOC15(RSMU, 0, + mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + + return REG_GET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN); +} + static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev, uint32_t umc_inst, uint32_t ch_inst) @@ -165,6 +181,11 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, uint32_t ch_inst = 0; uint32_t umc_reg_offset = 0; + uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { umc_reg_offset = get_umc_6_reg_offset(adev, umc_inst, @@ -177,6 +198,9 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, umc_reg_offset, &(err_data->ue_count)); } + + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); } static void umc_v6_1_query_error_address(struct amdgpu_device *adev, @@ -186,7 +210,7 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, uint32_t umc_inst) { uint32_t lsb, mc_umc_status_addr; - uint64_t mc_umc_status, err_addr, retired_page; + uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; struct eeprom_table_record *err_rec; uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; @@ -194,10 +218,14 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, /* UMC 6_1_2 registers */ mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); + mc_umc_addrt0 = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT); } else { /* UMC 6_1_1 registers */ mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + mc_umc_addrt0 = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0); } /* skip error address process if -ENOMEM */ @@ -214,8 +242,8 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { - err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4); + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); /* the lowest lsb bits should be ignored */ lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); @@ -255,6 +283,11 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, uint32_t ch_inst = 0; uint32_t umc_reg_offset = 0; + uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { umc_reg_offset = get_umc_6_reg_offset(adev, umc_inst, @@ -267,6 +300,8 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, umc_inst); } + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); } static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, @@ -313,7 +348,10 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) uint32_t ch_inst = 0; uint32_t umc_reg_offset = 0; - umc_v6_1_disable_umc_index_mode(adev); + uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { umc_reg_offset = get_umc_6_reg_offset(adev, @@ -322,6 +360,9 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset); } + + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); } const struct amdgpu_umc_funcs umc_v6_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 3b025a3f8c7d..e654938f6cca 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -39,10 +39,10 @@ #include "ivsrcid/vcn/irqsrcs_vcn_1_0.h" #include "jpeg_v1_0.h" -#define mmUVD_RBC_XX_IB_REG_CHECK 0x05ab -#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 -#define mmUVD_REG_XX_MASK 0x05ac -#define mmUVD_REG_XX_MASK_BASE_IDX 1 +#define mmUVD_RBC_XX_IB_REG_CHECK_1_0 0x05ab +#define mmUVD_RBC_XX_IB_REG_CHECK_1_0_BASE_IDX 1 +#define mmUVD_REG_XX_MASK_1_0 0x05ac +#define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1 static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); @@ -50,7 +50,7 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state); static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); + int inst_idx, struct dpg_pause_state *new_state); static void vcn_v1_0_idle_work_handler(struct work_struct *work); @@ -835,9 +835,9 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) vcn_v1_0_mc_resume_spg_mode(adev); - WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK, 0x10); - WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, - RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK) | 0x3); + WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK_1_0, 0x10); + WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0, + RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0) | 0x3); /* enable VCPU clock */ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK); @@ -1199,7 +1199,7 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev) } static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state) + int inst_idx, struct dpg_pause_state *new_state) { int ret_code; uint32_t reg_data = 0; @@ -1786,7 +1786,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work) else new_state.jpeg = VCN_DPG_STATE__UNPAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, 0, &new_state); } fences += amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec); @@ -1840,7 +1840,7 @@ void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring) else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) new_state.jpeg = VCN_DPG_STATE__PAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, 0, &new_state); } } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index d76ece38c97b..f4db8af6536b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -47,18 +47,13 @@ #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2 -#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b -#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 -#define mmUVD_REG_XX_MASK 0x026c -#define mmUVD_REG_XX_MASK_BASE_IDX 1 - static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v2_0_set_powergating_state(void *handle, enum amd_powergating_state state); static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); + int inst_idx, struct dpg_pause_state *new_state); /** * vcn_v2_0_early_init - set function pointers @@ -356,88 +351,88 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { if (!indirect) { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } else { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } offset = 0; } else { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); offset = size; - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); } if (!indirect) - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); else - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); /* cache window 1: stack */ if (!indirect) { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } else { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); /* cache window 2: context */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); /* non-cache window */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); /* VCN global tiling registers */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } @@ -583,19 +578,19 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev, UVD_CGC_CTRL__WCB_MODE_MASK | UVD_CGC_CTRL__VCPU_MODE_MASK | UVD_CGC_CTRL__SCPU_MODE_MASK); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); /* turn off clock gating */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect); /* turn on SUVD clock gating */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); /* turn on sw mode in UVD_SUVD_CGC_CTRL */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); } @@ -759,7 +754,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp); if (indirect) - adev->vcn.dpg_sram_curr_addr = (uint32_t*)adev->vcn.dpg_sram_cpu_addr; + adev->vcn.inst->dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst->dpg_sram_cpu_addr; /* enable clock gating */ vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect); @@ -768,11 +763,11 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK; - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); /* disable master interupt */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect); /* setup mmUVD_LMI_CTRL */ @@ -784,28 +779,28 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 0x00100000L); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_CNTL), 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_SET_MUXA0), ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_SET_MUXB0), ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_SET_MUX), ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | @@ -813,29 +808,29 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) vcn_v2_0_mc_resume_dpg_mode(adev, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); /* release VCPU reset to boot */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect); /* enable LMI MC and UMC channels */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_CTRL2), 0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect); /* enable master interrupt */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); if (indirect) - psp_update_vcn_sram(adev, 0, adev->vcn.dpg_sram_gpu_addr, - (uint32_t)((uintptr_t)adev->vcn.dpg_sram_curr_addr - - (uintptr_t)adev->vcn.dpg_sram_cpu_addr)); + psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr)); /* force RBC into idle state */ rb_bufsz = order_base_2(ring->ring_size); @@ -1135,7 +1130,7 @@ power_off: } static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state) + int inst_idx, struct dpg_pause_state *new_state) { struct amdgpu_ring *ring; uint32_t reg_data = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 4ea8e20ed15d..c8b63d57a541 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -55,6 +55,8 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v2_5_set_powergating_state(void *handle, enum amd_powergating_state state); +static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state); static int vcn_v2_5_sriov_start(struct amdgpu_device *adev); static int amdgpu_ih_clientid_vcns[] = { @@ -212,6 +214,9 @@ static int vcn_v2_5_sw_init(void *handle) return r; } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode; + return 0; } @@ -286,7 +291,8 @@ static int vcn_v2_5_hw_init(void *handle) done: if (!r) - DRM_INFO("VCN decode and encode initialized successfully.\n"); + DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); return r; } @@ -309,7 +315,9 @@ static int vcn_v2_5_hw_fini(void *handle) continue; ring = &adev->vcn.inst[i].ring_dec; - if (RREG32_SOC15(VCN, i, mmUVD_STATUS)) + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, mmUVD_STATUS))) vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); ring->sched.ready = false; @@ -384,9 +392,9 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo)); WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi)); WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); offset = 0; } else { @@ -418,6 +426,99 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) } } +static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); +} + /** * vcn_v2_5_disable_clock_gating - disable VCN clock gating * @@ -536,6 +637,54 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) } } +static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev, + uint8_t sram_sel, int inst_idx, uint8_t indirect) +{ + uint32_t reg_data = 0; + + /* enable sw clock gating control */ + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | + UVD_CGC_CTRL__UDEC_CM_MODE_MASK | + UVD_CGC_CTRL__UDEC_IT_MODE_MASK | + UVD_CGC_CTRL__UDEC_DB_MODE_MASK | + UVD_CGC_CTRL__UDEC_MP_MODE_MASK | + UVD_CGC_CTRL__SYS_MODE_MASK | + UVD_CGC_CTRL__UDEC_MODE_MASK | + UVD_CGC_CTRL__MPEG2_MODE_MASK | + UVD_CGC_CTRL__REGS_MODE_MASK | + UVD_CGC_CTRL__RBC_MODE_MASK | + UVD_CGC_CTRL__LMI_MC_MODE_MASK | + UVD_CGC_CTRL__LMI_UMC_MODE_MASK | + UVD_CGC_CTRL__IDCT_MODE_MASK | + UVD_CGC_CTRL__MPRD_MODE_MASK | + UVD_CGC_CTRL__MPC_MODE_MASK | + UVD_CGC_CTRL__LBSI_MODE_MASK | + UVD_CGC_CTRL__LRBBM_MODE_MASK | + UVD_CGC_CTRL__WCB_MODE_MASK | + UVD_CGC_CTRL__VCPU_MODE_MASK | + UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); + + /* turn off clock gating */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_CGC_GATE), 0, sram_sel, indirect); + + /* turn on SUVD clock gating */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); + + /* turn on sw mode in UVD_SUVD_CGC_CTRL */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); +} + /** * vcn_v2_5_enable_clock_gating - enable VCN clock gating * @@ -598,6 +747,138 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) } } +static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + struct amdgpu_ring *ring; + uint32_t rb_bufsz, tmp; + + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS, tmp); + + if (indirect) + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + + /* enable clock gating */ + vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + tmp |= UVD_VCPU_CNTL__BLK_RST_MASK; + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interupt */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MASTINT_EN), 0, 0, indirect); + + /* setup mmUVD_LMI_CTRL */ + tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_CTRL), tmp, 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MPC_CNTL), + 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MPC_SET_MUXA0), + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MPC_SET_MUXB0), + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MPC_SET_MUX), + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); + + vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); + + /* enable LMI MC and UMC channels */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_CTRL2), 0, 0, indirect); + + /* unblock VCPU register access */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_RB_ARB_CTRL), 0, 0, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + if (indirect) + psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); + + ring = &adev->vcn.inst[inst_idx].ring_dec; + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_CNTL, tmp); + + /* set the write pointer delay */ + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* set the wb address */ + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR_ADDR, + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR, 0); + + WREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2, 0); + + ring->wptr = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + + return 0; +} + static int vcn_v2_5_start(struct amdgpu_device *adev) { struct amdgpu_ring *ring; @@ -610,6 +891,9 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram); + /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); @@ -973,6 +1257,35 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table); } +static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + int ret_code = 0; + uint32_t tmp; + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2); + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + return 0; +} + static int vcn_v2_5_stop(struct amdgpu_device *adev) { uint32_t tmp; @@ -981,6 +1294,12 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v2_5_stop_dpg_mode(adev, i); + goto power_off; + } + /* wait for vcn idle */ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); if (r) @@ -1030,12 +1349,74 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } +power_off: if (adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, false); return 0; } +static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state) +{ + struct amdgpu_ring *ring; + uint32_t reg_data = 0; + int ret_code; + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + DRM_DEBUG("dpg pause state changed %d -> %d", + adev->vcn.pause_state.fw_based, new_state->fw_based); + reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.inst[inst_idx].ring_enc[1]; + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF); + + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, + 0x0, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + /** * vcn_v2_5_dec_ring_get_rptr - get read pointer * @@ -1078,6 +1459,10 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + WREG32_SOC15(UVD, ring->me, mmUVD_SCRATCH2, + lower_32_bits(ring->wptr) | 0x80000000); + if (ring->use_doorbell) { adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index e4f4201b3c34..78b35901643b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -689,40 +689,6 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev) return -EINVAL; } -int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) -{ - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { - *cap = false; - return -ENOENT; - } - - return pp_funcs->get_asic_baco_capability(pp_handle, cap); -} - -int smu7_asic_baco_reset(struct amdgpu_device *adev) -{ - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* enter BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -EIO; - - /* exit BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -EIO; - - dev_info(adev->dev, "GPU BACO reset\n"); - - return 0; -} - /** * vi_asic_pci_config_reset - soft reset GPU * @@ -747,8 +713,6 @@ static int vi_asic_pci_config_reset(struct amdgpu_device *adev) static bool vi_asic_supports_baco(struct amdgpu_device *adev) { - bool baco_support; - switch (adev->asic_type) { case CHIP_FIJI: case CHIP_TONGA: @@ -756,14 +720,10 @@ static bool vi_asic_supports_baco(struct amdgpu_device *adev) case CHIP_POLARIS11: case CHIP_POLARIS12: case CHIP_TOPAZ: - smu7_asic_get_baco_capability(adev, &baco_support); - break; + return amdgpu_dpm_is_baco_supported(adev); default: - baco_support = false; - break; + return false; } - - return baco_support; } static enum amd_reset_method @@ -778,7 +738,7 @@ vi_asic_reset_method(struct amdgpu_device *adev) case CHIP_POLARIS11: case CHIP_POLARIS12: case CHIP_TOPAZ: - smu7_asic_get_baco_capability(adev, &baco_reset); + baco_reset = amdgpu_dpm_is_baco_supported(adev); break; default: baco_reset = false; @@ -807,7 +767,7 @@ static int vi_asic_reset(struct amdgpu_device *adev) if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { if (!adev->in_suspend) amdgpu_inc_vram_lost(adev); - r = smu7_asic_baco_reset(adev); + r = amdgpu_dpm_baco_reset(adev); } else { r = vi_asic_pci_config_reset(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h index 40d4174913a4..defb4aaf929a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.h +++ b/drivers/gpu/drm/amd/amdgpu/vi.h @@ -31,7 +31,5 @@ void vi_srbm_select(struct amdgpu_device *adev, int vi_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); -int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap); -int smu7_asic_baco_reset(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d7eb6ac37f62..2870553a2ce0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -934,6 +934,7 @@ static void uninitialize(struct device_queue_manager *dqm) static int start_nocpsch(struct device_queue_manager *dqm) { + pr_info("SW scheduler is used"); init_interrupts(dqm); if (dqm->dev->device_info->asic_family == CHIP_HAWAII) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 7832ec6e480b..d1d68a51bfb8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -153,6 +153,14 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, return r; } +static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, + queue_id, p->doorbell_off); +} + static void update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties *q) { @@ -409,7 +417,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, mqd->allocate_mqd = allocate_hiq_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd_hiq_sdma; - mqd->load_mqd = load_mqd; + mqd->load_mqd = hiq_load_mqd_kiq; mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index aa9010995eaf..436b7f518979 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -191,6 +191,14 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, wptr_shift, 0, mms); } +static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, + queue_id, p->doorbell_off); +} + static void update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties *q) { @@ -449,7 +457,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->allocate_mqd = allocate_hiq_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd_hiq_sdma; - mqd->load_mqd = load_mqd; + mqd->load_mqd = hiq_load_mqd_kiq; mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 536a153ac9a4..25b90f70aecd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -32,6 +32,7 @@ #include <linux/mman.h> #include <linux/file.h> #include "amdgpu_amdkfd.h" +#include "amdgpu.h" struct mm_struct; @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, void kfd_flush_tlb(struct kfd_process_device *pdd) { struct kfd_dev *dev = pdd->dev; - const struct kfd2kgd_calls *f2g = dev->kfd2kgd; if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { /* Nothing to flush until a VMID is assigned, which * only happens when the first queue is created. */ if (pdd->qpd.vmid) - f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid); + amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd, + pdd->qpd.vmid); } else { - f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid); + amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, + pdd->process->pasid); } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 76673c7234ed..9402374d2466 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -906,13 +906,16 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.dce_environment = DCE_ENV_PRODUCTION_DRV; - /* - * TODO debug why this doesn't work on Raven - */ - if (adev->flags & AMD_IS_APU && - adev->asic_type >= CHIP_CARRIZO && - adev->asic_type < CHIP_RAVEN) + switch (adev->asic_type) { + case CHIP_CARRIZO: + case CHIP_STONEY: + case CHIP_RAVEN: + case CHIP_RENOIR: init_data.flags.gpu_vm_support = true; + break; + default: + break; + } if (amdgpu_dc_feature_mask & DC_FBC_MASK) init_data.flags.fbc_support = true; @@ -8390,17 +8393,37 @@ static bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream) bool amdgpu_dm_psr_enable(struct dc_stream_state *stream) { struct dc_link *link = stream->link; - struct dc_static_screen_events triggers = {0}; + unsigned int vsync_rate_hz = 0; + struct dc_static_screen_params params = {0}; + /* Calculate number of static frames before generating interrupt to + * enter PSR. + */ + unsigned int frame_time_microsec = 1000000 / vsync_rate_hz; + // Init fail safe of 2 frames static + unsigned int num_frames_static = 2; DRM_DEBUG_DRIVER("Enabling psr...\n"); - triggers.cursor_update = true; - triggers.overlay_update = true; - triggers.surface_update = true; + vsync_rate_hz = div64_u64(div64_u64(( + stream->timing.pix_clk_100hz * 100), + stream->timing.v_total), + stream->timing.h_total); + + /* Round up + * Calculate number of frames such that at least 30 ms of time has + * passed. + */ + if (vsync_rate_hz != 0) + num_frames_static = (30000 / frame_time_microsec) + 1; + + params.triggers.cursor_update = true; + params.triggers.overlay_update = true; + params.triggers.surface_update = true; + params.num_frames = num_frames_static; - dc_stream_set_static_screen_events(link->ctx->dc, + dc_stream_set_static_screen_params(link->ctx->dc, &stream, 1, - &triggers); + ¶ms); return dc_link_set_psr_allow_active(link, true, false); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 52fb207393ef..96b391e4b3e7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -534,7 +534,7 @@ static int kbps_to_peak_pbn(int kbps) peak_kbps *= 1006; peak_kbps = div_u64(peak_kbps, 1000); - return (int) DIV_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); + return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); } static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *params, diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index e6c22345f0ea..a27d84ca15a5 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -705,8 +705,8 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v, unsigned int get_highest_allowed_voltage_level(uint32_t hw_internal_rev) { - /* for dali, the highest voltage level we want is 0 */ - if (ASICREV_IS_DALI(hw_internal_rev)) + /* for dali & pollock, the highest voltage level we want is 0 */ + if (ASICREV_IS_POLLOCK(hw_internal_rev) || ASICREV_IS_DALI(hw_internal_rev)) return 0; /* we are ok with all levels */ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 6d60ef822619..a78e5c74c79c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -134,13 +134,13 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p #if defined(CONFIG_DRM_AMD_DC_DCN) case FAMILY_RV: - if (ASICREV_IS_DALI(asic_id.hw_internal_rev)) { + if (ASICREV_IS_DALI(asic_id.hw_internal_rev) || + ASICREV_IS_POLLOCK(asic_id.hw_internal_rev)) { /* TEMP: this check has to come before ASICREV_IS_RENOIR */ - /* which also incorrectly returns true for Dali */ + /* which also incorrectly returns true for Dali/Pollock*/ rv2_clk_mgr_construct(ctx, clk_mgr, pp_smu); break; } - if (ASICREV_IS_RENOIR(asic_id.hw_internal_rev)) { rn_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); break; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index ffed7207c099..7ae4c06232dd 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -59,14 +59,16 @@ int rn_get_active_display_cnt_wa( struct dc_state *context) { int i, display_count; - bool hdmi_present = false; + bool tmds_present = false; display_count = 0; for (i = 0; i < context->stream_count; i++) { const struct dc_stream_state *stream = context->streams[i]; - if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) - hdmi_present = true; + if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A || + stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK || + stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK) + tmds_present = true; } for (i = 0; i < dc->link_count; i++) { @@ -85,7 +87,7 @@ int rn_get_active_display_cnt_wa( } /* WA for hang on HDMI after display off back back on*/ - if (display_count == 0 && hdmi_present) + if (display_count == 0 && tmds_present) display_count = 1; return display_count; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 3d89904003f0..6c797fac189d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -287,7 +287,6 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe->stream == stream && pipe->stream_res.tg) { - pipe->stream->adjust = *adjust; dc->hwss.set_drr(&pipe, 1, adjust->v_total_min, @@ -511,10 +510,10 @@ bool dc_stream_program_csc_matrix(struct dc *dc, struct dc_stream_state *stream) return ret; } -void dc_stream_set_static_screen_events(struct dc *dc, +void dc_stream_set_static_screen_params(struct dc *dc, struct dc_stream_state **streams, int num_streams, - const struct dc_static_screen_events *events) + const struct dc_static_screen_params *params) { int i = 0; int j = 0; @@ -533,7 +532,7 @@ void dc_stream_set_static_screen_events(struct dc *dc, } } - dc->hwss.set_static_screen_control(pipes_affected, num_pipes_affected, events); + dc->hwss.set_static_screen_control(pipes_affected, num_pipes_affected, params); } static void dc_destruct(struct dc *dc) @@ -1319,6 +1318,12 @@ bool dc_commit_state(struct dc *dc, struct dc_state *context) return (result == DC_OK); } +bool dc_is_hw_initialized(struct dc *dc) +{ + struct dc_bios *dcb = dc->ctx->dc_bios; + return dcb->funcs->is_accelerated_mode(dcb); +} + bool dc_post_update_surfaces_to_stream(struct dc *dc) { int i; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index cef8c1ba9797..260c0b62d37d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -45,6 +45,7 @@ #include "dpcd_defs.h" #include "dmcu.h" #include "hw/clk_mgr.h" +#include "../dce/dmub_psr.h" #define DC_LOGGER_INIT(logger) @@ -817,8 +818,8 @@ static bool dc_link_detect_helper(struct dc_link *link, } case SIGNAL_TYPE_EDP: { - read_current_link_settings_on_detect(link); detect_edp_sink_caps(link); + read_current_link_settings_on_detect(link); sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C_OVER_AUX; sink_caps.signal = SIGNAL_TYPE_EDP; break; @@ -2404,10 +2405,11 @@ bool dc_link_set_psr_allow_active(struct dc_link *link, bool allow_active, bool { struct dc *dc = link->ctx->dc; struct dmcu *dmcu = dc->res_pool->dmcu; + struct dmub_psr *psr = dc->res_pool->psr; - - - if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_feature_enabled) + if ((psr != NULL) && link->psr_feature_enabled) + psr->funcs->set_psr_enable(psr, allow_active); + else if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_feature_enabled) dmcu->funcs->set_psr_enable(dmcu, allow_active, wait); link->psr_allow_active = allow_active; @@ -2419,8 +2421,11 @@ bool dc_link_get_psr_state(const struct dc_link *link, uint32_t *psr_state) { struct dc *dc = link->ctx->dc; struct dmcu *dmcu = dc->res_pool->dmcu; + struct dmub_psr *psr = dc->res_pool->psr; - if (dmcu != NULL && link->psr_feature_enabled) + if (psr != NULL && link->psr_feature_enabled) + psr->funcs->get_psr_state(psr_state); + else if (dmcu != NULL && link->psr_feature_enabled) dmcu->funcs->get_psr_state(dmcu, psr_state); return true; @@ -2467,6 +2472,7 @@ bool dc_link_setup_psr(struct dc_link *link, { struct dc *dc; struct dmcu *dmcu; + struct dmub_psr *psr; int i; /* updateSinkPsrDpcdConfig*/ union dpcd_psr_configuration psr_configuration; @@ -2478,8 +2484,9 @@ bool dc_link_setup_psr(struct dc_link *link, dc = link->ctx->dc; dmcu = dc->res_pool->dmcu; + psr = dc->res_pool->psr; - if (!dmcu) + if (!dmcu && !psr) return false; @@ -2535,7 +2542,7 @@ bool dc_link_setup_psr(struct dc_link *link, transmitter_to_phy_id(link->link_enc->transmitter); psr_context->crtcTimingVerticalTotal = stream->timing.v_total; - psr_context->vsyncRateHz = div64_u64(div64_u64((stream-> + psr_context->vsync_rate_hz = div64_u64(div64_u64((stream-> timing.pix_clk_100hz * 100), stream->timing.v_total), stream->timing.h_total); @@ -2588,7 +2595,10 @@ bool dc_link_setup_psr(struct dc_link *link, */ psr_context->frame_delay = 0; - link->psr_feature_enabled = dmcu->funcs->setup_psr(dmcu, link, psr_context); + if (psr) + link->psr_feature_enabled = psr->funcs->setup_psr(psr, link, psr_context); + else + link->psr_feature_enabled = dmcu->funcs->setup_psr(dmcu, link, psr_context); /* psr_enabled == 0 indicates setup_psr did not succeed, but this * should not happen since firmware should be running at this point @@ -2863,6 +2873,39 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) return DC_OK; } + +enum dc_status dc_link_reallocate_mst_payload(struct dc_link *link) +{ + int i; + struct pipe_ctx *pipe_ctx; + + // Clear all of MST payload then reallocate + for (i = 0; i < MAX_PIPES; i++) { + pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link && + pipe_ctx->stream->dpms_off == false && + pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + deallocate_mst_payload(pipe_ctx); + } + } + + for (i = 0; i < MAX_PIPES; i++) { + pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link && + pipe_ctx->stream->dpms_off == false && + pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + /* enable/disable PHY will clear connection between BE and FE + * need to restore it. + */ + link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc, + pipe_ctx->stream_res.stream_enc->id, true); + dc_link_allocate_mst_payload(pipe_ctx); + } + } + + return DC_OK; +} + #if defined(CONFIG_DRM_AMD_DC_HDCP) static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off) { @@ -3361,3 +3404,10 @@ const struct dc_link_settings *dc_link_get_link_cap( return &link->preferred_link_setting; return &link->verified_link_cap; } + +void dc_link_overwrite_extended_receiver_cap( + struct dc_link *link) +{ + dp_overwrite_extended_receiver_cap(link); +} + diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 38b0f4347383..6ab298c65247 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1217,24 +1217,33 @@ static void configure_lttpr_mode(struct dc_link *link) uint8_t repeater_cnt; uint32_t aux_interval_address; uint8_t repeater_id; + enum dc_status result = DC_ERROR_UNEXPECTED; uint8_t repeater_mode = DP_PHY_REPEATER_MODE_TRANSPARENT; DC_LOG_HW_LINK_TRAINING("%s\n Set LTTPR to Transparent Mode\n", __func__); - core_link_write_dpcd(link, + result = core_link_write_dpcd(link, DP_PHY_REPEATER_MODE, (uint8_t *)&repeater_mode, sizeof(repeater_mode)); + if (result == DC_OK) { + link->dpcd_caps.lttpr_caps.mode = repeater_mode; + } + if (!link->is_lttpr_mode_transparent) { DC_LOG_HW_LINK_TRAINING("%s\n Set LTTPR to Non Transparent Mode\n", __func__); repeater_mode = DP_PHY_REPEATER_MODE_NON_TRANSPARENT; - core_link_write_dpcd(link, + result = core_link_write_dpcd(link, DP_PHY_REPEATER_MODE, (uint8_t *)&repeater_mode, sizeof(repeater_mode)); + if (result == DC_OK) { + link->dpcd_caps.lttpr_caps.mode = repeater_mode; + } + repeater_cnt = convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); for (repeater_id = repeater_cnt; repeater_id > 0; repeater_id--) { aux_interval_address = DP_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 + @@ -1883,6 +1892,16 @@ bool dp_verify_link_cap( /* disable PHY done possible by BIOS, will be done by driver itself */ dp_disable_link_phy(link, link->connector_signal); + /* Temporary Renoir-specific workaround for SWDEV-215184; + * PHY will sometimes be in bad state on hotplugging display from certain USB-C dongle, + * so add extra cycle of enabling and disabling the PHY before first link training. + */ + if (link->link_enc->features.flags.bits.DP_IS_USB_C && + link->dc->debug.usbc_combo_phy_reset_wa) { + dp_enable_link_phy(link, link->connector_signal, dp_cs_id, cur); + dp_disable_link_phy(link, link->connector_signal); + } + dp_cs_id = get_clock_source_id(link); /* link training starts with the maximum common settings @@ -2876,18 +2895,14 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd return false; previous_link_settings = link->cur_link_settings; - dp_disable_link_phy(link, pipe_ctx->stream->signal); perform_link_training_with_retries(&previous_link_settings, true, LINK_TRAINING_ATTEMPTS, pipe_ctx, pipe_ctx->stream->signal); - if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link && - pipe_ctx->stream->dpms_off == false && - pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { - dc_link_allocate_mst_payload(pipe_ctx); - } + if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) + dc_link_reallocate_mst_payload(link); status = false; if (out_link_loss) @@ -3269,7 +3284,7 @@ static bool retrieve_link_cap(struct dc_link *link) dpcd_data[DP_TRAINING_AUX_RD_INTERVAL]; link->dpcd_caps.ext_receiver_cap_field_present = - aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1 ? true:false; + aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1; if (aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1) { uint8_t ext_cap_data[16]; @@ -3428,6 +3443,68 @@ static bool retrieve_link_cap(struct dc_link *link) return true; } +bool dp_overwrite_extended_receiver_cap(struct dc_link *link) +{ + uint8_t dpcd_data[16]; + uint32_t read_dpcd_retry_cnt = 3; + enum dc_status status = DC_ERROR_UNEXPECTED; + union dp_downstream_port_present ds_port = { 0 }; + union down_stream_port_count down_strm_port_count; + union edp_configuration_cap edp_config_cap; + + int i; + + for (i = 0; i < read_dpcd_retry_cnt; i++) { + status = core_link_read_dpcd( + link, + DP_DPCD_REV, + dpcd_data, + sizeof(dpcd_data)); + if (status == DC_OK) + break; + } + + link->dpcd_caps.dpcd_rev.raw = + dpcd_data[DP_DPCD_REV - DP_DPCD_REV]; + + if (dpcd_data[DP_MAX_LANE_COUNT - DP_DPCD_REV] == 0) + return false; + + ds_port.byte = dpcd_data[DP_DOWNSTREAMPORT_PRESENT - + DP_DPCD_REV]; + + get_active_converter_info(ds_port.byte, link); + + down_strm_port_count.raw = dpcd_data[DP_DOWN_STREAM_PORT_COUNT - + DP_DPCD_REV]; + + link->dpcd_caps.allow_invalid_MSA_timing_param = + down_strm_port_count.bits.IGNORE_MSA_TIMING_PARAM; + + link->dpcd_caps.max_ln_count.raw = dpcd_data[ + DP_MAX_LANE_COUNT - DP_DPCD_REV]; + + link->dpcd_caps.max_down_spread.raw = dpcd_data[ + DP_MAX_DOWNSPREAD - DP_DPCD_REV]; + + link->reported_link_cap.lane_count = + link->dpcd_caps.max_ln_count.bits.MAX_LANE_COUNT; + link->reported_link_cap.link_rate = dpcd_data[ + DP_MAX_LINK_RATE - DP_DPCD_REV]; + link->reported_link_cap.link_spread = + link->dpcd_caps.max_down_spread.bits.MAX_DOWN_SPREAD ? + LINK_SPREAD_05_DOWNSPREAD_30KHZ : LINK_SPREAD_DISABLED; + + edp_config_cap.raw = dpcd_data[ + DP_EDP_CONFIGURATION_CAP - DP_DPCD_REV]; + link->dpcd_caps.panel_mode_edp = + edp_config_cap.bits.ALT_SCRAMBLER_RESET; + link->dpcd_caps.dpcd_display_control_capable = + edp_config_cap.bits.DPCD_DISPLAY_CONTROL_CAPABLE; + + return true; +} + bool detect_dp_sink_caps(struct dc_link *link) { return retrieve_link_cap(link); @@ -3603,6 +3680,7 @@ static void set_crtc_test_pattern(struct dc_link *link, struct pipe_ctx *odm_pipe; enum controller_dp_color_space controller_color_space; int opp_cnt = 1; + uint8_t count = 0; switch (test_pattern_color_space) { case DP_TEST_PATTERN_COLOR_SPACE_RGB: @@ -3646,6 +3724,12 @@ static void set_crtc_test_pattern(struct dc_link *link, NULL, width, height); + /* wait for dpg to blank pixel data with test pattern */ + for (count = 0; count < 1000; count++) + if (opp->funcs->dpg_is_blanked(opp)) + break; + else + udelay(100); } } break; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 64a0e08fd019..a0eb9e533a61 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2066,6 +2066,13 @@ void dc_resource_state_construct( dst_ctx->clk_mgr = dc->clk_mgr; } + +bool dc_resource_is_dsc_encoding_supported(const struct dc *dc) +{ + return dc->res_pool->res_cap->num_dsc > 0; +} + + /** * dc_validate_global_state() - Determine if HW can support a given state * Checks HW resource availability and bandwidth requirement. @@ -2897,6 +2904,3 @@ void get_audio_check(struct audio_info *aud_modes, } } - - - diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 039004344dc6..3fa85a54360f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.64" +#define DC_VER "3.2.68" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -157,11 +157,14 @@ struct dc_surface_dcc_cap { bool const_color_support; }; -struct dc_static_screen_events { - bool force_trigger; - bool cursor_update; - bool surface_update; - bool overlay_update; +struct dc_static_screen_params { + struct { + bool force_trigger; + bool cursor_update; + bool surface_update; + bool overlay_update; + } triggers; + unsigned int num_frames; }; @@ -420,6 +423,8 @@ struct dc_debug_options { bool nv12_iflip_vm_wa; bool disable_dram_clock_change_vactive_support; bool validate_dml_output; + bool enable_dmcub_surface_flip; + bool usbc_combo_phy_reset_wa; }; struct dc_debug_data { @@ -910,6 +915,8 @@ void dc_resource_state_copy_construct_current( void dc_resource_state_destruct(struct dc_state *context); +bool dc_resource_is_dsc_encoding_supported(const struct dc *dc); + /* * TODO update to make it about validation sets * Set up streams and links associated to drive sinks @@ -1067,6 +1074,7 @@ unsigned int dc_get_current_backlight_pwm(struct dc *dc); unsigned int dc_get_target_backlight_pwm(struct dc *dc); bool dc_is_dmcu_initialized(struct dc *dc); +bool dc_is_hw_initialized(struct dc *dc); enum dc_status dc_set_clock(struct dc *dc, enum dc_clock_type clock_type, uint32_t clk_khz, uint32_t stepping); void dc_get_clock(struct dc *dc, enum dc_clock_type clock_type, struct dc_clock_config *clock_cfg); diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 02a63e9cb62f..737048d8a96c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -552,6 +552,36 @@ uint32_t generic_read_indirect_reg(const struct dc_context *ctx, return value; } +uint32_t generic_indirect_reg_get(const struct dc_context *ctx, + uint32_t addr_index, uint32_t addr_data, + uint32_t index, int n, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + ...) +{ + uint32_t shift, mask, *field_value; + uint32_t value = 0; + int i = 1; + + va_list ap; + + va_start(ap, field_value1); + + value = generic_read_indirect_reg(ctx, addr_index, addr_data, index); + *field_value1 = get_reg_field_value_ex(value, mask1, shift1); + + while (i < n) { + shift = va_arg(ap, uint32_t); + mask = va_arg(ap, uint32_t); + field_value = va_arg(ap, uint32_t *); + + *field_value = get_reg_field_value_ex(value, mask, shift); + i++; + } + + va_end(ap); + + return value; +} uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx, uint32_t addr_index, uint32_t addr_data, diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index f420aeac7fbd..d25603128394 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -205,6 +205,7 @@ enum dc_detect_reason { bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason); bool dc_link_get_hpd_state(struct dc_link *dc_link); enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx); +enum dc_status dc_link_reallocate_mst_payload(struct dc_link *link); /* Notify DC about DP RX Interrupt (aka Short Pulse Interrupt). * Return: @@ -301,6 +302,9 @@ uint32_t dc_link_bandwidth_kbps( const struct dc_link_settings *dc_link_get_link_cap( const struct dc_link *link); +void dc_link_overwrite_extended_receiver_cap( + struct dc_link *link); + bool dc_submit_i2c( struct dc *dc, uint32_t link_index, diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 37c10dbf269e..92096de79dec 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -439,10 +439,10 @@ bool dc_stream_get_crc(struct dc *dc, uint32_t *g_y, uint32_t *b_cb); -void dc_stream_set_static_screen_events(struct dc *dc, +void dc_stream_set_static_screen_params(struct dc *dc, struct dc_stream_state **stream, int num_streams, - const struct dc_static_screen_events *events); + const struct dc_static_screen_params *params); void dc_stream_set_dyn_expansion(struct dc *dc, struct dc_stream_state *stream, enum dc_dynamic_expansion option); diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index b1a372c8df83..e59532d98cb4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -229,6 +229,7 @@ struct dc_panel_patch { unsigned int extra_t12_ms; unsigned int extra_delay_backlight_off; unsigned int extra_t7_ms; + unsigned int manage_secondary_link; }; struct dc_edid_caps { @@ -728,7 +729,7 @@ struct psr_context { /* The VSync rate in Hz used to calculate the * step size for smooth brightness feature */ - unsigned int vsyncRateHz; + unsigned int vsync_rate_hz; unsigned int skipPsrWaitForPllLock; unsigned int numberOfControllers; /* Unused, for future use. To indicate that first changed frame from diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c index e619e67e6b51..30d953acd016 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c @@ -537,9 +537,6 @@ static void dcn10_dmcu_set_psr_enable(struct dmcu *dmcu, bool enable, bool wait) if (dmcu->dmcu_state != DMCU_RUNNING) return; - dcn10_get_dmcu_psr_state(dmcu, &psr_state); - if (psr_state == 0 && !enable) - return; /* waitDMCUReadyForCmd */ REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, dmcu_wait_reg_ready_interval, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c new file mode 100644 index 000000000000..225955ec6d39 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -0,0 +1,220 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dmub_psr.h" +#include "dc.h" +#include "dc_dmub_srv.h" +#include "../../dmub/inc/dmub_srv.h" +#include "dmub_fw_state.h" +#include "core_types.h" +#include "ipp.h" + +#define MAX_PIPES 6 + +/** + * Get PSR state from firmware. + */ +static void dmub_get_psr_state(uint32_t *psr_state) +{ + // Not yet implemented + // Trigger GPINT interrupt from firmware +} + +/** + * Enable/Disable PSR. + */ +static void dmub_set_psr_enable(struct dmub_psr *dmub, bool enable) +{ + union dmub_rb_cmd cmd; + struct dc_context *dc = dmub->ctx; + + cmd.psr_enable.header.type = DMUB_CMD__PSR; + + if (enable) + cmd.psr_enable.header.sub_type = DMUB_CMD__PSR_ENABLE; + else + cmd.psr_enable.header.sub_type = DMUB_CMD__PSR_DISABLE; + + cmd.psr_enable.header.payload_bytes = 0; // Send header only + + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_enable.header); + dc_dmub_srv_cmd_execute(dc->dmub_srv); + dc_dmub_srv_wait_idle(dc->dmub_srv); +} + +/** + * Set PSR level. + */ +static void dmub_set_psr_level(struct dmub_psr *dmub, uint16_t psr_level) +{ + union dmub_rb_cmd cmd; + uint32_t psr_state = 0; + struct dc_context *dc = dmub->ctx; + + dmub_get_psr_state(&psr_state); + + if (psr_state == 0) + return; + + cmd.psr_set_level.header.type = DMUB_CMD__PSR; + cmd.psr_set_level.header.sub_type = DMUB_CMD__PSR_SET_LEVEL; + cmd.psr_set_level.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_level_data); + cmd.psr_set_level.psr_set_level_data.psr_level = psr_level; + + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_set_level.header); + dc_dmub_srv_cmd_execute(dc->dmub_srv); + dc_dmub_srv_wait_idle(dc->dmub_srv); +} + +/** + * Setup PSR by programming phy registers and sending psr hw context values to firmware. + */ +static bool dmub_setup_psr(struct dmub_psr *dmub, + struct dc_link *link, + struct psr_context *psr_context) +{ + union dmub_rb_cmd cmd; + struct dc_context *dc = dmub->ctx; + struct dmub_cmd_psr_copy_settings_data *copy_settings_data + = &cmd.psr_copy_settings.psr_copy_settings_data; + struct pipe_ctx *pipe_ctx = NULL; + struct resource_context *res_ctx = &link->ctx->dc->current_state->res_ctx; + + for (int i = 0; i < MAX_PIPES; i++) { + if (res_ctx && + res_ctx->pipe_ctx[i].stream && + res_ctx->pipe_ctx[i].stream->link && + res_ctx->pipe_ctx[i].stream->link == link && + res_ctx->pipe_ctx[i].stream->link->connector_signal == SIGNAL_TYPE_EDP) { + pipe_ctx = &res_ctx->pipe_ctx[i]; + break; + } + } + + if (!pipe_ctx || + !&pipe_ctx->plane_res || + !&pipe_ctx->stream_res) + return false; + + // Program DP DPHY fast training registers + link->link_enc->funcs->psr_program_dp_dphy_fast_training(link->link_enc, + psr_context->psrExitLinkTrainingRequired); + + // Program DP_SEC_CNTL1 register to set transmission GPS0 line num and priority to high + link->link_enc->funcs->psr_program_secondary_packet(link->link_enc, + psr_context->sdpTransmitLineNumDeadline); + + cmd.psr_copy_settings.header.type = DMUB_CMD__PSR; + cmd.psr_copy_settings.header.sub_type = DMUB_CMD__PSR_COPY_SETTINGS; + cmd.psr_copy_settings.header.payload_bytes = sizeof(struct dmub_cmd_psr_copy_settings_data); + + // Hw insts + copy_settings_data->dpphy_inst = psr_context->phyType; + copy_settings_data->aux_inst = psr_context->channel; + copy_settings_data->digfe_inst = psr_context->engineId; + copy_settings_data->digbe_inst = psr_context->transmitterId; + + copy_settings_data->mpcc_inst = pipe_ctx->plane_res.mpcc_inst; + + if (pipe_ctx->plane_res.hubp) + copy_settings_data->hubp_inst = pipe_ctx->plane_res.hubp->inst; + else + copy_settings_data->hubp_inst = 0; + if (pipe_ctx->plane_res.dpp) + copy_settings_data->dpp_inst = pipe_ctx->plane_res.dpp->inst; + else + copy_settings_data->dpp_inst = 0; + if (pipe_ctx->stream_res.opp) + copy_settings_data->opp_inst = pipe_ctx->stream_res.opp->inst; + else + copy_settings_data->opp_inst = 0; + if (pipe_ctx->stream_res.tg) + copy_settings_data->otg_inst = pipe_ctx->stream_res.tg->inst; + else + copy_settings_data->otg_inst = 0; + + // Misc + copy_settings_data->psr_level = psr_context->psr_level.u32all; + copy_settings_data->hyst_frames = psr_context->timehyst_frames; + copy_settings_data->hyst_lines = psr_context->hyst_lines; + copy_settings_data->phy_type = psr_context->phyType; + copy_settings_data->aux_repeat = psr_context->aux_repeats; + copy_settings_data->smu_optimizations_en = psr_context->allow_smu_optimizations; + copy_settings_data->skip_wait_for_pll_lock = psr_context->skipPsrWaitForPllLock; + copy_settings_data->frame_delay = psr_context->frame_delay; + copy_settings_data->smu_phy_id = psr_context->smuPhyId; + copy_settings_data->num_of_controllers = psr_context->numberOfControllers; + copy_settings_data->frame_cap_ind = psr_context->psrFrameCaptureIndicationReq; + copy_settings_data->phy_num = psr_context->frame_delay & 0x7; + copy_settings_data->link_rate = psr_context->frame_delay & 0xF; + + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_copy_settings.header); + dc_dmub_srv_cmd_execute(dc->dmub_srv); + dc_dmub_srv_wait_idle(dc->dmub_srv); + + return true; +} + +static const struct dmub_psr_funcs psr_funcs = { + .set_psr_enable = dmub_set_psr_enable, + .setup_psr = dmub_setup_psr, + .get_psr_state = dmub_get_psr_state, + .set_psr_level = dmub_set_psr_level, +}; + +/** + * Construct PSR object. + */ +static void dmub_psr_construct(struct dmub_psr *psr, struct dc_context *ctx) +{ + psr->ctx = ctx; + psr->funcs = &psr_funcs; +} + +/** + * Allocate and initialize PSR object. + */ +struct dmub_psr *dmub_psr_create(struct dc_context *ctx) +{ + struct dmub_psr *psr = kzalloc(sizeof(struct dmub_psr), GFP_KERNEL); + + if (psr == NULL) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dmub_psr_construct(psr, ctx); + + return psr; +} + +/** + * Deallocate PSR object. + */ +void dmub_psr_destroy(struct dmub_psr **dmub) +{ + kfree(dmub); + *dmub = NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h new file mode 100644 index 000000000000..229958de3035 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h @@ -0,0 +1,47 @@ +/* + * Copyright 2012-16 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DMUB_PSR_H_ +#define _DMUB_PSR_H_ + +#include "os_types.h" + +struct dmub_psr { + struct dc_context *ctx; + const struct dmub_psr_funcs *funcs; +}; + +struct dmub_psr_funcs { + void (*set_psr_enable)(struct dmub_psr *dmub, bool enable); + bool (*setup_psr)(struct dmub_psr *dmub, struct dc_link *link, struct psr_context *psr_context); + void (*get_psr_state)(uint32_t *psr_state); + void (*set_psr_level)(struct dmub_psr *dmub, uint16_t psr_level); +}; + +struct dmub_psr *dmub_psr_create(struct dc_context *ctx); +void dmub_psr_destroy(struct dmub_psr **dmub); + + +#endif /* _DCE_DMUB_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 4939cf3b316f..5b689273ff44 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1373,9 +1373,13 @@ static enum dc_status apply_single_controller_ctx_to_hw( // DRR should set trigger event to monitor surface update event if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0) event_triggers = 0x80; + /* Event triggers and num frames initialized for DRR, but can be + * later updated for PSR use. Note DRR trigger events are generated + * regardless of whether num frames met. + */ if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control) pipe_ctx->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx->stream_res.tg, event_triggers); + pipe_ctx->stream_res.tg, event_triggers, 2); if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->dig_connect_to_otg( @@ -1706,6 +1710,8 @@ static void set_drr(struct pipe_ctx **pipe_ctx, struct drr_params params = {0}; // DRR should set trigger event to monitor surface update event unsigned int event_triggers = 0x80; + // Note DRR trigger events are generated regardless of whether num frames met. + unsigned int num_frames = 2; params.vertical_total_max = vmax; params.vertical_total_min = vmin; @@ -1721,7 +1727,7 @@ static void set_drr(struct pipe_ctx **pipe_ctx, if (vmax != 0 && vmin != 0) pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( pipe_ctx[i]->stream_res.tg, - event_triggers); + event_triggers, num_frames); } } @@ -1738,30 +1744,31 @@ static void get_position(struct pipe_ctx **pipe_ctx, } static void set_static_screen_control(struct pipe_ctx **pipe_ctx, - int num_pipes, const struct dc_static_screen_events *events) + int num_pipes, const struct dc_static_screen_params *params) { unsigned int i; - unsigned int value = 0; + unsigned int triggers = 0; - if (events->overlay_update) - value |= 0x100; - if (events->surface_update) - value |= 0x80; - if (events->cursor_update) - value |= 0x2; - if (events->force_trigger) - value |= 0x1; + if (params->triggers.overlay_update) + triggers |= 0x100; + if (params->triggers.surface_update) + triggers |= 0x80; + if (params->triggers.cursor_update) + triggers |= 0x2; + if (params->triggers.force_trigger) + triggers |= 0x1; if (num_pipes) { struct dc *dc = pipe_ctx[0]->stream->ctx->dc; if (dc->fbc_compressor) - value |= 0x84; + triggers |= 0x84; } for (i = 0; i < num_pipes; i++) pipe_ctx[i]->stream_res.tg->funcs-> - set_static_screen_control(pipe_ctx[i]->stream_res.tg, value); + set_static_screen_control(pipe_ctx[i]->stream_res.tg, + triggers, params->num_frames); } /* diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index 5f7c2c5641c4..1ea7db8eeb98 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -469,22 +469,27 @@ void dce110_timing_generator_set_drr( void dce110_timing_generator_set_static_screen_control( struct timing_generator *tg, - uint32_t value) + uint32_t event_triggers, + uint32_t num_frames) { struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); uint32_t static_screen_cntl = 0; uint32_t addr = 0; + // By register spec, it only takes 8 bit value + if (num_frames > 0xFF) + num_frames = 0xFF; + addr = CRTC_REG(mmCRTC_STATIC_SCREEN_CONTROL); static_screen_cntl = dm_read_reg(tg->ctx, addr); set_reg_field_value(static_screen_cntl, - value, + event_triggers, CRTC_STATIC_SCREEN_CONTROL, CRTC_STATIC_SCREEN_EVENT_MASK); set_reg_field_value(static_screen_cntl, - 2, + num_frames, CRTC_STATIC_SCREEN_CONTROL, CRTC_STATIC_SCREEN_FRAME_COUNT); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h index 768ccf27ada9..d8a5ed7b485d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h @@ -231,7 +231,8 @@ void dce110_timing_generator_set_drr( void dce110_timing_generator_set_static_screen_control( struct timing_generator *tg, - uint32_t value); + uint32_t event_triggers, + uint32_t num_frames); void dce110_timing_generator_get_crtc_scanoutpos( struct timing_generator *tg, diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index 098e56962f2a..82bc4e192bbf 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -819,13 +819,18 @@ void dce120_tg_set_colors(struct timing_generator *tg, static void dce120_timing_generator_set_static_screen_control( struct timing_generator *tg, - uint32_t value) + uint32_t event_triggers, + uint32_t num_frames) { struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); + // By register spec, it only takes 8 bit value + if (num_frames > 0xFF) + num_frames = 0xFF; + CRTC_REG_UPDATE_2(CRTC0_CRTC_STATIC_SCREEN_CONTROL, - CRTC_STATIC_SCREEN_EVENT_MASK, value, - CRTC_STATIC_SCREEN_FRAME_COUNT, 2); + CRTC_STATIC_SCREEN_EVENT_MASK, event_triggers, + CRTC_STATIC_SCREEN_FRAME_COUNT, num_frames); } void dce120_timing_generator_set_test_pattern( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c index 935c892622a0..4d3f7d5e1473 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c @@ -88,26 +88,6 @@ enum dscl_mode_sel { DSCL_MODE_DSCL_BYPASS = 6 }; -static const struct dpp_input_csc_matrix dpp_input_csc_matrix[] = { - {COLOR_SPACE_SRGB, - {0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, - {COLOR_SPACE_SRGB_LIMITED, - {0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, - {COLOR_SPACE_YCBCR601, - {0x2cdd, 0x2000, 0, 0xe991, 0xe926, 0x2000, 0xf4fd, 0x10ef, - 0, 0x2000, 0x38b4, 0xe3a6} }, - {COLOR_SPACE_YCBCR601_LIMITED, - {0x3353, 0x2568, 0, 0xe400, 0xe5dc, 0x2568, 0xf367, 0x1108, - 0, 0x2568, 0x40de, 0xdd3a} }, - {COLOR_SPACE_YCBCR709, - {0x3265, 0x2000, 0, 0xe6ce, 0xf105, 0x2000, 0xfa01, 0xa7d, 0, - 0x2000, 0x3b61, 0xe24f} }, - - {COLOR_SPACE_YCBCR709_LIMITED, - {0x39a6, 0x2568, 0, 0xe0d6, 0xeedd, 0x2568, 0xf925, 0x9a8, 0, - 0x2568, 0x43ee, 0xdbb2} } -}; - static void program_gamut_remap( struct dcn10_dpp *dpp, const uint16_t *regval, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index a02c10e23e0d..f36a0d8cedfe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -930,6 +930,9 @@ static bool hubbub1_get_dcc_compression_cap(struct hubbub *hubbub, output->grph.rgb.max_compressed_blk_size = 64; output->grph.rgb.independent_64b_blks = true; break; + default: + ASSERT(false); + break; } output->capable = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 2baff3cd0ae5..f2127afb37b2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1188,8 +1188,14 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) if (can_apply_seamless_boot && pipe_ctx->stream != NULL && pipe_ctx->stream_res.tg->funcs->is_tg_enabled( - pipe_ctx->stream_res.tg)) + pipe_ctx->stream_res.tg)) { + // Enable double buffering for OTG_BLANK no matter if + // seamless boot is enabled or not to suppress global sync + // signals when OTG blanked. This is to prevent pipe from + // requesting data while in PSR. + tg->funcs->tg_init(tg); continue; + } /* Disable on the current state so the new one isn't cleared. */ pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; @@ -2704,6 +2710,8 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx, struct drr_params params = {0}; // DRR set trigger event mapped to OTG_TRIG_A (bit 11) for manual control flow unsigned int event_triggers = 0x800; + // Note DRR trigger events are generated regardless of whether num frames met. + unsigned int num_frames = 2; params.vertical_total_max = vmax; params.vertical_total_min = vmin; @@ -2720,7 +2728,7 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx, if (vmax != 0 && vmin != 0) pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( pipe_ctx[i]->stream_res.tg, - event_triggers); + event_triggers, num_frames); } } @@ -2737,21 +2745,22 @@ void dcn10_get_position(struct pipe_ctx **pipe_ctx, } void dcn10_set_static_screen_control(struct pipe_ctx **pipe_ctx, - int num_pipes, const struct dc_static_screen_events *events) + int num_pipes, const struct dc_static_screen_params *params) { unsigned int i; - unsigned int value = 0; + unsigned int triggers = 0; - if (events->surface_update) - value |= 0x80; - if (events->cursor_update) - value |= 0x2; - if (events->force_trigger) - value |= 0x1; + if (params->triggers.surface_update) + triggers |= 0x80; + if (params->triggers.cursor_update) + triggers |= 0x2; + if (params->triggers.force_trigger) + triggers |= 0x1; for (i = 0; i < num_pipes; i++) pipe_ctx[i]->stream_res.tg->funcs-> - set_static_screen_control(pipe_ctx[i]->stream_res.tg, value); + set_static_screen_control(pipe_ctx[i]->stream_res.tg, + triggers, params->num_frames); } static void dcn10_config_stereo_parameters( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h index 55b8f3b2fc4e..4d20f6586bb5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h @@ -132,7 +132,7 @@ void dcn10_get_position(struct pipe_ctx **pipe_ctx, int num_pipes, struct crtc_position *position); void dcn10_set_static_screen_control(struct pipe_ctx **pipe_ctx, - int num_pipes, const struct dc_static_screen_events *events); + int num_pipes, const struct dc_static_screen_params *params); void dcn10_setup_stereo(struct pipe_ctx *pipe_ctx, struct dc *dc); void dce110_set_avmute(struct pipe_ctx *pipe_ctx, bool enable); void dcn10_log_hw_state(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index cd7412dc42d1..a9a43b397db9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -789,21 +789,26 @@ void optc1_set_early_control( void optc1_set_static_screen_control( struct timing_generator *optc, - uint32_t value) + uint32_t event_triggers, + uint32_t num_frames) { struct optc *optc1 = DCN10TG_FROM_TG(optc); + // By register spec, it only takes 8 bit value + if (num_frames > 0xFF) + num_frames = 0xFF; + /* Bit 8 is no longer applicable in RV for PSR case, * set bit 8 to 0 if given */ - if ((value & STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN) + if ((event_triggers & STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN) != 0) - value = value & + event_triggers = event_triggers & ~STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN; REG_SET_2(OTG_STATIC_SCREEN_CONTROL, 0, - OTG_STATIC_SCREEN_EVENT_MASK, value, - OTG_STATIC_SCREEN_FRAME_COUNT, 2); + OTG_STATIC_SCREEN_EVENT_MASK, event_triggers, + OTG_STATIC_SCREEN_FRAME_COUNT, num_frames); } void optc1_setup_manual_trigger(struct timing_generator *optc) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index 3afeb1a30f21..f277656d5464 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -625,7 +625,8 @@ void optc1_set_drr( void optc1_set_static_screen_control( struct timing_generator *optc, - uint32_t value); + uint32_t event_triggers, + uint32_t num_frames); void optc1_program_stereo(struct timing_generator *optc, const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c index 4d7e45892f08..13e057d7ee93 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c @@ -104,7 +104,7 @@ static void dpp2_cnv_setup ( uint32_t pixel_format = 0; uint32_t alpha_en = 1; enum dc_color_space color_space = COLOR_SPACE_SRGB; - enum dcn10_input_csc_select select = INPUT_CSC_SELECT_BYPASS; + enum dcn20_input_csc_select select = DCN2_ICSC_SELECT_BYPASS; bool force_disable_cursor = false; struct out_csc_color_matrix tbl_entry; uint32_t is_2bit = 0; @@ -145,25 +145,25 @@ static void dpp2_cnv_setup ( force_disable_cursor = false; pixel_format = 65; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: force_disable_cursor = true; pixel_format = 64; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: force_disable_cursor = true; pixel_format = 67; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: force_disable_cursor = true; pixel_format = 66; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: pixel_format = 22; @@ -177,7 +177,7 @@ static void dpp2_cnv_setup ( case SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888: pixel_format = 12; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX: pixel_format = 112; @@ -188,13 +188,13 @@ static void dpp2_cnv_setup ( case SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010: pixel_format = 114; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; is_2bit = 1; break; case SURFACE_PIXEL_FORMAT_VIDEO_CrYCbA1010102: pixel_format = 115; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; is_2bit = 1; break; case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FLOAT: @@ -227,13 +227,13 @@ static void dpp2_cnv_setup ( tbl_entry.color_space = input_color_space; if (color_space >= COLOR_SPACE_YCBCR601) - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; else - select = INPUT_CSC_SELECT_BYPASS; + select = DCN2_ICSC_SELECT_BYPASS; - dpp1_program_input_csc(dpp_base, color_space, select, &tbl_entry); + dpp2_program_input_csc(dpp_base, color_space, select, &tbl_entry); } else - dpp1_program_input_csc(dpp_base, color_space, select, NULL); + dpp2_program_input_csc(dpp_base, color_space, select, NULL); if (force_disable_cursor) { REG_UPDATE(CURSOR_CONTROL, @@ -458,7 +458,7 @@ static struct dpp_funcs dcn20_dpp_funcs = { .dpp_reset = dpp_reset, .dpp_set_scaler = dpp1_dscl_set_scaler_manual_scale, .dpp_get_optimal_number_of_taps = dpp1_get_optimal_number_of_taps, - .dpp_set_gamut_remap = dpp1_cm_set_gamut_remap, + .dpp_set_gamut_remap = dpp2_cm_set_gamut_remap, .dpp_set_csc_adjustment = NULL, .dpp_set_csc_default = NULL, .dpp_program_regamma_pwl = oppn20_dummy_program_regamma_pwl, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h index 5b03b737b1d6..27610251c57f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h @@ -150,6 +150,16 @@ SRI(CM_SHAPER_RAMA_REGION_32_33, CM, id), \ SRI(CM_SHAPER_LUT_INDEX, CM, id) +#define TF_REG_LIST_DCN20_COMMON_APPEND(id) \ + SRI(CM_GAMUT_REMAP_B_C11_C12, CM, id),\ + SRI(CM_GAMUT_REMAP_B_C13_C14, CM, id),\ + SRI(CM_GAMUT_REMAP_B_C21_C22, CM, id),\ + SRI(CM_GAMUT_REMAP_B_C23_C24, CM, id),\ + SRI(CM_GAMUT_REMAP_B_C31_C32, CM, id),\ + SRI(CM_GAMUT_REMAP_B_C33_C34, CM, id),\ + SRI(CM_ICSC_B_C11_C12, CM, id), \ + SRI(CM_ICSC_B_C33_C34, CM, id) + #define TF_REG_LIST_DCN20(id) \ TF_REG_LIST_DCN(id), \ TF_REG_LIST_DCN20_COMMON(id), \ @@ -572,10 +582,29 @@ TF_SF(DSCL0_OBUF_MEM_PWR_CTRL, OBUF_MEM_PWR_FORCE, mask_sh),\ TF_SF(DSCL0_DSCL_MEM_PWR_CTRL, LUT_MEM_PWR_FORCE, mask_sh) +/* DPP CM debug status register: + * + * Status index including current ICSC, Gamut Remap Mode is 9 + * ICSC Mode: [4..3] + * Gamut Remap Mode: [10..9] + */ +#define CM_TEST_DEBUG_DATA_STATUS_IDX 9 + +#define TF_DEBUG_REG_LIST_SH_DCN20 \ + TF_DEBUG_REG_LIST_SH_DCN10, \ + .CM_TEST_DEBUG_DATA_ICSC_MODE = 3, \ + .CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE = 9 + +#define TF_DEBUG_REG_LIST_MASK_DCN20 \ + TF_DEBUG_REG_LIST_MASK_DCN10, \ + .CM_TEST_DEBUG_DATA_ICSC_MODE = 0x18, \ + .CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE = 0x600 #define TF_REG_FIELD_LIST_DCN2_0(type) \ TF_REG_FIELD_LIST(type) \ type CM_BLNDGAM_LUT_DATA; \ + type CM_TEST_DEBUG_DATA_ICSC_MODE; \ + type CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE; \ type FORMAT_CNV16; \ type CNVC_BYPASS_MSB_ALIGN; \ type CLAMP_POSITIVE; \ @@ -630,11 +659,22 @@ struct dcn2_dpp_mask { uint32_t COLOR_KEYER_RED; \ uint32_t COLOR_KEYER_GREEN; \ uint32_t COLOR_KEYER_BLUE; \ - uint32_t OBUF_MEM_PWR_CTRL;\ + uint32_t OBUF_MEM_PWR_CTRL; \ uint32_t DSCL_MEM_PWR_CTRL +#define DPP_DCN2_REG_VARIABLE_LIST_CM_APPEND \ + uint32_t CM_GAMUT_REMAP_B_C11_C12; \ + uint32_t CM_GAMUT_REMAP_B_C13_C14; \ + uint32_t CM_GAMUT_REMAP_B_C21_C22; \ + uint32_t CM_GAMUT_REMAP_B_C23_C24; \ + uint32_t CM_GAMUT_REMAP_B_C31_C32; \ + uint32_t CM_GAMUT_REMAP_B_C33_C34; \ + uint32_t CM_ICSC_B_C11_C12; \ + uint32_t CM_ICSC_B_C33_C34 + struct dcn2_dpp_registers { DPP_DCN2_REG_VARIABLE_LIST; + DPP_DCN2_REG_VARIABLE_LIST_CM_APPEND; }; struct dcn20_dpp { @@ -656,6 +696,18 @@ struct dcn20_dpp { struct pwl_params pwl_data; }; +enum dcn20_input_csc_select { + DCN2_ICSC_SELECT_BYPASS = 0, + DCN2_ICSC_SELECT_ICSC_A = 1, + DCN2_ICSC_SELECT_ICSC_B = 2 +}; + +enum dcn20_gamut_remap_select { + DCN2_GAMUT_REMAP_BYPASS = 0, + DCN2_GAMUT_REMAP_COEF_A = 1, + DCN2_GAMUT_REMAP_COEF_B = 2 +}; + void dpp20_read_state(struct dpp *dpp_base, struct dcn_dpp_state *s); @@ -667,6 +719,16 @@ void dpp2_set_degamma( struct dpp *dpp_base, enum ipp_degamma_mode mode); +void dpp2_cm_set_gamut_remap( + struct dpp *dpp_base, + const struct dpp_grph_csc_adjustment *adjust); + +void dpp2_program_input_csc( + struct dpp *dpp_base, + enum dc_color_space color_space, + enum dcn20_input_csc_select input_select, + const struct out_csc_color_matrix *tbl_entry); + bool dpp20_program_blnd_lut( struct dpp *dpp_base, const struct pwl_params *params); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c index 05a3e7f97ef0..8dc3d1f73984 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c @@ -36,6 +36,9 @@ #define REG(reg)\ dpp->tf_regs->reg +#define IND_REG(index) \ + (index) + #define CTX \ dpp->base.ctx @@ -44,9 +47,6 @@ dpp->tf_shift->field_name, dpp->tf_mask->field_name - - - static void dpp2_enable_cm_block( struct dpp *dpp_base) { @@ -158,6 +158,155 @@ void dpp2_set_degamma( } } +static void program_gamut_remap( + struct dcn20_dpp *dpp, + const uint16_t *regval, + enum dcn20_gamut_remap_select select) +{ + uint32_t cur_select = 0; + struct color_matrices_reg gam_regs; + + if (regval == NULL || select == DCN2_GAMUT_REMAP_BYPASS) { + REG_SET(CM_GAMUT_REMAP_CONTROL, 0, + CM_GAMUT_REMAP_MODE, 0); + return; + } + + /* determine which gamut_remap coefficients (A or B) we are using + * currently. select the alternate set to double buffer + * the update so gamut_remap is updated on frame boundary + */ + IX_REG_GET(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA, + CM_TEST_DEBUG_DATA_STATUS_IDX, + CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE, &cur_select); + + /* value stored in dbg reg will be 1 greater than mode we want */ + if (cur_select != DCN2_GAMUT_REMAP_COEF_A) + select = DCN2_GAMUT_REMAP_COEF_A; + else + select = DCN2_GAMUT_REMAP_COEF_B; + + gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_GAMUT_REMAP_C11; + gam_regs.masks.csc_c11 = dpp->tf_mask->CM_GAMUT_REMAP_C11; + gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_GAMUT_REMAP_C12; + gam_regs.masks.csc_c12 = dpp->tf_mask->CM_GAMUT_REMAP_C12; + + if (select == DCN2_GAMUT_REMAP_COEF_A) { + gam_regs.csc_c11_c12 = REG(CM_GAMUT_REMAP_C11_C12); + gam_regs.csc_c33_c34 = REG(CM_GAMUT_REMAP_C33_C34); + } else { + gam_regs.csc_c11_c12 = REG(CM_GAMUT_REMAP_B_C11_C12); + gam_regs.csc_c33_c34 = REG(CM_GAMUT_REMAP_B_C33_C34); + } + + cm_helper_program_color_matrices( + dpp->base.ctx, + regval, + &gam_regs); + + REG_SET( + CM_GAMUT_REMAP_CONTROL, 0, + CM_GAMUT_REMAP_MODE, select); + +} + +void dpp2_cm_set_gamut_remap( + struct dpp *dpp_base, + const struct dpp_grph_csc_adjustment *adjust) +{ + struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base); + int i = 0; + + if (adjust->gamut_adjust_type != GRAPHICS_GAMUT_ADJUST_TYPE_SW) + /* Bypass if type is bypass or hw */ + program_gamut_remap(dpp, NULL, DCN2_GAMUT_REMAP_BYPASS); + else { + struct fixed31_32 arr_matrix[12]; + uint16_t arr_reg_val[12]; + + for (i = 0; i < 12; i++) + arr_matrix[i] = adjust->temperature_matrix[i]; + + convert_float_matrix( + arr_reg_val, arr_matrix, 12); + + program_gamut_remap(dpp, arr_reg_val, DCN2_GAMUT_REMAP_COEF_A); + } +} + +void dpp2_program_input_csc( + struct dpp *dpp_base, + enum dc_color_space color_space, + enum dcn20_input_csc_select input_select, + const struct out_csc_color_matrix *tbl_entry) +{ + struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base); + int i; + int arr_size = sizeof(dpp_input_csc_matrix)/sizeof(struct dpp_input_csc_matrix); + const uint16_t *regval = NULL; + uint32_t cur_select = 0; + enum dcn20_input_csc_select select; + struct color_matrices_reg icsc_regs; + + if (input_select == DCN2_ICSC_SELECT_BYPASS) { + REG_SET(CM_ICSC_CONTROL, 0, CM_ICSC_MODE, 0); + return; + } + + if (tbl_entry == NULL) { + for (i = 0; i < arr_size; i++) + if (dpp_input_csc_matrix[i].color_space == color_space) { + regval = dpp_input_csc_matrix[i].regval; + break; + } + + if (regval == NULL) { + BREAK_TO_DEBUGGER(); + return; + } + } else { + regval = tbl_entry->regval; + } + + /* determine which CSC coefficients (A or B) we are using + * currently. select the alternate set to double buffer + * the CSC update so CSC is updated on frame boundary + */ + IX_REG_GET(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA, + CM_TEST_DEBUG_DATA_STATUS_IDX, + CM_TEST_DEBUG_DATA_ICSC_MODE, &cur_select); + + if (cur_select != DCN2_ICSC_SELECT_ICSC_A) + select = DCN2_ICSC_SELECT_ICSC_A; + else + select = DCN2_ICSC_SELECT_ICSC_B; + + icsc_regs.shifts.csc_c11 = dpp->tf_shift->CM_ICSC_C11; + icsc_regs.masks.csc_c11 = dpp->tf_mask->CM_ICSC_C11; + icsc_regs.shifts.csc_c12 = dpp->tf_shift->CM_ICSC_C12; + icsc_regs.masks.csc_c12 = dpp->tf_mask->CM_ICSC_C12; + + if (select == DCN2_ICSC_SELECT_ICSC_A) { + + icsc_regs.csc_c11_c12 = REG(CM_ICSC_C11_C12); + icsc_regs.csc_c33_c34 = REG(CM_ICSC_C33_C34); + + } else { + + icsc_regs.csc_c11_c12 = REG(CM_ICSC_B_C11_C12); + icsc_regs.csc_c33_c34 = REG(CM_ICSC_B_C33_C34); + + } + + cm_helper_program_color_matrices( + dpp->base.ctx, + regval, + &icsc_regs); + + REG_SET(CM_ICSC_CONTROL, 0, + CM_ICSC_MODE, select); +} + static void dpp20_power_on_blnd_lut( struct dpp *dpp_base, bool power_on) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c index 8b8438566101..9235f7d29454 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c @@ -293,6 +293,9 @@ bool hubbub2_get_dcc_compression_cap(struct hubbub *hubbub, output->grph.rgb.max_compressed_blk_size = 64; output->grph.rgb.independent_64b_blks = true; break; + default: + ASSERT(false); + break; } output->capable = true; output->const_color_support = true; @@ -601,7 +604,8 @@ static const struct hubbub_funcs hubbub2_funcs = { .wm_read_state = hubbub2_wm_read_state, .get_dchub_ref_freq = hubbub2_get_dchub_ref_freq, .program_watermarks = hubbub2_program_watermarks, - .allow_self_refresh_control = hubbub1_allow_self_refresh_control + .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled, + .allow_self_refresh_control = hubbub1_allow_self_refresh_control, }; void hubbub2_construct(struct dcn20_hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 5b9cbedaa0de..cfbbaffa8654 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -686,9 +686,13 @@ enum dc_status dcn20_enable_stream_timing( // DRR should set trigger event to monitor surface update event if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0) event_triggers = 0x80; + /* Event triggers and num frames initialized for DRR, but can be + * later updated for PSR use. Note DRR trigger events are generated + * regardless of whether num frames met. + */ if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control) pipe_ctx->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx->stream_res.tg, event_triggers); + pipe_ctx->stream_res.tg, event_triggers, 2); /* TODO program crtc source select for non-virtual signal*/ /* TODO program FMT */ @@ -941,6 +945,9 @@ void dcn20_blank_pixel_data( int width = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right; int height = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top; + if (stream->link->test_pattern_enabled) + return; + /* get opp dpg blank color */ color_space_to_black_color(dc, color_space, &black_color); @@ -1638,9 +1645,9 @@ void dcn20_program_front_end_for_ctx( struct hubp *hubp = pipe->plane_res.hubp; int j = 0; - for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS + for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS*1000 && hubp->funcs->hubp_is_flip_pending(hubp); j++) - msleep(1); + mdelay(1); } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c index f90031ed58a6..de9c857ab3e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c @@ -33,6 +33,9 @@ #define REG(reg)\ mpc20->mpc_regs->reg +#define IND_REG(index) \ + (index) + #define CTX \ mpc20->base.ctx @@ -132,19 +135,33 @@ void mpc2_set_output_csc( const uint16_t *regval, enum mpc_output_csc_mode ocsc_mode) { + uint32_t cur_mode; struct dcn20_mpc *mpc20 = TO_DCN20_MPC(mpc); struct color_matrices_reg ocsc_regs; - REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); - - if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) + if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) { + REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); return; + } if (regval == NULL) { BREAK_TO_DEBUGGER(); return; } + /* determine which CSC coefficients (A or B) we are using + * currently. select the alternate set to double buffer + * the CSC update so CSC is updated on frame boundary + */ + IX_REG_GET(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA, + MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX, + MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE, &cur_mode); + + if (cur_mode != MPC_OUTPUT_CSC_COEF_A) + ocsc_mode = MPC_OUTPUT_CSC_COEF_A; + else + ocsc_mode = MPC_OUTPUT_CSC_COEF_B; + ocsc_regs.shifts.csc_c11 = mpc20->mpc_shift->MPC_OCSC_C11_A; ocsc_regs.masks.csc_c11 = mpc20->mpc_mask->MPC_OCSC_C11_A; ocsc_regs.shifts.csc_c12 = mpc20->mpc_shift->MPC_OCSC_C12_A; @@ -157,10 +174,13 @@ void mpc2_set_output_csc( ocsc_regs.csc_c11_c12 = REG(CSC_C11_C12_B[opp_id]); ocsc_regs.csc_c33_c34 = REG(CSC_C33_C34_B[opp_id]); } + cm_helper_program_color_matrices( mpc20->base.ctx, regval, &ocsc_regs); + + REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); } void mpc2_set_ocsc_default( @@ -169,14 +189,16 @@ void mpc2_set_ocsc_default( enum dc_color_space color_space, enum mpc_output_csc_mode ocsc_mode) { + uint32_t cur_mode; struct dcn20_mpc *mpc20 = TO_DCN20_MPC(mpc); uint32_t arr_size; struct color_matrices_reg ocsc_regs; const uint16_t *regval = NULL; - REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); - if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) + if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) { + REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); return; + } regval = find_color_matrix(color_space, &arr_size); @@ -185,6 +207,19 @@ void mpc2_set_ocsc_default( return; } + /* determine which CSC coefficients (A or B) we are using + * currently. select the alternate set to double buffer + * the CSC update so CSC is updated on frame boundary + */ + IX_REG_GET(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA, + MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX, + MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE, &cur_mode); + + if (cur_mode != MPC_OUTPUT_CSC_COEF_A) + ocsc_mode = MPC_OUTPUT_CSC_COEF_A; + else + ocsc_mode = MPC_OUTPUT_CSC_COEF_B; + ocsc_regs.shifts.csc_c11 = mpc20->mpc_shift->MPC_OCSC_C11_A; ocsc_regs.masks.csc_c11 = mpc20->mpc_mask->MPC_OCSC_C11_A; ocsc_regs.shifts.csc_c12 = mpc20->mpc_shift->MPC_OCSC_C12_A; @@ -203,6 +238,8 @@ void mpc2_set_ocsc_default( mpc20->base.ctx, regval, &ocsc_regs); + + REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); } static void mpc2_ogam_get_reg_field( diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h index 9f53192da2dc..c78fd5123497 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h @@ -80,6 +80,10 @@ SRII(DENORM_CLAMP_G_Y, MPC_OUT, inst),\ SRII(DENORM_CLAMP_B_CB, MPC_OUT, inst) +#define MPC_DBG_REG_LIST_DCN2_0() \ + SR(MPC_OCSC_TEST_DEBUG_DATA),\ + SR(MPC_OCSC_TEST_DEBUG_INDEX) + #define MPC_REG_VARIABLE_LIST_DCN2_0 \ MPC_COMMON_REG_VARIABLE_LIST \ uint32_t MPCC_TOP_GAIN[MAX_MPCC]; \ @@ -118,6 +122,8 @@ uint32_t MPCC_OGAM_LUT_RAM_CONTROL[MAX_MPCC];\ uint32_t MPCC_OGAM_LUT_DATA[MAX_MPCC];\ uint32_t MPCC_OGAM_MODE[MAX_MPCC];\ + uint32_t MPC_OCSC_TEST_DEBUG_DATA;\ + uint32_t MPC_OCSC_TEST_DEBUG_INDEX;\ uint32_t CSC_MODE[MAX_OPP]; \ uint32_t CSC_C11_C12_A[MAX_OPP]; \ uint32_t CSC_C33_C34_A[MAX_OPP]; \ @@ -134,6 +140,7 @@ SF(MPCC0_MPCC_TOP_GAIN, MPCC_TOP_GAIN, mask_sh),\ SF(MPCC0_MPCC_BOT_GAIN_INSIDE, MPCC_BOT_GAIN_INSIDE, mask_sh),\ SF(MPCC0_MPCC_BOT_GAIN_OUTSIDE, MPCC_BOT_GAIN_OUTSIDE, mask_sh),\ + SF(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_INDEX, mask_sh),\ SF(MPC_OUT0_CSC_MODE, MPC_OCSC_MODE, mask_sh),\ SF(MPC_OUT0_CSC_C11_C12_A, MPC_OCSC_C11_A, mask_sh),\ SF(MPC_OUT0_CSC_C11_C12_A, MPC_OCSC_C12_A, mask_sh),\ @@ -174,6 +181,19 @@ SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MAX_B_CB, mask_sh),\ SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MIN_B_CB, mask_sh) +/* + * DCN2 MPC_OCSC debug status register: + * + * Status index including current OCSC Mode is 1 + * OCSC Mode: [1..0] + */ +#define MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX 1 + +#define MPC_DEBUG_REG_LIST_SH_DCN20 \ + .MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE = 0 + +#define MPC_DEBUG_REG_LIST_MASK_DCN20 \ + .MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE = 0x3 #define MPC_REG_FIELD_LIST_DCN2_0(type) \ MPC_REG_FIELD_LIST(type)\ @@ -182,6 +202,8 @@ type MPCC_TOP_GAIN;\ type MPCC_BOT_GAIN_INSIDE;\ type MPCC_BOT_GAIN_OUTSIDE;\ + type MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE;\ + type MPC_OCSC_TEST_DEBUG_INDEX;\ type MPC_OCSC_MODE;\ type MPC_OCSC_C11_A;\ type MPC_OCSC_C12_A;\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 2dafa20d769d..85f90f3e24cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -638,6 +638,7 @@ static const struct dce110_aux_registers aux_engine_regs[] = { #define tf_regs(id)\ [id] = {\ TF_REG_LIST_DCN20(id),\ + TF_REG_LIST_DCN20_COMMON_APPEND(id),\ } static const struct dcn2_dpp_registers tf_regs[] = { @@ -651,12 +652,12 @@ static const struct dcn2_dpp_registers tf_regs[] = { static const struct dcn2_dpp_shift tf_shift = { TF_REG_LIST_SH_MASK_DCN20(__SHIFT), - TF_DEBUG_REG_LIST_SH_DCN10 + TF_DEBUG_REG_LIST_SH_DCN20 }; static const struct dcn2_dpp_mask tf_mask = { TF_REG_LIST_SH_MASK_DCN20(_MASK), - TF_DEBUG_REG_LIST_MASK_DCN10 + TF_DEBUG_REG_LIST_MASK_DCN20 }; #define dwbc_regs_dcn2(id)\ @@ -706,14 +707,17 @@ static const struct dcn20_mpc_registers mpc_regs = { MPC_OUT_MUX_REG_LIST_DCN2_0(3), MPC_OUT_MUX_REG_LIST_DCN2_0(4), MPC_OUT_MUX_REG_LIST_DCN2_0(5), + MPC_DBG_REG_LIST_DCN2_0() }; static const struct dcn20_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) + MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT), + MPC_DEBUG_REG_LIST_SH_DCN20 }; static const struct dcn20_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK) + MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK), + MPC_DEBUG_REG_LIST_MASK_DCN20 }; #define tg_regs(id)\ @@ -1857,6 +1861,22 @@ void dcn20_populate_dml_writeback_from_context( } +static int get_num_odm_heads(struct pipe_ctx *pipe) +{ + int odm_head_count = 0; + struct pipe_ctx *next_pipe = pipe->next_odm_pipe; + while (next_pipe) { + odm_head_count++; + next_pipe = next_pipe->next_odm_pipe; + } + pipe = pipe->prev_odm_pipe; + while (pipe) { + odm_head_count++; + pipe = pipe->prev_odm_pipe; + } + return odm_head_count ? odm_head_count + 1 : 0; +} + int dcn20_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes) { @@ -1883,17 +1903,21 @@ int dcn20_populate_dml_pipes_from_context( for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { struct dc_crtc_timing *timing = &res_ctx->pipe_ctx[i].stream->timing; unsigned int v_total; + unsigned int front_porch; int output_bpc; if (!res_ctx->pipe_ctx[i].stream) continue; v_total = timing->v_total; + front_porch = timing->v_front_porch; /* todo: pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = 0; pipes[pipe_cnt].pipe.src.dcc = 0; pipes[pipe_cnt].pipe.src.vm = 0;*/ + pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0; + pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC; /* todo: rotation?*/ pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h; @@ -1915,7 +1939,7 @@ int dcn20_populate_dml_pipes_from_context( - timing->h_addressable - timing->h_border_left - timing->h_border_right; - pipes[pipe_cnt].pipe.dest.vblank_start = v_total - timing->v_front_porch; + pipes[pipe_cnt].pipe.dest.vblank_start = v_total - front_porch; pipes[pipe_cnt].pipe.dest.vblank_end = pipes[pipe_cnt].pipe.dest.vblank_start - timing->v_addressable - timing->v_border_top @@ -1932,8 +1956,13 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].dout.dp_lanes = 4; pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min; pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max; - pipes[pipe_cnt].pipe.dest.odm_combine = res_ctx->pipe_ctx[i].prev_odm_pipe - || res_ctx->pipe_ctx[i].next_odm_pipe; + switch (get_num_odm_heads(&res_ctx->pipe_ctx[i])) { + case 2: + pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_2to1; + break; + default: + pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_disabled; + } pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx; if (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state == res_ctx->pipe_ctx[i].plane_state) @@ -2043,6 +2072,9 @@ int dcn20_populate_dml_pipes_from_context( if (pipes[pipe_cnt].pipe.src.viewport_height > 1080) pipes[pipe_cnt].pipe.src.viewport_height = 1080; pipes[pipe_cnt].pipe.src.surface_height_y = pipes[pipe_cnt].pipe.src.viewport_height; + pipes[pipe_cnt].pipe.src.surface_width_y = pipes[pipe_cnt].pipe.src.viewport_width; + pipes[pipe_cnt].pipe.src.surface_height_c = pipes[pipe_cnt].pipe.src.viewport_height; + pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width; pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 63) / 64) * 64; /* linear sw only */ pipes[pipe_cnt].pipe.src.source_format = dm_444_32; pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/ @@ -2076,7 +2108,10 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.viewport_width_c = scl->viewport_c.width; pipes[pipe_cnt].pipe.src.viewport_height = scl->viewport.height; pipes[pipe_cnt].pipe.src.viewport_height_c = scl->viewport_c.height; + pipes[pipe_cnt].pipe.src.surface_width_y = pln->plane_size.surface_size.width; pipes[pipe_cnt].pipe.src.surface_height_y = pln->plane_size.surface_size.height; + pipes[pipe_cnt].pipe.src.surface_width_c = pln->plane_size.chroma_size.width; + pipes[pipe_cnt].pipe.src.surface_height_c = pln->plane_size.chroma_size.height; if (pln->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { pipes[pipe_cnt].pipe.src.data_pitch = pln->plane_size.surface_pitch; pipes[pipe_cnt].pipe.src.data_pitch_c = pln->plane_size.chroma_pitch; @@ -2490,7 +2525,7 @@ int dcn20_validate_apply_pipe_split_flags( split[i] = true; if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) { split[i] = true; - context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx] = true; + context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx] = dm_odm_combine_mode_2to1; } context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx] = context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx]; @@ -2915,7 +2950,7 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 || (voltage_supported && full_pstate_supported)) { - context->bw_ctx.bw.dcn.clk.p_state_change_support = true; + context->bw_ctx.bw.dcn.clk.p_state_change_support = full_pstate_supported; goto restore_dml_state; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index 216ae170bc50..da63fc53cc4a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -29,6 +29,8 @@ #include "dm_services.h" #include "reg_helper.h" +#include "dc_dmub_srv.h" + #define DC_LOGGER_INIT(logger) #define REG(reg)\ @@ -251,10 +253,20 @@ static void hubp21_apply_PLAT_54186_wa( ROTATION_ANGLE, &rotation_angle, H_MIRROR_EN, &h_mirror_en); - /* apply wa only for NV12 surface with scatter gather enabled with view port > 512 */ + /* reset persistent cached data */ + hubp21->PLAT_54186_wa_chroma_addr_offset = 0; + /* apply wa only for NV12 surface with scatter gather enabled with viewport > 512 along + * the vertical direction*/ if (address->type != PLN_ADDR_TYPE_VIDEO_PROGRESSIVE || - address->video_progressive.luma_addr.high_part == 0xf4 - || viewport_c_height <= 512) + address->video_progressive.luma_addr.high_part == 0xf4) + return; + + if ((rotation_angle == 0 || rotation_angle == 180) + && viewport_c_height <= 512) + return; + + if ((rotation_angle == 90 || rotation_angle == 270) + && viewport_c_width <= 512) return; switch (rotation_angle) { @@ -678,123 +690,167 @@ void hubp21_validate_dml_output(struct hubp *hubp, dml_dlg_attr->refcyc_per_meta_chunk_flip_l, dlg_attr.refcyc_per_meta_chunk_flip_l); } -bool hubp21_program_surface_flip_and_addr( - struct hubp *hubp, - const struct dc_plane_address *address, - bool flip_immediate) +static void program_surface_flip_and_addr(struct hubp *hubp, struct surface_flip_registers *flip_regs) { struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); - struct dc_debug_options *debug = &hubp->ctx->dc->debug; - //program flip type - REG_UPDATE(DCSURF_FLIP_CONTROL, - SURFACE_FLIP_TYPE, flip_immediate); + REG_UPDATE_3(DCSURF_FLIP_CONTROL, + SURFACE_FLIP_TYPE, flip_regs->immediate, + SURFACE_FLIP_MODE_FOR_STEREOSYNC, flip_regs->grph_stereo, + SURFACE_FLIP_IN_STEREOSYNC, flip_regs->grph_stereo); - // Program VMID reg REG_UPDATE(VMID_SETTINGS_0, - VMID, address->vmid); + VMID, flip_regs->vmid); - if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) { - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1); - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1); + REG_UPDATE_8(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_TMZ, flip_regs->tmz_surface, + PRIMARY_SURFACE_TMZ_C, flip_regs->tmz_surface, + PRIMARY_META_SURFACE_TMZ, flip_regs->tmz_surface, + PRIMARY_META_SURFACE_TMZ_C, flip_regs->tmz_surface, + SECONDARY_SURFACE_TMZ, flip_regs->tmz_surface, + SECONDARY_SURFACE_TMZ_C, flip_regs->tmz_surface, + SECONDARY_META_SURFACE_TMZ, flip_regs->tmz_surface, + SECONDARY_META_SURFACE_TMZ_C, flip_regs->tmz_surface); - } else { - // turn off stereo if not in stereo - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x0); - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x0); - } + REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, 0, + PRIMARY_META_SURFACE_ADDRESS_HIGH_C, + flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C); + REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, 0, + PRIMARY_META_SURFACE_ADDRESS_C, + flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS_C); + REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0, + PRIMARY_META_SURFACE_ADDRESS_HIGH, + flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH); + + REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0, + PRIMARY_META_SURFACE_ADDRESS, + flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS); + + REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH, 0, + SECONDARY_META_SURFACE_ADDRESS_HIGH, + flip_regs->DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH); + + REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS, 0, + SECONDARY_META_SURFACE_ADDRESS, + flip_regs->DCSURF_SECONDARY_META_SURFACE_ADDRESS); + + + REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, 0, + SECONDARY_SURFACE_ADDRESS_HIGH, + flip_regs->DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH); + + REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS, 0, + SECONDARY_SURFACE_ADDRESS, + flip_regs->DCSURF_SECONDARY_SURFACE_ADDRESS); + + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0, + PRIMARY_SURFACE_ADDRESS_HIGH_C, + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0, + PRIMARY_SURFACE_ADDRESS_C, + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_C); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, + PRIMARY_SURFACE_ADDRESS_HIGH, + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, + PRIMARY_SURFACE_ADDRESS, + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS); +} + +void dmcub_PLAT_54186_wa(struct hubp *hubp, struct surface_flip_registers *flip_regs) +{ + struct dc_dmub_srv *dmcub = hubp->ctx->dmub_srv; + struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); + struct dmub_rb_cmd_PLAT_54186_wa PLAT_54186_wa = { 0 }; + + PLAT_54186_wa.header.type = DMUB_CMD__PLAT_54186_WA; + PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS; + PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_C; + PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; + PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; + PLAT_54186_wa.flip.flip_params.grph_stereo = flip_regs->grph_stereo; + PLAT_54186_wa.flip.flip_params.hubp_inst = hubp->inst; + PLAT_54186_wa.flip.flip_params.immediate = flip_regs->immediate; + PLAT_54186_wa.flip.flip_params.tmz_surface = flip_regs->tmz_surface; + PLAT_54186_wa.flip.flip_params.vmid = flip_regs->vmid; + + PERF_TRACE(); // TODO: remove after performance is stable. + dc_dmub_srv_cmd_queue(dmcub, &PLAT_54186_wa.header); + PERF_TRACE(); // TODO: remove after performance is stable. + dc_dmub_srv_cmd_execute(dmcub); + PERF_TRACE(); // TODO: remove after performance is stable. + dc_dmub_srv_wait_idle(dmcub); + PERF_TRACE(); // TODO: remove after performance is stable. +} + +bool hubp21_program_surface_flip_and_addr( + struct hubp *hubp, + const struct dc_plane_address *address, + bool flip_immediate) +{ + struct dc_debug_options *debug = &hubp->ctx->dc->debug; + struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); + struct surface_flip_registers flip_regs = { 0 }; + + flip_regs.vmid = address->vmid; - /* HW automatically latch rest of address register on write to - * DCSURF_PRIMARY_SURFACE_ADDRESS if SURFACE_UPDATE_LOCK is not used - * - * program high first and then the low addr, order matters! - */ switch (address->type) { case PLN_ADDR_TYPE_GRAPHICS: - /* DCN1.0 does not support const color - * TODO: program DCHUBBUB_RET_PATH_DCC_CFGx_0/1 - * base on address->grph.dcc_const_color - * x = 0, 2, 4, 6 for pipe 0, 1, 2, 3 for rgb and luma - * x = 1, 3, 5, 7 for pipe 0, 1, 2, 3 for chroma - */ - - if (address->grph.addr.quad_part == 0) + if (address->grph.addr.quad_part == 0) { + BREAK_TO_DEBUGGER(); break; - - REG_UPDATE_2(DCSURF_SURFACE_CONTROL, - PRIMARY_SURFACE_TMZ, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ, address->tmz_surface); + } if (address->grph.meta_addr.quad_part != 0) { - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_META_SURFACE_ADDRESS_HIGH, - address->grph.meta_addr.high_part); - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0, - PRIMARY_META_SURFACE_ADDRESS, - address->grph.meta_addr.low_part); + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS = + address->grph.meta_addr.low_part; + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH = + address->grph.meta_addr.high_part; } - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_SURFACE_ADDRESS_HIGH, - address->grph.addr.high_part); - - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, - PRIMARY_SURFACE_ADDRESS, - address->grph.addr.low_part); + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS = + address->grph.addr.low_part; + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = + address->grph.addr.high_part; break; case PLN_ADDR_TYPE_VIDEO_PROGRESSIVE: if (address->video_progressive.luma_addr.quad_part == 0 || address->video_progressive.chroma_addr.quad_part == 0) break; - REG_UPDATE_4(DCSURF_SURFACE_CONTROL, - PRIMARY_SURFACE_TMZ, address->tmz_surface, - PRIMARY_SURFACE_TMZ_C, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ_C, address->tmz_surface); - if (address->video_progressive.luma_meta_addr.quad_part != 0) { - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, 0, - PRIMARY_META_SURFACE_ADDRESS_HIGH_C, - address->video_progressive.chroma_meta_addr.high_part); - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, 0, - PRIMARY_META_SURFACE_ADDRESS_C, - address->video_progressive.chroma_meta_addr.low_part); - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_META_SURFACE_ADDRESS_HIGH, - address->video_progressive.luma_meta_addr.high_part); - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0, - PRIMARY_META_SURFACE_ADDRESS, - address->video_progressive.luma_meta_addr.low_part); + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS = + address->video_progressive.luma_meta_addr.low_part; + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH = + address->video_progressive.luma_meta_addr.high_part; + + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_C = + address->video_progressive.chroma_meta_addr.low_part; + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C = + address->video_progressive.chroma_meta_addr.high_part; } - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0, - PRIMARY_SURFACE_ADDRESS_HIGH_C, - address->video_progressive.chroma_addr.high_part); + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS = + address->video_progressive.luma_addr.low_part; + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = + address->video_progressive.luma_addr.high_part; if (debug->nv12_iflip_vm_wa) { - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0, - PRIMARY_SURFACE_ADDRESS_C, - address->video_progressive.chroma_addr.low_part + hubp21->PLAT_54186_wa_chroma_addr_offset); - } else { - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0, - PRIMARY_SURFACE_ADDRESS_C, - address->video_progressive.chroma_addr.low_part); - } + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_C = + address->video_progressive.chroma_addr.low_part + hubp21->PLAT_54186_wa_chroma_addr_offset; + } else + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_C = + address->video_progressive.chroma_addr.low_part; - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_SURFACE_ADDRESS_HIGH, - address->video_progressive.luma_addr.high_part); + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C = + address->video_progressive.chroma_addr.high_part; - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, - PRIMARY_SURFACE_ADDRESS, - address->video_progressive.luma_addr.low_part); break; case PLN_ADDR_TYPE_GRPH_STEREO: if (address->grph_stereo.left_addr.quad_part == 0) @@ -802,58 +858,46 @@ bool hubp21_program_surface_flip_and_addr( if (address->grph_stereo.right_addr.quad_part == 0) break; - REG_UPDATE_8(DCSURF_SURFACE_CONTROL, - PRIMARY_SURFACE_TMZ, address->tmz_surface, - PRIMARY_SURFACE_TMZ_C, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ_C, address->tmz_surface, - SECONDARY_SURFACE_TMZ, address->tmz_surface, - SECONDARY_SURFACE_TMZ_C, address->tmz_surface, - SECONDARY_META_SURFACE_TMZ, address->tmz_surface, - SECONDARY_META_SURFACE_TMZ_C, address->tmz_surface); + flip_regs.grph_stereo = true; if (address->grph_stereo.right_meta_addr.quad_part != 0) { - - REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH, 0, - SECONDARY_META_SURFACE_ADDRESS_HIGH, - address->grph_stereo.right_meta_addr.high_part); - - REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS, 0, - SECONDARY_META_SURFACE_ADDRESS, - address->grph_stereo.right_meta_addr.low_part); + flip_regs.DCSURF_SECONDARY_META_SURFACE_ADDRESS = + address->grph_stereo.right_meta_addr.low_part; + flip_regs.DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH = + address->grph_stereo.right_meta_addr.high_part; } - if (address->grph_stereo.left_meta_addr.quad_part != 0) { - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_META_SURFACE_ADDRESS_HIGH, - address->grph_stereo.left_meta_addr.high_part); - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0, - PRIMARY_META_SURFACE_ADDRESS, - address->grph_stereo.left_meta_addr.low_part); + if (address->grph_stereo.left_meta_addr.quad_part != 0) { + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS = + address->grph_stereo.left_meta_addr.low_part; + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH = + address->grph_stereo.left_meta_addr.high_part; } - REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, 0, - SECONDARY_SURFACE_ADDRESS_HIGH, - address->grph_stereo.right_addr.high_part); + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS = + address->grph_stereo.left_addr.low_part; + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = + address->grph_stereo.left_addr.high_part; - REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS, 0, - SECONDARY_SURFACE_ADDRESS, - address->grph_stereo.right_addr.low_part); + flip_regs.DCSURF_SECONDARY_SURFACE_ADDRESS = + address->grph_stereo.right_addr.low_part; + flip_regs.DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH = + address->grph_stereo.right_addr.high_part; - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_SURFACE_ADDRESS_HIGH, - address->grph_stereo.left_addr.high_part); - - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, - PRIMARY_SURFACE_ADDRESS, - address->grph_stereo.left_addr.low_part); break; default: BREAK_TO_DEBUGGER(); break; } + flip_regs.tmz_surface = address->tmz_surface; + flip_regs.immediate = flip_immediate; + + if (hubp->ctx->dc->debug.enable_dmcub_surface_flip && address->type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE) + dmcub_PLAT_54186_wa(hubp, &flip_regs); + else + program_surface_flip_and_addr(hubp, &flip_regs); + hubp->request_address = *address; return true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index c76449f58064..1d741bca2211 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -83,6 +83,7 @@ #include "dcn21_resource.h" #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" +#include "../dce/dmub_psr.h" #define SOC_BOUNDING_BOX_VALID false #define DC_LOGGER_INIT(logger) @@ -465,15 +466,18 @@ static const struct dcn20_mpc_registers mpc_regs = { MPC_OUT_MUX_REG_LIST_DCN2_0(0), MPC_OUT_MUX_REG_LIST_DCN2_0(1), MPC_OUT_MUX_REG_LIST_DCN2_0(2), - MPC_OUT_MUX_REG_LIST_DCN2_0(3) + MPC_OUT_MUX_REG_LIST_DCN2_0(3), + MPC_DBG_REG_LIST_DCN2_0() }; static const struct dcn20_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) + MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT), + MPC_DEBUG_REG_LIST_SH_DCN20 }; static const struct dcn20_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK) + MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK), + MPC_DEBUG_REG_LIST_MASK_DCN20 }; #define hubp_regs(id)\ @@ -608,6 +612,7 @@ static const struct dce110_aux_registers aux_engine_regs[] = { #define tf_regs(id)\ [id] = {\ TF_REG_LIST_DCN20(id),\ + TF_REG_LIST_DCN20_COMMON_APPEND(id),\ } static const struct dcn2_dpp_registers tf_regs[] = { @@ -618,11 +623,13 @@ static const struct dcn2_dpp_registers tf_regs[] = { }; static const struct dcn2_dpp_shift tf_shift = { - TF_REG_LIST_SH_MASK_DCN20(__SHIFT) + TF_REG_LIST_SH_MASK_DCN20(__SHIFT), + TF_DEBUG_REG_LIST_SH_DCN20 }; static const struct dcn2_dpp_mask tf_mask = { - TF_REG_LIST_SH_MASK_DCN20(_MASK) + TF_REG_LIST_SH_MASK_DCN20(_MASK), + TF_DEBUG_REG_LIST_MASK_DCN20 }; #define stream_enc_regs(id)\ @@ -828,7 +835,8 @@ static const struct dc_debug_options debug_defaults_drv = { .scl_reset_length10 = true, .sanity_checks = true, .disable_48mhz_pwrdwn = false, - .nv12_iflip_vm_wa = true + .nv12_iflip_vm_wa = true, + .usbc_combo_phy_reset_wa = true }; static const struct dc_debug_options debug_defaults_diags = { @@ -1341,6 +1349,10 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param } dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i - 1]; dcn2_1_soc.num_states = i; + + // diags does not retrieve proper values from SMU, do not update DML instance for diags + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) && !IS_DIAG_DC(dc->ctx->dce_environment)) + dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21); } /* Temporary Place holder until we can get them from fuse */ @@ -1740,6 +1752,10 @@ static bool dcn21_resource_construct( goto create_fail; } + // Leave as NULL to not affect current dmcu psr programming sequence + // Will be uncommented when functionality is confirmed to be working + pool->base.psr = NULL; + pool->base.abm = dce_abm_create(ctx, &abm_regs, &abm_shift, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c index 3b224b155e8c..e7a8ac7a1f22 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c @@ -937,7 +937,7 @@ static unsigned int CalculateVMAndRowBytes( *MetaRowByte = 0; } - if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { + if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) { MacroTileSizeBytes = 256; MacroTileHeight = BlockHeight256Bytes; } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x @@ -1335,11 +1335,11 @@ static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPer else mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; - if (mode_lib->vba.ODMCombineEnabled[k] == true) + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; if (MainPlaneDoesODMCombine == true) @@ -2848,12 +2848,12 @@ static void dml20_DisplayPipeConfiguration(struct display_mode_lib *mode_lib) SwathWidth = mode_lib->vba.ViewportHeight[k]; } - if (mode_lib->vba.ODMCombineEnabled[k] == true) { + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) { + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } } @@ -3348,7 +3348,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l == dm_420_10)) || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl || mode_lib->vba.SurfaceTiling[k] - == dm_sw_gfx7_2d_thin_lvp) + == dm_sw_gfx7_2d_thin_l_vp) && !((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 || mode_lib->vba.SourcePixelFormat[k] @@ -3446,10 +3446,10 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->FabricAndDRAMBandwidthPerState[i] * 1000) * locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; - locals->ReturnBWPerState[i] = locals->ReturnBWToDCNPerState; + locals->ReturnBWPerState[i][0] = locals->ReturnBWToDCNPerState; if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] @@ -3460,7 +3460,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], 4 * locals->ReturnBWToDCNPerState * (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / @@ -3472,7 +3472,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000); if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] @@ -3483,7 +3483,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], 4 * locals->ReturnBWToDCNPerState * (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / @@ -3521,12 +3521,12 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] = (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i] - + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i]; - if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i] + + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0]; + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0] > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { - locals->ROBSupport[i] = true; + locals->ROBSupport[i][0] = true; } else { - locals->ROBSupport[i] = false; + locals->ROBSupport[i][0] = false; } } /*Writeback Mode Support Check*/ @@ -3903,7 +3903,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] - && locals->ODMCombineEnablePerState[i][k] == false) { + && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->NoOfDPP[i][j][k] = 1; locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); @@ -3992,16 +3992,16 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*Viewport Size Check*/ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - locals->ViewportSizeSupport[i] = true; + locals->ViewportSizeSupport[i][0] = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } else { if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } } @@ -4183,8 +4183,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DSCFormatFactor = 1; } if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] - == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { locals->DSCCLKRequiredMoreThanSupported[i] = @@ -4207,7 +4206,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.TotalDSCUnitsRequired = 0.0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { mode_lib->vba.TotalDSCUnitsRequired = mode_lib->vba.TotalDSCUnitsRequired + 2.0; } else { @@ -4249,7 +4248,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; } if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { - if (locals->ODMCombineEnablePerState[i][k] == false) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->DSCDelayPerState[i][k] = dscceComputeDelay( mode_lib->vba.DSCInputBitPerComponent[k], @@ -4292,7 +4291,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { for (j = 0; j < 2; j++) { for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->ODMCombineEnablePerState[i][k] == true) + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k])); else locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k]; @@ -4345,28 +4344,28 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma + dml_min( locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] * - locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i], + locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i][0], locals->EffectiveLBLatencyHidingSourceLinesLuma), locals->SwathHeightYPerState[i][j][k]); locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min( locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] * - locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i], + locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0], locals->EffectiveLBLatencyHidingSourceLinesChroma), locals->SwathHeightCPerState[i][j][k]); if (locals->BytePerPixelInDETC[k] == 0) { locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * - dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]); + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]); } else { locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min( locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * - dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]), + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]), locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) - locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 * - dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k])); + dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k])); } } } @@ -4406,14 +4405,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k]; locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k]; locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k]; - mode_lib->vba.ProjectedDCFCLKDeepSleep = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], mode_lib->vba.PixelClock[k] / 16.0); if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { if (mode_lib->vba.VRatio[k] <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4423,9 +4422,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4436,9 +4435,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } else { if (mode_lib->vba.VRatio[k] <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4448,9 +4447,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4460,9 +4459,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l * mode_lib->vba.RequiredDPPCLK[i][j][k]); } if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETC[k], @@ -4473,9 +4472,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETC[k], @@ -4511,7 +4510,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k], &mode_lib->vba.dpte_row_height[k], &mode_lib->vba.meta_row_height[k]); - mode_lib->vba.PrefetchLinesY[k] = CalculatePrefetchSourceLines( + mode_lib->vba.PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k], mode_lib->vba.vtaps[k], @@ -4550,7 +4549,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k], &mode_lib->vba.dpte_row_height_chroma[k], &mode_lib->vba.meta_row_height_chroma[k]); - mode_lib->vba.PrefetchLinesC[k] = CalculatePrefetchSourceLines( + mode_lib->vba.PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k] / 2.0, mode_lib->vba.VTAPsChroma[k], @@ -4564,14 +4563,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; mode_lib->vba.MetaRowBytesC = 0.0; mode_lib->vba.DPTEBytesPerRowC = 0.0; - locals->PrefetchLinesC[k] = 0.0; + locals->PrefetchLinesC[0][0][k] = 0.0; locals->PTEBufferSizeNotExceededC[i][j][k] = true; locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; } - locals->PDEAndMetaPTEBytesPerFrame[k] = + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] = mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; - locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; - locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; + locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; + locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; CalculateActiveRowBandwidth( mode_lib->vba.GPUVMEnable, @@ -4598,14 +4597,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] * mode_lib->vba.MetaChunkSize) * 1024.0 - / mode_lib->vba.ReturnBWPerState[i]; + / mode_lib->vba.ReturnBWPerState[i][0]; if (mode_lib->vba.GPUVMEnable == true) { mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency + mode_lib->vba.TotalNumberOfActiveDPP[i][j] * mode_lib->vba.PTEGroupSize - / mode_lib->vba.ReturnBWPerState[i]; + / mode_lib->vba.ReturnBWPerState[i][0]; } - mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; + mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { @@ -4655,7 +4654,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); } @@ -4700,7 +4699,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.ScalerEnabled[k], @@ -4718,7 +4717,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l - mode_lib->vba.VActive[k], mode_lib->vba.HTotal[k], mode_lib->vba.MaxInterDCNTileRepeaters, - mode_lib->vba.MaximumVStartup[k], + mode_lib->vba.MaximumVStartup[0][0][k], mode_lib->vba.GPUVMMaxPageTableLevels, mode_lib->vba.GPUVMEnable, mode_lib->vba.DynamicMetadataEnable[k], @@ -4728,15 +4727,15 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.UrgentLatencyPixelDataOnly, mode_lib->vba.ExtraLatency, mode_lib->vba.TimeCalc, - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], - mode_lib->vba.MetaRowBytes[k], - mode_lib->vba.DPTEBytesPerRow[k], - mode_lib->vba.PrefetchLinesY[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], + mode_lib->vba.PrefetchLinesY[0][0][k], mode_lib->vba.SwathWidthYPerState[i][j][k], mode_lib->vba.BytePerPixelInDETY[k], mode_lib->vba.PrefillY[k], mode_lib->vba.MaxNumSwY[k], - mode_lib->vba.PrefetchLinesC[k], + mode_lib->vba.PrefetchLinesC[0][0][k], mode_lib->vba.BytePerPixelInDETC[k], mode_lib->vba.PrefillC[k], mode_lib->vba.MaxNumSwC[k], @@ -4767,19 +4766,19 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->prefetch_vm_bw_valid = true; locals->prefetch_row_bw_valid = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->PDEAndMetaPTEBytesPerFrame[k] == 0) + if (locals->PDEAndMetaPTEBytesPerFrame[0][0][k] == 0) locals->prefetch_vm_bw[k] = 0; else if (locals->LinesForMetaPTE[k] > 0) - locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[k] + locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[0][0][k] / (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]); else { locals->prefetch_vm_bw[k] = 0; locals->prefetch_vm_bw_valid = false; } - if (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k] == 0) + if (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k] == 0) locals->prefetch_row_bw[k] = 0; else if (locals->LinesForMetaAndDPTERow[k] > 0) - locals->prefetch_row_bw[k] = (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k]) + locals->prefetch_row_bw[k] = (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k]) / (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]); else { locals->prefetch_row_bw[k] = 0; @@ -4798,13 +4797,13 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k]) + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]); } - locals->BandwidthWithoutPrefetchSupported[i] = true; - if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]) { - locals->BandwidthWithoutPrefetchSupported[i] = false; + locals->BandwidthWithoutPrefetchSupported[i][0] = true; + if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]) { + locals->BandwidthWithoutPrefetchSupported[i][0] = false; } locals->PrefetchSupported[i][j] = true; - if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]) { + if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]) { locals->PrefetchSupported[i][j] = false; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4829,7 +4828,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (mode_lib->vba.PrefetchSupported[i][j] == true && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) { mode_lib->vba.BandwidthAvailableForImmediateFlip = - mode_lib->vba.ReturnBWPerState[i]; + mode_lib->vba.ReturnBWPerState[i][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip @@ -4843,9 +4842,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { mode_lib->vba.ImmediateFlipBytes[k] = - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k] - + mode_lib->vba.MetaRowBytes[k] - + mode_lib->vba.DPTEBytesPerRow[k]; + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k] + + mode_lib->vba.MetaRowBytes[0][0][k] + + mode_lib->vba.DPTEBytesPerRow[0][0][k]; } } mode_lib->vba.TotImmediateFlipBytes = 0.0; @@ -4873,9 +4872,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l / mode_lib->vba.PixelClock[k], mode_lib->vba.VRatio[k], mode_lib->vba.Tno_bw[k], - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], - mode_lib->vba.MetaRowBytes[k], - mode_lib->vba.DPTEBytesPerRow[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], mode_lib->vba.DCCEnable[k], mode_lib->vba.dpte_row_height[k], mode_lib->vba.meta_row_height[k], @@ -4900,7 +4899,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true; if (mode_lib->vba.total_dcn_read_bw_with_flip - > mode_lib->vba.ReturnBWPerState[i]) { + > mode_lib->vba.ReturnBWPerState[i][0]) { mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4919,13 +4918,13 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++) mode_lib->vba.MaxTotalVActiveRDBandwidth = mode_lib->vba.MaxTotalVActiveRDBandwidth + mode_lib->vba.ReadBandwidth[k]; for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(mode_lib->vba.ReturnBusWidth * + mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) * mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100; - if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i]) - mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = true; + if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0]) + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = true; else - mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = false; + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = false; } /*PTE Buffer Size Check*/ @@ -5013,7 +5012,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_SCALE_RATIO_TAP; } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { status = DML_FAIL_SOURCE_PIXEL_FORMAT; - } else if (locals->ViewportSizeSupport[i] != true) { + } else if (locals->ViewportSizeSupport[i][0] != true) { status = DML_FAIL_VIEWPORT_SIZE; } else if (locals->DIOSupport[i] != true) { status = DML_FAIL_DIO_SUPPORT; @@ -5023,7 +5022,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_DSC_CLK_REQUIRED; } else if (locals->UrgentLatencySupport[i][j] != true) { status = DML_FAIL_URGENT_LATENCY; - } else if (locals->ROBSupport[i] != true) { + } else if (locals->ROBSupport[i][0] != true) { status = DML_FAIL_REORDERING_BUFFER; } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { status = DML_FAIL_DISPCLK_DPPCLK; @@ -5043,7 +5042,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_PITCH_SUPPORT; } else if (locals->PrefetchSupported[i][j] != true) { status = DML_FAIL_PREFETCH_SUPPORT; - } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) { + } else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) { status = DML_FAIL_TOTAL_V_ACTIVE_BW; } else if (locals->VRatioInPrefetchSupported[i][j] != true) { status = DML_FAIL_V_RATIO_PREFETCH; @@ -5089,7 +5088,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; - mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0]; mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c index 6482d7b99bae..22f3b5a4b3b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -997,7 +997,7 @@ static unsigned int CalculateVMAndRowBytes( *MetaRowByte = 0; } - if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { + if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) { MacroTileSizeBytes = 256; MacroTileHeight = BlockHeight256Bytes; } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x @@ -1395,11 +1395,11 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP else mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; - if (mode_lib->vba.ODMCombineEnabled[k] == true) + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; if (MainPlaneDoesODMCombine == true) @@ -2885,12 +2885,12 @@ static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib) SwathWidth = mode_lib->vba.ViewportHeight[k]; } - if (mode_lib->vba.ODMCombineEnabled[k] == true) { + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) { + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } } @@ -3385,7 +3385,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode == dm_420_10)) || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl || mode_lib->vba.SurfaceTiling[k] - == dm_sw_gfx7_2d_thin_lvp) + == dm_sw_gfx7_2d_thin_l_vp) && !((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 || mode_lib->vba.SourcePixelFormat[k] @@ -3483,10 +3483,10 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->FabricAndDRAMBandwidthPerState[i] * 1000) * locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; - locals->ReturnBWPerState[i] = locals->ReturnBWToDCNPerState; + locals->ReturnBWPerState[i][0] = locals->ReturnBWToDCNPerState; if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] @@ -3497,7 +3497,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], 4 * locals->ReturnBWToDCNPerState * (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / @@ -3509,7 +3509,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000); if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] @@ -3520,7 +3520,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], 4 * locals->ReturnBWToDCNPerState * (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / @@ -3558,12 +3558,12 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] = (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i] - + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i]; - if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i] + + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0]; + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0] > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { - locals->ROBSupport[i] = true; + locals->ROBSupport[i][0] = true; } else { - locals->ROBSupport[i] = false; + locals->ROBSupport[i][0] = false; } } /*Writeback Mode Support Check*/ @@ -3946,7 +3946,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode } if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] - && locals->ODMCombineEnablePerState[i][k] == false) { + && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->NoOfDPP[i][j][k] = 1; locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); @@ -4035,16 +4035,16 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode /*Viewport Size Check*/ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - locals->ViewportSizeSupport[i] = true; + locals->ViewportSizeSupport[i][0] = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } else { if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } } @@ -4226,8 +4226,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.DSCFormatFactor = 1; } if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] - == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { locals->DSCCLKRequiredMoreThanSupported[i] = @@ -4250,7 +4249,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.TotalDSCUnitsRequired = 0.0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { mode_lib->vba.TotalDSCUnitsRequired = mode_lib->vba.TotalDSCUnitsRequired + 2.0; } else { @@ -4292,7 +4291,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; } if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { - if (locals->ODMCombineEnablePerState[i][k] == false) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->DSCDelayPerState[i][k] = dscceComputeDelay( mode_lib->vba.DSCInputBitPerComponent[k], @@ -4335,7 +4334,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { for (j = 0; j < 2; j++) { for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->ODMCombineEnablePerState[i][k] == true) + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k])); else locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k]; @@ -4388,28 +4387,28 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma + dml_min( locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] * - locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i], + locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i][0], locals->EffectiveLBLatencyHidingSourceLinesLuma), locals->SwathHeightYPerState[i][j][k]); locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min( locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] * - locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i], + locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0], locals->EffectiveLBLatencyHidingSourceLinesChroma), locals->SwathHeightCPerState[i][j][k]); if (locals->BytePerPixelInDETC[k] == 0) { locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * - dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]); + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]); } else { locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min( locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * - dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]), + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]), locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) - locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 * - dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k])); + dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k])); } } } @@ -4454,14 +4453,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k]; locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k]; locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k]; - mode_lib->vba.ProjectedDCFCLKDeepSleep = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], mode_lib->vba.PixelClock[k] / 16.0); if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { if (mode_lib->vba.VRatio[k] <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4471,9 +4470,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4484,9 +4483,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode } } else { if (mode_lib->vba.VRatio[k] <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4496,9 +4495,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4508,9 +4507,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode * mode_lib->vba.RequiredDPPCLK[i][j][k]); } if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETC[k], @@ -4521,9 +4520,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETC[k], @@ -4559,7 +4558,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode &mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k], &mode_lib->vba.dpte_row_height[k], &mode_lib->vba.meta_row_height[k]); - mode_lib->vba.PrefetchLinesY[k] = CalculatePrefetchSourceLines( + mode_lib->vba.PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k], mode_lib->vba.vtaps[k], @@ -4598,7 +4597,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode &mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k], &mode_lib->vba.dpte_row_height_chroma[k], &mode_lib->vba.meta_row_height_chroma[k]); - mode_lib->vba.PrefetchLinesC[k] = CalculatePrefetchSourceLines( + mode_lib->vba.PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k] / 2.0, mode_lib->vba.VTAPsChroma[k], @@ -4612,14 +4611,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; mode_lib->vba.MetaRowBytesC = 0.0; mode_lib->vba.DPTEBytesPerRowC = 0.0; - locals->PrefetchLinesC[k] = 0.0; + locals->PrefetchLinesC[0][0][k] = 0.0; locals->PTEBufferSizeNotExceededC[i][j][k] = true; locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; } - locals->PDEAndMetaPTEBytesPerFrame[k] = + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] = mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; - locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; - locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; + locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; + locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; CalculateActiveRowBandwidth( mode_lib->vba.GPUVMEnable, @@ -4646,14 +4645,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] * mode_lib->vba.MetaChunkSize) * 1024.0 - / mode_lib->vba.ReturnBWPerState[i]; + / mode_lib->vba.ReturnBWPerState[i][0]; if (mode_lib->vba.GPUVMEnable == true) { mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency + mode_lib->vba.TotalNumberOfActiveDPP[i][j] * mode_lib->vba.PTEGroupSize - / mode_lib->vba.ReturnBWPerState[i]; + / mode_lib->vba.ReturnBWPerState[i][0]; } - mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; + mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { @@ -4703,7 +4702,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); } @@ -4743,7 +4742,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0; } - CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBWPerState[i], mode_lib->vba.ReadBandwidthLuma[k], mode_lib->vba.ReadBandwidthChroma[k], mode_lib->vba.MaxTotalVActiveRDBandwidth, + CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBWPerState[i][0], mode_lib->vba.ReadBandwidthLuma[k], mode_lib->vba.ReadBandwidthChroma[k], mode_lib->vba.MaxTotalVActiveRDBandwidth, mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k], mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.ScalerEnabled[k], mode_lib->vba.NumberOfCursors[k], mode_lib->vba.DPPCLKDelaySubtotal, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelayCNVCCursor, mode_lib->vba.DISPCLKDelaySubtotal, @@ -4757,14 +4756,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.NumberOfCursors[k], mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k], mode_lib->vba.HTotal[k], mode_lib->vba.MaxInterDCNTileRepeaters, - mode_lib->vba.MaximumVStartup[k], + mode_lib->vba.MaximumVStartup[0][0][k], mode_lib->vba.GPUVMMaxPageTableLevels, mode_lib->vba.GPUVMEnable, mode_lib->vba.DynamicMetadataEnable[k], @@ -4774,15 +4773,15 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.UrgentLatencyPixelDataOnly, mode_lib->vba.ExtraLatency, mode_lib->vba.TimeCalc, - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], - mode_lib->vba.MetaRowBytes[k], - mode_lib->vba.DPTEBytesPerRow[k], - mode_lib->vba.PrefetchLinesY[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], + mode_lib->vba.PrefetchLinesY[0][0][k], mode_lib->vba.SwathWidthYPerState[i][j][k], mode_lib->vba.BytePerPixelInDETY[k], mode_lib->vba.PrefillY[k], mode_lib->vba.MaxNumSwY[k], - mode_lib->vba.PrefetchLinesC[k], + mode_lib->vba.PrefetchLinesC[0][0][k], mode_lib->vba.BytePerPixelInDETC[k], mode_lib->vba.PrefillC[k], mode_lib->vba.MaxNumSwC[k], @@ -4812,19 +4811,19 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->prefetch_vm_bw_valid = true; locals->prefetch_row_bw_valid = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->PDEAndMetaPTEBytesPerFrame[k] == 0) + if (locals->PDEAndMetaPTEBytesPerFrame[0][0][k] == 0) locals->prefetch_vm_bw[k] = 0; else if (locals->LinesForMetaPTE[k] > 0) - locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[k] + locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[0][0][k] / (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]); else { locals->prefetch_vm_bw[k] = 0; locals->prefetch_vm_bw_valid = false; } - if (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k] == 0) + if (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k] == 0) locals->prefetch_row_bw[k] = 0; else if (locals->LinesForMetaAndDPTERow[k] > 0) - locals->prefetch_row_bw[k] = (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k]) + locals->prefetch_row_bw[k] = (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k]) / (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]); else { locals->prefetch_row_bw[k] = 0; @@ -4843,13 +4842,13 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k]) + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]); } - locals->BandwidthWithoutPrefetchSupported[i] = true; - if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]) { - locals->BandwidthWithoutPrefetchSupported[i] = false; + locals->BandwidthWithoutPrefetchSupported[i][0] = true; + if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]) { + locals->BandwidthWithoutPrefetchSupported[i][0] = false; } locals->PrefetchSupported[i][j] = true; - if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]) { + if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]) { locals->PrefetchSupported[i][j] = false; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4874,7 +4873,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode if (mode_lib->vba.PrefetchSupported[i][j] == true && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) { mode_lib->vba.BandwidthAvailableForImmediateFlip = - mode_lib->vba.ReturnBWPerState[i]; + mode_lib->vba.ReturnBWPerState[i][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip @@ -4888,9 +4887,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { mode_lib->vba.ImmediateFlipBytes[k] = - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k] - + mode_lib->vba.MetaRowBytes[k] - + mode_lib->vba.DPTEBytesPerRow[k]; + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k] + + mode_lib->vba.MetaRowBytes[0][0][k] + + mode_lib->vba.DPTEBytesPerRow[0][0][k]; } } mode_lib->vba.TotImmediateFlipBytes = 0.0; @@ -4918,9 +4917,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode / mode_lib->vba.PixelClock[k], mode_lib->vba.VRatio[k], mode_lib->vba.Tno_bw[k], - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], - mode_lib->vba.MetaRowBytes[k], - mode_lib->vba.DPTEBytesPerRow[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], mode_lib->vba.DCCEnable[k], mode_lib->vba.dpte_row_height[k], mode_lib->vba.meta_row_height[k], @@ -4945,7 +4944,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode } mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true; if (mode_lib->vba.total_dcn_read_bw_with_flip - > mode_lib->vba.ReturnBWPerState[i]) { + > mode_lib->vba.ReturnBWPerState[i][0]) { mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4961,13 +4960,13 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode /*Vertical Active BW support*/ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(mode_lib->vba.ReturnBusWidth * + mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) * mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100; - if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i]) - mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = true; + if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0]) + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = true; else - mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = false; + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = false; } /*PTE Buffer Size Check*/ @@ -5055,7 +5054,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode status = DML_FAIL_SCALE_RATIO_TAP; } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { status = DML_FAIL_SOURCE_PIXEL_FORMAT; - } else if (locals->ViewportSizeSupport[i] != true) { + } else if (locals->ViewportSizeSupport[i][0] != true) { status = DML_FAIL_VIEWPORT_SIZE; } else if (locals->DIOSupport[i] != true) { status = DML_FAIL_DIO_SUPPORT; @@ -5065,7 +5064,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode status = DML_FAIL_DSC_CLK_REQUIRED; } else if (locals->UrgentLatencySupport[i][j] != true) { status = DML_FAIL_URGENT_LATENCY; - } else if (locals->ROBSupport[i] != true) { + } else if (locals->ROBSupport[i][0] != true) { status = DML_FAIL_REORDERING_BUFFER; } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { status = DML_FAIL_DISPCLK_DPPCLK; @@ -5085,7 +5084,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode status = DML_FAIL_PITCH_SUPPORT; } else if (locals->PrefetchSupported[i][j] != true) { status = DML_FAIL_PREFETCH_SUPPORT; - } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) { + } else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) { status = DML_FAIL_TOTAL_V_ACTIVE_BW; } else if (locals->VRatioInPrefetchSupported[i][j] != true) { status = DML_FAIL_V_RATIO_PREFETCH; @@ -5131,7 +5130,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; - mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0]; mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index b6d34669cddf..af35b3bea909 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -197,7 +197,7 @@ static unsigned int CalculateVMAndRowBytes( unsigned int *meta_row_width, unsigned int *meta_row_height, unsigned int *vm_group_bytes, - long *dpte_group_bytes, + unsigned int *dpte_group_bytes, unsigned int *PixelPTEReqWidth, unsigned int *PixelPTEReqHeight, unsigned int *PTERequestSize, @@ -295,7 +295,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( double UrgentOutOfOrderReturn, double ReturnBW, bool GPUVMEnable, - long dpte_group_bytes[], + int dpte_group_bytes[], unsigned int MetaChunkSize, double UrgentLatency, double ExtraLatency, @@ -309,13 +309,13 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( int DPPPerPlane[], bool DCCEnable[], double DPPCLK[], - unsigned int SwathWidthSingleDPPY[], + double SwathWidthSingleDPPY[], unsigned int SwathHeightY[], double ReadBandwidthPlaneLuma[], unsigned int SwathHeightC[], double ReadBandwidthPlaneChroma[], unsigned int LBBitPerPixel[], - unsigned int SwathWidthY[], + double SwathWidthY[], double HRatio[], unsigned int vtaps[], unsigned int VTAPsChroma[], @@ -344,7 +344,7 @@ static void CalculateDCFCLKDeepSleep( double BytePerPixelDETY[], double BytePerPixelDETC[], double VRatio[], - unsigned int SwathWidthY[], + double SwathWidthY[], int DPPPerPlane[], double HRatio[], double PixelClock[], @@ -435,7 +435,7 @@ static void CalculateMetaAndPTETimes( unsigned int meta_row_height[], unsigned int meta_req_width[], unsigned int meta_req_height[], - long dpte_group_bytes[], + int dpte_group_bytes[], unsigned int PTERequestSizeY[], unsigned int PTERequestSizeC[], unsigned int PixelPTEReqWidthY[], @@ -477,7 +477,7 @@ static double CalculateExtraLatency( bool HostVMEnable, int NumberOfActivePlanes, int NumberOfDPP[], - long dpte_group_bytes[], + int dpte_group_bytes[], double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, int HostVMMaxPageTableLevels, @@ -1280,7 +1280,7 @@ static unsigned int CalculateVMAndRowBytes( unsigned int *meta_row_width, unsigned int *meta_row_height, unsigned int *vm_group_bytes, - long *dpte_group_bytes, + unsigned int *dpte_group_bytes, unsigned int *PixelPTEReqWidth, unsigned int *PixelPTEReqHeight, unsigned int *PTERequestSize, @@ -1338,7 +1338,7 @@ static unsigned int CalculateVMAndRowBytes( *MetaRowByte = 0; } - if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { + if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) { MacroTileSizeBytes = 256; MacroTileHeight = BlockHeight256Bytes; } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x @@ -1683,11 +1683,11 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman else locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; - if (mode_lib->vba.ODMCombineEnabled[k] == true) + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; if (MainPlaneDoesODMCombine == true) @@ -2940,12 +2940,12 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) SwathWidth = mode_lib->vba.ViewportHeight[k]; } - if (mode_lib->vba.ODMCombineEnabled[k] == true) { + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) { + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } } @@ -3453,7 +3453,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l == dm_420_10)) || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl || mode_lib->vba.SurfaceTiling[k] - == dm_sw_gfx7_2d_thin_lvp) + == dm_sw_gfx7_2d_thin_l_vp) && !((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 || mode_lib->vba.SourcePixelFormat[k] @@ -3542,17 +3542,17 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - locals->IdealSDPPortBandwidthPerState[i] = dml_min3( + locals->IdealSDPPortBandwidthPerState[i][0] = dml_min3( mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels * mode_lib->vba.DRAMChannelWidth, mode_lib->vba.FabricClockPerState[i] * mode_lib->vba.FabricDatapathToDCNDataReturn); if (mode_lib->vba.HostVMEnable == false) { - locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i] + locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0] * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100.0; } else { - locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i] + locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0] * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0; } } @@ -3589,12 +3589,12 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l + dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly) - * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i]; - if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i] + * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0]; + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0] > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { - locals->ROBSupport[i] = true; + locals->ROBSupport[i][0] = true; } else { - locals->ROBSupport[i] = false; + locals->ROBSupport[i][0] = false; } } /*Writeback Mode Support Check*/ @@ -3982,7 +3982,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] - && locals->ODMCombineEnablePerState[i][k] == false) { + && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->NoOfDPP[i][j][k] = 1; locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); @@ -4071,16 +4071,16 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*Viewport Size Check*/ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - locals->ViewportSizeSupport[i] = true; + locals->ViewportSizeSupport[i][0] = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } else { if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } } @@ -4269,8 +4269,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DSCFormatFactor = 1; } if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] - == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { locals->DSCCLKRequiredMoreThanSupported[i] = @@ -4293,7 +4292,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.TotalDSCUnitsRequired = 0.0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { mode_lib->vba.TotalDSCUnitsRequired = mode_lib->vba.TotalDSCUnitsRequired + 2.0; } else { @@ -4335,7 +4334,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; } if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { - if (locals->ODMCombineEnablePerState[i][k] == false) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->DSCDelayPerState[i][k] = dscceComputeDelay( mode_lib->vba.DSCInputBitPerComponent[k], @@ -4399,7 +4398,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k]; locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k]; - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { locals->SwathWidthYThisState[k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])); } else { @@ -4451,7 +4450,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->PSCL_FACTOR, locals->PSCL_FACTOR_CHROMA, locals->RequiredDPPCLKThisState, - &mode_lib->vba.ProjectedDCFCLKDeepSleep); + &mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]); for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 @@ -4496,7 +4495,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->PTERequestSizeC, locals->dpde0_bytes_per_frame_ub_c, locals->meta_pte_bytes_per_frame_ub_c); - locals->PrefetchLinesC[k] = CalculatePrefetchSourceLines( + locals->PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k]/2, mode_lib->vba.VTAPsChroma[k], @@ -4511,7 +4510,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; mode_lib->vba.MetaRowBytesC = 0.0; mode_lib->vba.DPTEBytesPerRowC = 0.0; - locals->PrefetchLinesC[k] = 0.0; + locals->PrefetchLinesC[0][0][k] = 0.0; locals->PTEBufferSizeNotExceededC[i][j][k] = true; locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; } @@ -4552,7 +4551,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->PTERequestSizeY, locals->dpde0_bytes_per_frame_ub_l, locals->meta_pte_bytes_per_frame_ub_l); - locals->PrefetchLinesY[k] = CalculatePrefetchSourceLines( + locals->PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k], mode_lib->vba.vtaps[k], @@ -4562,10 +4561,10 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.ViewportYStartY[k], &locals->PrefillY[k], &locals->MaxNumSwY[k]); - locals->PDEAndMetaPTEBytesPerFrame[k] = + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] = mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; - locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; - locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; + locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; + locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; CalculateActiveRowBandwidth( mode_lib->vba.GPUVMEnable, @@ -4591,7 +4590,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.PixelChunkSizeInKByte, locals->TotalNumberOfDCCActiveDPP[i][j], mode_lib->vba.MetaChunkSize, - locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0], mode_lib->vba.GPUVMEnable, mode_lib->vba.HostVMEnable, mode_lib->vba.NumberOfActivePlanes, @@ -4602,7 +4601,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.HostVMMaxPageTableLevels, mode_lib->vba.HostVMCachedPageTableLevels); - mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; + mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { if (mode_lib->vba.WritebackEnable[k] == true) { @@ -4644,15 +4643,15 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } } - mode_lib->vba.MaxMaxVStartup = 0; + mode_lib->vba.MaxMaxVStartup[0][0] = 0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); - mode_lib->vba.MaxMaxVStartup = dml_max(mode_lib->vba.MaxMaxVStartup, locals->MaximumVStartup[k]); + mode_lib->vba.MaxMaxVStartup[0][0] = dml_max(mode_lib->vba.MaxMaxVStartup[0][0], locals->MaximumVStartup[0][0][k]); } mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode; - mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup; + mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0]; do { mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode; mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup; @@ -4693,7 +4692,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k]; myPipe.DISPCLK = locals->RequiredDISPCLK[i][j]; myPipe.PixelClock = mode_lib->vba.PixelClock[k]; - myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep; + myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]; myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k]; myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; myPipe.SourceScan = mode_lib->vba.SourceScan[k]; @@ -4727,8 +4726,8 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.MaxInterDCNTileRepeaters, - dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[k]), - locals->MaximumVStartup[k], + dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[0][0][k]), + locals->MaximumVStartup[0][0][k], mode_lib->vba.GPUVMMaxPageTableLevels, mode_lib->vba.GPUVMEnable, &myHostVM, @@ -4739,15 +4738,15 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.UrgentLatency, mode_lib->vba.ExtraLatency, mode_lib->vba.TimeCalc, - locals->PDEAndMetaPTEBytesPerFrame[k], - locals->MetaRowBytes[k], - locals->DPTEBytesPerRow[k], - locals->PrefetchLinesY[k], + locals->PDEAndMetaPTEBytesPerFrame[0][0][k], + locals->MetaRowBytes[0][0][k], + locals->DPTEBytesPerRow[0][0][k], + locals->PrefetchLinesY[0][0][k], locals->SwathWidthYThisState[k], locals->BytePerPixelInDETY[k], locals->PrefillY[k], locals->MaxNumSwY[k], - locals->PrefetchLinesC[k], + locals->PrefetchLinesC[0][0][k], locals->BytePerPixelInDETC[k], locals->PrefillC[k], locals->MaxNumSwC[k], @@ -4836,14 +4835,14 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); } - locals->BandwidthWithoutPrefetchSupported[i] = true; - if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i] + locals->BandwidthWithoutPrefetchSupported[i][0] = true; + if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0] || locals->NotEnoughUrgentLatencyHiding == 1) { - locals->BandwidthWithoutPrefetchSupported[i] = false; + locals->BandwidthWithoutPrefetchSupported[i][0] = false; } locals->PrefetchSupported[i][j] = true; - if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i] + if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0] || locals->NotEnoughUrgentLatencyHiding == 1 || locals->NotEnoughUrgentLatencyHidingPre == 1) { locals->PrefetchSupported[i][j] = false; @@ -4872,17 +4871,17 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } if (mode_lib->vba.MaxVStartup <= 13 || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) { - mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup; + mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0]; mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1; } else { mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1; } } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true) - && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup + && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0] || mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode)); if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) { - mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i]; + mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip - dml_max(locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] @@ -4895,7 +4894,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.TotImmediateFlipBytes = 0.0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes - + locals->PDEAndMetaPTEBytesPerFrame[k] + locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k]; + + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] + locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k]; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4910,9 +4909,9 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.HostVMMaxPageTableLevels, mode_lib->vba.HostVMCachedPageTableLevels, mode_lib->vba.GPUVMEnable, - locals->PDEAndMetaPTEBytesPerFrame[k], - locals->MetaRowBytes[k], - locals->DPTEBytesPerRow[k], + locals->PDEAndMetaPTEBytesPerFrame[0][0][k], + locals->MetaRowBytes[0][0][k], + locals->DPTEBytesPerRow[0][0][k], mode_lib->vba.BandwidthAvailableForImmediateFlip, mode_lib->vba.TotImmediateFlipBytes, mode_lib->vba.SourcePixelFormat[k], @@ -4943,7 +4942,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } locals->ImmediateFlipSupportedForState[i][j] = true; if (mode_lib->vba.total_dcn_read_bw_with_flip - > locals->ReturnBWPerState[i]) { + > locals->ReturnBWPerState[i][0]) { locals->ImmediateFlipSupportedForState[i][j] = false; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4970,7 +4969,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.WritebackInterfaceChromaBufferSize, mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels, - locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0], mode_lib->vba.GPUVMEnable, locals->dpte_group_bytes, mode_lib->vba.MetaChunkSize, @@ -4982,7 +4981,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DRAMClockChangeLatency, mode_lib->vba.SRExitTime, mode_lib->vba.SREnterPlusExitTime, - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], locals->NoOfDPPThisState, mode_lib->vba.DCCEnable, locals->RequiredDPPCLKThisState, @@ -5025,8 +5024,8 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k]; } for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) { - locals->MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min( - locals->IdealSDPPortBandwidthPerState[i] * + locals->MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min( + locals->IdealSDPPortBandwidthPerState[i][0] * mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100.0, mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels * @@ -5034,10 +5033,10 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100.0); - if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i]) { - locals->TotalVerticalActiveBandwidthSupport[i] = true; + if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i][0]) { + locals->TotalVerticalActiveBandwidthSupport[i][0] = true; } else { - locals->TotalVerticalActiveBandwidthSupport[i] = false; + locals->TotalVerticalActiveBandwidthSupport[i][0] = false; } } } @@ -5116,7 +5115,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_SCALE_RATIO_TAP; } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { status = DML_FAIL_SOURCE_PIXEL_FORMAT; - } else if (locals->ViewportSizeSupport[i] != true) { + } else if (locals->ViewportSizeSupport[i][0] != true) { status = DML_FAIL_VIEWPORT_SIZE; } else if (locals->DIOSupport[i] != true) { status = DML_FAIL_DIO_SUPPORT; @@ -5124,7 +5123,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_NOT_ENOUGH_DSC; } else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) { status = DML_FAIL_DSC_CLK_REQUIRED; - } else if (locals->ROBSupport[i] != true) { + } else if (locals->ROBSupport[i][0] != true) { status = DML_FAIL_REORDERING_BUFFER; } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { status = DML_FAIL_DISPCLK_DPPCLK; @@ -5142,7 +5141,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_CURSOR_SUPPORT; } else if (mode_lib->vba.PitchSupport != true) { status = DML_FAIL_PITCH_SUPPORT; - } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) { + } else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) { status = DML_FAIL_TOTAL_V_ACTIVE_BW; } else if (locals->PTEBufferSizeNotExceeded[i][j] != true) { status = DML_FAIL_PTE_BUFFER_SIZE; @@ -5198,7 +5197,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; - mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { mode_lib->vba.ODMCombineEnabled[k] = @@ -5227,7 +5226,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( double UrgentOutOfOrderReturn, double ReturnBW, bool GPUVMEnable, - long dpte_group_bytes[], + int dpte_group_bytes[], unsigned int MetaChunkSize, double UrgentLatency, double ExtraLatency, @@ -5241,13 +5240,13 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( int DPPPerPlane[], bool DCCEnable[], double DPPCLK[], - unsigned int SwathWidthSingleDPPY[], + double SwathWidthSingleDPPY[], unsigned int SwathHeightY[], double ReadBandwidthPlaneLuma[], unsigned int SwathHeightC[], double ReadBandwidthPlaneChroma[], unsigned int LBBitPerPixel[], - unsigned int SwathWidthY[], + double SwathWidthY[], double HRatio[], unsigned int vtaps[], unsigned int VTAPsChroma[], @@ -5503,7 +5502,7 @@ static void CalculateDCFCLKDeepSleep( double BytePerPixelDETY[], double BytePerPixelDETC[], double VRatio[], - unsigned int SwathWidthY[], + double SwathWidthY[], int DPPPerPlane[], double HRatio[], double PixelClock[], @@ -5831,7 +5830,7 @@ static void CalculateMetaAndPTETimes( unsigned int meta_row_height[], unsigned int meta_req_width[], unsigned int meta_req_height[], - long dpte_group_bytes[], + int dpte_group_bytes[], unsigned int PTERequestSizeY[], unsigned int PTERequestSizeC[], unsigned int PixelPTEReqWidthY[], @@ -6087,7 +6086,7 @@ static double CalculateExtraLatency( bool HostVMEnable, int NumberOfActivePlanes, int NumberOfDPP[], - long dpte_group_bytes[], + int dpte_group_bytes[], double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, int HostVMMaxPageTableLevels, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h index 55d4cb23a073..bfc2f39bd1ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h @@ -85,7 +85,7 @@ enum dm_swizzle_mode { dm_sw_var_s_x = 29, dm_sw_var_d_x = 30, dm_sw_64kb_r_x, - dm_sw_gfx7_2d_thin_lvp, + dm_sw_gfx7_2d_thin_l_vp, dm_sw_gfx7_2d_thin_gl, }; enum lb_depth { @@ -119,6 +119,10 @@ enum mpc_combine_affinity { dm_mpc_never }; +enum RequestType { + REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA +}; + enum self_refresh_affinity { dm_try_to_allow_self_refresh_and_mclk_switch, dm_allow_self_refresh_and_mclk_switch, @@ -165,4 +169,16 @@ enum odm_combine_mode { dm_odm_combine_mode_4to1, }; +enum odm_combine_policy { + dm_odm_combine_policy_dal, + dm_odm_combine_policy_none, + dm_odm_combine_policy_2to1, + dm_odm_combine_policy_4to1, +}; + +enum immediate_flip_requirement { + dm_immediate_flip_not_required, + dm_immediate_flip_required, +}; + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index dbf6a021d0d8..658f81e757e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -63,6 +63,7 @@ struct _vcs_dpi_voltage_scaling_st { double dispclk_mhz; double phyclk_mhz; double dppclk_mhz; + double dtbclk_mhz; }; struct _vcs_dpi_soc_bounding_box_st { @@ -214,6 +215,7 @@ struct _vcs_dpi_display_pipe_source_params_st { int source_format; unsigned char dcc; unsigned int dcc_rate; + unsigned int dcc_rate_chroma; unsigned char dcc_use_global; unsigned char vm; bool gpuvm; // gpuvm enabled @@ -225,7 +227,10 @@ struct _vcs_dpi_display_pipe_source_params_st { int source_scan; int sw_mode; int macro_tile_size; + unsigned int surface_width_y; unsigned int surface_height_y; + unsigned int surface_width_c; + unsigned int surface_height_c; unsigned int viewport_width; unsigned int viewport_height; unsigned int viewport_y_y; @@ -324,7 +329,7 @@ struct _vcs_dpi_display_pipe_dest_params_st { double pixel_rate_mhz; unsigned char synchronized_vblank_all_planes; unsigned char otg_inst; - unsigned char odm_combine; + unsigned int odm_combine; unsigned char use_maximum_vstartup; unsigned int vtotal_max; unsigned int vtotal_min; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 66ca014a6b92..b3c96d9b472f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -264,7 +264,10 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib) mode_lib->vba.DRAMSpeedPerState[i] = soc->clock_limits[i].dram_speed_mts; //mode_lib->vba.DRAMSpeedPerState[i] = soc->clock_limits[i].dram_speed_mhz; mode_lib->vba.MaxDispclk[i] = soc->clock_limits[i].dispclk_mhz; + mode_lib->vba.DTBCLKPerState[i] = soc->clock_limits[i].dtbclk_mhz; } + mode_lib->vba.MinVoltageLevel = 0; + mode_lib->vba.MaxVoltageLevel = mode_lib->vba.soc.num_states; mode_lib->vba.DoUrgentLatencyAdjustment = soc->do_urgent_latency_adjustment; @@ -306,8 +309,6 @@ static void fetch_ip_params(struct display_mode_lib *mode_lib) mode_lib->vba.WritebackInterfaceBufferSize = ip->writeback_interface_buffer_size_kbytes; mode_lib->vba.WritebackLineBufferSize = ip->writeback_line_buffer_buffer_size; - mode_lib->vba.MinVoltageLevel = 0; - mode_lib->vba.MaxVoltageLevel = 5; mode_lib->vba.WritebackChromaLineBufferWidth = ip->writeback_chroma_line_buffer_width_pixels; @@ -423,8 +424,8 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) ip->dcc_supported : src->dcc && ip->dcc_supported; mode_lib->vba.DCCRate[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate; /* TODO: Needs to be set based on src->dcc_rate_luma/chroma */ - mode_lib->vba.DCCRateLuma[mode_lib->vba.NumberOfActivePlanes] = 0; - mode_lib->vba.DCCRateChroma[mode_lib->vba.NumberOfActivePlanes] = 0; + mode_lib->vba.DCCRateLuma[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate; + mode_lib->vba.DCCRateChroma[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate_chroma; mode_lib->vba.SourcePixelFormat[mode_lib->vba.NumberOfActivePlanes] = (enum source_format_class) (src->source_format); @@ -436,8 +437,6 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) dst->recout_width; // TODO: or should this be full_recout_width???...maybe only when in hsplit mode? mode_lib->vba.ODMCombineEnabled[mode_lib->vba.NumberOfActivePlanes] = dst->odm_combine; - mode_lib->vba.ODMCombineTypeEnabled[mode_lib->vba.NumberOfActivePlanes] = - dst->odm_combine; mode_lib->vba.OutputFormat[mode_lib->vba.NumberOfActivePlanes] = (enum output_format_class) (dout->output_format); mode_lib->vba.OutputBpp[mode_lib->vba.NumberOfActivePlanes] = @@ -590,6 +589,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) for (k = j + 1; k < mode_lib->vba.cache_num_pipes; ++k) { display_pipe_source_params_st *src_k = &pipes[k].pipe.src; display_pipe_dest_params_st *dst_k = &pipes[k].pipe.dest; + display_output_params_st *dout_k = &pipes[j].dout; if (src_k->is_hsplit && !visited[k] && src->hsplit_grp == src_k->hsplit_grp) { @@ -600,12 +600,18 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) == dm_horz) { mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] += src_k->viewport_width; + mode_lib->vba.ViewportWidthChroma[mode_lib->vba.NumberOfActivePlanes] += + src_k->viewport_width; mode_lib->vba.ScalerRecoutWidth[mode_lib->vba.NumberOfActivePlanes] += dst_k->recout_width; } else { mode_lib->vba.ViewportHeight[mode_lib->vba.NumberOfActivePlanes] += src_k->viewport_height; + mode_lib->vba.ViewportHeightChroma[mode_lib->vba.NumberOfActivePlanes] += + src_k->viewport_height; } + mode_lib->vba.NumberOfDSCSlices[mode_lib->vba.NumberOfActivePlanes] += + dout_k->dsc_slices; visited[k] = true; } @@ -811,7 +817,9 @@ void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib) unsigned int total_pipes = 0; mode_lib->vba.VoltageLevel = mode_lib->vba.cache_pipes[0].clks_cfg.voltage; - mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb]; + if (mode_lib->vba.ReturnBW == 0) + mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][0]; mode_lib->vba.FabricAndDRAMBandwidth = mode_lib->vba.FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel]; fetch_socbb_params(mode_lib); diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 3eb657ed5714..e7a44df676ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -157,6 +157,7 @@ struct vba_vars_st { bool DummyPStateCheck; bool DRAMClockChangeSupportsVActive; bool PrefetchModeSupported; + bool PrefetchAndImmediateFlipSupported; enum self_refresh_affinity AllowDRAMSelfRefreshOrDRAMClockChangeInVblank; // Mode Support only double XFCRemoteSurfaceFlipDelay; double TInitXFill; @@ -318,8 +319,7 @@ struct vba_vars_st { unsigned int DynamicMetadataTransmittedBytes[DC__NUM_DPP__MAX]; double DCCRate[DC__NUM_DPP__MAX]; double AverageDCCCompressionRate; - bool ODMCombineEnabled[DC__NUM_DPP__MAX]; - enum odm_combine_mode ODMCombineTypeEnabled[DC__NUM_DPP__MAX]; + enum odm_combine_mode ODMCombineEnabled[DC__NUM_DPP__MAX]; double OutputBpp[DC__NUM_DPP__MAX]; bool DSCEnabled[DC__NUM_DPP__MAX]; unsigned int DSCInputBitPerComponent[DC__NUM_DPP__MAX]; @@ -347,6 +347,7 @@ struct vba_vars_st { unsigned int EffectiveLBLatencyHidingSourceLinesChroma; double BandwidthAvailableForImmediateFlip; unsigned int PrefetchMode[DC__VOLTAGE_STATES + 1][2]; + unsigned int PrefetchModePerState[DC__VOLTAGE_STATES + 1][2]; unsigned int MinPrefetchMode; unsigned int MaxPrefetchMode; bool AnyLinesForVMOrRowTooLarge; @@ -396,6 +397,7 @@ struct vba_vars_st { bool WritebackLumaAndChromaScalingSupported; bool Cursor64BppSupport; double DCFCLKPerState[DC__VOLTAGE_STATES + 1]; + double DCFCLKState[DC__VOLTAGE_STATES + 1][2]; double FabricClockPerState[DC__VOLTAGE_STATES + 1]; double SOCCLKPerState[DC__VOLTAGE_STATES + 1]; double PHYCLKPerState[DC__VOLTAGE_STATES + 1]; @@ -444,7 +446,7 @@ struct vba_vars_st { double OutputLinkDPLanes[DC__NUM_DPP__MAX]; double ForcedOutputLinkBPP[DC__NUM_DPP__MAX]; // Mode Support only double ImmediateFlipBW[DC__NUM_DPP__MAX]; - double MaxMaxVStartup; + double MaxMaxVStartup[DC__VOLTAGE_STATES + 1][2]; double WritebackLumaVExtra; double WritebackChromaVExtra; @@ -471,7 +473,7 @@ struct vba_vars_st { double RoundedUpMaxSwathSizeBytesC; double EffectiveDETLBLinesLuma; double EffectiveDETLBLinesChroma; - double ProjectedDCFCLKDeepSleep; + double ProjectedDCFCLKDeepSleep[DC__VOLTAGE_STATES + 1][2]; double PDEAndMetaPTEBytesPerFrameY; double PDEAndMetaPTEBytesPerFrameC; unsigned int MetaRowBytesY; @@ -489,12 +491,11 @@ struct vba_vars_st { double FractionOfUrgentBandwidthImmediateFlip; // Mode Support debugging output /* ms locals */ - double IdealSDPPortBandwidthPerState[DC__VOLTAGE_STATES + 1]; + double IdealSDPPortBandwidthPerState[DC__VOLTAGE_STATES + 1][2]; unsigned int NoOfDPP[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; int NoOfDPPThisState[DC__NUM_DPP__MAX]; - bool ODMCombineEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - enum odm_combine_mode ODMCombineTypeEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - unsigned int SwathWidthYThisState[DC__NUM_DPP__MAX]; + enum odm_combine_mode ODMCombineEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; + double SwathWidthYThisState[DC__NUM_DPP__MAX]; unsigned int SwathHeightCPerState[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; unsigned int SwathHeightYThisState[DC__NUM_DPP__MAX]; unsigned int SwathHeightCThisState[DC__NUM_DPP__MAX]; @@ -506,7 +507,7 @@ struct vba_vars_st { double RequiredDPPCLKThisState[DC__NUM_DPP__MAX]; bool PTEBufferSizeNotExceededY[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; bool PTEBufferSizeNotExceededC[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; - bool BandwidthWithoutPrefetchSupported[DC__VOLTAGE_STATES + 1]; + bool BandwidthWithoutPrefetchSupported[DC__VOLTAGE_STATES + 1][2]; bool PrefetchSupported[DC__VOLTAGE_STATES + 1][2]; bool VRatioInPrefetchSupported[DC__VOLTAGE_STATES + 1][2]; double RequiredDISPCLK[DC__VOLTAGE_STATES + 1][2]; @@ -515,22 +516,22 @@ struct vba_vars_st { unsigned int TotalNumberOfActiveDPP[DC__VOLTAGE_STATES + 1][2]; unsigned int TotalNumberOfDCCActiveDPP[DC__VOLTAGE_STATES + 1][2]; bool ModeSupport[DC__VOLTAGE_STATES + 1][2]; - double ReturnBWPerState[DC__VOLTAGE_STATES + 1]; + double ReturnBWPerState[DC__VOLTAGE_STATES + 1][2]; bool DIOSupport[DC__VOLTAGE_STATES + 1]; bool NotEnoughDSCUnits[DC__VOLTAGE_STATES + 1]; bool DSCCLKRequiredMoreThanSupported[DC__VOLTAGE_STATES + 1]; bool DTBCLKRequiredMoreThanSupported[DC__VOLTAGE_STATES + 1]; double UrgentRoundTripAndOutOfOrderLatencyPerState[DC__VOLTAGE_STATES + 1]; - bool ROBSupport[DC__VOLTAGE_STATES + 1]; + bool ROBSupport[DC__VOLTAGE_STATES + 1][2]; bool PTEBufferSizeNotExceeded[DC__VOLTAGE_STATES + 1][2]; - bool TotalVerticalActiveBandwidthSupport[DC__VOLTAGE_STATES + 1]; - double MaxTotalVerticalActiveAvailableBandwidth[DC__VOLTAGE_STATES + 1]; + bool TotalVerticalActiveBandwidthSupport[DC__VOLTAGE_STATES + 1][2]; + double MaxTotalVerticalActiveAvailableBandwidth[DC__VOLTAGE_STATES + 1][2]; double PrefetchBW[DC__NUM_DPP__MAX]; - double PDEAndMetaPTEBytesPerFrame[DC__NUM_DPP__MAX]; - double MetaRowBytes[DC__NUM_DPP__MAX]; - double DPTEBytesPerRow[DC__NUM_DPP__MAX]; - double PrefetchLinesY[DC__NUM_DPP__MAX]; - double PrefetchLinesC[DC__NUM_DPP__MAX]; + double PDEAndMetaPTEBytesPerFrame[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double MetaRowBytes[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double DPTEBytesPerRow[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double PrefetchLinesY[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double PrefetchLinesC[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; unsigned int MaxNumSwY[DC__NUM_DPP__MAX]; unsigned int MaxNumSwC[DC__NUM_DPP__MAX]; double PrefillY[DC__NUM_DPP__MAX]; @@ -539,7 +540,7 @@ struct vba_vars_st { double LinesForMetaPTE[DC__NUM_DPP__MAX]; double LinesForMetaAndDPTERow[DC__NUM_DPP__MAX]; double MinDPPCLKUsingSingleDPP[DC__NUM_DPP__MAX]; - unsigned int SwathWidthYSingleDPP[DC__NUM_DPP__MAX]; + double SwathWidthYSingleDPP[DC__NUM_DPP__MAX]; double BytePerPixelInDETY[DC__NUM_DPP__MAX]; double BytePerPixelInDETC[DC__NUM_DPP__MAX]; bool RequiresDSC[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; @@ -547,7 +548,7 @@ struct vba_vars_st { double RequiresFEC[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; double OutputBppPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; double DSCDelayPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - bool ViewportSizeSupport[DC__VOLTAGE_STATES + 1]; + bool ViewportSizeSupport[DC__VOLTAGE_STATES + 1][2]; unsigned int Read256BlockHeightY[DC__NUM_DPP__MAX]; unsigned int Read256BlockWidthY[DC__NUM_DPP__MAX]; unsigned int Read256BlockHeightC[DC__NUM_DPP__MAX]; @@ -562,7 +563,7 @@ struct vba_vars_st { double WriteBandwidth[DC__NUM_DPP__MAX]; double PSCL_FACTOR[DC__NUM_DPP__MAX]; double PSCL_FACTOR_CHROMA[DC__NUM_DPP__MAX]; - double MaximumVStartup[DC__NUM_DPP__MAX]; + double MaximumVStartup[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; unsigned int MacroTileWidthY[DC__NUM_DPP__MAX]; unsigned int MacroTileWidthC[DC__NUM_DPP__MAX]; double AlignedDCCMetaPitch[DC__NUM_DPP__MAX]; @@ -579,7 +580,7 @@ struct vba_vars_st { bool ImmediateFlipSupportedForState[DC__VOLTAGE_STATES + 1][2]; double WritebackDelay[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; unsigned int vm_group_bytes[DC__NUM_DPP__MAX]; - long dpte_group_bytes[DC__NUM_DPP__MAX]; + unsigned int dpte_group_bytes[DC__NUM_DPP__MAX]; unsigned int dpte_row_height[DC__NUM_DPP__MAX]; unsigned int meta_req_height[DC__NUM_DPP__MAX]; unsigned int meta_req_width[DC__NUM_DPP__MAX]; @@ -605,14 +606,14 @@ struct vba_vars_st { double UrgentBurstFactorChroma[DC__NUM_DPP__MAX]; double UrgentBurstFactorChromaPre[DC__NUM_DPP__MAX]; + bool MPCCombine[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; double SwathWidthCSingleDPP[DC__NUM_DPP__MAX]; double MaximumSwathWidthInLineBufferLuma; double MaximumSwathWidthInLineBufferChroma; double MaximumSwathWidthLuma[DC__NUM_DPP__MAX]; double MaximumSwathWidthChroma[DC__NUM_DPP__MAX]; - bool odm_combine_dummy[DC__NUM_DPP__MAX]; - enum odm_combine_mode odm_combine_mode_dummy[DC__NUM_DPP__MAX]; + enum odm_combine_mode odm_combine_dummy[DC__NUM_DPP__MAX]; double dummy1[DC__NUM_DPP__MAX]; double dummy2[DC__NUM_DPP__MAX]; double dummy3[DC__NUM_DPP__MAX]; @@ -622,9 +623,9 @@ struct vba_vars_st { double dummy7[DC__NUM_DPP__MAX]; double dummy8[DC__NUM_DPP__MAX]; unsigned int dummyinteger1ms[DC__NUM_DPP__MAX]; - unsigned int dummyinteger2ms[DC__NUM_DPP__MAX]; + double dummyinteger2ms[DC__NUM_DPP__MAX]; unsigned int dummyinteger3[DC__NUM_DPP__MAX]; - unsigned int dummyinteger4; + unsigned int dummyinteger4[DC__NUM_DPP__MAX]; unsigned int dummyinteger5; unsigned int dummyinteger6; unsigned int dummyinteger7; @@ -637,7 +638,6 @@ struct vba_vars_st { unsigned int dummyintegerarr2[DC__NUM_DPP__MAX]; unsigned int dummyintegerarr3[DC__NUM_DPP__MAX]; unsigned int dummyintegerarr4[DC__NUM_DPP__MAX]; - long dummylongarr1[DC__NUM_DPP__MAX]; bool dummysinglestring; bool SingleDPPViewportSizeSupportPerPlane[DC__NUM_DPP__MAX]; double PlaneRequiredDISPCLKWithODMCombine2To1; @@ -645,20 +645,19 @@ struct vba_vars_st { unsigned int TotalNumberOfSingleDPPPlanes[DC__VOLTAGE_STATES + 1][2]; bool LinkDSCEnable; bool ODMCombine4To1SupportCheckOK[DC__VOLTAGE_STATES + 1]; - bool ODMCombineEnableThisState[DC__NUM_DPP__MAX]; - enum odm_combine_mode ODMCombineEnableTypeThisState[DC__NUM_DPP__MAX]; - unsigned int SwathWidthCThisState[DC__NUM_DPP__MAX]; + enum odm_combine_mode ODMCombineEnableThisState[DC__NUM_DPP__MAX]; + double SwathWidthCThisState[DC__NUM_DPP__MAX]; bool ViewportSizeSupportPerPlane[DC__NUM_DPP__MAX]; double AlignedDCCMetaPitchY[DC__NUM_DPP__MAX]; double AlignedDCCMetaPitchC[DC__NUM_DPP__MAX]; unsigned int NotEnoughUrgentLatencyHiding; unsigned int NotEnoughUrgentLatencyHidingPre; - long PTEBufferSizeInRequestsForLuma; - long PTEBufferSizeInRequestsForChroma; + int PTEBufferSizeInRequestsForLuma; + int PTEBufferSizeInRequestsForChroma; // Missing from VBA - long dpte_group_bytes_chroma; + int dpte_group_bytes_chroma; unsigned int vm_group_bytes_chroma; double dst_x_after_scaler; double dst_y_after_scaler; @@ -683,8 +682,8 @@ struct vba_vars_st { double MinTTUVBlank[DC__NUM_DPP__MAX]; double BytePerPixelDETY[DC__NUM_DPP__MAX]; double BytePerPixelDETC[DC__NUM_DPP__MAX]; - unsigned int SwathWidthY[DC__NUM_DPP__MAX]; - unsigned int SwathWidthSingleDPPY[DC__NUM_DPP__MAX]; + double SwathWidthY[DC__NUM_DPP__MAX]; + double SwathWidthSingleDPPY[DC__NUM_DPP__MAX]; double CursorRequestDeliveryTime[DC__NUM_DPP__MAX]; double CursorRequestDeliveryTimePrefetch[DC__NUM_DPP__MAX]; double ReadBandwidthPlaneLuma[DC__NUM_DPP__MAX]; @@ -760,8 +759,8 @@ struct vba_vars_st { double LinesInDETY[DC__NUM_DPP__MAX]; double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; - unsigned int SwathWidthSingleDPPC[DC__NUM_DPP__MAX]; - unsigned int SwathWidthC[DC__NUM_DPP__MAX]; + double SwathWidthSingleDPPC[DC__NUM_DPP__MAX]; + double SwathWidthC[DC__NUM_DPP__MAX]; unsigned int BytePerPixelY[DC__NUM_DPP__MAX]; unsigned int BytePerPixelC[DC__NUM_DPP__MAX]; long dummyinteger1; @@ -779,6 +778,7 @@ struct vba_vars_st { unsigned int DCCCMaxCompressedBlock[DC__NUM_DPP__MAX]; unsigned int DCCCIndependent64ByteBlock[DC__NUM_DPP__MAX]; double VStartupMargin; + bool NotEnoughTimeForDynamicMetadata; /* Missing from VBA */ unsigned int MaximumMaxVStartupLines; @@ -814,7 +814,7 @@ struct vba_vars_st { unsigned int ViewportHeightChroma[DC__NUM_DPP__MAX]; double HRatioChroma[DC__NUM_DPP__MAX]; double VRatioChroma[DC__NUM_DPP__MAX]; - long WritebackSourceWidth[DC__NUM_DPP__MAX]; + int WritebackSourceWidth[DC__NUM_DPP__MAX]; bool ModeIsSupported; bool ODMCombine4To1Supported; @@ -850,6 +850,58 @@ struct vba_vars_st { unsigned int MaxNumHDMIFRLOutputs; int AudioSampleRate[DC__NUM_DPP__MAX]; int AudioSampleLayout[DC__NUM_DPP__MAX]; + + int PercentMarginOverMinimumRequiredDCFCLK; + bool DynamicMetadataSupported[DC__VOLTAGE_STATES + 1][2]; + enum immediate_flip_requirement ImmediateFlipRequirement; + double DETBufferSizeYThisState[DC__NUM_DPP__MAX]; + double DETBufferSizeCThisState[DC__NUM_DPP__MAX]; + bool NoUrgentLatencyHiding[DC__NUM_DPP__MAX]; + bool NoUrgentLatencyHidingPre[DC__NUM_DPP__MAX]; + int swath_width_luma_ub_this_state[DC__NUM_DPP__MAX]; + int swath_width_chroma_ub_this_state[DC__NUM_DPP__MAX]; + double UrgLatency[DC__VOLTAGE_STATES + 1]; + double VActiveCursorBandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double VActivePixelBandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + bool NoTimeForPrefetch[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + bool NoTimeForDynamicMetadata[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double dpte_row_bandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double meta_row_bandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double DETBufferSizeYAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double DETBufferSizeCAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + int swath_width_luma_ub_all_states[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + int swath_width_chroma_ub_all_states[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + bool NotUrgentLatencyHiding[DC__VOLTAGE_STATES + 1][2]; + unsigned int SwathHeightYAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + unsigned int SwathHeightCAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + unsigned int SwathWidthYAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + unsigned int SwathWidthCAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double TotalDPTERowBandwidth[DC__VOLTAGE_STATES + 1][2]; + double TotalMetaRowBandwidth[DC__VOLTAGE_STATES + 1][2]; + double TotalVActiveCursorBandwidth[DC__VOLTAGE_STATES + 1][2]; + double TotalVActivePixelBandwidth[DC__VOLTAGE_STATES + 1][2]; + bool UseMinimumRequiredDCFCLK; + double WritebackDelayTime[DC__NUM_DPP__MAX]; + unsigned int DCCYIndependentBlock[DC__NUM_DPP__MAX]; + unsigned int DCCCIndependentBlock[DC__NUM_DPP__MAX]; + unsigned int dummyinteger15; + unsigned int dummyinteger16; + unsigned int dummyinteger17; + unsigned int dummyinteger18; + unsigned int dummyinteger19; + unsigned int dummyinteger20; + unsigned int dummyinteger21; + unsigned int dummyinteger22; + unsigned int dummyinteger23; + unsigned int dummyinteger24; + unsigned int dummyinteger25; + unsigned int dummyinteger26; + unsigned int dummyinteger27; + unsigned int dummyinteger28; + unsigned int dummyinteger29; + bool dummystring[DC__NUM_DPP__MAX]; + double BPP; + enum odm_combine_policy ODMCombinePolicy; }; bool CalculateMinAndMaxPrefetchMode( diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c b/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c index b953b02a1512..723af0b2dda0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c @@ -24,7 +24,7 @@ */ #include "dml_common_defs.h" -#include "../calcs/dcn_calc_math.h" +#include "dcn_calc_math.h" #include "dml_inline_defs.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h index eca140da13d8..ded71ea82413 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h @@ -27,7 +27,7 @@ #define __DML_INLINE_DEFS_H__ #include "dml_common_defs.h" -#include "../calcs/dcn_calc_math.h" +#include "dcn_calc_math.h" #include "dml_logger.h" static inline double dml_min(double a, double b) diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 16f6ef22367b..f285b76888fb 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -212,6 +212,7 @@ struct resource_pool { struct abm *abm; struct dmcu *dmcu; + struct dmub_psr *psr; const struct resource_funcs *funcs; const struct resource_caps *res_cap; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index 6198bccd6199..8b1f0ce6c2a7 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -76,6 +76,8 @@ void dp_enable_mst_on_sink(struct dc_link *link, bool enable); enum dp_panel_mode dp_get_panel_mode(struct dc_link *link); void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode); +bool dp_overwrite_extended_receiver_cap(struct dc_link *link); + void dp_set_fec_ready(struct dc_link *link, bool ready); void dp_set_fec_enable(struct dc_link *link, bool enable); bool dp_set_dsc_enable(struct pipe_ctx *pipe_ctx, bool enable); diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calc_math.h index 45a07eeffbb6..45a07eeffbb6 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calc_math.h diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 125e42dbd3c5..45ef390ae052 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -47,6 +47,26 @@ struct dpp_input_csc_matrix { uint16_t regval[12]; }; +static const struct dpp_input_csc_matrix dpp_input_csc_matrix[] = { + {COLOR_SPACE_SRGB, + {0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, + {COLOR_SPACE_SRGB_LIMITED, + {0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, + {COLOR_SPACE_YCBCR601, + {0x2cdd, 0x2000, 0, 0xe991, 0xe926, 0x2000, 0xf4fd, 0x10ef, + 0, 0x2000, 0x38b4, 0xe3a6} }, + {COLOR_SPACE_YCBCR601_LIMITED, + {0x3353, 0x2568, 0, 0xe400, 0xe5dc, 0x2568, 0xf367, 0x1108, + 0, 0x2568, 0x40de, 0xdd3a} }, + {COLOR_SPACE_YCBCR709, + {0x3265, 0x2000, 0, 0xe6ce, 0xf105, 0x2000, 0xfa01, 0xa7d, 0, + 0x2000, 0x3b61, 0xe24f} }, + + {COLOR_SPACE_YCBCR709_LIMITED, + {0x39a6, 0x2568, 0, 0xe0d6, 0xeedd, 0x2568, 0xf925, 0x9a8, 0, + 0x2568, 0x43ee, 0xdbb2} } +}; + struct dpp_grph_csc_adjustment { struct fixed31_32 temperature_matrix[CSC_TEMPERATURE_MATRIX_SIZE]; enum graphics_gamut_adjust_type gamut_adjust_type; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 686145933335..2cb8466e657b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -63,6 +63,26 @@ struct hubp { bool power_gated; }; +struct surface_flip_registers { + uint32_t DCSURF_SURFACE_CONTROL; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_C; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C; + uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS; + uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS; + bool tmz_surface; + bool immediate; + uint8_t vmid; + bool grph_stereo; +}; + struct hubp_funcs { void (*hubp_setup)( struct hubp *hubp, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 2d3efd71fa51..e5e7d94026fc 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -208,7 +208,8 @@ struct timing_generator_funcs { bool enable, const struct dc_crtc_timing *timing); void (*set_drr)(struct timing_generator *tg, const struct drr_params *params); void (*set_static_screen_control)(struct timing_generator *tg, - uint32_t value); + uint32_t event_triggers, + uint32_t num_frames); void (*set_test_pattern)( struct timing_generator *tg, enum controller_dp_test_pattern test_pattern, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index df3204645c6b..209118f9f193 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -42,7 +42,7 @@ struct dc_state; struct dc_stream_status; struct dc_writeback_info; struct dchub_init_data; -struct dc_static_screen_events; +struct dc_static_screen_params; struct resource_pool; struct dc_phy_addr_space_config; struct dc_virtual_addr_space_config; @@ -102,7 +102,7 @@ struct hw_sequencer_funcs { unsigned int vmid, unsigned int vmid_frame_number); void (*set_static_screen_control)(struct pipe_ctx **pipe_ctx, int num_pipes, - const struct dc_static_screen_events *events); + const struct dc_static_screen_params *events); /* Stream Related */ void (*enable_stream)(struct pipe_ctx *pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h index 8ba06f015975..ecf566378ccd 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h @@ -51,7 +51,7 @@ struct dc_state; struct dc_stream_status; struct dc_writeback_info; struct dchub_init_data; -struct dc_static_screen_events; +struct dc_static_screen_params; struct resource_pool; struct resource_context; struct stream_resource; diff --git a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h index 47e307388581..2470405e996b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h +++ b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h @@ -458,7 +458,14 @@ uint32_t generic_reg_get8(const struct dc_context *ctx, uint32_t addr, #define IX_REG_READ(index_reg_name, data_reg_name, index) \ generic_read_indirect_reg(CTX, REG(index_reg_name), REG(data_reg_name), IND_REG(index)) +#define IX_REG_GET_N(index_reg_name, data_reg_name, index, n, ...) \ + generic_indirect_reg_get(CTX, REG(index_reg_name), REG(data_reg_name), \ + IND_REG(index), \ + n, __VA_ARGS__) +#define IX_REG_GET(index_reg_name, data_reg_name, index, field, val) \ + IX_REG_GET_N(index_reg_name, data_reg_name, index, 1, \ + FN(data_reg_name, field), val) #define IX_REG_UPDATE_N(index_reg_name, data_reg_name, index, n, ...) \ generic_indirect_reg_update_ex(CTX, \ @@ -479,6 +486,12 @@ uint32_t generic_read_indirect_reg(const struct dc_context *ctx, uint32_t addr_index, uint32_t addr_data, uint32_t index); +uint32_t generic_indirect_reg_get(const struct dc_context *ctx, + uint32_t addr_index, uint32_t addr_data, + uint32_t index, int n, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + ...); + uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx, uint32_t addr_index, uint32_t addr_data, uint32_t index, uint32_t reg_val, int n, diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index b10728f33f62..cd9532b4f14d 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -36,6 +36,7 @@ #define DMUB_RB_SIZE (DMUB_RB_CMD_SIZE * DMUB_RB_MAX_ENTRY) #define REG_SET_MASK 0xFFFF + /* * Command IDs should be treated as stable ABI. * Do not reuse or modify IDs. @@ -47,6 +48,7 @@ enum dmub_cmd_type { DMUB_CMD__REG_SEQ_FIELD_UPDATE_SEQ = 2, DMUB_CMD__REG_SEQ_BURST_WRITE = 3, DMUB_CMD__REG_REG_WAIT = 4, + DMUB_CMD__PLAT_54186_WA = 5, DMUB_CMD__PSR = 64, DMUB_CMD__VBIOS = 128, }; @@ -145,6 +147,32 @@ struct dmub_rb_cmd_reg_wait { struct dmub_cmd_reg_wait_data reg_wait; }; +#ifndef PHYSICAL_ADDRESS_LOC +#define PHYSICAL_ADDRESS_LOC union large_integer +#endif + +struct dmub_cmd_PLAT_54186_wa { + uint32_t DCSURF_SURFACE_CONTROL; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C; + struct { + uint8_t hubp_inst : 4; + uint8_t tmz_surface : 1; + uint8_t immediate :1; + uint8_t vmid : 4; + uint8_t grph_stereo : 1; + uint32_t reserved : 21; + } flip_params; + uint32_t reserved[9]; +}; + +struct dmub_rb_cmd_PLAT_54186_wa { + struct dmub_cmd_header header; + struct dmub_cmd_PLAT_54186_wa flip; +}; + struct dmub_cmd_digx_encoder_control_data { union dig_encoder_control_parameters_v1_5 dig; }; @@ -187,9 +215,28 @@ struct dmub_rb_cmd_dpphy_init { }; struct dmub_cmd_psr_copy_settings_data { - uint32_t reg1; - uint32_t reg2; - uint32_t reg3; + uint16_t psr_level; + uint8_t hubp_inst; + uint8_t dpp_inst; + uint8_t mpcc_inst; + uint8_t opp_inst; + uint8_t otg_inst; + uint8_t digfe_inst; + uint8_t digbe_inst; + uint8_t dpphy_inst; + uint8_t aux_inst; + uint8_t hyst_frames; + uint8_t hyst_lines; + uint8_t phy_num; + uint8_t phy_type; + uint8_t aux_repeat; + uint8_t smu_optimizations_en; + uint8_t skip_wait_for_pll_lock; + uint8_t frame_delay; + uint8_t smu_phy_id; + uint8_t num_of_controllers; + uint8_t link_rate; + uint8_t frame_cap_ind; }; struct dmub_rb_cmd_psr_copy_settings { @@ -206,31 +253,17 @@ struct dmub_rb_cmd_psr_set_level { struct dmub_cmd_psr_set_level_data psr_set_level_data; }; -struct dmub_rb_cmd_psr_disable { - struct dmub_cmd_header header; -}; - struct dmub_rb_cmd_psr_enable { struct dmub_cmd_header header; }; -struct dmub_cmd_psr_notify_vblank_data { - uint32_t vblank_int; // Which vblank interrupt was triggered -}; - -struct dmub_rb_cmd_notify_vblank { - struct dmub_cmd_header header; - struct dmub_cmd_psr_notify_vblank_data psr_notify_vblank_data; -}; - -struct dmub_cmd_psr_notify_static_state_data { - uint32_t ss_int; // Which static screen interrupt was triggered - uint32_t ss_enter; // Enter (1) or exit (0) static screen +struct dmub_cmd_psr_setup_data { + enum psr_version version; // PSR version 1 or 2 }; -struct dmub_rb_cmd_psr_notify_static_state { +struct dmub_rb_cmd_psr_setup { struct dmub_cmd_header header; - struct dmub_cmd_psr_notify_static_state_data psr_notify_static_state_data; + struct dmub_cmd_psr_setup_data psr_setup_data; }; union dmub_rb_cmd { @@ -245,9 +278,10 @@ union dmub_rb_cmd { struct dmub_rb_cmd_dpphy_init dpphy_init; struct dmub_rb_cmd_dig1_transmitter_control dig1_transmitter_control; struct dmub_rb_cmd_psr_enable psr_enable; - struct dmub_rb_cmd_psr_disable psr_disable; struct dmub_rb_cmd_psr_copy_settings psr_copy_settings; struct dmub_rb_cmd_psr_set_level psr_set_level; + struct dmub_rb_cmd_PLAT_54186_wa PLAT_54186_wa; + struct dmub_rb_cmd_psr_setup psr_setup; }; #pragma pack(pop) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h index 14f13e8a6f3b..7b69eb37f762 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h @@ -32,10 +32,17 @@ */ enum dmub_cmd_psr_type { - DMUB_CMD__PSR_ENABLE = 0, - DMUB_CMD__PSR_DISABLE = 1, - DMUB_CMD__PSR_COPY_SETTINGS = 2, - DMUB_CMD__PSR_SET_LEVEL = 3, + DMUB_CMD__PSR_SETUP = 0, + DMUB_CMD__PSR_COPY_SETTINGS = 1, + DMUB_CMD__PSR_ENABLE = 2, + DMUB_CMD__PSR_DISABLE = 3, + DMUB_CMD__PSR_SET_LEVEL = 4, +}; + +enum psr_version { + PSR_VERSION_1 = 0x10, // PSR Version 1 + PSR_VERSION_2 = 0x20, // PSR Version 2, includes selective update + PSR_VERSION_2_Y_COORD = 0x21, // PSR Version 2, includes Y-coordinate support for SU }; #endif /* _DMUB_CMD_DAL_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h index ac22744eaa94..df875fdd2ab0 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h @@ -73,12 +73,17 @@ static inline bool dmub_rb_full(struct dmub_rb *rb) static inline bool dmub_rb_push_front(struct dmub_rb *rb, const struct dmub_cmd_header *cmd) { - uint8_t *wt_ptr = (uint8_t *)(rb->base_address) + rb->wrpt; + uint64_t volatile *dst = (uint64_t volatile *)(rb->base_address) + rb->wrpt / sizeof(uint64_t); + const uint64_t *src = (const uint64_t *)cmd; + int i; if (dmub_rb_full(rb)) return false; - dmub_memcpy(wt_ptr, cmd, DMUB_RB_CMD_SIZE); + // copying data + for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) + *dst++ = *src++; + rb->wrpt += DMUB_RB_CMD_SIZE; if (rb->wrpt >= rb->capacity) @@ -113,6 +118,26 @@ static inline bool dmub_rb_pop_front(struct dmub_rb *rb) return true; } +static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) +{ + uint32_t rptr = rb->rptr; + uint32_t wptr = rb->wrpt; + + while (rptr != wptr) { + uint64_t volatile *data = (uint64_t volatile *)rb->base_address + rptr / sizeof(uint64_t); + //uint64_t volatile *p = (uint64_t volatile *)data; + uint64_t temp; + int i; + + for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) + temp = *data++; + + rptr += DMUB_RB_CMD_SIZE; + if (rptr >= rb->capacity) + rptr %= rb->capacity; + } +} + static inline void dmub_rb_init(struct dmub_rb *rb, struct dmub_rb_init_params *init_params) { diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h index f34a50dd36ea..8e23a7017588 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h @@ -445,25 +445,6 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub); /** - * dmub_srv_cmd_submit() - submits a command to the DMUB immediately - * @dmub: the dmub service - * @cmd: the command to submit - * @timeout_us: the maximum number of microseconds to wait - * - * Submits a command to the DMUB with an optional timeout. - * If timeout_us is given then the service will attempt to - * resubmit for the given number of microseconds. - * - * Return: - * DMUB_STATUS_OK - success - * DMUB_STATUS_TIMEOUT - wait for submit timed out - * DMUB_STATUS_INVALID - unspecified error - */ -enum dmub_status dmub_srv_cmd_submit(struct dmub_srv *dmub, - const struct dmub_cmd_header *cmd, - uint32_t timeout_us); - -/** * dmub_srv_wait_for_auto_load() - Waits for firmware auto load to complete * @dmub: the dmub service * @timeout_us: the maximum number of microseconds to wait diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c index f45e14ada685..cd51c6138894 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c @@ -66,10 +66,12 @@ void dmub_dcn20_reset(struct dmub_srv *dmub) { REG_UPDATE(DMCUB_CNTL, DMCUB_SOFT_RESET, 1); REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); + REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); } void dmub_dcn20_reset_release(struct dmub_srv *dmub) { + REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 0); REG_WRITE(DMCUB_SCRATCH15, dmub->psp_version & 0x001100FF); REG_UPDATE_2(DMCUB_CNTL, DMCUB_ENABLE, 1, DMCUB_TRACEPORT_EN, 1); REG_UPDATE(DMCUB_CNTL, DMCUB_SOFT_RESET, 0); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h index 68af9b190288..53bfd4da69ad 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h @@ -91,7 +91,8 @@ struct dmub_srv; DMUB_SR(DMCUB_SCRATCH13) \ DMUB_SR(DMCUB_SCRATCH14) \ DMUB_SR(DMCUB_SCRATCH15) \ - DMUB_SR(CC_DC_PIPE_DIS) + DMUB_SR(CC_DC_PIPE_DIS) \ + DMUB_SR(MMHUBBUB_SOFT_RESET) #define DMUB_COMMON_FIELDS() \ DMUB_SF(DMCUB_CNTL, DMCUB_ENABLE) \ @@ -119,7 +120,8 @@ struct dmub_srv; DMUB_SF(DMCUB_REGION3_CW7_TOP_ADDRESS, DMCUB_REGION3_CW7_ENABLE) \ DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_TOP_ADDRESS) \ DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_ENABLE) \ - DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) + DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \ + DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) struct dmub_srv_common_reg_offset { #define DMUB_SR(reg) uint32_t reg; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 9a959f871f11..dee676335d73 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -67,6 +67,26 @@ static inline uint32_t dmub_align(uint32_t val, uint32_t factor) return (val + factor - 1) / factor * factor; } +static void dmub_flush_buffer_mem(const struct dmub_fb *fb) +{ + const uint8_t *base = (const uint8_t *)fb->cpu_addr; + uint8_t buf[64]; + uint32_t pos, end; + + /** + * Read 64-byte chunks since we don't want to store a + * large temporary buffer for this purpose. + */ + end = fb->size / sizeof(buf) * sizeof(buf); + + for (pos = 0; pos < end; pos += sizeof(buf)) + dmub_memcpy(buf, base + pos, sizeof(buf)); + + /* Read anything leftover into the buffer. */ + if (end < fb->size) + dmub_memcpy(buf, base + pos, fb->size - end); +} + static const struct dmub_fw_meta_info * dmub_get_fw_meta_info(const uint8_t *fw_bss_data, uint32_t fw_bss_data_size) { @@ -329,6 +349,13 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, cw1.region.base = DMUB_CW1_BASE; cw1.region.top = cw1.region.base + stack_fb->size - 1; + /** + * Read back all the instruction memory so we don't hang the + * DMCUB when backdoor loading if the write from x86 hasn't been + * flushed yet. This only occurs in backdoor loading. + */ + dmub_flush_buffer_mem(inst_fb); + if (params->load_inst_const && dmub->hw_funcs.backdoor_load) dmub->hw_funcs.backdoor_load(dmub, &cw0, &cw1); } @@ -405,33 +432,17 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) if (!dmub->hw_init) return DMUB_STATUS_INVALID; + /** + * Read back all the queued commands to ensure that they've + * been flushed to framebuffer memory. Otherwise DMCUB might + * read back stale, fully invalid or partially invalid data. + */ + dmub_rb_flush_pending(&dmub->inbox1_rb); + dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt); return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_cmd_submit(struct dmub_srv *dmub, - const struct dmub_cmd_header *cmd, - uint32_t timeout_us) -{ - uint32_t i = 0; - - if (!dmub->hw_init) - return DMUB_STATUS_INVALID; - - for (i = 0; i <= timeout_us; ++i) { - dmub->inbox1_rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); - if (dmub_rb_push_front(&dmub->inbox1_rb, cmd)) { - dmub->hw_funcs.set_inbox1_wptr(dmub, - dmub->inbox1_rb.wrpt); - return DMUB_STATUS_OK; - } - - udelay(1); - } - - return DMUB_STATUS_TIMEOUT; -} - enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub, uint32_t timeout_us) { diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 11d7daf6f076..a2903985b9e8 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -134,8 +134,13 @@ #define PICASSO_A0 0x41 /* DCN1_01 */ #define RAVEN2_A0 0x81 +#define RAVEN2_15D8_REV_94 0x94 +#define RAVEN2_15D8_REV_95 0x95 #define RAVEN2_15D8_REV_E3 0xE3 #define RAVEN2_15D8_REV_E4 0xE4 +#define RAVEN2_15D8_REV_E9 0xE9 +#define RAVEN2_15D8_REV_EA 0xEA +#define RAVEN2_15D8_REV_EB 0xEB #define RAVEN1_F0 0xF0 #define RAVEN_UNKNOWN 0xFF #ifndef ASICREV_IS_RAVEN @@ -149,6 +154,11 @@ #define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN)) #define ASICREV_IS_DALI(eChipRev) ((eChipRev == RAVEN2_15D8_REV_E3) \ || (eChipRev == RAVEN2_15D8_REV_E4)) +#define ASICREV_IS_POLLOCK(eChipRev) (eChipRev == RAVEN2_15D8_REV_94 \ + || eChipRev == RAVEN2_15D8_REV_95 \ + || eChipRev == RAVEN2_15D8_REV_E9 \ + || eChipRev == RAVEN2_15D8_REV_EA \ + || eChipRev == RAVEN2_15D8_REV_EB) #define FAMILY_RV 142 /* DCN 1*/ diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index fa57885503d4..6e5ecefe7d9d 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -816,6 +816,8 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, in_out_vrr->btr.inserted_duration_in_us = 0; in_out_vrr->btr.frames_to_insert = 0; in_out_vrr->btr.frame_counter = 0; + in_out_vrr->fixed.fixed_active = false; + in_out_vrr->fixed.target_refresh_in_uhz = 0; in_out_vrr->btr.mid_point_in_us = (in_out_vrr->min_duration_in_us + @@ -832,6 +834,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, in_out_vrr->adjust.v_total_max = stream->timing.v_total; } else if (in_out_vrr->state == VRR_STATE_ACTIVE_VARIABLE && refresh_range >= MIN_REFRESH_RANGE_IN_US) { + in_out_vrr->adjust.v_total_min = calc_v_total_from_refresh(stream, in_out_vrr->max_refresh_in_uhz); diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 4e2f615c3566..e75a4bb94488 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -662,7 +662,11 @@ bool dmcu_load_iram(struct dmcu *dmcu, memset(&ram_table, 0, sizeof(ram_table)); - if (dmcu->dmcu_version.abm_version == 0x23) { + if (dmcu->dmcu_version.abm_version == 0x24) { + fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params); + result = dmcu->funcs->load_iram( + dmcu, 0, (char *)(&ram_table), IRAM_RESERVE_AREA_START_V2_2); + } else if (dmcu->dmcu_version.abm_version == 0x23) { fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params); result = dmcu->funcs->load_iram( @@ -687,3 +691,4 @@ bool dmcu_load_iram(struct dmcu *dmcu, return result; } + diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h index cff8f91555d3..e9b2bd84cfed 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h @@ -8134,6 +8134,10 @@ #define mmMPC_OUT5_CSC_C33_C34_B 0x1604 #define mmMPC_OUT5_CSC_C33_C34_B_BASE_IDX 2 +#define mmMPC_OCSC_TEST_DEBUG_INDEX 0x163b +#define mmMPC_OCSC_TEST_DEBUG_INDEX_BASE_IDX 2 +#define mmMPC_OCSC_TEST_DEBUG_DATA_BASE_IDX 2 +#define mmMPC_OCSC_TEST_DEBUG_DATA 0x163c // addressBlock: dce_dc_mpc_mpc_dcperfmon_dc_perfmon_dispdec // base address: 0x5964 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h index 10c83fecd147..dc8ce7aaa0cf 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h @@ -28263,7 +28263,14 @@ #define MPC_OUT5_CSC_C33_C34_B__MPC_OCSC_C34_B__SHIFT 0x10 #define MPC_OUT5_CSC_C33_C34_B__MPC_OCSC_C33_B_MASK 0x0000FFFFL #define MPC_OUT5_CSC_C33_C34_B__MPC_OCSC_C34_B_MASK 0xFFFF0000L - +//MPC_OCSC_TEST_DEBUG_INDEX +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX__SHIFT 0x0 +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN__SHIFT 0x8 +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX_MASK 0x000000FFL +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN_MASK 0x00000100L +//MPC_OCSC_TEST_DEBUG_DATA +#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA__SHIFT 0x0 +#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL // addressBlock: dce_dc_mpc_mpc_dcperfmon_dc_perfmon_dispdec //DC_PERFMON17_PERFCOUNTER_CNTL diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h index eddf83ec1c39..7cd0ee61c030 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h @@ -7103,7 +7103,10 @@ #define mmMPC_OUT3_CSC_C31_C32_B_BASE_IDX 2 #define mmMPC_OUT3_CSC_C33_C34_B 0x15ea #define mmMPC_OUT3_CSC_C33_C34_B_BASE_IDX 2 - +#define mmMPC_OCSC_TEST_DEBUG_INDEX 0x163b +#define mmMPC_OCSC_TEST_DEBUG_INDEX_BASE_IDX 2 +#define mmMPC_OCSC_TEST_DEBUG_DATA_BASE_IDX 2 +#define mmMPC_OCSC_TEST_DEBUG_DATA 0x163c // addressBlock: dce_dc_mpc_mpc_dcperfmon_dc_perfmon_dispdec // base address: 0x5964 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h index faa0e76e32b4..2f780aefc722 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h @@ -56634,5 +56634,13 @@ #define AZF0INPUTENDPOINT7_AZALIA_F0_CODEC_INPUT_PIN_CONTROL_INFOFRAME__INFOFRAME_BYTE_5_MASK 0x00FF0000L #define AZF0INPUTENDPOINT7_AZALIA_F0_CODEC_INPUT_PIN_CONTROL_INFOFRAME__INFOFRAME_VALID_MASK 0x80000000L +//MPC_OCSC_TEST_DEBUG_INDEX +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX__SHIFT 0x0 +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN__SHIFT 0x8 +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX_MASK 0x000000FFL +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN_MASK 0x00000100L +//MPC_OCSC_TEST_DEBUG_DATA +#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA__SHIFT 0x0 +#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h index f301e5fe2109..87c84691b5be 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h @@ -27,6 +27,9 @@ #define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc #define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0 +#define mmDF_CS_UMC_AON0_DfGlobalCtrl 0x00fe +#define mmDF_CS_UMC_AON0_DfGlobalCtrl_BASE_IDX 0 + #define mmDF_CS_UMC_AON0_DramBaseAddress0 0x0044 #define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX 0 diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h index 06fac509e987..65e9f756e86e 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h @@ -33,6 +33,14 @@ #define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0 #define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL +/* DF_CS_UMC_AON0_DfGlobalCtrl */ +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl64K__SHIFT 0x14 +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl2M__SHIFT 0x15 +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl1G__SHIFT 0x16 +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl64K_MASK 0x00100000L +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl2M_MASK 0x00200000L +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl1G_MASK 0x00400000L + /* DF_CS_AON0_DramBaseAddress0 */ #define DF_CS_UMC_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0 #define DF_CS_UMC_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h index d4c613a85352..c9e3f6d849a8 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h @@ -8739,10 +8739,16 @@ #define TCP_ADDR_CONFIG__NUM_BANKS__SHIFT 0x4 #define TCP_ADDR_CONFIG__COLHI_WIDTH__SHIFT 0x6 #define TCP_ADDR_CONFIG__RB_SPLIT_COLHI__SHIFT 0x9 +#define TCP_ADDR_CONFIG__ENABLE64KHASH__SHIFT 0xb +#define TCP_ADDR_CONFIG__ENABLE2MHASH__SHIFT 0xc +#define TCP_ADDR_CONFIG__ENABLE1GHASH__SHIFT 0xd #define TCP_ADDR_CONFIG__NUM_TCC_BANKS_MASK 0x0000000FL #define TCP_ADDR_CONFIG__NUM_BANKS_MASK 0x00000030L #define TCP_ADDR_CONFIG__COLHI_WIDTH_MASK 0x000001C0L #define TCP_ADDR_CONFIG__RB_SPLIT_COLHI_MASK 0x00000200L +#define TCP_ADDR_CONFIG__ENABLE64KHASH_MASK 0x00000800L +#define TCP_ADDR_CONFIG__ENABLE2MHASH_MASK 0x00001000L +#define TCP_ADDR_CONFIG__ENABLE1GHASH_MASK 0x00002000L //TCP_CREDIT #define TCP_CREDIT__LFIFO_CREDIT__SHIFT 0x0 #define TCP_CREDIT__REQ_FIFO_CREDIT__SHIFT 0x10 diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h index 043aa695d63f..0d6b594be775 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h @@ -27,5 +27,7 @@ #define mmUMCCH0_0_EccErrCnt_BASE_IDX 0 #define mmMCA_UMC_UMC0_MCUMC_STATUST0 0x03c2 #define mmMCA_UMC_UMC0_MCUMC_STATUST0_BASE_IDX 0 +#define mmMCA_UMC_UMC0_MCUMC_ADDRT0 0x03c4 +#define mmMCA_UMC_UMC0_MCUMC_ADDRT0_BASE_IDX 0 #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h index 03be415e9555..ce005c674a18 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h @@ -27,5 +27,7 @@ #define mmUMCCH0_0_EccErrCnt_ARCT_BASE_IDX 1 #define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT 0x03c2 #define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT_BASE_IDX 1 +#define mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT 0x03c4 +#define mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT_BASE_IDX 1 #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h new file mode 100644 index 000000000000..a5a8c993ec3a --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _umc_6_1_2_SH_MASK_HEADER +#define _umc_6_1_2_SH_MASK_HEADER + +//UMCCH0_0_EccErrCntSel_ARCT +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntCsSel__SHIFT 0x0 +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrInt__SHIFT 0xc +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntEn__SHIFT 0xf +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntCsSel_MASK 0x0000000FL +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrInt_MASK 0x00003000L +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntEn_MASK 0x00008000L +//UMCCH0_0_EccErrCnt_ARCT +#define UMCCH0_0_EccErrCnt_ARCT__EccErrCnt__SHIFT 0x0 +#define UMCCH0_0_EccErrCnt_ARCT__EccErrCnt_MASK 0x0000FFFFL +//MCA_UMC_UMC0_MCUMC_STATUST0_ARCT +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCode__SHIFT 0x0 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCodeExt__SHIFT 0x10 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV0__SHIFT 0x16 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreId__SHIFT 0x20 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV1__SHIFT 0x26 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Scrub__SHIFT 0x28 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV2__SHIFT 0x29 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Poison__SHIFT 0x2b +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Deferred__SHIFT 0x2c +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UECC__SHIFT 0x2d +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__CECC__SHIFT 0x2e +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV3__SHIFT 0x2f +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Transparent__SHIFT 0x34 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__SyndV__SHIFT 0x35 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV4__SHIFT 0x36 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__TCC__SHIFT 0x37 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreIdVal__SHIFT 0x38 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__PCC__SHIFT 0x39 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__AddrV__SHIFT 0x3a +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__MiscV__SHIFT 0x3b +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__En__SHIFT 0x3c +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UC__SHIFT 0x3d +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Overflow__SHIFT 0x3e +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Val__SHIFT 0x3f +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCode_MASK 0x000000000000FFFFL +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCodeExt_MASK 0x00000000003F0000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV0_MASK 0x00000000FFC00000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreId_MASK 0x0000003F00000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV1_MASK 0x000000C000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Scrub_MASK 0x0000010000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV2_MASK 0x0000060000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Poison_MASK 0x0000080000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Deferred_MASK 0x0000100000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UECC_MASK 0x0000200000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__CECC_MASK 0x0000400000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV3_MASK 0x000F800000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Transparent_MASK 0x0010000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__SyndV_MASK 0x0020000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV4_MASK 0x0040000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__TCC_MASK 0x0080000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreIdVal_MASK 0x0100000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__PCC_MASK 0x0200000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__AddrV_MASK 0x0400000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__MiscV_MASK 0x0800000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__En_MASK 0x1000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UC_MASK 0x2000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Overflow_MASK 0x4000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Val_MASK 0x8000000000000000L +//MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__ErrorAddr__SHIFT 0x0 +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__LSB__SHIFT 0x38 +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__Reserved__SHIFT 0x3e +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__ErrorAddr_MASK 0x00FFFFFFFFFFFFFFL +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__LSB_MASK 0x3F00000000000000L +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__Reserved_MASK 0xC000000000000000L + +#endif diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 2cd217e60125..a607b1034962 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -256,6 +256,10 @@ struct kfd2kgd_calls { uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); + int (*hiq_mqd_load)(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off); + int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd, uint32_t __user *wptr, struct mm_struct *mm); @@ -307,8 +311,6 @@ struct kfd2kgd_calls { void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base); - int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid); - int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid); uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd); uint64_t (*get_hive_id)(struct kgd_dev *kgd); diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 1c15c6fbe3b9..14ba6aa876e2 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -1318,6 +1318,7 @@ static int arcturus_get_power_limit(struct smu_context *smu, static int arcturus_get_power_profile_mode(struct smu_context *smu, char *buf) { + struct amdgpu_device *adev = smu->adev; DpmActivityMonitorCoeffInt_t activity_monitor; static const char *profile_name[] = { "BOOTUP_DEFAULT", @@ -1351,7 +1352,7 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, if (result) return result; - if (smu_version >= 0x360d00) + if (smu_version >= 0x360d00 && !amdgpu_sriov_vf(adev)) size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n", title[0], title[1], title[2], title[3], title[4], title[5], title[6], title[7], title[8], title[9], title[10]); @@ -1368,7 +1369,7 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, if (workload_type < 0) continue; - if (smu_version >= 0x360d00) { + if (smu_version >= 0x360d00 && !amdgpu_sriov_vf(adev)) { result = smu_update_table(smu, SMU_TABLE_ACTIVITY_MONITOR_COEFF, workload_type, @@ -1383,7 +1384,7 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, size += sprintf(buf + size, "%2d %14s%s\n", i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); - if (smu_version >= 0x360d00) { + if (smu_version >= 0x360d00 && !amdgpu_sriov_vf(adev)) { size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 0, diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h index c27c82851468..2f85a34c0591 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h @@ -27,7 +27,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define SMU12_DRIVER_IF_VERSION 10 +#define SMU12_DRIVER_IF_VERSION 11 typedef struct { int32_t value; @@ -192,6 +192,11 @@ typedef struct { uint16_t SocTemperature; //[centi-Celsius] uint16_t ThrottlerStatus; uint16_t spare; + + uint16_t StapmOriginalLimit; //[mW] + uint16_t StapmCurrentLimit; //[mW] + uint16_t ApuPower; //[mW] + uint16_t dGpuPower; //[mW] } SmuMetrics_t; diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index e804f9854027..02f8c9cb89d9 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1154,11 +1154,12 @@ static int smu_v11_0_set_thermal_range(struct smu_context *smu, int low = SMU_THERMAL_MINIMUM_ALERT_TEMP; int high = SMU_THERMAL_MAXIMUM_ALERT_TEMP; uint32_t val; + struct smu_table_context *table_context = &smu->smu_table; + struct smu_11_0_powerplay_table *powerplay_table = table_context->power_play_table; low = max(SMU_THERMAL_MINIMUM_ALERT_TEMP, range.min / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES); - high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, - range.max / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES); + high = min((uint16_t)SMU_THERMAL_MAXIMUM_ALERT_TEMP, powerplay_table->software_shutdown_temp); if (low > high) return -EINVAL; diff --git a/drivers/gpu/drm/arm/malidp_mw.c b/drivers/gpu/drm/arm/malidp_mw.c index 875a3a9eabfa..7d0e7b031e44 100644 --- a/drivers/gpu/drm/arm/malidp_mw.c +++ b/drivers/gpu/drm/arm/malidp_mw.c @@ -56,7 +56,7 @@ malidp_mw_connector_mode_valid(struct drm_connector *connector, return MODE_OK; } -const struct drm_connector_helper_funcs malidp_mw_connector_helper_funcs = { +static const struct drm_connector_helper_funcs malidp_mw_connector_helper_funcs = { .get_modes = malidp_mw_connector_get_modes, .mode_valid = malidp_mw_connector_mode_valid, }; diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 895b73f23079..6d4a29e99ae2 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -115,6 +115,33 @@ drm_client_find_modeset(struct drm_client_dev *client, struct drm_crtc *crtc) } static struct drm_display_mode * +drm_connector_get_tiled_mode(struct drm_connector *connector) +{ + struct drm_display_mode *mode; + + list_for_each_entry(mode, &connector->modes, head) { + if (mode->hdisplay == connector->tile_h_size && + mode->vdisplay == connector->tile_v_size) + return mode; + } + return NULL; +} + +static struct drm_display_mode * +drm_connector_fallback_non_tiled_mode(struct drm_connector *connector) +{ + struct drm_display_mode *mode; + + list_for_each_entry(mode, &connector->modes, head) { + if (mode->hdisplay == connector->tile_h_size && + mode->vdisplay == connector->tile_v_size) + continue; + return mode; + } + return NULL; +} + +static struct drm_display_mode * drm_connector_has_preferred_mode(struct drm_connector *connector, int width, int height) { struct drm_display_mode *mode; @@ -348,8 +375,15 @@ static bool drm_client_target_preferred(struct drm_connector **connectors, struct drm_connector *connector; u64 conn_configured = 0; int tile_pass = 0; + int num_tiled_conns = 0; int i; + for (i = 0; i < connector_count; i++) { + if (connectors[i]->has_tile && + connectors[i]->status == connector_status_connected) + num_tiled_conns++; + } + retry: for (i = 0; i < connector_count; i++) { connector = connectors[i]; @@ -399,6 +433,28 @@ retry: list_for_each_entry(modes[i], &connector->modes, head) break; } + /* + * In case of tiled mode if all tiles not present fallback to + * first available non tiled mode. + * After all tiles are present, try to find the tiled mode + * for all and if tiled mode not present due to fbcon size + * limitations, use first non tiled mode only for + * tile 0,0 and set to no mode for all other tiles. + */ + if (connector->has_tile) { + if (num_tiled_conns < + connector->num_h_tile * connector->num_v_tile || + (connector->tile_h_loc == 0 && + connector->tile_v_loc == 0 && + !drm_connector_get_tiled_mode(connector))) { + DRM_DEBUG_KMS("Falling back to non tiled mode on Connector %d\n", + connector->base.id); + modes[i] = drm_connector_fallback_non_tiled_mode(connector); + } else { + modes[i] = drm_connector_get_tiled_mode(connector); + } + } + DRM_DEBUG_KMS("found mode %s\n", modes[i] ? modes[i]->name : "none"); conn_configured |= BIT_ULL(i); @@ -515,6 +571,7 @@ static bool drm_client_firmware_config(struct drm_client_dev *client, bool fallback = true, ret = true; int num_connectors_enabled = 0; int num_connectors_detected = 0; + int num_tiled_conns = 0; struct drm_modeset_acquire_ctx ctx; if (!drm_drv_uses_atomic_modeset(dev)) @@ -532,6 +589,11 @@ static bool drm_client_firmware_config(struct drm_client_dev *client, memcpy(save_enabled, enabled, count); mask = GENMASK(count - 1, 0); conn_configured = 0; + for (i = 0; i < count; i++) { + if (connectors[i]->has_tile && + connectors[i]->status == connector_status_connected) + num_tiled_conns++; + } retry: conn_seq = conn_configured; for (i = 0; i < count; i++) { @@ -631,6 +693,16 @@ retry: connector->name); modes[i] = &connector->state->crtc->mode; } + /* + * In case of tiled modes, if all tiles are not present + * then fallback to a non tiled mode. + */ + if (connector->has_tile && + num_tiled_conns < connector->num_h_tile * connector->num_v_tile) { + DRM_DEBUG_KMS("Falling back to non tiled mode on Connector %d\n", + connector->base.id); + modes[i] = drm_connector_fallback_non_tiled_mode(connector); + } crtcs[i] = new_crtc; DRM_DEBUG_KMS("connector %s on [CRTC:%d:%s]: %dx%d%s\n", diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 5d3c1d379277..38bf111e5f9b 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -398,7 +398,7 @@ drm_dp_encode_sideband_req(const struct drm_dp_sideband_msg_req_body *req, memcpy(&buf[idx], req->u.i2c_read.transactions[i].bytes, req->u.i2c_read.transactions[i].num_bytes); idx += req->u.i2c_read.transactions[i].num_bytes; - buf[idx] = (req->u.i2c_read.transactions[i].no_stop_bit & 0x1) << 5; + buf[idx] = (req->u.i2c_read.transactions[i].no_stop_bit & 0x1) << 4; buf[idx] |= (req->u.i2c_read.transactions[i].i2c_transaction_delay & 0xf); idx++; } @@ -1209,6 +1209,8 @@ static int drm_dp_mst_wait_tx_reply(struct drm_dp_mst_branch *mstb, txmsg->state == DRM_DP_SIDEBAND_TX_SENT) { mstb->tx_slots[txmsg->seqno] = NULL; } + mgr->is_waiting_for_dwn_reply = false; + } out: if (unlikely(ret == -EIO) && drm_debug_enabled(DRM_UT_DP)) { @@ -1218,6 +1220,7 @@ out: } mutex_unlock(&mgr->qlock); + drm_dp_mst_kick_tx(mgr); return ret; } @@ -2342,7 +2345,7 @@ drm_dp_mst_handle_conn_stat(struct drm_dp_mst_branch *mstb, { struct drm_dp_mst_topology_mgr *mgr = mstb->mgr; struct drm_dp_mst_port *port; - int old_ddps, ret; + int old_ddps, old_input, ret, i; u8 new_pdt; bool dowork = false, create_connector = false; @@ -2373,6 +2376,7 @@ drm_dp_mst_handle_conn_stat(struct drm_dp_mst_branch *mstb, } old_ddps = port->ddps; + old_input = port->input; port->input = conn_stat->input_port; port->mcs = conn_stat->message_capability_status; port->ldps = conn_stat->legacy_device_plug_status; @@ -2397,6 +2401,28 @@ drm_dp_mst_handle_conn_stat(struct drm_dp_mst_branch *mstb, dowork = false; } + if (!old_input && old_ddps != port->ddps && !port->ddps) { + for (i = 0; i < mgr->max_payloads; i++) { + struct drm_dp_vcpi *vcpi = mgr->proposed_vcpis[i]; + struct drm_dp_mst_port *port_validated; + + if (!vcpi) + continue; + + port_validated = + container_of(vcpi, struct drm_dp_mst_port, vcpi); + port_validated = + drm_dp_mst_topology_get_port_validated(mgr, port_validated); + if (!port_validated) { + mutex_lock(&mgr->payload_lock); + vcpi->num_slots = 0; + mutex_unlock(&mgr->payload_lock); + } else { + drm_dp_mst_topology_put_port(port_validated); + } + } + } + if (port->connector) drm_modeset_unlock(&mgr->base.lock); else if (create_connector) @@ -2759,9 +2785,11 @@ static void process_single_down_tx_qlock(struct drm_dp_mst_topology_mgr *mgr) ret = process_single_tx_qlock(mgr, txmsg, false); if (ret == 1) { /* txmsg is sent it should be in the slots now */ + mgr->is_waiting_for_dwn_reply = true; list_del(&txmsg->next); } else if (ret) { DRM_DEBUG_KMS("failed to send msg in q %d\n", ret); + mgr->is_waiting_for_dwn_reply = false; list_del(&txmsg->next); if (txmsg->seqno != -1) txmsg->dst->tx_slots[txmsg->seqno] = NULL; @@ -2801,7 +2829,8 @@ static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr, drm_dp_mst_dump_sideband_msg_tx(&p, txmsg); } - if (list_is_singular(&mgr->tx_msg_downq)) + if (list_is_singular(&mgr->tx_msg_downq) && + !mgr->is_waiting_for_dwn_reply) process_single_down_tx_qlock(mgr); mutex_unlock(&mgr->qlock); } @@ -3756,6 +3785,7 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr) mutex_lock(&mgr->qlock); txmsg->state = DRM_DP_SIDEBAND_TX_RX; mstb->tx_slots[slot] = NULL; + mgr->is_waiting_for_dwn_reply = false; mutex_unlock(&mgr->qlock); wake_up_all(&mgr->tx_waitq); @@ -3765,6 +3795,9 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr) no_msg: drm_dp_mst_topology_put_mstb(mstb); clear_down_rep_recv: + mutex_lock(&mgr->qlock); + mgr->is_waiting_for_dwn_reply = false; + mutex_unlock(&mgr->qlock); memset(&mgr->down_rep_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); return 0; @@ -4597,7 +4630,7 @@ static void drm_dp_tx_work(struct work_struct *work) struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, tx_work); mutex_lock(&mgr->qlock); - if (!list_empty(&mgr->tx_msg_downq)) + if (!list_empty(&mgr->tx_msg_downq) && !mgr->is_waiting_for_dwn_reply) process_single_down_tx_qlock(mgr); mutex_unlock(&mgr->qlock); } @@ -5001,6 +5034,9 @@ int drm_dp_mst_atomic_check(struct drm_atomic_state *state) int i, ret = 0; for_each_new_mst_mgr_in_state(state, mgr, mst_state, i) { + if (!mgr->mst_state) + continue; + ret = drm_dp_mst_atomic_check_vcpi_alloc_limit(mgr, mst_state); if (ret) break; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index f8e905192608..4c7cbce7bae7 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1267,7 +1267,7 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, * Changes struct fb_var_screeninfo are currently not pushed back * to KMS, hence fail if different settings are requested. */ - if (var->bits_per_pixel != fb->format->cpp[0] * 8 || + if (var->bits_per_pixel > fb->format->cpp[0] * 8 || var->xres > fb->width || var->yres > fb->height || var->xres_virtual > fb->width || var->yres_virtual > fb->height) { drm_dbg_kms(dev, "fb requested width/height/bpp can't fit in current fb " @@ -1293,6 +1293,11 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, } /* + * Likewise, bits_per_pixel should be rounded up to a supported value. + */ + var->bits_per_pixel = fb->format->cpp[0] * 8; + + /* * drm fbdev emulation doesn't support changing the pixel format at all, * so reject all pixel format changing requests. */ @@ -1561,7 +1566,9 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper, for (j = 0; j < mode_set->num_connectors; j++) { struct drm_connector *connector = mode_set->connectors[j]; - if (connector->has_tile) { + if (connector->has_tile && + desired_mode->hdisplay == connector->tile_h_size && + desired_mode->vdisplay == connector->tile_v_size) { lasth = (connector->tile_h_loc == (connector->num_h_tile - 1)); lastv = (connector->tile_v_loc == (connector->num_v_tile - 1)); /* cloning to multiple tiles is just crazy-talk, so: */ diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 6f7d3b3b3628..6417f374b923 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -1,13 +1,13 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_EXYNOS - tristate "DRM Support for Samsung SoC EXYNOS Series" + tristate "DRM Support for Samsung SoC Exynos Series" depends on OF && DRM && (ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM || COMPILE_TEST) depends on MMU select DRM_KMS_HELPER select VIDEOMODE_HELPERS select SND_SOC_HDMI_CODEC if SND_SOC help - Choose this option if you have a Samsung SoC EXYNOS chipset. + Choose this option if you have a Samsung SoC Exynos chipset. If M is selected the module will be called exynosdrm. if DRM_EXYNOS @@ -62,7 +62,7 @@ config DRM_EXYNOS_DSI This enables support for Exynos MIPI-DSI device. config DRM_EXYNOS_DP - bool "EXYNOS specific extensions for Analogix DP driver" + bool "Exynos specific extensions for Analogix DP driver" depends on DRM_EXYNOS_FIMD || DRM_EXYNOS7_DECON select DRM_ANALOGIX_DP default DRM_EXYNOS diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 2d5cbfda3ca7..8428ae12dfa5 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -510,7 +510,7 @@ static void decon_swreset(struct decon_context *ctx) ctx->addr + DECON_CRCCTRL); } -static void decon_enable(struct exynos_drm_crtc *crtc) +static void decon_atomic_enable(struct exynos_drm_crtc *crtc) { struct decon_context *ctx = crtc->ctx; @@ -523,7 +523,7 @@ static void decon_enable(struct exynos_drm_crtc *crtc) decon_commit(ctx->crtc); } -static void decon_disable(struct exynos_drm_crtc *crtc) +static void decon_atomic_disable(struct exynos_drm_crtc *crtc) { struct decon_context *ctx = crtc->ctx; int i; @@ -599,8 +599,8 @@ static enum drm_mode_status decon_mode_valid(struct exynos_drm_crtc *crtc, } static const struct exynos_drm_crtc_ops decon_crtc_ops = { - .enable = decon_enable, - .disable = decon_disable, + .atomic_enable = decon_atomic_enable, + .atomic_disable = decon_atomic_disable, .enable_vblank = decon_enable_vblank, .disable_vblank = decon_disable_vblank, .atomic_begin = decon_atomic_begin, @@ -651,7 +651,7 @@ static void decon_unbind(struct device *dev, struct device *master, void *data) { struct decon_context *ctx = dev_get_drvdata(dev); - decon_disable(ctx->crtc); + decon_atomic_disable(ctx->crtc); /* detach this sub driver from iommu mapping if supported. */ exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev); diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index f0640950bd46..ff59c641fa80 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -526,7 +526,7 @@ static void decon_init(struct decon_context *ctx) writel(VIDCON1_VCLK_HOLD, ctx->regs + VIDCON1(0)); } -static void decon_enable(struct exynos_drm_crtc *crtc) +static void decon_atomic_enable(struct exynos_drm_crtc *crtc) { struct decon_context *ctx = crtc->ctx; @@ -546,7 +546,7 @@ static void decon_enable(struct exynos_drm_crtc *crtc) ctx->suspended = false; } -static void decon_disable(struct exynos_drm_crtc *crtc) +static void decon_atomic_disable(struct exynos_drm_crtc *crtc) { struct decon_context *ctx = crtc->ctx; int i; @@ -568,8 +568,8 @@ static void decon_disable(struct exynos_drm_crtc *crtc) } static const struct exynos_drm_crtc_ops decon_crtc_ops = { - .enable = decon_enable, - .disable = decon_disable, + .atomic_enable = decon_atomic_enable, + .atomic_disable = decon_atomic_disable, .enable_vblank = decon_enable_vblank, .disable_vblank = decon_disable_vblank, .atomic_begin = decon_atomic_begin, @@ -653,7 +653,7 @@ static void decon_unbind(struct device *dev, struct device *master, { struct decon_context *ctx = dev_get_drvdata(dev); - decon_disable(ctx->crtc); + decon_atomic_disable(ctx->crtc); if (ctx->encoder) exynos_dpi_remove(ctx->encoder); diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c index 77ce78986408..1c03485676ef 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c @@ -23,8 +23,8 @@ static void exynos_drm_crtc_atomic_enable(struct drm_crtc *crtc, { struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc); - if (exynos_crtc->ops->enable) - exynos_crtc->ops->enable(exynos_crtc); + if (exynos_crtc->ops->atomic_enable) + exynos_crtc->ops->atomic_enable(exynos_crtc); drm_crtc_vblank_on(crtc); } @@ -36,8 +36,8 @@ static void exynos_drm_crtc_atomic_disable(struct drm_crtc *crtc, drm_crtc_vblank_off(crtc); - if (exynos_crtc->ops->disable) - exynos_crtc->ops->disable(exynos_crtc); + if (exynos_crtc->ops->atomic_disable) + exynos_crtc->ops->atomic_disable(exynos_crtc); if (crtc->state->event && !crtc->state->active) { spin_lock_irq(&crtc->dev->event_lock); diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index d4014ba592fd..d4d21d8cfb90 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -118,8 +118,8 @@ struct exynos_drm_plane_config { /* * Exynos drm crtc ops * - * @enable: enable the device - * @disable: disable the device + * @atomic_enable: enable the device + * @atomic_disable: disable the device * @enable_vblank: specific driver callback for enabling vblank interrupt. * @disable_vblank: specific driver callback for disabling vblank interrupt. * @mode_valid: specific driver callback for mode validation @@ -133,8 +133,8 @@ struct exynos_drm_plane_config { */ struct exynos_drm_crtc; struct exynos_drm_crtc_ops { - void (*enable)(struct exynos_drm_crtc *crtc); - void (*disable)(struct exynos_drm_crtc *crtc); + void (*atomic_enable)(struct exynos_drm_crtc *crtc); + void (*atomic_disable)(struct exynos_drm_crtc *crtc); int (*enable_vblank)(struct exynos_drm_crtc *crtc); void (*disable_vblank)(struct exynos_drm_crtc *crtc); enum drm_mode_status (*mode_valid)(struct exynos_drm_crtc *crtc, diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 8d0a929104e5..21aec38702fc 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -894,7 +894,7 @@ static void fimd_disable_plane(struct exynos_drm_crtc *crtc, fimd_enable_shadow_channel_path(ctx, win, false); } -static void fimd_enable(struct exynos_drm_crtc *crtc) +static void fimd_atomic_enable(struct exynos_drm_crtc *crtc) { struct fimd_context *ctx = crtc->ctx; @@ -912,7 +912,7 @@ static void fimd_enable(struct exynos_drm_crtc *crtc) fimd_commit(ctx->crtc); } -static void fimd_disable(struct exynos_drm_crtc *crtc) +static void fimd_atomic_disable(struct exynos_drm_crtc *crtc) { struct fimd_context *ctx = crtc->ctx; int i; @@ -1006,8 +1006,8 @@ static void fimd_dp_clock_enable(struct exynos_drm_clk *clk, bool enable) } static const struct exynos_drm_crtc_ops fimd_crtc_ops = { - .enable = fimd_enable, - .disable = fimd_disable, + .atomic_enable = fimd_atomic_enable, + .atomic_disable = fimd_atomic_disable, .enable_vblank = fimd_enable_vblank, .disable_vblank = fimd_disable_vblank, .atomic_begin = fimd_atomic_begin, @@ -1098,7 +1098,7 @@ static void fimd_unbind(struct device *dev, struct device *master, { struct fimd_context *ctx = dev_get_drvdata(dev); - fimd_disable(ctx->crtc); + fimd_atomic_disable(ctx->crtc); exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev); diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 7ae087b0504d..88b6fcaa20be 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1313,6 +1313,7 @@ static int gsc_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; + component_del(dev, &gsc_component_ops); pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index 65b891cb9c50..b320b3a21ad4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -120,7 +120,7 @@ static void vidi_update_plane(struct exynos_drm_crtc *crtc, DRM_DEV_DEBUG_KMS(ctx->dev, "dma_addr = %pad\n", &addr); } -static void vidi_enable(struct exynos_drm_crtc *crtc) +static void vidi_atomic_enable(struct exynos_drm_crtc *crtc) { struct vidi_context *ctx = crtc->ctx; @@ -133,7 +133,7 @@ static void vidi_enable(struct exynos_drm_crtc *crtc) drm_crtc_vblank_on(&crtc->base); } -static void vidi_disable(struct exynos_drm_crtc *crtc) +static void vidi_atomic_disable(struct exynos_drm_crtc *crtc) { struct vidi_context *ctx = crtc->ctx; @@ -147,8 +147,8 @@ static void vidi_disable(struct exynos_drm_crtc *crtc) } static const struct exynos_drm_crtc_ops vidi_crtc_ops = { - .enable = vidi_enable, - .disable = vidi_disable, + .atomic_enable = vidi_atomic_enable, + .atomic_disable = vidi_atomic_disable, .enable_vblank = vidi_enable_vblank, .disable_vblank = vidi_disable_vblank, .update_plane = vidi_update_plane, diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 6cfdb95fef2f..38ae9c32feef 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -986,7 +986,7 @@ static void mixer_atomic_flush(struct exynos_drm_crtc *crtc) exynos_crtc_handle_event(crtc); } -static void mixer_enable(struct exynos_drm_crtc *crtc) +static void mixer_atomic_enable(struct exynos_drm_crtc *crtc) { struct mixer_context *ctx = crtc->ctx; @@ -1015,7 +1015,7 @@ static void mixer_enable(struct exynos_drm_crtc *crtc) set_bit(MXR_BIT_POWERED, &ctx->flags); } -static void mixer_disable(struct exynos_drm_crtc *crtc) +static void mixer_atomic_disable(struct exynos_drm_crtc *crtc) { struct mixer_context *ctx = crtc->ctx; int i; @@ -1109,8 +1109,8 @@ static bool mixer_mode_fixup(struct exynos_drm_crtc *crtc, } static const struct exynos_drm_crtc_ops mixer_crtc_ops = { - .enable = mixer_enable, - .disable = mixer_disable, + .atomic_enable = mixer_atomic_enable, + .atomic_disable = mixer_atomic_disable, .enable_vblank = mixer_enable_vblank, .disable_vblank = mixer_disable_vblank, .atomic_begin = mixer_atomic_begin, diff --git a/drivers/gpu/drm/i915/.gitignore b/drivers/gpu/drm/i915/.gitignore new file mode 100644 index 000000000000..d9a77f3b59b2 --- /dev/null +++ b/drivers/gpu/drm/i915/.gitignore @@ -0,0 +1 @@ +*.hdrtest diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index b0c53661f62b..b8c5f8934dbd 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -31,9 +31,6 @@ CFLAGS_display/intel_fbdev.o = $(call cc-disable-warning, override-init) subdir-ccflags-y += \ $(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA) -# Extra header tests -header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h - subdir-ccflags-y += -I$(srctree)/$(src) # Please keep these build lists sorted! @@ -73,11 +70,12 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o display/intel_pipe_crc.o i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o # "Graphics Technology" (aka we talk to the gpu) -obj-y += gt/ gt-y += \ gt/debugfs_engines.o \ gt/debugfs_gt.o \ gt/debugfs_gt_pm.o \ + gt/gen6_ppgtt.o \ + gt/gen8_ppgtt.o \ gt/intel_breadcrumbs.o \ gt/intel_context.o \ gt/intel_engine_cs.o \ @@ -85,14 +83,17 @@ gt-y += \ gt/intel_engine_pm.o \ gt/intel_engine_pool.o \ gt/intel_engine_user.o \ + gt/intel_ggtt.o \ gt/intel_gt.o \ gt/intel_gt_irq.o \ gt/intel_gt_pm.o \ gt/intel_gt_pm_irq.o \ gt/intel_gt_requests.o \ + gt/intel_gtt.o \ gt/intel_llc.o \ gt/intel_lrc.o \ gt/intel_mocs.o \ + gt/intel_ppgtt.o \ gt/intel_rc6.o \ gt/intel_renderstate.o \ gt/intel_reset.o \ @@ -111,7 +112,6 @@ gt-y += \ i915-y += $(gt-y) # GEM (Graphics Execution Management) code -obj-y += gem/ gem-y += \ gem/i915_gem_busy.o \ gem/i915_gem_clflush.o \ @@ -157,7 +157,6 @@ i915-y += \ intel_wopcm.o # general-purpose microcontroller (GuC) support -obj-y += gt/uc/ i915-y += gt/uc/intel_uc.o \ gt/uc/intel_uc_fw.o \ gt/uc/intel_guc.o \ @@ -170,7 +169,6 @@ i915-y += gt/uc/intel_uc.o \ gt/uc/intel_huc_fw.o # modesetting core code -obj-y += display/ i915-y += \ display/intel_atomic.o \ display/intel_atomic_plane.o \ @@ -235,7 +233,6 @@ i915-y += \ display/vlv_dsi_pll.o # perf code -obj-y += oa/ i915-y += \ oa/i915_oa_hsw.o \ oa/i915_oa_bdw.o \ @@ -260,6 +257,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \ gem/selftests/igt_gem_utils.o \ selftests/i915_random.o \ selftests/i915_selftest.o \ + selftests/igt_atomic.o \ selftests/igt_flush_test.o \ selftests/igt_live_test.o \ selftests/igt_mmap.o \ @@ -276,3 +274,27 @@ endif obj-$(CONFIG_DRM_I915) += i915.o obj-$(CONFIG_DRM_I915_GVT_KVMGT) += gvt/kvmgt.o + +# header test + +# exclude some broken headers from the test coverage +no-header-test := \ + display/intel_vbt_defs.h \ + gvt/execlist.h \ + gvt/fb_decoder.h \ + gvt/gtt.h \ + gvt/gvt.h \ + gvt/interrupt.h \ + gvt/mmio_context.h \ + gvt/mpt.h \ + gvt/scheduler.h + +extra-$(CONFIG_DRM_I915_WERROR) += \ + $(patsubst %.h,%.hdrtest, $(filter-out $(no-header-test), \ + $(shell cd $(srctree)/$(src) && find * -name '*.h'))) + +quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) + cmd_hdrtest = $(CC) $(c_flags) -S -o /dev/null -x c /dev/null -include $<; touch $@ + +$(obj)/%.hdrtest: $(src)/%.h FORCE + $(call if_changed_dep,hdrtest) diff --git a/drivers/gpu/drm/i915/display/Makefile b/drivers/gpu/drm/i915/display/Makefile deleted file mode 100644 index 173c305d7866..000000000000 --- a/drivers/gpu/drm/i915/display/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# For building individual subdir files on the command line -subdir-ccflags-y += -I$(srctree)/$(src)/.. - -# Extra header tests -header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h -header-test- := intel_vbt_defs.h diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 006b1a297e6f..f8e882101396 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -77,7 +77,7 @@ static enum transcoder dsi_port_to_transcoder(enum port port) static void wait_for_cmds_dispatched_to_panel(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct mipi_dsi_device *dsi; enum port port; enum transcoder dsi_trans; @@ -202,7 +202,7 @@ static int dsi_send_pkt_payld(struct intel_dsi_host *host, static void dsi_program_swing_and_deemphasis(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum phy phy; u32 tmp; int lane; @@ -267,7 +267,7 @@ static void configure_dual_link_mode(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 dss_ctl1; dss_ctl1 = I915_READ(DSS_CTL1); @@ -306,7 +306,7 @@ static void configure_dual_link_mode(struct intel_encoder *encoder, static int afe_clk(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); int bpp; if (crtc_state->dsc.compression_enable) @@ -321,7 +321,7 @@ static void gen11_dsi_program_esc_clk_div(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; int afe_clk_khz; u32 esc_clk_div_m; @@ -360,7 +360,7 @@ static void get_dsi_io_power_domains(struct drm_i915_private *dev_priv, static void gen11_dsi_enable_io_power(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 tmp; @@ -376,7 +376,7 @@ static void gen11_dsi_enable_io_power(struct intel_encoder *encoder) static void gen11_dsi_power_up_lanes(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum phy phy; for_each_dsi_phy(phy, intel_dsi->phys) @@ -387,7 +387,7 @@ static void gen11_dsi_power_up_lanes(struct intel_encoder *encoder) static void gen11_dsi_config_phy_lanes_sequence(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum phy phy; u32 tmp; int lane; @@ -436,7 +436,7 @@ static void gen11_dsi_config_phy_lanes_sequence(struct intel_encoder *encoder) static void gen11_dsi_voltage_swing_program_seq(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; enum phy phy; @@ -488,7 +488,7 @@ static void gen11_dsi_voltage_swing_program_seq(struct intel_encoder *encoder) static void gen11_dsi_enable_ddi_buffer(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; enum port port; @@ -509,7 +509,7 @@ gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; enum port port; enum phy phy; @@ -575,7 +575,7 @@ gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder, static void gen11_dsi_gate_clocks(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; enum phy phy; @@ -591,7 +591,7 @@ static void gen11_dsi_gate_clocks(struct intel_encoder *encoder) static void gen11_dsi_ungate_clocks(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; enum phy phy; @@ -608,7 +608,7 @@ static void gen11_dsi_map_pll(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct intel_shared_dpll *pll = crtc_state->shared_dpll; enum phy phy; u32 val; @@ -640,7 +640,7 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); enum pipe pipe = intel_crtc->pipe; u32 tmp; @@ -789,7 +789,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; enum port port; @@ -923,7 +923,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, static void gen11_dsi_enable_transcoder(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; enum transcoder dsi_trans; u32 tmp; @@ -945,7 +945,7 @@ static void gen11_dsi_setup_timeouts(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; enum transcoder dsi_trans; u32 tmp, hs_tx_timeout, lp_rx_timeout, ta_timeout, divisor, mul; @@ -1026,7 +1026,7 @@ gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder, static void gen11_dsi_powerup_panel(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct mipi_dsi_device *dsi; enum port port; enum transcoder dsi_trans; @@ -1077,7 +1077,7 @@ static void gen11_dsi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); /* step3b */ gen11_dsi_map_pll(encoder, pipe_config); @@ -1104,7 +1104,7 @@ static void gen11_dsi_pre_enable(struct intel_encoder *encoder, static void gen11_dsi_disable_transcoder(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; enum transcoder dsi_trans; u32 tmp; @@ -1126,7 +1126,7 @@ static void gen11_dsi_disable_transcoder(struct intel_encoder *encoder) static void gen11_dsi_powerdown_panel(struct intel_encoder *encoder) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_OFF); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); @@ -1139,7 +1139,7 @@ static void gen11_dsi_powerdown_panel(struct intel_encoder *encoder) static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; enum transcoder dsi_trans; u32 tmp; @@ -1180,7 +1180,7 @@ static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder) static void gen11_dsi_disable_port(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u32 tmp; enum port port; @@ -1202,7 +1202,7 @@ static void gen11_dsi_disable_port(struct intel_encoder *encoder) static void gen11_dsi_disable_io_power(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 tmp; @@ -1229,7 +1229,7 @@ static void gen11_dsi_disable(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); /* step1: turn off backlight */ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); @@ -1259,7 +1259,7 @@ static void gen11_dsi_post_disable(struct intel_encoder *encoder, intel_dsc_disable(old_crtc_state); - skylake_scaler_disable(old_crtc_state); + skl_scaler_disable(old_crtc_state); } static enum drm_mode_status gen11_dsi_mode_valid(struct drm_connector *connector, @@ -1272,7 +1272,7 @@ static enum drm_mode_status gen11_dsi_mode_valid(struct drm_connector *connector static void gen11_dsi_get_timings(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; @@ -1313,7 +1313,7 @@ static void gen11_dsi_get_config(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); intel_dsc_get_config(encoder, pipe_config); @@ -1417,7 +1417,8 @@ static void gen11_dsi_get_power_domains(struct intel_encoder *encoder, { struct drm_i915_private *i915 = to_i915(encoder->base.dev); - get_dsi_io_power_domains(i915, enc_to_intel_dsi(&encoder->base)); + get_dsi_io_power_domains(i915, + enc_to_intel_dsi(encoder)); if (crtc_state->dsc.compression_enable) intel_display_power_get(i915, @@ -1428,7 +1429,7 @@ static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum transcoder dsi_trans; intel_wakeref_t wakeref; enum port port; diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index fd0026fc3618..c362eecdd414 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -37,6 +37,7 @@ #include "intel_atomic.h" #include "intel_display_types.h" #include "intel_hdcp.h" +#include "intel_psr.h" #include "intel_sprite.h" /** @@ -129,6 +130,7 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn, struct drm_crtc_state *crtc_state; intel_hdcp_atomic_check(conn, old_state, new_state); + intel_psr_atomic_check(conn, old_state, new_state); if (!new_state->crtc) return 0; @@ -175,6 +177,38 @@ intel_digital_connector_duplicate_state(struct drm_connector *connector) } /** + * intel_connector_needs_modeset - check if connector needs a modeset + */ +bool +intel_connector_needs_modeset(struct intel_atomic_state *state, + struct drm_connector *connector) +{ + const struct drm_connector_state *old_conn_state, *new_conn_state; + + old_conn_state = drm_atomic_get_old_connector_state(&state->base, connector); + new_conn_state = drm_atomic_get_new_connector_state(&state->base, connector); + + return old_conn_state->crtc != new_conn_state->crtc || + (new_conn_state->crtc && + drm_atomic_crtc_needs_modeset(drm_atomic_get_new_crtc_state(&state->base, + new_conn_state->crtc))); +} + +struct intel_digital_connector_state * +intel_atomic_get_digital_connector_state(struct intel_atomic_state *state, + struct intel_connector *connector) +{ + struct drm_connector_state *conn_state; + + conn_state = drm_atomic_get_connector_state(&state->base, + &connector->base); + if (IS_ERR(conn_state)) + return ERR_CAST(conn_state); + + return to_intel_digital_connector_state(conn_state); +} + +/** * intel_crtc_duplicate_state - duplicate crtc state * @crtc: drm crtc * diff --git a/drivers/gpu/drm/i915/display/intel_atomic.h b/drivers/gpu/drm/i915/display/intel_atomic.h index 7b49623419ba..74c749dbfb4f 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.h +++ b/drivers/gpu/drm/i915/display/intel_atomic.h @@ -17,6 +17,7 @@ struct drm_device; struct drm_i915_private; struct drm_property; struct intel_atomic_state; +struct intel_connector; struct intel_crtc; struct intel_crtc_state; @@ -32,6 +33,11 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn, struct drm_atomic_state *state); struct drm_connector_state * intel_digital_connector_duplicate_state(struct drm_connector *connector); +bool intel_connector_needs_modeset(struct intel_atomic_state *state, + struct drm_connector *connector); +struct intel_digital_connector_state * +intel_atomic_get_digital_connector_state(struct intel_atomic_state *state, + struct intel_connector *connector); struct drm_crtc_state *intel_crtc_duplicate_state(struct drm_crtc *crtc); void intel_crtc_destroy_state(struct drm_crtc *crtc, diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 27710098d056..b18040793d9e 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -707,8 +707,8 @@ void intel_audio_codec_enable(struct intel_encoder *encoder, DRM_DEBUG_DRIVER("ELD on [CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", connector->base.id, connector->name, - connector->encoder->base.id, - connector->encoder->name); + encoder->base.base.id, + encoder->base.name); connector->eld[6] = drm_av_sync_delay(connector, adjusted_mode) / 2; @@ -856,7 +856,7 @@ static unsigned long i915_audio_component_get_power(struct device *kdev) } /* Force CDCLK to 2*BCLK as long as we need audio powered. */ - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + if (IS_GEMINILAKE(dev_priv)) glk_force_audio_cdclk(dev_priv, true); if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) @@ -875,7 +875,7 @@ static void i915_audio_component_put_power(struct device *kdev, /* Stop forcing CDCLK to 2*BCLK if no need for audio to be powered. */ if (--dev_priv->audio_power_refcount == 0) - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + if (IS_GEMINILAKE(dev_priv)) glk_force_audio_cdclk(dev_priv, false); intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO, cookie); diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index dcb66a33be9b..b228671d5a5d 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -486,3 +486,8 @@ int intel_bw_init(struct drm_i915_private *dev_priv) return 0; } + +void intel_bw_cleanup(struct drm_i915_private *dev_priv) +{ + drm_atomic_private_obj_fini(&dev_priv->bw_obj); +} diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h index 9db10af012f4..20b9ad241802 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.h +++ b/drivers/gpu/drm/i915/display/intel_bw.h @@ -25,6 +25,7 @@ struct intel_bw_state { void intel_bw_init_hw(struct drm_i915_private *dev_priv); int intel_bw_init(struct drm_i915_private *dev_priv); +void intel_bw_cleanup(struct drm_i915_private *dev_priv); int intel_bw_atomic_check(struct intel_atomic_state *state); void intel_bw_crtc_update(struct intel_bw_state *bw_state, const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 7d1ab1e5b7c3..0ce5926006ca 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2004,6 +2004,18 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) /* Account for additional needs from the planes */ min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk); + /* + * HACK. Currently for TGL platforms we calculate + * min_cdclk initially based on pixel_rate divided + * by 2, accounting for also plane requirements, + * however in some cases the lowest possible CDCLK + * doesn't work and causing the underruns. + * Explicitly stating here that this seems to be currently + * rather a Hack, than final solution. + */ + if (IS_TIGERLAKE(dev_priv)) + min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate); + if (min_cdclk > dev_priv->max_cdclk_freq) { DRM_DEBUG_KMS("required cdclk (%d kHz) exceeds max (%d kHz)\n", min_cdclk, dev_priv->max_cdclk_freq); diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index b2b1336ecdb6..f976b800b245 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -65,7 +65,7 @@ static struct intel_crt *intel_encoder_to_crt(struct intel_encoder *encoder) return container_of(encoder, struct intel_crt, base); } -static struct intel_crt *intel_attached_crt(struct drm_connector *connector) +static struct intel_crt *intel_attached_crt(struct intel_connector *connector) { return intel_encoder_to_crt(intel_attached_encoder(connector)); } @@ -247,7 +247,7 @@ static void hsw_post_disable_crt(struct intel_encoder *encoder, intel_ddi_disable_transcoder_func(old_crtc_state); - ironlake_pfit_disable(old_crtc_state); + ilk_pfit_disable(old_crtc_state); intel_ddi_disable_pipe_clock(old_crtc_state); @@ -351,7 +351,7 @@ intel_crt_mode_valid(struct drm_connector *connector, /* The FDI receiver on LPT only supports 8bpc and only has 2 lanes. */ if (HAS_PCH_LPT(dev_priv) && - (ironlake_get_lanes_required(mode->clock, 270000, 24) > 2)) + ilk_get_lanes_required(mode->clock, 270000, 24) > 2) return MODE_CLOCK_HIGH; /* HSW/BDW FDI limited to 4k */ @@ -427,10 +427,10 @@ static int hsw_crt_compute_config(struct intel_encoder *encoder, return 0; } -static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector) +static bool ilk_crt_detect_hotplug(struct drm_connector *connector) { struct drm_device *dev = connector->dev; - struct intel_crt *crt = intel_attached_crt(connector); + struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); struct drm_i915_private *dev_priv = to_i915(dev); u32 adpa; bool ret; @@ -440,7 +440,7 @@ static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector) bool turn_off_dac = HAS_PCH_SPLIT(dev_priv); u32 save_adpa; - crt->force_hotplug_required = 0; + crt->force_hotplug_required = false; save_adpa = adpa = I915_READ(crt->adpa_reg); DRM_DEBUG_KMS("trigger hotplug detect cycle: adpa=0x%x\n", adpa); @@ -477,7 +477,7 @@ static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector) static bool valleyview_crt_detect_hotplug(struct drm_connector *connector) { struct drm_device *dev = connector->dev; - struct intel_crt *crt = intel_attached_crt(connector); + struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); struct drm_i915_private *dev_priv = to_i915(dev); bool reenable_hpd; u32 adpa; @@ -535,7 +535,7 @@ static bool intel_crt_detect_hotplug(struct drm_connector *connector) int i, tries = 0; if (HAS_PCH_SPLIT(dev_priv)) - return intel_ironlake_crt_detect_hotplug(connector); + return ilk_crt_detect_hotplug(connector); if (IS_VALLEYVIEW(dev_priv)) return valleyview_crt_detect_hotplug(connector); @@ -609,7 +609,7 @@ static int intel_crt_ddc_get_modes(struct drm_connector *connector, static bool intel_crt_detect_ddc(struct drm_connector *connector) { - struct intel_crt *crt = intel_attached_crt(connector); + struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); struct drm_i915_private *dev_priv = to_i915(crt->base.base.dev); struct edid *edid; struct i2c_adapter *i2c; @@ -795,7 +795,7 @@ intel_crt_detect(struct drm_connector *connector, bool force) { struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_crt *crt = intel_attached_crt(connector); + struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); struct intel_encoder *intel_encoder = &crt->base; intel_wakeref_t wakeref; int status, ret; @@ -886,7 +886,7 @@ static int intel_crt_get_modes(struct drm_connector *connector) { struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crt *crt = intel_attached_crt(connector); + struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); struct intel_encoder *intel_encoder = &crt->base; intel_wakeref_t wakeref; struct i2c_adapter *i2c; @@ -925,7 +925,7 @@ void intel_crt_reset(struct drm_encoder *encoder) POSTING_READ(crt->adpa_reg); DRM_DEBUG_KMS("crt adpa set to 0x%x\n", adpa); - crt->force_hotplug_required = 1; + crt->force_hotplug_required = true; } } @@ -1063,7 +1063,7 @@ void intel_crt_init(struct drm_i915_private *dev_priv) /* * Configure the automatic hotplug detection stuff */ - crt->force_hotplug_required = 0; + crt->force_hotplug_required = false; /* * TODO: find a proper way to discover whether we need to set the the diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index c9ba7d7f3787..33f1dc3d7c1a 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -34,6 +34,7 @@ #include "intel_ddi.h" #include "intel_display_types.h" #include "intel_dp.h" +#include "intel_dp_mst.h" #include "intel_dp_link_training.h" #include "intel_dpio_phy.h" #include "intel_dsi.h" @@ -1237,9 +1238,9 @@ void hsw_fdi_link_train(struct intel_encoder *encoder, static void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_digital_port *intel_dig_port = - enc_to_dig_port(&encoder->base); + enc_to_dig_port(encoder); intel_dp->DP = intel_dig_port->saved_port_bits | DDI_BUF_CTL_ENABLE | DDI_BUF_TRANS_SELECT(0); @@ -1899,8 +1900,13 @@ intel_ddi_transcoder_func_reg_val_get(const struct intel_crtc_state *crtc_state) temp |= TRANS_DDI_MODE_SELECT_DP_MST; temp |= DDI_PORT_WIDTH(crtc_state->lane_count); - if (INTEL_GEN(dev_priv) >= 12) - temp |= TRANS_DDI_MST_TRANSPORT_SELECT(crtc_state->cpu_transcoder); + if (INTEL_GEN(dev_priv) >= 12) { + enum transcoder master; + + master = crtc_state->mst_master_transcoder; + WARN_ON(master == INVALID_TRANSCODER); + temp |= TRANS_DDI_MST_TRANSPORT_SELECT(master); + } } else { temp |= TRANS_DDI_MODE_SELECT_DP_SST; temp |= DDI_PORT_WIDTH(crtc_state->lane_count); @@ -1944,17 +1950,18 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - i915_reg_t reg = TRANS_DDI_FUNC_CTL(cpu_transcoder); - u32 val = I915_READ(reg); + u32 val; + + val = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); + val &= ~TRANS_DDI_FUNC_ENABLE; if (INTEL_GEN(dev_priv) >= 12) { - val &= ~(TRANS_DDI_FUNC_ENABLE | TGL_TRANS_DDI_PORT_MASK | - TRANS_DDI_DP_VC_PAYLOAD_ALLOC); + if (!intel_dp_mst_is_master_trans(crtc_state)) + val &= ~TGL_TRANS_DDI_PORT_MASK; } else { - val &= ~(TRANS_DDI_FUNC_ENABLE | TRANS_DDI_PORT_MASK | - TRANS_DDI_DP_VC_PAYLOAD_ALLOC); + val &= ~TRANS_DDI_PORT_MASK; } - I915_WRITE(reg, val); + I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), val); if (dev_priv->quirks & QUIRK_INCREASE_DDI_DISABLED_TIME && intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { @@ -2217,7 +2224,7 @@ static void intel_ddi_get_power_domains(struct intel_encoder *encoder, if (WARN_ON(intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST))) return; - dig_port = enc_to_dig_port(&encoder->base); + dig_port = enc_to_dig_port(encoder); intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); /* @@ -2287,7 +2294,7 @@ static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv, static void skl_ddi_set_iboost(struct intel_encoder *encoder, int level, enum intel_output_type type) { - struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; u8 iboost; @@ -2358,7 +2365,7 @@ static void bxt_ddi_vswing_sequence(struct intel_encoder *encoder, u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); enum port port = encoder->port; enum phy phy = intel_port_to_phy(dev_priv, port); int n_entries; @@ -2497,7 +2504,7 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, width = 4; rate = 0; /* Rate is always < than 6GHz for HDMI */ } else { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); width = intel_dp->lane_count; rate = intel_dp->link_rate; @@ -2623,7 +2630,7 @@ static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder, width = 4; /* Rate is always < than 6GHz for HDMI */ } else { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); width = intel_dp->lane_count; rate = intel_dp->link_rate; @@ -3161,57 +3168,6 @@ static void intel_ddi_clk_disable(struct intel_encoder *encoder) } static void -icl_phy_set_clock_gating(struct intel_digital_port *dig_port, bool enable) -{ - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - enum tc_port tc_port = intel_port_to_tc(dev_priv, dig_port->base.port); - u32 val, bits; - int ln; - - if (tc_port == PORT_TC_NONE) - return; - - bits = MG_DP_MODE_CFG_TR2PWR_GATING | MG_DP_MODE_CFG_TRPWR_GATING | - MG_DP_MODE_CFG_CLNPWR_GATING | MG_DP_MODE_CFG_DIGPWR_GATING | - MG_DP_MODE_CFG_GAONPWR_GATING; - - for (ln = 0; ln < 2; ln++) { - if (INTEL_GEN(dev_priv) >= 12) { - I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, ln)); - val = I915_READ(DKL_DP_MODE(tc_port)); - } else { - val = I915_READ(MG_DP_MODE(ln, tc_port)); - } - - if (enable) - val |= bits; - else - val &= ~bits; - - if (INTEL_GEN(dev_priv) >= 12) - I915_WRITE(DKL_DP_MODE(tc_port), val); - else - I915_WRITE(MG_DP_MODE(ln, tc_port), val); - } - - if (INTEL_GEN(dev_priv) == 11) { - bits = MG_MISC_SUS0_CFG_TR2PWR_GATING | - MG_MISC_SUS0_CFG_CL2PWR_GATING | - MG_MISC_SUS0_CFG_GAONPWR_GATING | - MG_MISC_SUS0_CFG_TRPWR_GATING | - MG_MISC_SUS0_CFG_CL1PWR_GATING | - MG_MISC_SUS0_CFG_DGPWR_GATING; - - val = I915_READ(MG_MISC_SUS0(tc_port)); - if (enable) - val |= (bits | MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE(3)); - else - val &= ~(bits | MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE_MASK); - I915_WRITE(MG_MISC_SUS0(tc_port), val); - } -} - -static void icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port, const struct intel_crtc_state *crtc_state) { @@ -3317,7 +3273,7 @@ static void intel_ddi_enable_fec(struct intel_encoder *encoder, if (!crtc_state->fec_enable) return; - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); val = I915_READ(intel_dp->regs.dp_tp_ctl); val |= DP_TP_CTL_FEC_ENABLE; I915_WRITE(intel_dp->regs.dp_tp_ctl, val); @@ -3337,7 +3293,7 @@ static void intel_ddi_disable_fec_state(struct intel_encoder *encoder, if (!crtc_state->fec_enable) return; - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); val = I915_READ(intel_dp->regs.dp_tp_ctl); val &= ~DP_TP_CTL_FEC_ENABLE; I915_WRITE(intel_dp->regs.dp_tp_ctl, val); @@ -3428,10 +3384,10 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(dev_priv, encoder->port); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); int level = intel_ddi_dp_level(intel_dp); enum transcoder transcoder = crtc_state->cpu_transcoder; @@ -3458,14 +3414,14 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, * (DFLEXDPSP.DPX4TXLATC) * * This was done before tgl_ddi_pre_enable_dp by - * haswell_crtc_enable()->intel_encoders_pre_pll_enable(). + * hsw_crtc_enable()->intel_encoders_pre_pll_enable(). */ /* * 4. Enable the port PLL. * * The PLL enabling itself was already done before this function by - * haswell_crtc_enable()->intel_enable_shared_dpll(). We need only + * hsw_crtc_enable()->intel_enable_shared_dpll(). We need only * configure the PLL to port mapping here. */ intel_ddi_clk_select(encoder, crtc_state); @@ -3509,12 +3465,6 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, * down this function. */ - /* - * 7.d Type C with DP alternate or fixed/legacy/static connection - - * Disable PHY clock gating per Type-C DDI Buffer page - */ - icl_phy_set_clock_gating(dig_port, false); - /* 7.e Configure voltage swing and related IO settings */ tgl_ddi_vswing_sequence(encoder, crtc_state->port_clock, level, encoder->type); @@ -3566,15 +3516,6 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, if (!is_trans_port_sync_mode(crtc_state)) intel_dp_stop_link_train(intel_dp); - /* - * TODO: enable clock gating - * - * It is not written in DP enabling sequence but "PHY Clockgating - * programming" states that clock gating should be enabled after the - * link training but doing so causes all the following trainings to fail - * so not enabling it for now. - */ - /* 7.l Configure and enable FEC if needed */ intel_ddi_enable_fec(encoder, crtc_state); intel_dsc_enable(encoder, crtc_state); @@ -3584,15 +3525,18 @@ static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; enum phy phy = intel_port_to_phy(dev_priv, port); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); int level = intel_ddi_dp_level(intel_dp); - WARN_ON(is_mst && (port == PORT_A || port == PORT_E)); + if (INTEL_GEN(dev_priv) < 11) + WARN_ON(is_mst && (port == PORT_A || port == PORT_E)); + else + WARN_ON(is_mst && port == PORT_A); intel_dp_set_link_params(intel_dp, crtc_state->port_clock, crtc_state->lane_count, is_mst); @@ -3610,7 +3554,6 @@ static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder, dig_port->ddi_io_power_domain); icl_program_mg_dp_mode(dig_port, crtc_state); - icl_phy_set_clock_gating(dig_port, false); if (INTEL_GEN(dev_priv) >= 11) icl_ddi_vswing_sequence(encoder, crtc_state->port_clock, @@ -3644,8 +3587,6 @@ static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_ddi_enable_fec(encoder, crtc_state); - icl_phy_set_clock_gating(dig_port, true); - if (!is_mst) intel_ddi_enable_pipe_clock(crtc_state); @@ -3674,12 +3615,12 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; int level = intel_ddi_hdmi_level(dev_priv, port); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); intel_ddi_clk_select(encoder, crtc_state); @@ -3687,7 +3628,6 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); icl_program_mg_dp_mode(dig_port, crtc_state); - icl_phy_set_clock_gating(dig_port, false); if (INTEL_GEN(dev_priv) >= 12) tgl_ddi_vswing_sequence(encoder, crtc_state->port_clock, @@ -3702,8 +3642,6 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, else intel_prepare_hdmi_ddi_buffers(encoder, level); - icl_phy_set_clock_gating(dig_port, true); - if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level, INTEL_OUTPUT_HDMI); @@ -3746,12 +3684,12 @@ static void intel_ddi_pre_enable(struct intel_encoder *encoder, intel_ddi_pre_enable_hdmi(encoder, crtc_state, conn_state); } else { struct intel_lspcon *lspcon = - enc_to_intel_lspcon(&encoder->base); + enc_to_intel_lspcon(encoder); intel_ddi_pre_enable_dp(encoder, crtc_state, conn_state); if (lspcon->active) { struct intel_digital_port *dig_port = - enc_to_dig_port(&encoder->base); + enc_to_dig_port(encoder); dig_port->set_infoframes(encoder, crtc_state->has_infoframe, @@ -3776,7 +3714,7 @@ static void intel_disable_ddi_buf(struct intel_encoder *encoder, } if (intel_crtc_has_dp_encoder(crtc_state)) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); val = I915_READ(intel_dp->regs.dp_tp_ctl); val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK); @@ -3796,7 +3734,7 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct intel_dp *intel_dp = &dig_port->dp; bool is_mst = intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST); @@ -3808,8 +3746,19 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, */ intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); - if (INTEL_GEN(dev_priv) < 12 && !is_mst) - intel_ddi_disable_pipe_clock(old_crtc_state); + if (INTEL_GEN(dev_priv) >= 12) { + if (is_mst) { + enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; + u32 val; + + val = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); + val &= ~TGL_TRANS_DDI_PORT_MASK; + I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), val); + } + } else { + if (!is_mst) + intel_ddi_disable_pipe_clock(old_crtc_state); + } intel_disable_ddi_buf(encoder, old_crtc_state); @@ -3838,7 +3787,7 @@ static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct intel_hdmi *intel_hdmi = &dig_port->hdmi; dig_port->set_infoframes(encoder, false, @@ -3860,8 +3809,6 @@ static void icl_disable_transcoder_port_sync(const struct intel_crtc_state *old_ { struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - i915_reg_t reg; - u32 trans_ddi_func_ctl2_val; if (old_crtc_state->master_transcoder == INVALID_TRANSCODER) return; @@ -3869,10 +3816,7 @@ static void icl_disable_transcoder_port_sync(const struct intel_crtc_state *old_ DRM_DEBUG_KMS("Disabling Transcoder Port Sync on Slave Transcoder %s\n", transcoder_name(old_crtc_state->cpu_transcoder)); - reg = TRANS_DDI_FUNC_CTL2(old_crtc_state->cpu_transcoder); - trans_ddi_func_ctl2_val = ~(PORT_SYNC_MODE_ENABLE | - PORT_SYNC_MODE_MASTER_SELECT_MASK); - I915_WRITE(reg, trans_ddi_func_ctl2_val); + I915_WRITE(TRANS_DDI_FUNC_CTL2(old_crtc_state->cpu_transcoder), 0); } static void intel_ddi_post_disable(struct intel_encoder *encoder, @@ -3880,25 +3824,27 @@ static void intel_ddi_post_disable(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); enum phy phy = intel_port_to_phy(dev_priv, encoder->port); bool is_tc_port = intel_phy_is_tc(dev_priv, phy); - intel_crtc_vblank_off(old_crtc_state); + if (!intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST)) { + intel_crtc_vblank_off(old_crtc_state); - intel_disable_pipe(old_crtc_state); + intel_disable_pipe(old_crtc_state); - if (INTEL_GEN(dev_priv) >= 11) - icl_disable_transcoder_port_sync(old_crtc_state); + if (INTEL_GEN(dev_priv) >= 11) + icl_disable_transcoder_port_sync(old_crtc_state); - intel_ddi_disable_transcoder_func(old_crtc_state); + intel_ddi_disable_transcoder_func(old_crtc_state); - intel_dsc_disable(old_crtc_state); + intel_dsc_disable(old_crtc_state); - if (INTEL_GEN(dev_priv) >= 9) - skylake_scaler_disable(old_crtc_state); - else - ironlake_pfit_disable(old_crtc_state); + if (INTEL_GEN(dev_priv) >= 9) + skl_scaler_disable(old_crtc_state); + else + ilk_pfit_disable(old_crtc_state); + } /* * When called from DP MST code: @@ -3970,7 +3916,7 @@ static void intel_enable_ddi_dp(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); enum port port = encoder->port; if (port == PORT_A && INTEL_GEN(dev_priv) < 9) @@ -4011,7 +3957,7 @@ static void intel_enable_ddi_hdmi(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct drm_connector *connector = conn_state->connector; enum port port = encoder->port; @@ -4088,7 +4034,7 @@ static void intel_disable_ddi_dp(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); intel_dp->link_trained = false; @@ -4136,7 +4082,7 @@ static void intel_ddi_update_pipe_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); intel_ddi_set_dp_msa(crtc_state, conn_state); @@ -4200,7 +4146,8 @@ intel_ddi_update_prepare(struct intel_atomic_state *state, WARN_ON(crtc && crtc->active); - intel_tc_port_get_link(enc_to_dig_port(&encoder->base), required_lanes); + intel_tc_port_get_link(enc_to_dig_port(encoder), + required_lanes); if (crtc_state && crtc_state->hw.active) intel_update_active_dpll(state, crtc, encoder); } @@ -4210,7 +4157,7 @@ intel_ddi_update_complete(struct intel_atomic_state *state, struct intel_encoder *encoder, struct intel_crtc *crtc) { - intel_tc_port_put_link(enc_to_dig_port(&encoder->base)); + intel_tc_port_put_link(enc_to_dig_port(encoder)); } static void @@ -4219,7 +4166,7 @@ intel_ddi_pre_pll_enable(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); enum phy phy = intel_port_to_phy(dev_priv, encoder->port); bool is_tc_port = intel_phy_is_tc(dev_priv, phy); @@ -4405,6 +4352,11 @@ void intel_ddi_get_config(struct intel_encoder *encoder, pipe_config->output_types |= BIT(INTEL_OUTPUT_DP_MST); pipe_config->lane_count = ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; + + if (INTEL_GEN(dev_priv) >= 12) + pipe_config->mst_master_transcoder = + REG_FIELD_GET(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, temp); + intel_dp_get_m_n(intel_crtc, pipe_config); break; default: @@ -4518,7 +4470,7 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, static void intel_ddi_encoder_destroy(struct drm_encoder *encoder) { - struct intel_digital_port *dig_port = enc_to_dig_port(encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(to_intel_encoder(encoder)); intel_dp_encoder_flush_work(encoder); @@ -4585,7 +4537,7 @@ static int intel_hdmi_reset_link(struct intel_encoder *encoder, struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_hdmi *hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *hdmi = enc_to_intel_hdmi(encoder); struct intel_connector *connector = hdmi->attached_connector; struct i2c_adapter *adapter = intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus); @@ -4657,7 +4609,7 @@ intel_ddi_hotplug(struct intel_encoder *encoder, struct intel_connector *connector, bool irq_received) { - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct drm_modeset_acquire_ctx ctx; enum intel_hotplug_state state; int ret; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 1860da0a493e..19ea842cfd84 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -46,6 +46,7 @@ #include "display/intel_crt.h" #include "display/intel_ddi.h" #include "display/intel_dp.h" +#include "display/intel_dp_mst.h" #include "display/intel_dsi.h" #include "display/intel_dvo.h" #include "display/intel_gmbus.h" @@ -145,8 +146,8 @@ static const u64 cursor_format_modifiers[] = { static void i9xx_crtc_clock_get(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config); -static void ironlake_pch_clock_get(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config); +static void ilk_pch_clock_get(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config); static int intel_framebuffer_init(struct intel_framebuffer *ifb, struct drm_i915_gem_object *obj, @@ -157,15 +158,15 @@ static void intel_cpu_transcoder_set_m_n(const struct intel_crtc_state *crtc_sta const struct intel_link_m_n *m_n, const struct intel_link_m_n *m2_n2); static void i9xx_set_pipeconf(const struct intel_crtc_state *crtc_state); -static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state); -static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state); +static void ilk_set_pipeconf(const struct intel_crtc_state *crtc_state); +static void hsw_set_pipeconf(const struct intel_crtc_state *crtc_state); static void bdw_set_pipemisc(const struct intel_crtc_state *crtc_state); static void vlv_prepare_pll(struct intel_crtc *crtc, const struct intel_crtc_state *pipe_config); static void chv_prepare_pll(struct intel_crtc *crtc, const struct intel_crtc_state *pipe_config); -static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state); -static void ironlake_pfit_enable(const struct intel_crtc_state *crtc_state); +static void skl_pfit_enable(const struct intel_crtc_state *crtc_state); +static void ilk_pfit_enable(const struct intel_crtc_state *crtc_state); static void intel_modeset_setup_hw_state(struct drm_device *dev, struct drm_modeset_acquire_ctx *ctx); static struct intel_crtc_state *intel_crtc_state_alloc(struct intel_crtc *crtc); @@ -369,7 +370,7 @@ static const struct intel_limit intel_limits_g4x_dual_channel_lvds = { }, }; -static const struct intel_limit intel_limits_pineview_sdvo = { +static const struct intel_limit pnv_limits_sdvo = { .dot = { .min = 20000, .max = 400000}, .vco = { .min = 1700000, .max = 3500000 }, /* Pineview's Ncounter is a ring counter */ @@ -384,7 +385,7 @@ static const struct intel_limit intel_limits_pineview_sdvo = { .p2_slow = 10, .p2_fast = 5 }, }; -static const struct intel_limit intel_limits_pineview_lvds = { +static const struct intel_limit pnv_limits_lvds = { .dot = { .min = 20000, .max = 400000 }, .vco = { .min = 1700000, .max = 3500000 }, .n = { .min = 3, .max = 6 }, @@ -402,7 +403,7 @@ static const struct intel_limit intel_limits_pineview_lvds = { * We calculate clock using (register_value + 2) for N/M1/M2, so here * the range value for them is (actual_value - 2). */ -static const struct intel_limit intel_limits_ironlake_dac = { +static const struct intel_limit ilk_limits_dac = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000 }, .n = { .min = 1, .max = 5 }, @@ -415,7 +416,7 @@ static const struct intel_limit intel_limits_ironlake_dac = { .p2_slow = 10, .p2_fast = 5 }, }; -static const struct intel_limit intel_limits_ironlake_single_lvds = { +static const struct intel_limit ilk_limits_single_lvds = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000 }, .n = { .min = 1, .max = 3 }, @@ -428,7 +429,7 @@ static const struct intel_limit intel_limits_ironlake_single_lvds = { .p2_slow = 14, .p2_fast = 14 }, }; -static const struct intel_limit intel_limits_ironlake_dual_lvds = { +static const struct intel_limit ilk_limits_dual_lvds = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000 }, .n = { .min = 1, .max = 3 }, @@ -442,7 +443,7 @@ static const struct intel_limit intel_limits_ironlake_dual_lvds = { }; /* LVDS 100mhz refclk limits. */ -static const struct intel_limit intel_limits_ironlake_single_lvds_100m = { +static const struct intel_limit ilk_limits_single_lvds_100m = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000 }, .n = { .min = 1, .max = 2 }, @@ -455,7 +456,7 @@ static const struct intel_limit intel_limits_ironlake_single_lvds_100m = { .p2_slow = 14, .p2_fast = 14 }, }; -static const struct intel_limit intel_limits_ironlake_dual_lvds_100m = { +static const struct intel_limit ilk_limits_dual_lvds_100m = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000 }, .n = { .min = 1, .max = 3 }, @@ -553,13 +554,6 @@ is_trans_port_sync_mode(const struct intel_crtc_state *crtc_state) } static bool -is_trans_port_sync_master(const struct intel_crtc_state *crtc_state) -{ - return (crtc_state->master_transcoder == INVALID_TRANSCODER && - crtc_state->sync_mode_slaves_mask); -} - -static bool is_trans_port_sync_slave(const struct intel_crtc_state *crtc_state) { return crtc_state->master_transcoder != INVALID_TRANSCODER; @@ -1637,7 +1631,7 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv, I915_READ(dpll_reg) & port_mask, expected_mask); } -static void ironlake_enable_pch_transcoder(const struct intel_crtc_state *crtc_state) +static void ilk_enable_pch_transcoder(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -1735,8 +1729,8 @@ static void lpt_enable_pch_transcoder(struct drm_i915_private *dev_priv, DRM_ERROR("Failed to enable PCH transcoder\n"); } -static void ironlake_disable_pch_transcoder(struct drm_i915_private *dev_priv, - enum pipe pipe) +static void ilk_disable_pch_transcoder(struct drm_i915_private *dev_priv, + enum pipe pipe) { i915_reg_t reg; u32 val; @@ -1944,7 +1938,9 @@ static bool is_ccs_plane(const struct drm_framebuffer *fb, int plane) static bool is_gen12_ccs_modifier(u64 modifier) { - return modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS; + return modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS || + modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS; + } static bool is_gen12_ccs_plane(const struct drm_framebuffer *fb, int plane) @@ -1977,8 +1973,7 @@ static int ccs_to_main_plane(const struct drm_framebuffer *fb, int ccs_plane) } /* Return either the main plane's CCS or - if not a CCS FB - UV plane */ -static int -intel_main_to_aux_plane(const struct drm_framebuffer *fb, int main_plane) +int intel_main_to_aux_plane(const struct drm_framebuffer *fb, int main_plane) { if (is_ccs_modifier(fb->modifier)) return main_to_ccs_plane(fb, main_plane); @@ -1994,6 +1989,13 @@ intel_format_info_is_yuv_semiplanar(const struct drm_format_info *info, info->num_planes == (is_ccs_modifier(modifier) ? 4 : 2); } +static bool is_semiplanar_uv_plane(const struct drm_framebuffer *fb, + int color_plane) +{ + return intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier) && + color_plane == 1; +} + static unsigned int intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) { @@ -2013,6 +2015,7 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) return 128; /* fall through */ case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: + case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: if (is_ccs_plane(fb, color_plane)) return 64; /* fall through */ @@ -2068,6 +2071,16 @@ static void intel_tile_dims(const struct drm_framebuffer *fb, int color_plane, *tile_height = intel_tile_height(fb, color_plane); } +static unsigned int intel_tile_row_size(const struct drm_framebuffer *fb, + int color_plane) +{ + unsigned int tile_width, tile_height; + + intel_tile_dims(fb, color_plane, &tile_width, &tile_height); + + return fb->pitches[color_plane] * tile_height; +} + unsigned int intel_fb_align_height(const struct drm_framebuffer *fb, int color_plane, unsigned int height) @@ -2142,7 +2155,8 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, struct drm_i915_private *dev_priv = to_i915(fb->dev); /* AUX_DIST needs only 4K alignment */ - if (is_aux_plane(fb, color_plane)) + if ((INTEL_GEN(dev_priv) < 12 && is_aux_plane(fb, color_plane)) || + is_ccs_plane(fb, color_plane)) return 4096; switch (fb->modifier) { @@ -2152,11 +2166,19 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, if (INTEL_GEN(dev_priv) >= 9) return 256 * 1024; return 0; + case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: + if (is_semiplanar_uv_plane(fb, color_plane)) + return intel_tile_row_size(fb, color_plane); + /* Fall-through */ case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: return 16 * 1024; case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Yf_TILED_CCS: case I915_FORMAT_MOD_Y_TILED: + if (INTEL_GEN(dev_priv) >= 12 && + is_semiplanar_uv_plane(fb, color_plane)) + return intel_tile_row_size(fb, color_plane); + /* Fall-through */ case I915_FORMAT_MOD_Yf_TILED: return 1 * 1024 * 1024; default: @@ -2193,6 +2215,8 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, return ERR_PTR(-EINVAL); alignment = intel_surf_alignment(fb, 0); + if (WARN_ON(alignment && !is_power_of_2(alignment))) + return ERR_PTR(-EINVAL); /* Note that the w/a also requires 64 PTE of padding following the * bo. We currently fill all unused PTE with the shadow page and so @@ -2431,9 +2455,6 @@ static u32 intel_compute_aligned_offset(struct drm_i915_private *dev_priv, unsigned int cpp = fb->format->cpp[color_plane]; u32 offset, offset_aligned; - if (alignment) - alignment--; - if (!is_surface_linear(fb, color_plane)) { unsigned int tile_size, tile_width, tile_height; unsigned int tile_rows, tiles, pitch_tiles; @@ -2455,17 +2476,24 @@ static u32 intel_compute_aligned_offset(struct drm_i915_private *dev_priv, *x %= tile_width; offset = (tile_rows * pitch_tiles + tiles) * tile_size; - offset_aligned = offset & ~alignment; + + offset_aligned = offset; + if (alignment) + offset_aligned = rounddown(offset_aligned, alignment); intel_adjust_tile_offset(x, y, tile_width, tile_height, tile_size, pitch_tiles, offset, offset_aligned); } else { offset = *y * pitch + *x * cpp; - offset_aligned = offset & ~alignment; - - *y = (offset & alignment) / pitch; - *x = ((offset & alignment) - *y * pitch) / cpp; + offset_aligned = offset; + if (alignment) { + offset_aligned = rounddown(offset_aligned, alignment); + *y = (offset % alignment) / pitch; + *x = ((offset % alignment) - *y * pitch) / cpp; + } else { + *y = *x = 0; + } } return offset_aligned; @@ -2498,9 +2526,17 @@ static int intel_fb_offset_to_xy(int *x, int *y, { struct drm_i915_private *dev_priv = to_i915(fb->dev); unsigned int height; + u32 alignment; - if (fb->modifier != DRM_FORMAT_MOD_LINEAR && - fb->offsets[color_plane] % intel_tile_size(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 12 && + is_semiplanar_uv_plane(fb, color_plane)) + alignment = intel_tile_row_size(fb, color_plane); + else if (fb->modifier != DRM_FORMAT_MOD_LINEAR) + alignment = intel_tile_size(dev_priv); + else + alignment = 0; + + if (alignment != 0 && fb->offsets[color_plane] % alignment) { DRM_DEBUG_KMS("Misaligned offset 0x%08x for color plane %d\n", fb->offsets[color_plane], color_plane); return -EINVAL; @@ -2537,6 +2573,7 @@ static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier) case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: + case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: return I915_TILING_Y; default: return I915_TILING_NONE; @@ -2588,6 +2625,30 @@ static const struct drm_format_info gen12_ccs_formats[] = { { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2, .char_per_block = { 4, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 }, .hsub = 1, .vsub = 1, .has_alpha = true }, + { .format = DRM_FORMAT_YUYV, .num_planes = 2, + .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 }, + .hsub = 2, .vsub = 1, .is_yuv = true }, + { .format = DRM_FORMAT_YVYU, .num_planes = 2, + .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 }, + .hsub = 2, .vsub = 1, .is_yuv = true }, + { .format = DRM_FORMAT_UYVY, .num_planes = 2, + .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 }, + .hsub = 2, .vsub = 1, .is_yuv = true }, + { .format = DRM_FORMAT_VYUY, .num_planes = 2, + .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 }, + .hsub = 2, .vsub = 1, .is_yuv = true }, + { .format = DRM_FORMAT_NV12, .num_planes = 4, + .char_per_block = { 1, 2, 1, 1 }, .block_w = { 1, 1, 4, 4 }, .block_h = { 1, 1, 1, 1 }, + .hsub = 2, .vsub = 2, .is_yuv = true }, + { .format = DRM_FORMAT_P010, .num_planes = 4, + .char_per_block = { 2, 4, 1, 1 }, .block_w = { 1, 1, 2, 2 }, .block_h = { 1, 1, 1, 1 }, + .hsub = 2, .vsub = 2, .is_yuv = true }, + { .format = DRM_FORMAT_P012, .num_planes = 4, + .char_per_block = { 2, 4, 1, 1 }, .block_w = { 1, 1, 2, 2 }, .block_h = { 1, 1, 1, 1 }, + .hsub = 2, .vsub = 2, .is_yuv = true }, + { .format = DRM_FORMAT_P016, .num_planes = 4, + .char_per_block = { 2, 4, 1, 1 }, .block_w = { 1, 1, 2, 2 }, .block_h = { 1, 1, 1, 1 }, + .hsub = 2, .vsub = 2, .is_yuv = true }, }; static const struct drm_format_info * @@ -2614,6 +2675,7 @@ intel_get_format_info(const struct drm_mode_fb_cmd2 *cmd) ARRAY_SIZE(skl_ccs_formats), cmd->pixel_format); case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: + case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: return lookup_format_info(gen12_ccs_formats, ARRAY_SIZE(gen12_ccs_formats), cmd->pixel_format); @@ -2625,6 +2687,7 @@ intel_get_format_info(const struct drm_mode_fb_cmd2 *cmd) bool is_ccs_modifier(u64 modifier) { return modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS || + modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS || modifier == I915_FORMAT_MOD_Y_TILED_CCS || modifier == I915_FORMAT_MOD_Yf_TILED_CCS; } @@ -2698,7 +2761,7 @@ intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane) } tile_width = intel_tile_width_bytes(fb, color_plane); - if (is_ccs_modifier(fb->modifier) && color_plane == 0) { + if (is_ccs_modifier(fb->modifier)) { /* * Display WA #0531: skl,bxt,kbl,glk * @@ -2708,7 +2771,7 @@ intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane) * require the entire fb to accommodate that to avoid * potential runtime errors at plane configuration time. */ - if (IS_GEN(dev_priv, 9) && fb->width > 3840) + if (IS_GEN(dev_priv, 9) && color_plane == 0 && fb->width > 3840) tile_width *= 4; /* * The main surface pitch must be padded to a multiple of four @@ -2876,11 +2939,15 @@ intel_fb_check_ccs_xy(struct drm_framebuffer *fb, int ccs_plane, int x, int y) static void intel_fb_plane_dims(int *w, int *h, struct drm_framebuffer *fb, int color_plane) { + int main_plane = is_ccs_plane(fb, color_plane) ? + ccs_to_main_plane(fb, color_plane) : 0; + int main_hsub, main_vsub; int hsub, vsub; + intel_fb_plane_get_subsampling(&main_hsub, &main_vsub, fb, main_plane); intel_fb_plane_get_subsampling(&hsub, &vsub, fb, color_plane); - *w = fb->width / hsub; - *h = fb->height / vsub; + *w = fb->width / main_hsub / hsub; + *h = fb->height / main_vsub / vsub; } /* @@ -3598,6 +3665,7 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, return 5120; case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Yf_TILED_CCS: + case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: /* FIXME AUX plane? */ case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: @@ -3656,11 +3724,12 @@ static int icl_max_plane_height(void) return 4320; } -static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state, - int main_x, int main_y, u32 main_offset) +static bool +skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state, + int main_x, int main_y, u32 main_offset, + int ccs_plane) { const struct drm_framebuffer *fb = plane_state->hw.fb; - int ccs_plane = main_to_ccs_plane(fb, 0); int aux_x = plane_state->color_plane[ccs_plane].x; int aux_y = plane_state->color_plane[ccs_plane].y; u32 aux_offset = plane_state->color_plane[ccs_plane].offset; @@ -3737,6 +3806,8 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) intel_add_fb_offsets(&x, &y, plane_state, 0); offset = intel_plane_compute_aligned_offset(&x, &y, plane_state, 0); alignment = intel_surf_alignment(fb, 0); + if (WARN_ON(alignment && !is_power_of_2(alignment))) + return -EINVAL; /* * AUX surface offset is specified as the distance from the @@ -3772,7 +3843,8 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) * they match with the main surface x/y offsets. */ if (is_ccs_modifier(fb->modifier)) { - while (!skl_check_main_ccs_coordinates(plane_state, x, y, offset)) { + while (!skl_check_main_ccs_coordinates(plane_state, x, y, + offset, aux_plane)) { if (offset == 0) break; @@ -3805,7 +3877,8 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->hw.fb; unsigned int rotation = plane_state->hw.rotation; - int max_width = skl_max_plane_width(fb, 1, rotation); + int uv_plane = 1; + int max_width = skl_max_plane_width(fb, uv_plane, rotation); int max_height = 4096; int x = plane_state->uapi.src.x1 >> 17; int y = plane_state->uapi.src.y1 >> 17; @@ -3813,8 +3886,9 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) int h = drm_rect_height(&plane_state->uapi.src) >> 17; u32 offset; - intel_add_fb_offsets(&x, &y, plane_state, 1); - offset = intel_plane_compute_aligned_offset(&x, &y, plane_state, 1); + intel_add_fb_offsets(&x, &y, plane_state, uv_plane); + offset = intel_plane_compute_aligned_offset(&x, &y, + plane_state, uv_plane); /* FIXME not quite sure how/if these apply to the chroma plane */ if (w > max_width || h > max_height) { @@ -3823,9 +3897,39 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) return -EINVAL; } - plane_state->color_plane[1].offset = offset; - plane_state->color_plane[1].x = x; - plane_state->color_plane[1].y = y; + if (is_ccs_modifier(fb->modifier)) { + int ccs_plane = main_to_ccs_plane(fb, uv_plane); + int aux_offset = plane_state->color_plane[ccs_plane].offset; + int alignment = intel_surf_alignment(fb, uv_plane); + + if (offset > aux_offset) + offset = intel_plane_adjust_aligned_offset(&x, &y, + plane_state, + uv_plane, + offset, + aux_offset & ~(alignment - 1)); + + while (!skl_check_main_ccs_coordinates(plane_state, x, y, + offset, ccs_plane)) { + if (offset == 0) + break; + + offset = intel_plane_adjust_aligned_offset(&x, &y, + plane_state, + uv_plane, + offset, offset - alignment); + } + + if (x != plane_state->color_plane[ccs_plane].x || + y != plane_state->color_plane[ccs_plane].y) { + DRM_DEBUG_KMS("Unable to find suitable display surface offset due to CCS\n"); + return -EINVAL; + } + } + + plane_state->color_plane[uv_plane].offset = offset; + plane_state->color_plane[uv_plane].x = x; + plane_state->color_plane[uv_plane].y = y; return 0; } @@ -3835,21 +3939,40 @@ static int skl_check_ccs_aux_surface(struct intel_plane_state *plane_state) const struct drm_framebuffer *fb = plane_state->hw.fb; int src_x = plane_state->uapi.src.x1 >> 16; int src_y = plane_state->uapi.src.y1 >> 16; - int hsub; - int vsub; - int x; - int y; u32 offset; + int ccs_plane; + + for (ccs_plane = 0; ccs_plane < fb->format->num_planes; ccs_plane++) { + int main_hsub, main_vsub; + int hsub, vsub; + int x, y; - intel_fb_plane_get_subsampling(&hsub, &vsub, fb, 1); - x = src_x / hsub; - y = src_y / vsub; - intel_add_fb_offsets(&x, &y, plane_state, 1); - offset = intel_plane_compute_aligned_offset(&x, &y, plane_state, 1); + if (!is_ccs_plane(fb, ccs_plane)) + continue; + + intel_fb_plane_get_subsampling(&main_hsub, &main_vsub, fb, + ccs_to_main_plane(fb, ccs_plane)); + intel_fb_plane_get_subsampling(&hsub, &vsub, fb, ccs_plane); - plane_state->color_plane[1].offset = offset; - plane_state->color_plane[1].x = x * hsub + src_x % hsub; - plane_state->color_plane[1].y = y * vsub + src_y % vsub; + hsub *= main_hsub; + vsub *= main_vsub; + x = src_x / hsub; + y = src_y / vsub; + + intel_add_fb_offsets(&x, &y, plane_state, ccs_plane); + + offset = intel_plane_compute_aligned_offset(&x, &y, + plane_state, + ccs_plane); + + plane_state->color_plane[ccs_plane].offset = offset; + plane_state->color_plane[ccs_plane].x = (x * hsub + + src_x % hsub) / + main_hsub; + plane_state->color_plane[ccs_plane].y = (y * vsub + + src_y % vsub) / + main_vsub; + } return 0; } @@ -3858,6 +3981,7 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->hw.fb; int ret; + bool needs_aux = false; ret = intel_plane_compute_gtt(plane_state); if (ret) @@ -3867,22 +3991,32 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) return 0; /* - * Handle the AUX surface first since - * the main surface setup depends on it. + * Handle the AUX surface first since the main surface setup depends on + * it. */ + if (is_ccs_modifier(fb->modifier)) { + needs_aux = true; + ret = skl_check_ccs_aux_surface(plane_state); + if (ret) + return ret; + } + if (intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier)) { + needs_aux = true; ret = skl_check_nv12_aux_surface(plane_state); if (ret) return ret; - } else if (is_ccs_modifier(fb->modifier)) { - ret = skl_check_ccs_aux_surface(plane_state); - if (ret) - return ret; - } else { - plane_state->color_plane[1].offset = ~0xfff; - plane_state->color_plane[1].x = 0; - plane_state->color_plane[1].y = 0; + } + + if (!needs_aux) { + int i; + + for (i = 1; i < fb->format->num_planes; i++) { + plane_state->color_plane[i].offset = ~0xfff; + plane_state->color_plane[i].x = 0; + plane_state->color_plane[i].y = 0; + } } ret = skl_check_main_surface(plane_state); @@ -4472,6 +4606,8 @@ static u32 skl_plane_ctl_tiling(u64 fb_modifier) return PLANE_CTL_TILED_Y | PLANE_CTL_RENDER_DECOMPRESSION_ENABLE | PLANE_CTL_CLEAR_COLOR_DISABLE; + case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: + return PLANE_CTL_TILED_Y | PLANE_CTL_MEDIA_DECOMPRESSION_ENABLE; case I915_FORMAT_MOD_Yf_TILED: return PLANE_CTL_TILED_YF; case I915_FORMAT_MOD_Yf_TILED_CCS: @@ -4869,8 +5005,8 @@ static void intel_fdi_normal_train(struct intel_crtc *crtc) } /* The FDI link training functions for ILK/Ibexpeak. */ -static void ironlake_fdi_link_train(struct intel_crtc *crtc, - const struct intel_crtc_state *crtc_state) +static void ilk_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -5222,7 +5358,7 @@ train_done: DRM_DEBUG_KMS("FDI train done.\n"); } -static void ironlake_fdi_pll_enable(const struct intel_crtc_state *crtc_state) +static void ilk_fdi_pll_enable(const struct intel_crtc_state *crtc_state) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); @@ -5259,7 +5395,7 @@ static void ironlake_fdi_pll_enable(const struct intel_crtc_state *crtc_state) } } -static void ironlake_fdi_pll_disable(struct intel_crtc *intel_crtc) +static void ilk_fdi_pll_disable(struct intel_crtc *intel_crtc) { struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -5289,7 +5425,7 @@ static void ironlake_fdi_pll_disable(struct intel_crtc *intel_crtc) udelay(100); } -static void ironlake_fdi_disable(struct intel_crtc *crtc) +static void ilk_fdi_disable(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; @@ -5496,8 +5632,8 @@ int lpt_get_iclkip(struct drm_i915_private *dev_priv) desired_divisor << auxdiv); } -static void ironlake_pch_transcoder_set_timings(const struct intel_crtc_state *crtc_state, - enum pipe pch_transcoder) +static void ilk_pch_transcoder_set_timings(const struct intel_crtc_state *crtc_state, + enum pipe pch_transcoder) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -5540,7 +5676,7 @@ static void cpt_set_fdi_bc_bifurcation(struct drm_i915_private *dev_priv, bool e POSTING_READ(SOUTH_CHICKEN1); } -static void ivybridge_update_fdi_bc_bifurcation(const struct intel_crtc_state *crtc_state) +static void ivb_update_fdi_bc_bifurcation(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -5601,8 +5737,8 @@ intel_get_crtc_new_encoder(const struct intel_atomic_state *state, * - DP transcoding bits * - transcoder */ -static void ironlake_pch_enable(const struct intel_atomic_state *state, - const struct intel_crtc_state *crtc_state) +static void ilk_pch_enable(const struct intel_atomic_state *state, + const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_device *dev = crtc->base.dev; @@ -5613,7 +5749,7 @@ static void ironlake_pch_enable(const struct intel_atomic_state *state, assert_pch_transcoder_disabled(dev_priv, pipe); if (IS_IVYBRIDGE(dev_priv)) - ivybridge_update_fdi_bc_bifurcation(crtc_state); + ivb_update_fdi_bc_bifurcation(crtc_state); /* Write the TU size bits before fdi link training, so that error * detection works. */ @@ -5650,7 +5786,7 @@ static void ironlake_pch_enable(const struct intel_atomic_state *state, /* set transcoder timing, panel must allow it */ assert_panel_unlocked(dev_priv, pipe); - ironlake_pch_transcoder_set_timings(crtc_state, pipe); + ilk_pch_transcoder_set_timings(crtc_state, pipe); intel_fdi_normal_train(crtc); @@ -5682,7 +5818,7 @@ static void ironlake_pch_enable(const struct intel_atomic_state *state, I915_WRITE(reg, temp); } - ironlake_enable_pch_transcoder(crtc_state); + ilk_enable_pch_transcoder(crtc_state); } static void lpt_pch_enable(const struct intel_atomic_state *state, @@ -5697,7 +5833,7 @@ static void lpt_pch_enable(const struct intel_atomic_state *state, lpt_program_iclkip(crtc_state); /* Set transcoder timing. */ - ironlake_pch_transcoder_set_timings(crtc_state, PIPE_A); + ilk_pch_transcoder_set_timings(crtc_state, PIPE_A); lpt_enable_pch_transcoder(dev_priv, cpu_transcoder); } @@ -6001,7 +6137,7 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, return 0; } -void skylake_scaler_disable(const struct intel_crtc_state *old_crtc_state) +void skl_scaler_disable(const struct intel_crtc_state *old_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); int i; @@ -6010,7 +6146,7 @@ void skylake_scaler_disable(const struct intel_crtc_state *old_crtc_state) skl_detach_scaler(crtc, i); } -static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state) +static void skl_pfit_enable(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -6047,7 +6183,7 @@ static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state) } } -static void ironlake_pfit_enable(const struct intel_crtc_state *crtc_state) +static void ilk_pfit_enable(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -6411,45 +6547,29 @@ intel_connector_primary_encoder(struct intel_connector *connector) if (connector->mst_port) return &dp_to_dig_port(connector->mst_port)->base; - encoder = intel_attached_encoder(&connector->base); + encoder = intel_attached_encoder(connector); WARN_ON(!encoder); return encoder; } -static bool -intel_connector_needs_modeset(struct intel_atomic_state *state, - const struct drm_connector_state *old_conn_state, - const struct drm_connector_state *new_conn_state) -{ - struct intel_crtc *old_crtc = old_conn_state->crtc ? - to_intel_crtc(old_conn_state->crtc) : NULL; - struct intel_crtc *new_crtc = new_conn_state->crtc ? - to_intel_crtc(new_conn_state->crtc) : NULL; - - return new_crtc != old_crtc || - (new_crtc && - needs_modeset(intel_atomic_get_new_crtc_state(state, new_crtc))); -} - static void intel_encoders_update_prepare(struct intel_atomic_state *state) { - struct drm_connector_state *old_conn_state; struct drm_connector_state *new_conn_state; - struct drm_connector *conn; + struct drm_connector *connector; int i; - for_each_oldnew_connector_in_state(&state->base, conn, - old_conn_state, new_conn_state, i) { + for_each_new_connector_in_state(&state->base, connector, new_conn_state, + i) { + struct intel_connector *intel_connector; struct intel_encoder *encoder; struct intel_crtc *crtc; - if (!intel_connector_needs_modeset(state, - old_conn_state, - new_conn_state)) + if (!intel_connector_needs_modeset(state, connector)) continue; - encoder = intel_connector_primary_encoder(to_intel_connector(conn)); + intel_connector = to_intel_connector(connector); + encoder = intel_connector_primary_encoder(intel_connector); if (!encoder->update_prepare) continue; @@ -6461,22 +6581,21 @@ static void intel_encoders_update_prepare(struct intel_atomic_state *state) static void intel_encoders_update_complete(struct intel_atomic_state *state) { - struct drm_connector_state *old_conn_state; struct drm_connector_state *new_conn_state; - struct drm_connector *conn; + struct drm_connector *connector; int i; - for_each_oldnew_connector_in_state(&state->base, conn, - old_conn_state, new_conn_state, i) { + for_each_new_connector_in_state(&state->base, connector, new_conn_state, + i) { + struct intel_connector *intel_connector; struct intel_encoder *encoder; struct intel_crtc *crtc; - if (!intel_connector_needs_modeset(state, - old_conn_state, - new_conn_state)) + if (!intel_connector_needs_modeset(state, connector)) continue; - encoder = intel_connector_primary_encoder(to_intel_connector(conn)); + intel_connector = to_intel_connector(connector); + encoder = intel_connector_primary_encoder(intel_connector); if (!encoder->update_complete) continue; @@ -6643,8 +6762,8 @@ static void intel_disable_primary_plane(const struct intel_crtc_state *crtc_stat plane->disable_plane(plane, crtc_state); } -static void ironlake_crtc_enable(struct intel_atomic_state *state, - struct intel_crtc *crtc) +static void ilk_crtc_enable(struct intel_atomic_state *state, + struct intel_crtc *crtc) { const struct intel_crtc_state *new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc); @@ -6680,7 +6799,7 @@ static void ironlake_crtc_enable(struct intel_atomic_state *state, intel_cpu_transcoder_set_m_n(new_crtc_state, &new_crtc_state->fdi_m_n, NULL); - ironlake_set_pipeconf(new_crtc_state); + ilk_set_pipeconf(new_crtc_state); crtc->active = true; @@ -6690,13 +6809,13 @@ static void ironlake_crtc_enable(struct intel_atomic_state *state, /* Note: FDI PLL enabling _must_ be done before we enable the * cpu pipes, hence this is separate from all the other fdi/pch * enabling. */ - ironlake_fdi_pll_enable(new_crtc_state); + ilk_fdi_pll_enable(new_crtc_state); } else { assert_fdi_tx_disabled(dev_priv, pipe); assert_fdi_rx_disabled(dev_priv, pipe); } - ironlake_pfit_enable(new_crtc_state); + ilk_pfit_enable(new_crtc_state); /* * On ILK+ LUT must be loaded before the pipe is running but with @@ -6712,7 +6831,7 @@ static void ironlake_crtc_enable(struct intel_atomic_state *state, intel_enable_pipe(new_crtc_state); if (new_crtc_state->has_pch_encoder) - ironlake_pch_enable(state, new_crtc_state); + ilk_pch_enable(state, new_crtc_state); intel_crtc_vblank_on(new_crtc_state); @@ -6787,8 +6906,8 @@ static void hsw_set_frame_start_delay(const struct intel_crtc_state *crtc_state) I915_WRITE(reg, val); } -static void haswell_crtc_enable(struct intel_atomic_state *state, - struct intel_crtc *crtc) +static void hsw_crtc_enable(struct intel_atomic_state *state, + struct intel_crtc *crtc) { const struct intel_crtc_state *new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc); @@ -6829,7 +6948,7 @@ static void haswell_crtc_enable(struct intel_atomic_state *state, if (!transcoder_is_dsi(cpu_transcoder)) { hsw_set_frame_start_delay(new_crtc_state); - haswell_set_pipeconf(new_crtc_state); + hsw_set_pipeconf(new_crtc_state); } if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) @@ -6844,9 +6963,9 @@ static void haswell_crtc_enable(struct intel_atomic_state *state, glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, true); if (INTEL_GEN(dev_priv) >= 9) - skylake_pfit_enable(new_crtc_state); + skl_pfit_enable(new_crtc_state); else - ironlake_pfit_enable(new_crtc_state); + ilk_pfit_enable(new_crtc_state); /* * On ILK+ LUT must be loaded before the pipe is running but with @@ -6895,7 +7014,7 @@ static void haswell_crtc_enable(struct intel_atomic_state *state, } } -void ironlake_pfit_disable(const struct intel_crtc_state *old_crtc_state) +void ilk_pfit_disable(const struct intel_crtc_state *old_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -6910,8 +7029,8 @@ void ironlake_pfit_disable(const struct intel_crtc_state *old_crtc_state) } } -static void ironlake_crtc_disable(struct intel_atomic_state *state, - struct intel_crtc *crtc) +static void ilk_crtc_disable(struct intel_atomic_state *state, + struct intel_crtc *crtc) { const struct intel_crtc_state *old_crtc_state = intel_atomic_get_old_crtc_state(state, crtc); @@ -6932,15 +7051,15 @@ static void ironlake_crtc_disable(struct intel_atomic_state *state, intel_disable_pipe(old_crtc_state); - ironlake_pfit_disable(old_crtc_state); + ilk_pfit_disable(old_crtc_state); if (old_crtc_state->has_pch_encoder) - ironlake_fdi_disable(crtc); + ilk_fdi_disable(crtc); intel_encoders_post_disable(state, crtc); if (old_crtc_state->has_pch_encoder) { - ironlake_disable_pch_transcoder(dev_priv, pipe); + ilk_disable_pch_transcoder(dev_priv, pipe); if (HAS_PCH_CPT(dev_priv)) { i915_reg_t reg; @@ -6960,15 +7079,15 @@ static void ironlake_crtc_disable(struct intel_atomic_state *state, I915_WRITE(PCH_DPLL_SEL, temp); } - ironlake_fdi_pll_disable(crtc); + ilk_fdi_pll_disable(crtc); } intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true); } -static void haswell_crtc_disable(struct intel_atomic_state *state, - struct intel_crtc *crtc) +static void hsw_crtc_disable(struct intel_atomic_state *state, + struct intel_crtc *crtc) { /* * FIXME collapse everything to one hook. @@ -7505,8 +7624,8 @@ static int pipe_required_fdi_lanes(struct intel_crtc_state *crtc_state) return 0; } -static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, - struct intel_crtc_state *pipe_config) +static int ilk_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, + struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(dev); struct drm_atomic_state *state = pipe_config->uapi.state; @@ -7578,8 +7697,8 @@ static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, } #define RETRY 1 -static int ironlake_fdi_compute_config(struct intel_crtc *intel_crtc, - struct intel_crtc_state *pipe_config) +static int ilk_fdi_compute_config(struct intel_crtc *intel_crtc, + struct intel_crtc_state *pipe_config) { struct drm_device *dev = intel_crtc->base.dev; const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; @@ -7598,15 +7717,15 @@ retry: fdi_dotclock = adjusted_mode->crtc_clock; - lane = ironlake_get_lanes_required(fdi_dotclock, link_bw, - pipe_config->pipe_bpp); + lane = ilk_get_lanes_required(fdi_dotclock, link_bw, + pipe_config->pipe_bpp); pipe_config->fdi_lanes = lane; intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock, link_bw, &pipe_config->fdi_m_n, false, false); - ret = ironlake_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config); + ret = ilk_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config); if (ret == -EDEADLK) return ret; @@ -7812,7 +7931,7 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, intel_crtc_compute_pixel_rate(pipe_config); if (pipe_config->has_pch_encoder) - return ironlake_fdi_compute_config(crtc, pipe_config); + return ilk_fdi_compute_config(crtc, pipe_config); return 0; } @@ -8795,9 +8914,9 @@ static int pnv_crtc_compute_clock(struct intel_crtc *crtc, DRM_DEBUG_KMS("using SSC reference clock of %d kHz\n", refclk); } - limit = &intel_limits_pineview_lvds; + limit = &pnv_limits_lvds; } else { - limit = &intel_limits_pineview_sdvo; + limit = &pnv_limits_sdvo; } if (!crtc_state->clock_set && @@ -9224,7 +9343,7 @@ out: return ret; } -static void ironlake_init_pch_refclk(struct drm_i915_private *dev_priv) +static void ilk_init_pch_refclk(struct drm_i915_private *dev_priv) { struct intel_encoder *encoder; int i; @@ -9722,12 +9841,12 @@ static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv) void intel_init_pch_refclk(struct drm_i915_private *dev_priv) { if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) - ironlake_init_pch_refclk(dev_priv); + ilk_init_pch_refclk(dev_priv); else if (HAS_PCH_LPT(dev_priv)) lpt_init_pch_refclk(dev_priv); } -static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state) +static void ilk_set_pipeconf(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -9783,7 +9902,7 @@ static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state) POSTING_READ(PIPECONF(pipe)); } -static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state) +static void hsw_set_pipeconf(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -9871,7 +9990,7 @@ int bdw_get_pipemisc_bpp(struct intel_crtc *crtc) } } -int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp) +int ilk_get_lanes_required(int target_clock, int link_bw, int bpp) { /* * Account for spread spectrum to avoid @@ -9882,14 +10001,14 @@ int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp) return DIV_ROUND_UP(bps, link_bw * 8); } -static bool ironlake_needs_fb_cb_tune(struct dpll *dpll, int factor) +static bool ilk_needs_fb_cb_tune(struct dpll *dpll, int factor) { return i9xx_dpll_compute_m(dpll) < factor * dpll->n; } -static void ironlake_compute_dpll(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state, - struct dpll *reduced_clock) +static void ilk_compute_dpll(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct dpll *reduced_clock) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 dpll, fp, fp2; @@ -9909,7 +10028,7 @@ static void ironlake_compute_dpll(struct intel_crtc *crtc, fp = i9xx_dpll_compute_fp(&crtc_state->dpll); - if (ironlake_needs_fb_cb_tune(&crtc_state->dpll, factor)) + if (ilk_needs_fb_cb_tune(&crtc_state->dpll, factor)) fp |= FP_CB_TUNE; if (reduced_clock) { @@ -9989,8 +10108,8 @@ static void ironlake_compute_dpll(struct intel_crtc *crtc, crtc_state->dpll_hw_state.fp1 = fp2; } -static int ironlake_crtc_compute_clock(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state) +static int ilk_crtc_compute_clock(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_atomic_state *state = @@ -10014,17 +10133,17 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc, if (intel_is_dual_link_lvds(dev_priv)) { if (refclk == 100000) - limit = &intel_limits_ironlake_dual_lvds_100m; + limit = &ilk_limits_dual_lvds_100m; else - limit = &intel_limits_ironlake_dual_lvds; + limit = &ilk_limits_dual_lvds; } else { if (refclk == 100000) - limit = &intel_limits_ironlake_single_lvds_100m; + limit = &ilk_limits_single_lvds_100m; else - limit = &intel_limits_ironlake_single_lvds; + limit = &ilk_limits_single_lvds; } } else { - limit = &intel_limits_ironlake_dac; + limit = &ilk_limits_dac; } if (!crtc_state->clock_set && @@ -10034,7 +10153,7 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc, return -EINVAL; } - ironlake_compute_dpll(crtc, crtc_state, NULL); + ilk_compute_dpll(crtc, crtc_state, NULL); if (!intel_reserve_shared_dplls(state, crtc, NULL)) { DRM_DEBUG_KMS("failed to find PLL for pipe %c\n", @@ -10109,15 +10228,15 @@ void intel_dp_get_m_n(struct intel_crtc *crtc, &pipe_config->dp_m2_n2); } -static void ironlake_get_fdi_m_n_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static void ilk_get_fdi_m_n_config(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { intel_cpu_transcoder_get_m_n(crtc, pipe_config->cpu_transcoder, &pipe_config->fdi_m_n, NULL); } -static void skylake_get_pfit_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static void skl_get_pfit_config(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -10148,8 +10267,8 @@ static void skylake_get_pfit_config(struct intel_crtc *crtc, } static void -skylake_get_initial_plane_config(struct intel_crtc *crtc, - struct intel_initial_plane_config *plane_config) +skl_get_initial_plane_config(struct intel_crtc *crtc, + struct intel_initial_plane_config *plane_config) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -10210,6 +10329,8 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, fb->modifier = INTEL_GEN(dev_priv) >= 12 ? I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS : I915_FORMAT_MOD_Y_TILED_CCS; + else if (val & PLANE_CTL_MEDIA_DECOMPRESSION_ENABLE) + fb->modifier = I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS; else fb->modifier = I915_FORMAT_MOD_Y_TILED; break; @@ -10276,8 +10397,8 @@ error: kfree(intel_fb); } -static void ironlake_get_pfit_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static void ilk_get_pfit_config(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -10300,8 +10421,8 @@ static void ironlake_get_pfit_config(struct intel_crtc *crtc, } } -static bool ironlake_get_pipe_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static bool ilk_get_pipe_config(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -10372,7 +10493,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, pipe_config->fdi_lanes = ((FDI_DP_PORT_WIDTH_MASK & tmp) >> FDI_DP_PORT_WIDTH_SHIFT) + 1; - ironlake_get_fdi_m_n_config(crtc, pipe_config); + ilk_get_fdi_m_n_config(crtc, pipe_config); if (HAS_PCH_IBX(dev_priv)) { /* @@ -10400,7 +10521,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, ((tmp & PLL_REF_SDVO_HDMI_MULTIPLIER_MASK) >> PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT) + 1; - ironlake_pch_clock_get(crtc, pipe_config); + ilk_pch_clock_get(crtc, pipe_config); } else { pipe_config->pixel_multiplier = 1; } @@ -10408,7 +10529,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, intel_get_pipe_timings(crtc, pipe_config); intel_get_pipe_src_size(crtc, pipe_config); - ironlake_get_pfit_config(crtc, pipe_config); + ilk_get_pfit_config(crtc, pipe_config); ret = true; @@ -10417,8 +10538,9 @@ out: return ret; } -static int haswell_crtc_compute_clock(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state) + +static int hsw_crtc_compute_clock(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_atomic_state *state = @@ -10439,9 +10561,8 @@ static int haswell_crtc_compute_clock(struct intel_crtc *crtc, return 0; } -static void cannonlake_get_ddi_pll(struct drm_i915_private *dev_priv, - enum port port, - struct intel_crtc_state *pipe_config) +static void cnl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, + struct intel_crtc_state *pipe_config) { enum intel_dpll_id id; u32 temp; @@ -10455,9 +10576,8 @@ static void cannonlake_get_ddi_pll(struct drm_i915_private *dev_priv, pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id); } -static void icelake_get_ddi_pll(struct drm_i915_private *dev_priv, - enum port port, - struct intel_crtc_state *pipe_config) +static void icl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, + struct intel_crtc_state *pipe_config) { enum phy phy = intel_port_to_phy(dev_priv, port); enum icl_port_dpll_id port_dpll_id; @@ -10516,9 +10636,8 @@ static void bxt_get_ddi_pll(struct drm_i915_private *dev_priv, pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id); } -static void skylake_get_ddi_pll(struct drm_i915_private *dev_priv, - enum port port, - struct intel_crtc_state *pipe_config) +static void skl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, + struct intel_crtc_state *pipe_config) { enum intel_dpll_id id; u32 temp; @@ -10532,9 +10651,8 @@ static void skylake_get_ddi_pll(struct drm_i915_private *dev_priv, pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id); } -static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv, - enum port port, - struct intel_crtc_state *pipe_config) +static void hsw_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, + struct intel_crtc_state *pipe_config) { enum intel_dpll_id id; u32 ddi_pll_sel = I915_READ(PORT_CLK_SEL(port)); @@ -10722,8 +10840,8 @@ static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc, return transcoder_is_dsi(pipe_config->cpu_transcoder); } -static void haswell_get_ddi_port_state(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static void hsw_get_ddi_port_state(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; @@ -10743,15 +10861,15 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, } if (INTEL_GEN(dev_priv) >= 11) - icelake_get_ddi_pll(dev_priv, port, pipe_config); + icl_get_ddi_pll(dev_priv, port, pipe_config); else if (IS_CANNONLAKE(dev_priv)) - cannonlake_get_ddi_pll(dev_priv, port, pipe_config); + cnl_get_ddi_pll(dev_priv, port, pipe_config); else if (IS_GEN9_BC(dev_priv)) - skylake_get_ddi_pll(dev_priv, port, pipe_config); + skl_get_ddi_pll(dev_priv, port, pipe_config); else if (IS_GEN9_LP(dev_priv)) bxt_get_ddi_pll(dev_priv, port, pipe_config); else - haswell_get_ddi_pll(dev_priv, port, pipe_config); + hsw_get_ddi_pll(dev_priv, port, pipe_config); pll = pipe_config->shared_dpll; if (pll) { @@ -10772,7 +10890,7 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, pipe_config->fdi_lanes = ((FDI_DP_PORT_WIDTH_MASK & tmp) >> FDI_DP_PORT_WIDTH_SHIFT) + 1; - ironlake_get_fdi_m_n_config(crtc, pipe_config); + ilk_get_fdi_m_n_config(crtc, pipe_config); } } @@ -10794,7 +10912,7 @@ static enum transcoder transcoder_master_readout(struct drm_i915_private *dev_pr return master_select - 1; } -static void icelake_get_trans_port_sync_config(struct intel_crtc_state *crtc_state) +static void icl_get_trans_port_sync_config(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); u32 transcoders; @@ -10829,8 +10947,8 @@ static void icelake_get_trans_port_sync_config(struct intel_crtc_state *crtc_sta crtc_state->sync_mode_slaves_mask); } -static bool haswell_get_pipe_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static bool hsw_get_pipe_config(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); intel_wakeref_t wakerefs[POWER_DOMAIN_NUM], wf; @@ -10865,7 +10983,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, if (!transcoder_is_dsi(pipe_config->cpu_transcoder) || INTEL_GEN(dev_priv) >= 11) { - haswell_get_ddi_port_state(crtc, pipe_config); + hsw_get_ddi_port_state(crtc, pipe_config); intel_get_pipe_timings(crtc, pipe_config); } @@ -10922,9 +11040,9 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, power_domain_mask |= BIT_ULL(power_domain); if (INTEL_GEN(dev_priv) >= 9) - skylake_get_pfit_config(crtc, pipe_config); + skl_get_pfit_config(crtc, pipe_config); else - ironlake_get_pfit_config(crtc, pipe_config); + ilk_get_pfit_config(crtc, pipe_config); } if (hsw_crtc_supports_ips(crtc)) { @@ -10950,7 +11068,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, if (INTEL_GEN(dev_priv) >= 11 && !transcoder_is_dsi(pipe_config->cpu_transcoder)) - icelake_get_trans_port_sync_config(pipe_config); + icl_get_trans_port_sync_config(pipe_config); out: for_each_power_domain(power_domain, power_domain_mask) @@ -11570,7 +11688,7 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, { struct intel_crtc *intel_crtc; struct intel_encoder *intel_encoder = - intel_attached_encoder(connector); + intel_attached_encoder(to_intel_connector(connector)); struct drm_crtc *possible_crtc; struct drm_encoder *encoder = &intel_encoder->base; struct drm_crtc *crtc = NULL; @@ -11724,7 +11842,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx) { struct intel_encoder *intel_encoder = - intel_attached_encoder(connector); + intel_attached_encoder(to_intel_connector(connector)); struct drm_encoder *encoder = &intel_encoder->base; struct drm_atomic_state *state = old->restore_state; int ret; @@ -11867,8 +11985,8 @@ int intel_dotclock_calculate(int link_freq, return div_u64(mul_u32_u32(m_n->link_m, link_freq), m_n->link_n); } -static void ironlake_pch_clock_get(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static void ilk_pch_clock_get(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -11897,6 +12015,7 @@ static void intel_crtc_state_reset(struct intel_crtc_state *crtc_state, crtc_state->hsw_workaround_pipe = INVALID_PIPE; crtc_state->output_format = INTEL_OUTPUT_FORMAT_INVALID; crtc_state->scaler_state.scaler_id = -1; + crtc_state->mst_master_transcoder = INVALID_TRANSCODER; } static struct intel_crtc_state *intel_crtc_state_alloc(struct intel_crtc *crtc) @@ -12278,88 +12397,121 @@ static bool c8_planes_changed(const struct intel_crtc_state *new_crtc_state) return !old_crtc_state->c8_planes != !new_crtc_state->c8_planes; } -static int icl_add_sync_mode_crtcs(struct intel_crtc_state *crtc_state) +static bool +intel_atomic_is_master_connector(struct intel_crtc_state *crtc_state) +{ + struct drm_crtc *crtc = crtc_state->uapi.crtc; + struct drm_atomic_state *state = crtc_state->uapi.state; + struct drm_connector *connector; + struct drm_connector_state *connector_state; + int i; + + for_each_new_connector_in_state(state, connector, connector_state, i) { + if (connector_state->crtc != crtc) + continue; + if (connector->has_tile && + connector->tile_h_loc == connector->num_h_tile - 1 && + connector->tile_v_loc == connector->num_v_tile - 1) + return true; + } + + return false; +} + +static void reset_port_sync_mode_state(struct intel_crtc_state *crtc_state) +{ + crtc_state->master_transcoder = INVALID_TRANSCODER; + crtc_state->sync_mode_slaves_mask = 0; +} + +static int icl_compute_port_sync_crtc_state(struct drm_connector *connector, + struct intel_crtc_state *crtc_state, + int num_tiled_conns) { struct drm_crtc *crtc = crtc_state->uapi.crtc; struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state); struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); - struct drm_connector *master_connector, *connector; - struct drm_connector_state *connector_state; + struct drm_connector *master_connector; struct drm_connector_list_iter conn_iter; struct drm_crtc *master_crtc = NULL; struct drm_crtc_state *master_crtc_state; struct intel_crtc_state *master_pipe_config; - int i, tile_group_id; if (INTEL_GEN(dev_priv) < 11) return 0; + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP)) + return 0; + /* * In case of tiled displays there could be one or more slaves but there is * only one master. Lets make the CRTC used by the connector corresponding * to the last horizonal and last vertical tile a master/genlock CRTC. * All the other CRTCs corresponding to other tiles of the same Tile group * are the slave CRTCs and hold a pointer to their genlock CRTC. + * If all tiles not present do not make master slave assignments. */ - for_each_new_connector_in_state(&state->base, connector, connector_state, i) { - if (connector_state->crtc != crtc) - continue; - if (!connector->has_tile) + if (!connector->has_tile || + crtc_state->hw.mode.hdisplay != connector->tile_h_size || + crtc_state->hw.mode.vdisplay != connector->tile_v_size || + num_tiled_conns < connector->num_h_tile * connector->num_v_tile) { + reset_port_sync_mode_state(crtc_state); + return 0; + } + /* Last Horizontal and last vertical tile connector is a master + * Master's crtc state is already populated in slave for port sync + */ + if (connector->tile_h_loc == connector->num_h_tile - 1 && + connector->tile_v_loc == connector->num_v_tile - 1) + return 0; + + /* Loop through all connectors and configure the Slave crtc_state + * to point to the correct master. + */ + drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + drm_for_each_connector_iter(master_connector, &conn_iter) { + struct drm_connector_state *master_conn_state = NULL; + + if (!(master_connector->has_tile && + master_connector->tile_group->id == connector->tile_group->id)) continue; - if (crtc_state->hw.mode.hdisplay != connector->tile_h_size || - crtc_state->hw.mode.vdisplay != connector->tile_v_size) - return 0; - if (connector->tile_h_loc == connector->num_h_tile - 1 && - connector->tile_v_loc == connector->num_v_tile - 1) + if (master_connector->tile_h_loc != master_connector->num_h_tile - 1 || + master_connector->tile_v_loc != master_connector->num_v_tile - 1) continue; - crtc_state->sync_mode_slaves_mask = 0; - tile_group_id = connector->tile_group->id; - drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); - drm_for_each_connector_iter(master_connector, &conn_iter) { - struct drm_connector_state *master_conn_state = NULL; - - if (!master_connector->has_tile) - continue; - if (master_connector->tile_h_loc != master_connector->num_h_tile - 1 || - master_connector->tile_v_loc != master_connector->num_v_tile - 1) - continue; - if (master_connector->tile_group->id != tile_group_id) - continue; - master_conn_state = drm_atomic_get_connector_state(&state->base, - master_connector); - if (IS_ERR(master_conn_state)) { - drm_connector_list_iter_end(&conn_iter); - return PTR_ERR(master_conn_state); - } - if (master_conn_state->crtc) { - master_crtc = master_conn_state->crtc; - break; - } + master_conn_state = drm_atomic_get_connector_state(&state->base, + master_connector); + if (IS_ERR(master_conn_state)) { + drm_connector_list_iter_end(&conn_iter); + return PTR_ERR(master_conn_state); } - drm_connector_list_iter_end(&conn_iter); - - if (!master_crtc) { - DRM_DEBUG_KMS("Could not find Master CRTC for Slave CRTC %d\n", - connector_state->crtc->base.id); - return -EINVAL; + if (master_conn_state->crtc) { + master_crtc = master_conn_state->crtc; + break; } + } + drm_connector_list_iter_end(&conn_iter); - master_crtc_state = drm_atomic_get_crtc_state(&state->base, - master_crtc); - if (IS_ERR(master_crtc_state)) - return PTR_ERR(master_crtc_state); - - master_pipe_config = to_intel_crtc_state(master_crtc_state); - crtc_state->master_transcoder = master_pipe_config->cpu_transcoder; - master_pipe_config->sync_mode_slaves_mask |= - BIT(crtc_state->cpu_transcoder); - DRM_DEBUG_KMS("Master Transcoder = %s added for Slave CRTC = %d, slave transcoder bitmask = %d\n", - transcoder_name(crtc_state->master_transcoder), - crtc_state->uapi.crtc->base.id, - master_pipe_config->sync_mode_slaves_mask); + if (!master_crtc) { + DRM_DEBUG_KMS("Could not find Master CRTC for Slave CRTC %d\n", + crtc->base.id); + return -EINVAL; } + master_crtc_state = drm_atomic_get_crtc_state(&state->base, + master_crtc); + if (IS_ERR(master_crtc_state)) + return PTR_ERR(master_crtc_state); + + master_pipe_config = to_intel_crtc_state(master_crtc_state); + crtc_state->master_transcoder = master_pipe_config->cpu_transcoder; + master_pipe_config->sync_mode_slaves_mask |= + BIT(crtc_state->cpu_transcoder); + DRM_DEBUG_KMS("Master Transcoder = %s added for Slave CRTC = %d, slave transcoder bitmask = %d\n", + transcoder_name(crtc_state->master_transcoder), + crtc->base.id, + master_pipe_config->sync_mode_slaves_mask); + return 0; } @@ -12755,6 +12907,9 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config, pipe_config->csc_mode, pipe_config->gamma_mode, pipe_config->gamma_enable, pipe_config->csc_enable); + DRM_DEBUG_KMS("MST master transcoder: %s\n", + transcoder_name(pipe_config->mst_master_transcoder)); + dump_planes: if (!state) return; @@ -12901,9 +13056,11 @@ intel_crtc_prepare_cleared_state(struct intel_crtc_state *crtc_state) saved_state->wm = crtc_state->wm; /* * Save the slave bitmask which gets filled for master crtc state during - * slave atomic check call. + * slave atomic check call. For all other CRTCs reset the port sync variables + * crtc_state->master_transcoder needs to be set to INVALID */ - if (is_trans_port_sync_master(crtc_state)) + reset_port_sync_mode_state(saved_state); + if (intel_atomic_is_master_connector(crtc_state)) saved_state->sync_mode_slaves_mask = crtc_state->sync_mode_slaves_mask; @@ -12924,7 +13081,7 @@ intel_modeset_pipe_config(struct intel_crtc_state *pipe_config) struct drm_connector *connector; struct drm_connector_state *connector_state; int base_bpp, ret; - int i; + int i, tile_group_id = -1, num_tiled_conns = 0; bool retry = true; pipe_config->cpu_transcoder = @@ -12994,13 +13151,22 @@ encoder_retry: drm_mode_set_crtcinfo(&pipe_config->hw.adjusted_mode, CRTC_STEREO_DOUBLE); - /* Set the crtc_state defaults for trans_port_sync */ - pipe_config->master_transcoder = INVALID_TRANSCODER; - ret = icl_add_sync_mode_crtcs(pipe_config); - if (ret) { - DRM_DEBUG_KMS("Cannot assign Sync Mode CRTCs: %d\n", - ret); - return ret; + /* Get tile_group_id of tiled connector */ + for_each_new_connector_in_state(state, connector, connector_state, i) { + if (connector_state->crtc == crtc && + connector->has_tile) { + tile_group_id = connector->tile_group->id; + break; + } + } + + /* Get total number of tiled connectors in state that belong to + * this tile group. + */ + for_each_new_connector_in_state(state, connector, connector_state, i) { + if (connector->has_tile && + connector->tile_group->id == tile_group_id) + num_tiled_conns++; } /* Pass our mode to the connectors and the CRTC to give them a chance to @@ -13011,6 +13177,14 @@ encoder_retry: if (connector_state->crtc != crtc) continue; + ret = icl_compute_port_sync_crtc_state(connector, pipe_config, + num_tiled_conns); + if (ret) { + DRM_DEBUG_KMS("Cannot assign Sync Mode CRTCs: %d\n", + ret); + return ret; + } + encoder = to_intel_encoder(connector_state->best_encoder); ret = encoder->compute_config(encoder, pipe_config, connector_state); @@ -13535,6 +13709,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_I(dsc.dsc_split); PIPE_CONF_CHECK_I(dsc.compressed_bpp); + PIPE_CONF_CHECK_I(mst_master_transcoder); + #undef PIPE_CONF_CHECK_X #undef PIPE_CONF_CHECK_I #undef PIPE_CONF_CHECK_BOOL @@ -14048,7 +14224,7 @@ static void intel_modeset_clear_plls(struct intel_atomic_state *state) * multiple pipes, and planes are enabled after the pipe, we need to wait at * least 2 vblanks on the first pipe before enabling planes on the second pipe. */ -static int haswell_mode_set_planes_workaround(struct intel_atomic_state *state) +static int hsw_mode_set_planes_workaround(struct intel_atomic_state *state) { struct intel_crtc_state *crtc_state; struct intel_crtc *crtc; @@ -14143,7 +14319,7 @@ static int intel_modeset_checks(struct intel_atomic_state *state) intel_modeset_clear_plls(state); if (IS_HASWELL(dev_priv)) - return haswell_mode_set_planes_workaround(state); + return hsw_mode_set_planes_workaround(state); return 0; } @@ -14173,7 +14349,11 @@ static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_sta new_crtc_state->uapi.mode_changed = false; new_crtc_state->update_pipe = true; +} +static void intel_crtc_copy_fastset(const struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) +{ /* * If we're not doing the full modeset we want to * keep the current M/N values as they may be @@ -14296,6 +14476,107 @@ static int intel_atomic_check_crtcs(struct intel_atomic_state *state) return 0; } +static bool intel_cpu_transcoder_needs_modeset(struct intel_atomic_state *state, + enum transcoder transcoder) +{ + struct intel_crtc_state *new_crtc_state; + struct intel_crtc *crtc; + int i; + + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) + if (new_crtc_state->cpu_transcoder == transcoder) + return needs_modeset(new_crtc_state); + + return false; +} + +static void +intel_modeset_synced_crtcs(struct intel_atomic_state *state, + u8 transcoders) +{ + struct intel_crtc_state *new_crtc_state; + struct intel_crtc *crtc; + int i; + + for_each_new_intel_crtc_in_state(state, crtc, + new_crtc_state, i) { + if (transcoders & BIT(new_crtc_state->cpu_transcoder)) { + new_crtc_state->uapi.mode_changed = true; + new_crtc_state->update_pipe = false; + } + } +} + +static int +intel_modeset_all_tiles(struct intel_atomic_state *state, int tile_grp_id) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; + int ret = 0; + + drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { + struct drm_connector_state *conn_state; + struct drm_crtc_state *crtc_state; + + if (!connector->has_tile || + connector->tile_group->id != tile_grp_id) + continue; + conn_state = drm_atomic_get_connector_state(&state->base, + connector); + if (IS_ERR(conn_state)) { + ret = PTR_ERR(conn_state); + break; + } + + if (!conn_state->crtc) + continue; + + crtc_state = drm_atomic_get_crtc_state(&state->base, + conn_state->crtc); + if (IS_ERR(crtc_state)) { + ret = PTR_ERR(crtc_state); + break; + } + crtc_state->mode_changed = true; + ret = drm_atomic_add_affected_connectors(&state->base, + conn_state->crtc); + if (ret) + break; + } + drm_connector_list_iter_end(&conn_iter); + + return ret; +} + +static int +intel_atomic_check_tiled_conns(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct drm_connector *connector; + struct drm_connector_state *old_conn_state, *new_conn_state; + int i, ret; + + if (INTEL_GEN(dev_priv) < 11) + return 0; + + /* Is tiled, mark all other tiled CRTCs as needing a modeset */ + for_each_oldnew_connector_in_state(&state->base, connector, + old_conn_state, new_conn_state, i) { + if (!connector->has_tile) + continue; + if (!intel_connector_needs_modeset(state, connector)) + continue; + + ret = intel_modeset_all_tiles(state, connector->tile_group->id); + if (ret) + return ret; + } + + return 0; +} + /** * intel_atomic_check - validate state object * @dev: drm device @@ -14323,6 +14604,21 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) goto fail; + /** + * This check adds all the connectors in current state that belong to + * the same tile group to a full modeset. + * This function directly sets the mode_changed to true and we also call + * drm_atomic_add_affected_connectors(). Hence we are not explicitly + * calling drm_atomic_helper_check_modeset() after this. + * + * Fixme: Handle some corner cases where one of the + * tiled connectors gets disconnected and tile info is lost but since it + * was previously synced to other conn, we need to add that to the modeset. + */ + ret = intel_atomic_check_tiled_conns(state); + if (ret) + goto fail; + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { if (!needs_modeset(new_crtc_state)) { @@ -14334,8 +14630,6 @@ static int intel_atomic_check(struct drm_device *dev, if (!new_crtc_state->uapi.enable) { intel_crtc_copy_uapi_to_hw_state(new_crtc_state); - - any_ms = true; continue; } @@ -14348,9 +14642,49 @@ static int intel_atomic_check(struct drm_device *dev, goto fail; intel_crtc_check_fastset(old_crtc_state, new_crtc_state); + } + + /** + * Check if fastset is allowed by external dependencies like other + * pipes and transcoders. + * + * Right now it only forces a fullmodeset when the MST master + * transcoder did not changed but the pipe of the master transcoder + * needs a fullmodeset so all slaves also needs to do a fullmodeset or + * in case of port synced crtcs, if one of the synced crtcs + * needs a full modeset, all other synced crtcs should be + * forced a full modeset. + */ + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { + if (!new_crtc_state->hw.enable || needs_modeset(new_crtc_state)) + continue; + + if (intel_dp_mst_is_slave_trans(new_crtc_state)) { + enum transcoder master = new_crtc_state->mst_master_transcoder; + + if (intel_cpu_transcoder_needs_modeset(state, master)) { + new_crtc_state->uapi.mode_changed = true; + new_crtc_state->update_pipe = false; + } + } else if (is_trans_port_sync_mode(new_crtc_state)) { + u8 trans = new_crtc_state->sync_mode_slaves_mask | + BIT(new_crtc_state->master_transcoder); - if (needs_modeset(new_crtc_state)) + intel_modeset_synced_crtcs(state, trans); + } + } + + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + if (needs_modeset(new_crtc_state)) { any_ms = true; + continue; + } + + if (!new_crtc_state->update_pipe) + continue; + + intel_crtc_copy_fastset(old_crtc_state, new_crtc_state); } if (any_ms && !check_digital_port_conflicts(state)) { @@ -14472,12 +14806,12 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state, skl_detach_scalers(new_crtc_state); if (new_crtc_state->pch_pfit.enabled) - skylake_pfit_enable(new_crtc_state); + skl_pfit_enable(new_crtc_state); } else if (HAS_PCH_SPLIT(dev_priv)) { if (new_crtc_state->pch_pfit.enabled) - ironlake_pfit_enable(new_crtc_state); + ilk_pfit_enable(new_crtc_state); else if (old_crtc_state->pch_pfit.enabled) - ironlake_pfit_disable(old_crtc_state); + ilk_pfit_disable(old_crtc_state); } if (INTEL_GEN(dev_priv) >= 11) @@ -14619,7 +14953,7 @@ static void intel_commit_modeset_disables(struct intel_atomic_state *state) u32 handled = 0; int i; - /* Only disable port sync slaves */ + /* Only disable port sync and MST slaves */ for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { if (!needs_modeset(new_crtc_state)) @@ -14633,7 +14967,8 @@ static void intel_commit_modeset_disables(struct intel_atomic_state *state) * slave CRTCs are disabled first and then master CRTC since * Slave vblanks are masked till Master Vblanks. */ - if (!is_trans_port_sync_slave(old_crtc_state)) + if (!is_trans_port_sync_slave(old_crtc_state) && + !intel_dp_mst_is_slave_trans(old_crtc_state)) continue; intel_pre_plane_update(state, crtc); @@ -14694,10 +15029,14 @@ static void intel_set_dp_tp_ctl_normal(struct intel_crtc *crtc, if (conn_state->crtc == &crtc->base) break; } - intel_dp = enc_to_intel_dp(&intel_attached_encoder(conn)->base); + intel_dp = enc_to_intel_dp(intel_attached_encoder(to_intel_connector(conn))); intel_dp_stop_link_train(intel_dp); } +/* + * TODO: This is only called from port sync and it is identical to what will be + * executed again in intel_update_crtc() over port sync pipes + */ static void intel_post_crtc_enable_updates(struct intel_crtc *crtc, struct intel_atomic_state *state) { @@ -14786,15 +15125,21 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state) u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; u8 required_slices = state->wm_results.ddb.enabled_slices; struct skl_ddb_entry entries[I915_MAX_PIPES] = {}; - u8 dirty_pipes = 0; + const u8 num_pipes = INTEL_NUM_PIPES(dev_priv); + u8 update_pipes = 0, modeset_pipes = 0; int i; for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + if (!new_crtc_state->hw.active) + continue; + /* ignore allocations for crtc's that have been turned off. */ - if (!needs_modeset(new_crtc_state) && new_crtc_state->hw.active) + if (!needs_modeset(new_crtc_state)) { entries[i] = old_crtc_state->wm.skl.ddb; - if (new_crtc_state->hw.active) - dirty_pipes |= BIT(crtc->pipe); + update_pipes |= BIT(crtc->pipe); + } else { + modeset_pipes |= BIT(crtc->pipe); + } } /* If 2nd DBuf slice required, enable it here */ @@ -14804,38 +15149,29 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state) /* * Whenever the number of active pipes changes, we need to make sure we * update the pipes in the right order so that their ddb allocations - * never overlap with eachother inbetween CRTC updates. Otherwise we'll + * never overlap with each other between CRTC updates. Otherwise we'll * cause pipe underruns and other bad stuff. + * + * So first lets enable all pipes that do not need a fullmodeset as + * those don't have any external dependency. */ - while (dirty_pipes) { + while (update_pipes) { for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { enum pipe pipe = crtc->pipe; - bool modeset = needs_modeset(new_crtc_state); - if ((dirty_pipes & BIT(pipe)) == 0) + if ((update_pipes & BIT(pipe)) == 0) continue; if (skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb, - entries, - INTEL_NUM_PIPES(dev_priv), i)) + entries, num_pipes, i)) continue; entries[i] = new_crtc_state->wm.skl.ddb; - dirty_pipes &= ~BIT(pipe); - - if (modeset && is_trans_port_sync_mode(new_crtc_state)) { - if (is_trans_port_sync_master(new_crtc_state)) - intel_update_trans_port_sync_crtcs(crtc, - state, - old_crtc_state, - new_crtc_state); - else - continue; - } else { - intel_update_crtc(crtc, state, old_crtc_state, - new_crtc_state); - } + update_pipes &= ~BIT(pipe); + + intel_update_crtc(crtc, state, old_crtc_state, + new_crtc_state); /* * If this is an already active pipe, it's DDB changed, @@ -14845,11 +15181,72 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state) */ if (!skl_ddb_entry_equal(&new_crtc_state->wm.skl.ddb, &old_crtc_state->wm.skl.ddb) && - !modeset && dirty_pipes) + (update_pipes | modeset_pipes)) intel_wait_for_vblank(dev_priv, pipe); } } + /* + * Enable all pipes that needs a modeset and do not depends on other + * pipes + */ + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + enum pipe pipe = crtc->pipe; + + if ((modeset_pipes & BIT(pipe)) == 0) + continue; + + if (intel_dp_mst_is_slave_trans(new_crtc_state) || + is_trans_port_sync_slave(new_crtc_state)) + continue; + + WARN_ON(skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb, + entries, num_pipes, i)); + + entries[i] = new_crtc_state->wm.skl.ddb; + modeset_pipes &= ~BIT(pipe); + + if (is_trans_port_sync_mode(new_crtc_state)) { + struct intel_crtc *slave_crtc; + + intel_update_trans_port_sync_crtcs(crtc, state, + old_crtc_state, + new_crtc_state); + + slave_crtc = intel_get_slave_crtc(new_crtc_state); + /* TODO: update entries[] of slave */ + modeset_pipes &= ~BIT(slave_crtc->pipe); + + } else { + intel_update_crtc(crtc, state, old_crtc_state, + new_crtc_state); + } + } + + /* + * Finally enable all pipes that needs a modeset and depends on + * other pipes, right now it is only MST slaves as both port sync slave + * and master are enabled together + */ + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + enum pipe pipe = crtc->pipe; + + if ((modeset_pipes & BIT(pipe)) == 0) + continue; + + WARN_ON(skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb, + entries, num_pipes, i)); + + entries[i] = new_crtc_state->wm.skl.ddb; + modeset_pipes &= ~BIT(pipe); + + intel_update_crtc(crtc, state, old_crtc_state, new_crtc_state); + } + + WARN_ON(modeset_pipes); + /* If 2nd DBuf slice is no more required disable it */ if (INTEL_GEN(dev_priv) >= 11 && required_slices < hw_enabled_slices) icl_dbuf_slices_update(dev_priv, required_slices); @@ -16586,8 +16983,11 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, } /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ - if (mode_cmd->offsets[0] != 0) + if (mode_cmd->offsets[0] != 0) { + DRM_DEBUG_KMS("plane 0 offset (0x%08x) must be 0\n", + mode_cmd->offsets[0]); goto err; + } drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd); @@ -16814,29 +17214,28 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) intel_init_cdclk_hooks(dev_priv); if (INTEL_GEN(dev_priv) >= 9) { - dev_priv->display.get_pipe_config = haswell_get_pipe_config; + dev_priv->display.get_pipe_config = hsw_get_pipe_config; dev_priv->display.get_initial_plane_config = - skylake_get_initial_plane_config; - dev_priv->display.crtc_compute_clock = - haswell_crtc_compute_clock; - dev_priv->display.crtc_enable = haswell_crtc_enable; - dev_priv->display.crtc_disable = haswell_crtc_disable; + skl_get_initial_plane_config; + dev_priv->display.crtc_compute_clock = hsw_crtc_compute_clock; + dev_priv->display.crtc_enable = hsw_crtc_enable; + dev_priv->display.crtc_disable = hsw_crtc_disable; } else if (HAS_DDI(dev_priv)) { - dev_priv->display.get_pipe_config = haswell_get_pipe_config; + dev_priv->display.get_pipe_config = hsw_get_pipe_config; dev_priv->display.get_initial_plane_config = i9xx_get_initial_plane_config; dev_priv->display.crtc_compute_clock = - haswell_crtc_compute_clock; - dev_priv->display.crtc_enable = haswell_crtc_enable; - dev_priv->display.crtc_disable = haswell_crtc_disable; + hsw_crtc_compute_clock; + dev_priv->display.crtc_enable = hsw_crtc_enable; + dev_priv->display.crtc_disable = hsw_crtc_disable; } else if (HAS_PCH_SPLIT(dev_priv)) { - dev_priv->display.get_pipe_config = ironlake_get_pipe_config; + dev_priv->display.get_pipe_config = ilk_get_pipe_config; dev_priv->display.get_initial_plane_config = i9xx_get_initial_plane_config; dev_priv->display.crtc_compute_clock = - ironlake_crtc_compute_clock; - dev_priv->display.crtc_enable = ironlake_crtc_enable; - dev_priv->display.crtc_disable = ironlake_crtc_disable; + ilk_crtc_compute_clock; + dev_priv->display.crtc_enable = ilk_crtc_enable; + dev_priv->display.crtc_disable = ilk_crtc_disable; } else if (IS_CHERRYVIEW(dev_priv)) { dev_priv->display.get_pipe_config = i9xx_get_pipe_config; dev_priv->display.get_initial_plane_config = @@ -16882,7 +17281,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) } if (IS_GEN(dev_priv, 5)) { - dev_priv->display.fdi_link_train = ironlake_fdi_link_train; + dev_priv->display.fdi_link_train = ilk_fdi_link_train; } else if (IS_GEN(dev_priv, 6)) { dev_priv->display.fdi_link_train = gen6_fdi_link_train; } else if (IS_IVYBRIDGE(dev_priv)) { @@ -17827,8 +18226,11 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv) static void intel_early_display_was(struct drm_i915_private *dev_priv) { - /* Display WA #1185 WaDisableDARBFClkGating:cnl,glk */ - if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + /* + * Display WA #1185 WaDisableDARBFClkGating:cnl,glk,icl,ehl,tgl + * Also known as Wa_14010480278. + */ + if (IS_GEN_RANGE(dev_priv, 10, 12) || IS_GEMINILAKE(dev_priv)) I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | DARBF_GATING_DIS); @@ -17928,7 +18330,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev, /* We need to sanitize only the MST primary port. */ if (encoder->type != INTEL_OUTPUT_DP_MST && intel_phy_is_tc(dev_priv, phy)) - intel_tc_port_sanitize(enc_to_dig_port(&encoder->base)); + intel_tc_port_sanitize(enc_to_dig_port(encoder)); } get_encoder_power_domains(dev_priv); @@ -18101,6 +18503,8 @@ void intel_modeset_driver_remove(struct drm_i915_private *i915) intel_gmbus_teardown(i915); + intel_bw_cleanup(i915); + destroy_workqueue(i915->flip_wq); destroy_workqueue(i915->modeset_wq); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 0fef9263cddc..028aab728514 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -474,6 +474,7 @@ void intel_link_compute_m_n(u16 bpp, int nlanes, struct intel_link_m_n *m_n, bool constant_n, bool fec_enable); bool is_ccs_modifier(u64 modifier); +int intel_main_to_aux_plane(const struct drm_framebuffer *fb, int main_plane); void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv); u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, u32 pixel_format, u64 modifier); @@ -521,7 +522,7 @@ int intel_get_pipe_from_crtc_id_ioctl(struct drm_device *dev, void *data, u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc); void intel_crtc_vblank_off(const struct intel_crtc_state *crtc_state); -int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp); +int ilk_get_lanes_required(int target_clock, int link_bw, int bpp); void vlv_wait_port_ready(struct drm_i915_private *dev_priv, struct intel_digital_port *dport, unsigned int expected_mask); @@ -578,8 +579,8 @@ void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_center); int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); -void skylake_scaler_disable(const struct intel_crtc_state *old_crtc_state); -void ironlake_pfit_disable(const struct intel_crtc_state *old_crtc_state); +void skl_scaler_disable(const struct intel_crtc_state *old_crtc_state); +void ilk_pfit_disable(const struct intel_crtc_state *old_crtc_state); u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 679457156797..21561acfa3ac 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -514,7 +514,7 @@ static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv, if (encoder->type == INTEL_OUTPUT_DP_MST) continue; - dig_port = enc_to_dig_port(&encoder->base); + dig_port = enc_to_dig_port(encoder); if (WARN_ON(!dig_port)) continue; @@ -1664,8 +1664,8 @@ void chv_phy_powergate_lanes(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct i915_power_domains *power_domains = &dev_priv->power_domains; - enum dpio_phy phy = vlv_dport_to_phy(enc_to_dig_port(&encoder->base)); - enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base)); + enum dpio_phy phy = vlv_dport_to_phy(enc_to_dig_port(encoder)); + enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(encoder)); mutex_lock(&power_domains->lock); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 83ea04149b77..888ea8a170d1 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -90,8 +90,8 @@ struct intel_framebuffer { /* for each plane in the normal GTT view */ struct { unsigned int x, y; - } normal[2]; - /* for each plane in the rotated GTT view */ + } normal[4]; + /* for each plane in the rotated GTT view for no-CCS formats */ struct { unsigned int x, y; unsigned int pitch; /* pixels */ @@ -555,7 +555,7 @@ struct intel_plane_state { */ u32 stride; int x, y; - } color_plane[2]; + } color_plane[4]; /* plane control register */ u32 ctl; @@ -1054,6 +1054,9 @@ struct intel_crtc_state { /* Bitmask to indicate slaves attached */ u8 sync_mode_slaves_mask; + + /* Only valid on TGL+ */ + enum transcoder mst_master_transcoder; }; struct intel_crtc { @@ -1435,9 +1438,9 @@ struct intel_load_detect_pipe { }; static inline struct intel_encoder * -intel_attached_encoder(struct drm_connector *connector) +intel_attached_encoder(struct intel_connector *connector) { - return to_intel_connector(connector)->encoder; + return connector->encoder; } static inline bool intel_encoder_is_dig_port(struct intel_encoder *encoder) @@ -1454,12 +1457,12 @@ static inline bool intel_encoder_is_dig_port(struct intel_encoder *encoder) } static inline struct intel_digital_port * -enc_to_dig_port(struct drm_encoder *encoder) +enc_to_dig_port(struct intel_encoder *encoder) { - struct intel_encoder *intel_encoder = to_intel_encoder(encoder); + struct intel_encoder *intel_encoder = encoder; if (intel_encoder_is_dig_port(intel_encoder)) - return container_of(encoder, struct intel_digital_port, + return container_of(&encoder->base, struct intel_digital_port, base.base); else return NULL; @@ -1468,16 +1471,17 @@ enc_to_dig_port(struct drm_encoder *encoder) static inline struct intel_digital_port * conn_to_dig_port(struct intel_connector *connector) { - return enc_to_dig_port(&intel_attached_encoder(&connector->base)->base); + return enc_to_dig_port(intel_attached_encoder(connector)); } static inline struct intel_dp_mst_encoder * -enc_to_mst(struct drm_encoder *encoder) +enc_to_mst(struct intel_encoder *encoder) { - return container_of(encoder, struct intel_dp_mst_encoder, base.base); + return container_of(&encoder->base, struct intel_dp_mst_encoder, + base.base); } -static inline struct intel_dp *enc_to_intel_dp(struct drm_encoder *encoder) +static inline struct intel_dp *enc_to_intel_dp(struct intel_encoder *encoder) { return &enc_to_dig_port(encoder)->dp; } @@ -1490,14 +1494,14 @@ static inline bool intel_encoder_is_dp(struct intel_encoder *encoder) return true; case INTEL_OUTPUT_DDI: /* Skip pure HDMI/DVI DDI encoders */ - return i915_mmio_reg_valid(enc_to_intel_dp(&encoder->base)->output_reg); + return i915_mmio_reg_valid(enc_to_intel_dp(encoder)->output_reg); default: return false; } } static inline struct intel_lspcon * -enc_to_intel_lspcon(struct drm_encoder *encoder) +enc_to_intel_lspcon(struct intel_encoder *encoder) { return &enc_to_dig_port(encoder)->lspcon; } diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 2f31d226c6eb..c7424e2a04a3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -146,9 +146,9 @@ bool intel_dp_is_edp(struct intel_dp *intel_dp) return intel_dig_port->base.type == INTEL_OUTPUT_EDP; } -static struct intel_dp *intel_attached_dp(struct drm_connector *connector) +static struct intel_dp *intel_attached_dp(struct intel_connector *connector) { - return enc_to_intel_dp(&intel_attached_encoder(connector)->base); + return enc_to_intel_dp(intel_attached_encoder(connector)); } static void intel_dp_link_down(struct intel_encoder *encoder, @@ -614,7 +614,7 @@ static enum drm_mode_status intel_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - struct intel_dp *intel_dp = intel_attached_dp(connector); + struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector)); struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode; struct drm_i915_private *dev_priv = to_i915(connector->dev); @@ -834,7 +834,7 @@ static enum pipe vlv_find_free_pps(struct drm_i915_private *dev_priv) * Pick one that's not used by other ports. */ for_each_intel_dp(&dev_priv->drm, encoder) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); if (encoder->type == INTEL_OUTPUT_EDP) { WARN_ON(intel_dp->active_pipe != INVALID_PIPE && @@ -1031,7 +1031,7 @@ void intel_power_sequencer_reset(struct drm_i915_private *dev_priv) */ for_each_intel_dp(&dev_priv->drm, encoder) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); WARN_ON(intel_dp->active_pipe != INVALID_PIPE); @@ -2034,7 +2034,7 @@ static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc) static int intel_dp_dsc_compute_params(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; u8 line_buf_depth; int ret; @@ -2205,7 +2205,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state) { struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct link_config_limits limits; int common_len; int ret; @@ -2366,8 +2366,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - struct intel_lspcon *lspcon = enc_to_intel_lspcon(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_lspcon *lspcon = enc_to_intel_lspcon(encoder); enum port port = encoder->port; struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); struct intel_connector *intel_connector = intel_dp->attached_connector; @@ -2482,7 +2482,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); enum port port = encoder->port; struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; @@ -2509,7 +2509,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder, * * CPT PCH is quite different, having many bits moved * to the TRANS_DP_CTL register instead. That - * configuration happens (oddly) in ironlake_pch_enable + * configuration happens (oddly) in ilk_pch_enable */ /* Preserve the BIOS-computed detected bit. This is @@ -2653,7 +2653,7 @@ static void edp_wait_backlight_off(struct intel_dp *intel_dp) * is locked */ -static u32 ironlake_get_pp_control(struct intel_dp *intel_dp) +static u32 ilk_get_pp_control(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); u32 control; @@ -2703,7 +2703,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) if (!edp_have_panel_power(intel_dp)) wait_panel_power_cycle(intel_dp); - pp = ironlake_get_pp_control(intel_dp); + pp = ilk_get_pp_control(intel_dp); pp |= EDP_FORCE_VDD; pp_stat_reg = _pp_stat_reg(intel_dp); @@ -2768,7 +2768,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) intel_dig_port->base.base.base.id, intel_dig_port->base.base.name); - pp = ironlake_get_pp_control(intel_dp); + pp = ilk_get_pp_control(intel_dp); pp &= ~EDP_FORCE_VDD; pp_ctrl_reg = _pp_ctrl_reg(intel_dp); @@ -2864,7 +2864,7 @@ static void edp_panel_on(struct intel_dp *intel_dp) wait_panel_power_cycle(intel_dp); pp_ctrl_reg = _pp_ctrl_reg(intel_dp); - pp = ironlake_get_pp_control(intel_dp); + pp = ilk_get_pp_control(intel_dp); if (IS_GEN(dev_priv, 5)) { /* ILK workaround: disable reset around power sequence */ pp &= ~PANEL_POWER_RESET; @@ -2919,7 +2919,7 @@ static void edp_panel_off(struct intel_dp *intel_dp) WARN(!intel_dp->want_panel_vdd, "Need [ENCODER:%d:%s] VDD to turn off panel\n", dig_port->base.base.base.id, dig_port->base.base.name); - pp = ironlake_get_pp_control(intel_dp); + pp = ilk_get_pp_control(intel_dp); /* We need to switch off panel power _and_ force vdd, for otherwise some * panels get very unhappy and cease to work. */ pp &= ~(PANEL_POWER_ON | PANEL_POWER_RESET | EDP_FORCE_VDD | @@ -2968,7 +2968,7 @@ static void _intel_edp_backlight_on(struct intel_dp *intel_dp) i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp); u32 pp; - pp = ironlake_get_pp_control(intel_dp); + pp = ilk_get_pp_control(intel_dp); pp |= EDP_BLC_ENABLE; I915_WRITE(pp_ctrl_reg, pp); @@ -2980,7 +2980,7 @@ static void _intel_edp_backlight_on(struct intel_dp *intel_dp) void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(conn_state->best_encoder); + struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(conn_state->best_encoder)); if (!intel_dp_is_edp(intel_dp)) return; @@ -3004,7 +3004,7 @@ static void _intel_edp_backlight_off(struct intel_dp *intel_dp) i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp); u32 pp; - pp = ironlake_get_pp_control(intel_dp); + pp = ilk_get_pp_control(intel_dp); pp &= ~EDP_BLC_ENABLE; I915_WRITE(pp_ctrl_reg, pp); @@ -3018,7 +3018,7 @@ static void _intel_edp_backlight_off(struct intel_dp *intel_dp) /* Disable backlight PP control and backlight PWM. */ void intel_edp_backlight_off(const struct drm_connector_state *old_conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(old_conn_state->best_encoder); + struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(old_conn_state->best_encoder)); if (!intel_dp_is_edp(intel_dp)) return; @@ -3036,13 +3036,13 @@ void intel_edp_backlight_off(const struct drm_connector_state *old_conn_state) static void intel_edp_backlight_power(struct intel_connector *connector, bool enable) { - struct intel_dp *intel_dp = intel_attached_dp(&connector->base); + struct intel_dp *intel_dp = intel_attached_dp(connector); intel_wakeref_t wakeref; bool is_enabled; is_enabled = false; with_pps_lock(intel_dp, wakeref) - is_enabled = ironlake_get_pp_control(intel_dp) & EDP_BLC_ENABLE; + is_enabled = ilk_get_pp_control(intel_dp) & EDP_BLC_ENABLE; if (is_enabled == enable) return; @@ -3079,13 +3079,13 @@ static void assert_edp_pll(struct drm_i915_private *dev_priv, bool state) #define assert_edp_pll_enabled(d) assert_edp_pll((d), true) #define assert_edp_pll_disabled(d) assert_edp_pll((d), false) -static void ironlake_edp_pll_on(struct intel_dp *intel_dp, - const struct intel_crtc_state *pipe_config) +static void ilk_edp_pll_on(struct intel_dp *intel_dp, + const struct intel_crtc_state *pipe_config) { struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - assert_pipe_disabled(dev_priv, crtc->pipe); + assert_pipe_disabled(dev_priv, pipe_config->cpu_transcoder); assert_dp_port_disabled(intel_dp); assert_edp_pll_disabled(dev_priv); @@ -3119,13 +3119,13 @@ static void ironlake_edp_pll_on(struct intel_dp *intel_dp, udelay(200); } -static void ironlake_edp_pll_off(struct intel_dp *intel_dp, - const struct intel_crtc_state *old_crtc_state) +static void ilk_edp_pll_off(struct intel_dp *intel_dp, + const struct intel_crtc_state *old_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - assert_pipe_disabled(dev_priv, crtc->pipe); + assert_pipe_disabled(dev_priv, old_crtc_state->cpu_transcoder); assert_dp_port_disabled(intel_dp); assert_edp_pll_enabled(dev_priv); @@ -3258,7 +3258,7 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); intel_wakeref_t wakeref; bool ret; @@ -3279,7 +3279,7 @@ static void intel_dp_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); u32 tmp, flags = 0; enum port port = encoder->port; struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); @@ -3363,7 +3363,7 @@ static void intel_disable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); intel_dp->link_trained = false; @@ -3397,7 +3397,7 @@ static void g4x_post_disable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); enum port port = encoder->port; /* @@ -3410,7 +3410,7 @@ static void g4x_post_disable_dp(struct intel_encoder *encoder, /* Only ilk+ has port A */ if (port == PORT_A) - ironlake_edp_pll_off(intel_dp, old_crtc_state); + ilk_edp_pll_off(intel_dp, old_crtc_state); } static void vlv_post_disable_dp(struct intel_encoder *encoder, @@ -3548,7 +3548,7 @@ static void intel_enable_dp(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); u32 dp_reg = I915_READ(intel_dp->output_reg); enum pipe pipe = crtc->pipe; @@ -3608,14 +3608,14 @@ static void g4x_pre_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); enum port port = encoder->port; intel_dp_prepare(encoder, pipe_config); /* Only ilk+ has port A */ if (port == PORT_A) - ironlake_edp_pll_on(intel_dp, pipe_config); + ilk_edp_pll_on(intel_dp, pipe_config); } static void vlv_detach_power_sequencer(struct intel_dp *intel_dp) @@ -3658,7 +3658,7 @@ static void vlv_steal_power_sequencer(struct drm_i915_private *dev_priv, lockdep_assert_held(&dev_priv->pps_mutex); for_each_intel_dp(&dev_priv->drm, encoder) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); WARN(intel_dp->active_pipe == pipe, "stealing pipe %c power sequencer from active [ENCODER:%d:%s]\n", @@ -3681,7 +3681,7 @@ static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); lockdep_assert_held(&dev_priv->pps_mutex); @@ -4203,7 +4203,7 @@ intel_dp_link_down(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); enum port port = encoder->port; u32 DP = intel_dp->DP; @@ -4903,7 +4903,7 @@ static u8 intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) intel_dp->compliance.test_data.hdisplay = be16_to_cpu(h_width); intel_dp->compliance.test_data.vdisplay = be16_to_cpu(v_height); /* Set test active flag here so userspace doesn't interrupt things */ - intel_dp->compliance.test_active = 1; + intel_dp->compliance.test_active = true; return DP_TEST_ACK; } @@ -4947,7 +4947,7 @@ static u8 intel_dp_autotest_edid(struct intel_dp *intel_dp) } /* Set test active flag here so userspace doesn't interrupt things */ - intel_dp->compliance.test_active = 1; + intel_dp->compliance.test_active = true; return test_result; } @@ -5096,7 +5096,7 @@ int intel_dp_retrain_link(struct intel_encoder *encoder, struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_connector *connector = intel_dp->attached_connector; struct drm_connector_state *conn_state; struct intel_crtc_state *crtc_state; @@ -5536,7 +5536,7 @@ static bool intel_combo_phy_connected(struct drm_i915_private *dev_priv, static bool icp_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); enum phy phy = intel_port_to_phy(dev_priv, encoder->port); if (intel_phy_is_combo(dev_priv, phy)) @@ -5651,7 +5651,7 @@ intel_dp_detect(struct drm_connector *connector, bool force) { struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_dp *intel_dp = intel_attached_dp(connector); + struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector)); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *encoder = &dig_port->base; enum drm_connector_status status; @@ -5755,7 +5755,7 @@ out: static void intel_dp_force(struct drm_connector *connector) { - struct intel_dp *intel_dp = intel_attached_dp(connector); + struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector)); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *intel_encoder = &dig_port->base; struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); @@ -5790,7 +5790,7 @@ static int intel_dp_get_modes(struct drm_connector *connector) } /* if eDP has no EDID, fall back to fixed mode */ - if (intel_dp_is_edp(intel_attached_dp(connector)) && + if (intel_dp_is_edp(intel_attached_dp(to_intel_connector(connector))) && intel_connector->panel.fixed_mode) { struct drm_display_mode *mode; @@ -5808,7 +5808,7 @@ static int intel_dp_get_modes(struct drm_connector *connector) static int intel_dp_connector_register(struct drm_connector *connector) { - struct intel_dp *intel_dp = intel_attached_dp(connector); + struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector)); int ret; ret = intel_connector_register(connector); @@ -5830,7 +5830,7 @@ intel_dp_connector_register(struct drm_connector *connector) static void intel_dp_connector_unregister(struct drm_connector *connector) { - struct intel_dp *intel_dp = intel_attached_dp(connector); + struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector)); drm_dp_cec_unregister_connector(&intel_dp->aux); drm_dp_aux_unregister(&intel_dp->aux); @@ -5839,7 +5839,7 @@ intel_dp_connector_unregister(struct drm_connector *connector) void intel_dp_encoder_flush_work(struct drm_encoder *encoder) { - struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(to_intel_encoder(encoder)); struct intel_dp *intel_dp = &intel_dig_port->dp; intel_dp_mst_encoder_cleanup(intel_dig_port); @@ -5868,12 +5868,12 @@ static void intel_dp_encoder_destroy(struct drm_encoder *encoder) intel_dp_encoder_flush_work(encoder); drm_encoder_cleanup(encoder); - kfree(enc_to_dig_port(encoder)); + kfree(enc_to_dig_port(to_intel_encoder(encoder))); } void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) { - struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(intel_encoder); intel_wakeref_t wakeref; if (!intel_dp_is_edp(intel_dp)) @@ -5904,7 +5904,7 @@ static int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port, u8 *an) { - struct intel_dp *intel_dp = enc_to_intel_dp(&intel_dig_port->base.base); + struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(&intel_dig_port->base.base)); static const struct drm_dp_aux_msg msg = { .request = DP_AUX_NATIVE_WRITE, .address = DP_AUX_HDCP_AKSV, @@ -6514,7 +6514,7 @@ static enum pipe vlv_active_pipe(struct intel_dp *intel_dp) void intel_dp_encoder_reset(struct drm_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->dev); - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(encoder)); struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp); intel_wakeref_t wakeref; @@ -6693,7 +6693,7 @@ intel_pps_readout_hw_state(struct intel_dp *intel_dp, struct edp_power_seq *seq) intel_pps_get_registers(intel_dp, ®s); - pp_ctl = ironlake_get_pp_control(intel_dp); + pp_ctl = ilk_get_pp_control(intel_dp); /* Ensure PPS is unlocked */ if (!HAS_DDI(dev_priv)) @@ -6863,7 +6863,7 @@ intel_dp_init_panel_power_sequencer_registers(struct intel_dp *intel_dp, * soon as the new power sequencer gets initialized. */ if (force_disable_vdd) { - u32 pp = ironlake_get_pp_control(intel_dp); + u32 pp = ilk_get_pp_control(intel_dp); WARN(pp & PANEL_POWER_ON, "Panel power already on\n"); @@ -7660,7 +7660,7 @@ void intel_dp_mst_suspend(struct drm_i915_private *dev_priv) if (encoder->type != INTEL_OUTPUT_DDI) continue; - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); if (!intel_dp->can_mst) continue; @@ -7681,7 +7681,7 @@ void intel_dp_mst_resume(struct drm_i915_private *dev_priv) if (encoder->type != INTEL_OUTPUT_DDI) continue; - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); if (!intel_dp->can_mst) continue; diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 020422da2ae2..7c653f8c307f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -57,7 +57,7 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable) */ static u32 intel_dp_aux_get_backlight(struct intel_connector *connector) { - struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); u8 read_val[2] = { 0x0 }; u16 level = 0; @@ -82,7 +82,7 @@ static void intel_dp_aux_set_backlight(const struct drm_connector_state *conn_state, u32 level) { struct intel_connector *connector = to_intel_connector(conn_state->connector); - struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); u8 vals[2] = { 0x0 }; vals[0] = level; @@ -110,7 +110,7 @@ intel_dp_aux_set_backlight(const struct drm_connector_state *conn_state, u32 lev static bool intel_dp_aux_set_pwm_freq(struct intel_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); int freq, fxp, fxp_min, fxp_max, fxp_actual, f = 1; u8 pn, pn_min, pn_max; @@ -178,7 +178,7 @@ static void intel_dp_aux_enable_backlight(const struct intel_crtc_state *crtc_st const struct drm_connector_state *conn_state) { struct intel_connector *connector = to_intel_connector(conn_state->connector); - struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); u8 dpcd_buf, new_dpcd_buf, edp_backlight_mode; if (drm_dp_dpcd_readb(&intel_dp->aux, @@ -222,13 +222,14 @@ static void intel_dp_aux_enable_backlight(const struct intel_crtc_state *crtc_st static void intel_dp_aux_disable_backlight(const struct drm_connector_state *old_conn_state) { - set_aux_backlight_enable(enc_to_intel_dp(old_conn_state->best_encoder), false); + set_aux_backlight_enable(enc_to_intel_dp(to_intel_encoder(old_conn_state->best_encoder)), + false); } static int intel_dp_aux_setup_backlight(struct intel_connector *connector, enum pipe pipe) { - struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); struct intel_panel *panel = &connector->panel; if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT) @@ -247,7 +248,7 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector, static bool intel_dp_aux_display_control_capable(struct intel_connector *connector) { - struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); /* Check the eDP Display control capabilities registers to determine if * the panel can support backlight control over the aux channel diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 53bc14d0e953..cba68c5a80fa 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -43,7 +43,7 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, struct link_config_limits *limits) { struct drm_atomic_state *state = crtc_state->uapi.state; - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_dp *intel_dp = &intel_mst->primary->dp; struct intel_connector *connector = to_intel_connector(conn_state->connector); @@ -88,12 +88,58 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, return 0; } +/* + * Iterate over all connectors and return the smallest transcoder in the MST + * stream + */ +static enum transcoder +intel_dp_mst_master_trans_compute(struct intel_atomic_state *state, + struct intel_dp *mst_port) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_digital_connector_state *conn_state; + struct intel_connector *connector; + enum pipe ret = I915_MAX_PIPES; + int i; + + if (INTEL_GEN(dev_priv) < 12) + return INVALID_TRANSCODER; + + for_each_new_intel_connector_in_state(state, connector, conn_state, i) { + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + + if (connector->mst_port != mst_port || !conn_state->base.crtc) + continue; + + crtc = to_intel_crtc(conn_state->base.crtc); + crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + if (!crtc_state->uapi.active) + continue; + + /* + * Using crtc->pipe because crtc_state->cpu_transcoder is + * computed, so others CRTCs could have non-computed + * cpu_transcoder + */ + if (crtc->pipe < ret) + ret = crtc->pipe; + } + + if (ret == I915_MAX_PIPES) + return INVALID_TRANSCODER; + + /* Simple cast works because TGL don't have a eDP transcoder */ + return (enum transcoder)ret; +} + static int intel_dp_mst_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { + struct intel_atomic_state *state = to_intel_atomic_state(conn_state->state); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_dp *intel_dp = &intel_mst->primary->dp; struct intel_connector *connector = to_intel_connector(conn_state->connector); @@ -155,24 +201,91 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); + pipe_config->mst_master_transcoder = intel_dp_mst_master_trans_compute(state, intel_dp); + + return 0; +} + +/* + * If one of the connectors in a MST stream needs a modeset, mark all CRTCs + * that shares the same MST stream as mode changed, + * intel_modeset_pipe_config()+intel_crtc_check_fastset() will take care to do + * a fastset when possible. + */ +static int +intel_dp_mst_atomic_master_trans_check(struct intel_connector *connector, + struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct drm_connector_list_iter connector_list_iter; + struct intel_connector *connector_iter; + + if (INTEL_GEN(dev_priv) < 12) + return 0; + + if (!intel_connector_needs_modeset(state, &connector->base)) + return 0; + + drm_connector_list_iter_begin(&dev_priv->drm, &connector_list_iter); + for_each_intel_connector_iter(connector_iter, &connector_list_iter) { + struct intel_digital_connector_state *conn_iter_state; + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + int ret; + + if (connector_iter->mst_port != connector->mst_port || + connector_iter == connector) + continue; + + conn_iter_state = intel_atomic_get_digital_connector_state(state, + connector_iter); + if (IS_ERR(conn_iter_state)) { + drm_connector_list_iter_end(&connector_list_iter); + return PTR_ERR(conn_iter_state); + } + + if (!conn_iter_state->base.crtc) + continue; + + crtc = to_intel_crtc(conn_iter_state->base.crtc); + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) { + drm_connector_list_iter_end(&connector_list_iter); + return PTR_ERR(crtc_state); + } + + ret = drm_atomic_add_affected_planes(&state->base, &crtc->base); + if (ret) { + drm_connector_list_iter_end(&connector_list_iter); + return ret; + } + crtc_state->uapi.mode_changed = true; + } + drm_connector_list_iter_end(&connector_list_iter); + return 0; } static int intel_dp_mst_atomic_check(struct drm_connector *connector, - struct drm_atomic_state *state) + struct drm_atomic_state *_state) { + struct intel_atomic_state *state = to_intel_atomic_state(_state); struct drm_connector_state *new_conn_state = - drm_atomic_get_new_connector_state(state, connector); + drm_atomic_get_new_connector_state(&state->base, connector); struct drm_connector_state *old_conn_state = - drm_atomic_get_old_connector_state(state, connector); + drm_atomic_get_old_connector_state(&state->base, connector); struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_crtc *new_crtc = new_conn_state->crtc; struct drm_dp_mst_topology_mgr *mgr; int ret; - ret = intel_digital_connector_atomic_check(connector, state); + ret = intel_digital_connector_atomic_check(connector, &state->base); + if (ret) + return ret; + + ret = intel_dp_mst_atomic_master_trans_check(intel_connector, state); if (ret) return ret; @@ -183,12 +296,9 @@ intel_dp_mst_atomic_check(struct drm_connector *connector, * connector */ if (new_crtc) { - struct intel_atomic_state *intel_state = - to_intel_atomic_state(state); struct intel_crtc *intel_crtc = to_intel_crtc(new_crtc); struct intel_crtc_state *crtc_state = - intel_atomic_get_new_crtc_state(intel_state, - intel_crtc); + intel_atomic_get_new_crtc_state(state, intel_crtc); if (!crtc_state || !drm_atomic_crtc_needs_modeset(&crtc_state->uapi) || @@ -196,8 +306,8 @@ intel_dp_mst_atomic_check(struct drm_connector *connector, return 0; } - mgr = &enc_to_mst(old_conn_state->best_encoder)->primary->dp.mst_mgr; - ret = drm_dp_atomic_release_vcpi_slots(state, mgr, + mgr = &enc_to_mst(to_intel_encoder(old_conn_state->best_encoder))->primary->dp.mst_mgr; + ret = drm_dp_atomic_release_vcpi_slots(&state->base, mgr, intel_connector->port); return ret; @@ -207,7 +317,7 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct intel_connector *connector = @@ -231,29 +341,51 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct intel_connector *connector = to_intel_connector(old_conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); bool last_mst_stream; + u32 val; intel_dp->active_mst_links--; last_mst_stream = intel_dp->active_mst_links == 0; + WARN_ON(INTEL_GEN(dev_priv) >= 12 && last_mst_stream && + !intel_dp_mst_is_master_trans(old_crtc_state)); intel_crtc_vblank_off(old_crtc_state); intel_disable_pipe(old_crtc_state); + drm_dp_update_payload_part2(&intel_dp->mst_mgr); + + val = I915_READ(TRANS_DDI_FUNC_CTL(old_crtc_state->cpu_transcoder)); + val &= ~TRANS_DDI_DP_VC_PAYLOAD_ALLOC; + I915_WRITE(TRANS_DDI_FUNC_CTL(old_crtc_state->cpu_transcoder), val); + + if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status, + DP_TP_STATUS_ACT_SENT, 1)) + DRM_ERROR("Timed out waiting for ACT sent when disabling\n"); + drm_dp_check_act_status(&intel_dp->mst_mgr); + + drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, connector->port); + intel_ddi_disable_transcoder_func(old_crtc_state); if (INTEL_GEN(dev_priv) >= 9) - skylake_scaler_disable(old_crtc_state); + skl_scaler_disable(old_crtc_state); else - ironlake_pfit_disable(old_crtc_state); + ilk_pfit_disable(old_crtc_state); /* + * Power down mst path before disabling the port, otherwise we end + * up getting interrupts from the sink upon detecting link loss. + */ + drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, + false); + /* * From TGL spec: "If multi-stream slave transcoder: Configure * Transcoder Clock Select to direct no clock to the transcoder" * @@ -263,19 +395,6 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder, if (INTEL_GEN(dev_priv) < 12 || !last_mst_stream) intel_ddi_disable_pipe_clock(old_crtc_state); - /* this can fail */ - drm_dp_check_act_status(&intel_dp->mst_mgr); - /* and this can also fail */ - drm_dp_update_payload_part2(&intel_dp->mst_mgr); - - drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, connector->port); - - /* - * Power down mst path before disabling the port, otherwise we end - * up getting interrupts from the sink upon detecting link loss. - */ - drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, - false); intel_mst->connector = NULL; if (last_mst_stream) @@ -289,7 +408,7 @@ static void intel_mst_pre_pll_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; @@ -302,7 +421,7 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -318,6 +437,8 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, connector->encoder = encoder; intel_mst->connector = connector; first_mst_stream = intel_dp->active_mst_links == 0; + WARN_ON(INTEL_GEN(dev_priv) >= 12 && first_mst_stream && + !intel_dp_mst_is_master_trans(pipe_config)); DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); @@ -360,7 +481,7 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -381,7 +502,7 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder, static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); *pipe = intel_mst->pipe; if (intel_mst->connector) return true; @@ -391,7 +512,7 @@ static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder, static void intel_dp_mst_enc_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_digital_port *intel_dig_port = intel_mst->primary; intel_ddi_get_config(&intel_dig_port->base, pipe_config); @@ -499,7 +620,7 @@ static const struct drm_connector_helper_funcs intel_dp_mst_connector_helper_fun static void intel_dp_mst_encoder_destroy(struct drm_encoder *encoder) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(to_intel_encoder(encoder)); drm_encoder_cleanup(encoder); kfree(intel_mst); @@ -723,3 +844,14 @@ intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port) drm_dp_mst_topology_mgr_destroy(&intel_dp->mst_mgr); /* encoders will get killed by normal cleanup */ } + +bool intel_dp_mst_is_master_trans(const struct intel_crtc_state *crtc_state) +{ + return crtc_state->mst_master_transcoder == crtc_state->cpu_transcoder; +} + +bool intel_dp_mst_is_slave_trans(const struct intel_crtc_state *crtc_state) +{ + return crtc_state->mst_master_transcoder != INVALID_TRANSCODER && + crtc_state->mst_master_transcoder != crtc_state->cpu_transcoder; +} diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.h b/drivers/gpu/drm/i915/display/intel_dp_mst.h index f660ad80db04..854724f68f09 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.h +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.h @@ -6,10 +6,15 @@ #ifndef __INTEL_DP_MST_H__ #define __INTEL_DP_MST_H__ +#include <linux/types.h> + struct intel_digital_port; +struct intel_crtc_state; int intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_id); void intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port); int intel_dp_mst_encoder_active_links(struct intel_digital_port *intel_dig_port); +bool intel_dp_mst_is_master_trans(const struct intel_crtc_state *crtc_state); +bool intel_dp_mst_is_slave_trans(const struct intel_crtc_state *crtc_state); #endif /* __INTEL_DP_MST_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c index 704f38681c4b..6fb1f7a7364e 100644 --- a/drivers/gpu/drm/i915/display/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c @@ -642,7 +642,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, bool uniq_trans_scale) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); enum dpio_channel ch = vlv_dport_to_channel(dport); enum pipe pipe = intel_crtc->pipe; @@ -738,7 +738,7 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder, bool reset) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base)); + enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(encoder)); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum pipe pipe = crtc->pipe; u32 val; @@ -781,7 +781,7 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder, void chv_phy_pre_pll_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum dpio_channel ch = vlv_dport_to_channel(dport); @@ -861,7 +861,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder, void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_digital_port *dport = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); @@ -940,7 +940,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, void chv_phy_release_cl2_override(struct intel_encoder *encoder) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); if (dport->release_cl2_override) { @@ -989,7 +989,7 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); enum dpio_channel port = vlv_dport_to_channel(dport); enum pipe pipe = intel_crtc->pipe; @@ -1014,7 +1014,7 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum dpio_channel port = vlv_dport_to_channel(dport); @@ -1043,7 +1043,7 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_digital_port *dport = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); @@ -1073,7 +1073,7 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, void vlv_phy_reset_lanes(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); enum dpio_channel port = vlv_dport_to_channel(dport); diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 728a4b045de7..c75e34d87111 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2972,8 +2972,8 @@ static void icl_update_active_dpll(struct intel_atomic_state *state, enum icl_port_dpll_id port_dpll_id = ICL_PORT_DPLL_DEFAULT; primary_port = encoder->type == INTEL_OUTPUT_DP_MST ? - enc_to_mst(&encoder->base)->primary : - enc_to_dig_port(&encoder->base); + enc_to_mst(encoder)->primary : + enc_to_dig_port(encoder); if (primary_port && (primary_port->tc_mode == TC_PORT_DP_ALT || diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h b/drivers/gpu/drm/i915/display/intel_dsi.h index b15be5814599..19f78a4022d3 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi.h +++ b/drivers/gpu/drm/i915/display/intel_dsi.h @@ -45,8 +45,9 @@ struct intel_dsi { struct intel_dsi_host *dsi_hosts[I915_MAX_PORTS]; intel_wakeref_t io_wakeref[I915_MAX_PORTS]; - /* GPIO Desc for CRC based Panel control */ + /* GPIO Desc for panel and backlight control */ struct gpio_desc *gpio_panel; + struct gpio_desc *gpio_backlight; struct intel_connector *attached_connector; @@ -68,6 +69,9 @@ struct intel_dsi { /* number of DSI lanes */ unsigned int lane_count; + /* i2c bus associated with the slave device */ + int i2c_bus_num; + /* * video mode pixel format * @@ -141,9 +145,9 @@ static inline struct intel_dsi_host *to_intel_dsi_host(struct mipi_dsi_host *h) #define for_each_dsi_phy(__phy, __phys_mask) \ for_each_phy_masked(__phy, __phys_mask) -static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder) +static inline struct intel_dsi *enc_to_intel_dsi(struct intel_encoder *encoder) { - return container_of(encoder, struct intel_dsi, base.base); + return container_of(&encoder->base, struct intel_dsi, base.base); } static inline bool is_vid_mode(struct intel_dsi *intel_dsi) @@ -158,7 +162,7 @@ static inline bool is_cmd_mode(struct intel_dsi *intel_dsi) static inline u16 intel_dsi_encoder_ports(struct intel_encoder *encoder) { - return enc_to_intel_dsi(&encoder->base)->ports; + return enc_to_intel_dsi(encoder)->ports; } /* icl_dsi.c */ @@ -203,6 +207,8 @@ void bxt_dsi_reset_clocks(struct intel_encoder *encoder, enum port port); /* intel_dsi_vbt.c */ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id); +void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on); +void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi); void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi, enum mipi_seq seq_id); void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec); diff --git a/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c b/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c index bb3fd8b786a2..c87838843d0b 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c @@ -46,7 +46,7 @@ static u32 dcs_get_backlight(struct intel_connector *connector) { struct intel_encoder *encoder = connector->encoder; - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct mipi_dsi_device *dsi_device; u8 data = 0; enum port port; @@ -64,7 +64,7 @@ static u32 dcs_get_backlight(struct intel_connector *connector) static void dcs_set_backlight(const struct drm_connector_state *conn_state, u32 level) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(conn_state->best_encoder); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(conn_state->best_encoder)); struct mipi_dsi_device *dsi_device; u8 data = level; enum port port; @@ -79,7 +79,7 @@ static void dcs_set_backlight(const struct drm_connector_state *conn_state, u32 static void dcs_disable_backlight(const struct drm_connector_state *conn_state) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(conn_state->best_encoder); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(conn_state->best_encoder)); struct mipi_dsi_device *dsi_device; enum port port; @@ -113,7 +113,7 @@ static void dcs_disable_backlight(const struct drm_connector_state *conn_state) static void dcs_enable_backlight(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(conn_state->best_encoder); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(conn_state->best_encoder)); struct intel_panel *panel = &to_intel_connector(conn_state->connector)->panel; struct mipi_dsi_device *dsi_device; enum port port; diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index f90946c912ee..89fb0d90b694 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -25,7 +25,10 @@ */ #include <linux/gpio/consumer.h> +#include <linux/gpio/machine.h> #include <linux/mfd/intel_soc_pmic.h> +#include <linux/pinctrl/consumer.h> +#include <linux/pinctrl/machine.h> #include <linux/slab.h> #include <asm/intel-mid.h> @@ -83,6 +86,12 @@ static struct gpio_map vlv_gpio_table[] = { { VLV_GPIO_NC_11_PANEL1_BKLTCTL }, }; +struct i2c_adapter_lookup { + u16 slave_addr; + struct intel_dsi *intel_dsi; + acpi_handle dev_handle; +}; + #define CHV_GPIO_IDX_START_N 0 #define CHV_GPIO_IDX_START_E 73 #define CHV_GPIO_IDX_START_SW 100 @@ -375,11 +384,98 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) return data; } +static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) +{ + struct i2c_adapter_lookup *lookup = data; + struct intel_dsi *intel_dsi = lookup->intel_dsi; + struct acpi_resource_i2c_serialbus *sb; + struct i2c_adapter *adapter; + acpi_handle adapter_handle; + acpi_status status; + + if (intel_dsi->i2c_bus_num >= 0 || + !i2c_acpi_get_i2c_resource(ares, &sb)) + return 1; + + if (lookup->slave_addr != sb->slave_address) + return 1; + + status = acpi_get_handle(lookup->dev_handle, + sb->resource_source.string_ptr, + &adapter_handle); + if (ACPI_FAILURE(status)) + return 1; + + adapter = i2c_acpi_find_adapter_by_handle(adapter_handle); + if (adapter) + intel_dsi->i2c_bus_num = adapter->nr; + + return 1; +} + static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) { - DRM_DEBUG_KMS("Skipping I2C element execution\n"); + struct drm_device *drm_dev = intel_dsi->base.base.dev; + struct device *dev = &drm_dev->pdev->dev; + struct i2c_adapter *adapter; + struct acpi_device *acpi_dev; + struct list_head resource_list; + struct i2c_adapter_lookup lookup; + struct i2c_msg msg; + int ret; + u8 vbt_i2c_bus_num = *(data + 2); + u16 slave_addr = *(u16 *)(data + 3); + u8 reg_offset = *(data + 5); + u8 payload_size = *(data + 6); + u8 *payload_data; + + if (intel_dsi->i2c_bus_num < 0) { + intel_dsi->i2c_bus_num = vbt_i2c_bus_num; + + acpi_dev = ACPI_COMPANION(dev); + if (acpi_dev) { + memset(&lookup, 0, sizeof(lookup)); + lookup.slave_addr = slave_addr; + lookup.intel_dsi = intel_dsi; + lookup.dev_handle = acpi_device_handle(acpi_dev); + + INIT_LIST_HEAD(&resource_list); + acpi_dev_get_resources(acpi_dev, &resource_list, + i2c_adapter_lookup, + &lookup); + acpi_dev_free_resource_list(&resource_list); + } + } - return data + *(data + 6) + 7; + adapter = i2c_get_adapter(intel_dsi->i2c_bus_num); + if (!adapter) { + DRM_DEV_ERROR(dev, "Cannot find a valid i2c bus for xfer\n"); + goto err_bus; + } + + payload_data = kzalloc(payload_size + 1, GFP_KERNEL); + if (!payload_data) + goto err_alloc; + + payload_data[0] = reg_offset; + memcpy(&payload_data[1], (data + 7), payload_size); + + msg.addr = slave_addr; + msg.flags = 0; + msg.len = payload_size + 1; + msg.buf = payload_data; + + ret = i2c_transfer(adapter, &msg, 1); + if (ret < 0) + DRM_DEV_ERROR(dev, + "Failed to xfer payload of size (%u) to reg (%u)\n", + payload_size, reg_offset); + + kfree(payload_data); +err_alloc: + i2c_put_adapter(adapter); +err_bus: + return data + payload_size + 7; } static const u8 *mipi_exec_spi(struct intel_dsi *intel_dsi, const u8 *data) @@ -453,8 +549,8 @@ static const char *sequence_name(enum mipi_seq seq_id) return "(unknown)"; } -void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi, - enum mipi_seq seq_id) +static void intel_dsi_vbt_exec(struct intel_dsi *intel_dsi, + enum mipi_seq seq_id) { struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev); const u8 *data; @@ -519,6 +615,22 @@ void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi, } } +void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi, + enum mipi_seq seq_id) +{ + if (seq_id == MIPI_SEQ_POWER_ON && intel_dsi->gpio_panel) + gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); + if (seq_id == MIPI_SEQ_BACKLIGHT_ON && intel_dsi->gpio_backlight) + gpiod_set_value_cansleep(intel_dsi->gpio_backlight, 1); + + intel_dsi_vbt_exec(intel_dsi, seq_id); + + if (seq_id == MIPI_SEQ_POWER_OFF && intel_dsi->gpio_panel) + gpiod_set_value_cansleep(intel_dsi->gpio_panel, 0); + if (seq_id == MIPI_SEQ_BACKLIGHT_OFF && intel_dsi->gpio_backlight) + gpiod_set_value_cansleep(intel_dsi->gpio_backlight, 0); +} + void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec) { struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev); @@ -664,6 +776,8 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) intel_dsi->panel_off_delay = pps->panel_off_delay / 10; intel_dsi->panel_pwr_cycle_delay = pps->panel_power_cycle_delay / 10; + intel_dsi->i2c_bus_num = -1; + /* a regular driver would get the device in probe */ for_each_dsi_port(port, intel_dsi->ports) { mipi_dsi_attach(intel_dsi->dsi_hosts[port]->device); @@ -671,3 +785,110 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) return true; } + +/* + * On some BYT/CHT devs some sequences are incomplete and we need to manually + * control some GPIOs. We need to add a GPIO lookup table before we get these. + * If the GOP did not initialize the panel (HDMI inserted) we may need to also + * change the pinmux for the SoC's PWM0 pin from GPIO to PWM. + */ +static struct gpiod_lookup_table pmic_panel_gpio_table = { + /* Intel GFX is consumer */ + .dev_id = "0000:00:02.0", + .table = { + /* Panel EN/DISABLE */ + GPIO_LOOKUP("gpio_crystalcove", 94, "panel", GPIO_ACTIVE_HIGH), + { } + }, +}; + +static struct gpiod_lookup_table soc_panel_gpio_table = { + .dev_id = "0000:00:02.0", + .table = { + GPIO_LOOKUP("INT33FC:01", 10, "backlight", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("INT33FC:01", 11, "panel", GPIO_ACTIVE_HIGH), + { } + }, +}; + +static const struct pinctrl_map soc_pwm_pinctrl_map[] = { + PIN_MAP_MUX_GROUP("0000:00:02.0", "soc_pwm0", "INT33FC:00", + "pwm0_grp", "pwm"), +}; + +void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on) +{ + struct drm_device *dev = intel_dsi->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct mipi_config *mipi_config = dev_priv->vbt.dsi.config; + enum gpiod_flags flags = panel_is_on ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW; + bool want_backlight_gpio = false; + bool want_panel_gpio = false; + struct pinctrl *pinctrl; + int ret; + + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + mipi_config->pwm_blc == PPS_BLC_PMIC) { + gpiod_add_lookup_table(&pmic_panel_gpio_table); + want_panel_gpio = true; + } + + if (IS_VALLEYVIEW(dev_priv) && mipi_config->pwm_blc == PPS_BLC_SOC) { + gpiod_add_lookup_table(&soc_panel_gpio_table); + want_panel_gpio = true; + want_backlight_gpio = true; + + /* Ensure PWM0 pin is muxed as PWM instead of GPIO */ + ret = pinctrl_register_mappings(soc_pwm_pinctrl_map, + ARRAY_SIZE(soc_pwm_pinctrl_map)); + if (ret) + DRM_ERROR("Failed to register pwm0 pinmux mapping\n"); + + pinctrl = devm_pinctrl_get_select(dev->dev, "soc_pwm0"); + if (IS_ERR(pinctrl)) + DRM_ERROR("Failed to set pinmux to PWM\n"); + } + + if (want_panel_gpio) { + intel_dsi->gpio_panel = gpiod_get(dev->dev, "panel", flags); + if (IS_ERR(intel_dsi->gpio_panel)) { + DRM_ERROR("Failed to own gpio for panel control\n"); + intel_dsi->gpio_panel = NULL; + } + } + + if (want_backlight_gpio) { + intel_dsi->gpio_backlight = + gpiod_get(dev->dev, "backlight", flags); + if (IS_ERR(intel_dsi->gpio_backlight)) { + DRM_ERROR("Failed to own gpio for backlight control\n"); + intel_dsi->gpio_backlight = NULL; + } + } +} + +void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi) +{ + struct drm_device *dev = intel_dsi->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct mipi_config *mipi_config = dev_priv->vbt.dsi.config; + + if (intel_dsi->gpio_panel) { + gpiod_put(intel_dsi->gpio_panel); + intel_dsi->gpio_panel = NULL; + } + + if (intel_dsi->gpio_backlight) { + gpiod_put(intel_dsi->gpio_backlight); + intel_dsi->gpio_backlight = NULL; + } + + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + mipi_config->pwm_blc == PPS_BLC_PMIC) + gpiod_remove_lookup_table(&pmic_panel_gpio_table); + + if (IS_VALLEYVIEW(dev_priv) && mipi_config->pwm_blc == PPS_BLC_SOC) { + pinctrl_unregister_mappings(soc_pwm_pinctrl_map); + gpiod_remove_lookup_table(&soc_panel_gpio_table); + } +} diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index a74dc5b915d1..86a337c9d85d 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -125,7 +125,7 @@ static struct intel_dvo *enc_to_dvo(struct intel_encoder *encoder) return container_of(encoder, struct intel_dvo, base); } -static struct intel_dvo *intel_attached_dvo(struct drm_connector *connector) +static struct intel_dvo *intel_attached_dvo(struct intel_connector *connector) { return enc_to_dvo(intel_attached_encoder(connector)); } @@ -134,7 +134,7 @@ static bool intel_dvo_connector_get_hw_state(struct intel_connector *connector) { struct drm_device *dev = connector->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dvo *intel_dvo = intel_attached_dvo(&connector->base); + struct intel_dvo *intel_dvo = intel_attached_dvo(connector); u32 tmp; tmp = I915_READ(intel_dvo->dev.dvo_reg); @@ -220,7 +220,7 @@ static enum drm_mode_status intel_dvo_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - struct intel_dvo *intel_dvo = intel_attached_dvo(connector); + struct intel_dvo *intel_dvo = intel_attached_dvo(to_intel_connector(connector)); const struct drm_display_mode *fixed_mode = to_intel_connector(connector)->panel.fixed_mode; int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; @@ -311,7 +311,7 @@ static void intel_dvo_pre_enable(struct intel_encoder *encoder, static enum drm_connector_status intel_dvo_detect(struct drm_connector *connector, bool force) { - struct intel_dvo *intel_dvo = intel_attached_dvo(connector); + struct intel_dvo *intel_dvo = intel_attached_dvo(to_intel_connector(connector)); DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); return intel_dvo->dev.dev_ops->detect(&intel_dvo->dev); diff --git a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c index ab61f88d1d33..6c83b350525d 100644 --- a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c @@ -126,8 +126,8 @@ static void i9xx_set_fifo_underrun_reporting(struct drm_device *dev, } } -static void ironlake_set_fifo_underrun_reporting(struct drm_device *dev, - enum pipe pipe, bool enable) +static void ilk_set_fifo_underrun_reporting(struct drm_device *dev, + enum pipe pipe, bool enable) { struct drm_i915_private *dev_priv = to_i915(dev); u32 bit = (pipe == PIPE_A) ? @@ -139,7 +139,7 @@ static void ironlake_set_fifo_underrun_reporting(struct drm_device *dev, ilk_disable_display_irq(dev_priv, bit); } -static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc) +static void ivb_check_fifo_underruns(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; @@ -157,9 +157,9 @@ static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc) DRM_ERROR("fifo underrun on pipe %c\n", pipe_name(pipe)); } -static void ivybridge_set_fifo_underrun_reporting(struct drm_device *dev, - enum pipe pipe, - bool enable, bool old) +static void ivb_set_fifo_underrun_reporting(struct drm_device *dev, + enum pipe pipe, bool enable, + bool old) { struct drm_i915_private *dev_priv = to_i915(dev); if (enable) { @@ -180,8 +180,8 @@ static void ivybridge_set_fifo_underrun_reporting(struct drm_device *dev, } } -static void broadwell_set_fifo_underrun_reporting(struct drm_device *dev, - enum pipe pipe, bool enable) +static void bdw_set_fifo_underrun_reporting(struct drm_device *dev, + enum pipe pipe, bool enable) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -264,11 +264,11 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, if (HAS_GMCH(dev_priv)) i9xx_set_fifo_underrun_reporting(dev, pipe, enable, old); else if (IS_GEN_RANGE(dev_priv, 5, 6)) - ironlake_set_fifo_underrun_reporting(dev, pipe, enable); + ilk_set_fifo_underrun_reporting(dev, pipe, enable); else if (IS_GEN(dev_priv, 7)) - ivybridge_set_fifo_underrun_reporting(dev, pipe, enable, old); + ivb_set_fifo_underrun_reporting(dev, pipe, enable, old); else if (INTEL_GEN(dev_priv) >= 8) - broadwell_set_fifo_underrun_reporting(dev, pipe, enable); + bdw_set_fifo_underrun_reporting(dev, pipe, enable); return old; } @@ -427,7 +427,7 @@ void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv) if (HAS_GMCH(dev_priv)) i9xx_check_fifo_underruns(crtc); else if (IS_GEN(dev_priv, 7)) - ivybridge_check_fifo_underruns(crtc); + ivb_check_fifo_underruns(crtc); } spin_unlock_irq(&dev_priv->irq_lock); diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 685589064d10..93ac0f296852 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -85,16 +85,17 @@ assert_hdmi_transcoder_func_disabled(struct drm_i915_private *dev_priv, "HDMI transcoder function enabled, expecting disabled\n"); } -struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder) +struct intel_hdmi *enc_to_intel_hdmi(struct intel_encoder *encoder) { struct intel_digital_port *intel_dig_port = - container_of(encoder, struct intel_digital_port, base.base); + container_of(&encoder->base, struct intel_digital_port, + base.base); return &intel_dig_port->hdmi; } -static struct intel_hdmi *intel_attached_hdmi(struct drm_connector *connector) +static struct intel_hdmi *intel_attached_hdmi(struct intel_connector *connector) { - return enc_to_intel_hdmi(&intel_attached_encoder(connector)->base); + return enc_to_intel_hdmi(intel_attached_encoder(connector)); } static u32 g4x_infoframe_index(unsigned int type) @@ -602,7 +603,7 @@ u32 intel_hdmi_infoframes_enabled(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); u32 val, ret = 0; int i; @@ -646,7 +647,7 @@ static void intel_write_infoframe(struct intel_encoder *encoder, enum hdmi_infoframe_type type, const union hdmi_infoframe *frame) { - struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); u8 buffer[VIDEO_DIP_DATA_SIZE]; ssize_t len; @@ -675,7 +676,7 @@ void intel_read_infoframe(struct intel_encoder *encoder, enum hdmi_infoframe_type type, union hdmi_infoframe *frame) { - struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); u8 buffer[VIDEO_DIP_DATA_SIZE]; int ret; @@ -855,7 +856,7 @@ static void g4x_set_infoframes(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi; i915_reg_t reg = VIDEO_DIP_CTL; u32 val = I915_READ(reg); @@ -1038,7 +1039,7 @@ static void ibx_set_infoframes(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi; i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); u32 val = I915_READ(reg); @@ -1097,7 +1098,7 @@ static void cpt_set_infoframes(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); u32 val = I915_READ(reg); @@ -1146,7 +1147,7 @@ static void vlv_set_infoframes(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe); u32 val = I915_READ(reg); u32 port = VIDEO_DIP_PORT(encoder->port); @@ -1737,7 +1738,7 @@ static void intel_hdmi_prepare(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; u32 hdmi_val; @@ -1774,7 +1775,7 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); intel_wakeref_t wakeref; bool ret; @@ -1793,7 +1794,7 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, static void intel_hdmi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); u32 tmp, flags = 0; @@ -1874,7 +1875,7 @@ static void g4x_enable_hdmi(struct intel_encoder *encoder, { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); u32 temp; temp = I915_READ(intel_hdmi->hdmi_reg); @@ -1896,7 +1897,7 @@ static void ibx_enable_hdmi(struct intel_encoder *encoder, { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); u32 temp; temp = I915_READ(intel_hdmi->hdmi_reg); @@ -1947,7 +1948,7 @@ static void cpt_enable_hdmi(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); enum pipe pipe = crtc->pipe; u32 temp; @@ -2007,7 +2008,7 @@ static void intel_disable_hdmi(struct intel_encoder *encoder, { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); struct intel_digital_port *intel_dig_port = hdmi_to_dig_port(intel_hdmi); struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); @@ -2160,7 +2161,7 @@ static enum drm_mode_status intel_hdmi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - struct intel_hdmi *hdmi = intel_attached_hdmi(connector); + struct intel_hdmi *hdmi = intel_attached_hdmi(to_intel_connector(connector)); struct drm_device *dev = intel_hdmi_to_dev(hdmi); struct drm_i915_private *dev_priv = to_i915(dev); enum drm_mode_status status; @@ -2316,7 +2317,7 @@ static int intel_hdmi_compute_bpc(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, int clock, bool force_dvi) { - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); int bpc; for (bpc = 12; bpc >= 10; bpc -= 2) { @@ -2334,7 +2335,7 @@ static int intel_hdmi_compute_clock(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, bool force_dvi) { - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; int bpc, clock = adjusted_mode->crtc_clock; @@ -2404,7 +2405,7 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; struct drm_connector *connector = conn_state->connector; @@ -2496,7 +2497,7 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, static void intel_hdmi_unset_edid(struct drm_connector *connector) { - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); + struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector)); intel_hdmi->has_hdmi_sink = false; intel_hdmi->has_audio = false; @@ -2512,7 +2513,7 @@ static void intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector, bool has_edid) { struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_hdmi *hdmi = intel_attached_hdmi(connector); + struct intel_hdmi *hdmi = intel_attached_hdmi(to_intel_connector(connector)); enum port port = hdmi_to_dig_port(hdmi)->base.port; struct i2c_adapter *adapter = intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus); @@ -2559,7 +2560,7 @@ static bool intel_hdmi_set_edid(struct drm_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); + struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector)); intel_wakeref_t wakeref; struct edid *edid; bool connected = false; @@ -2600,7 +2601,7 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) { enum drm_connector_status status = connector_status_disconnected; struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); + struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector)); struct intel_encoder *encoder = &hdmi_to_dig_port(intel_hdmi)->base; intel_wakeref_t wakeref; @@ -2663,7 +2664,7 @@ static void intel_hdmi_pre_enable(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct intel_digital_port *intel_dig_port = - enc_to_dig_port(&encoder->base); + enc_to_dig_port(encoder); intel_hdmi_prepare(encoder, pipe_config); @@ -2676,7 +2677,7 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); vlv_phy_pre_encoder_enable(encoder, pipe_config); @@ -2746,7 +2747,7 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dport = enc_to_dig_port(encoder); struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -2772,7 +2773,7 @@ static struct i2c_adapter * intel_hdmi_get_i2c_adapter(struct drm_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); + struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector)); return intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus); } @@ -2816,7 +2817,7 @@ intel_hdmi_connector_register(struct drm_connector *connector) static void intel_hdmi_destroy(struct drm_connector *connector) { - struct cec_notifier *n = intel_attached_hdmi(connector)->cec_notifier; + struct cec_notifier *n = intel_attached_hdmi(to_intel_connector(connector))->cec_notifier; cec_notifier_conn_unregister(n); @@ -2906,7 +2907,7 @@ bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder, bool scrambling) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); struct drm_scrambling *sink_scrambling = &connector->display_info.hdmi.scdc.scrambling; struct i2c_adapter *adapter = diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.h b/drivers/gpu/drm/i915/display/intel_hdmi.h index cf1ea5427639..d3659d0b408b 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.h +++ b/drivers/gpu/drm/i915/display/intel_hdmi.h @@ -29,7 +29,7 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, enum port port); void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector); -struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder); +struct intel_hdmi *enc_to_intel_hdmi(struct intel_encoder *encoder); int intel_hdmi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state); diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index fc29046d48ea..99d3a3c7989e 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -302,7 +302,7 @@ intel_encoder_hotplug(struct intel_encoder *encoder, static bool intel_encoder_has_hpd_pulse(struct intel_encoder *encoder) { return intel_encoder_is_dig_port(encoder) && - enc_to_dig_port(&encoder->base)->hpd_pulse != NULL; + enc_to_dig_port(encoder)->hpd_pulse != NULL; } static void i915_digport_work_func(struct work_struct *work) @@ -335,7 +335,7 @@ static void i915_digport_work_func(struct work_struct *work) if (!long_hpd && !short_hpd) continue; - dig_port = enc_to_dig_port(&encoder->base); + dig_port = enc_to_dig_port(encoder); ret = dig_port->hpd_pulse(dig_port, long_hpd); if (ret == IRQ_NONE) { diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c index 5145ff8b962b..d807c5648c87 100644 --- a/drivers/gpu/drm/i915/display/intel_lspcon.c +++ b/drivers/gpu/drm/i915/display/intel_lspcon.c @@ -434,8 +434,8 @@ void lspcon_write_infoframe(struct intel_encoder *encoder, const void *frame, ssize_t len) { bool ret; - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - struct intel_lspcon *lspcon = enc_to_intel_lspcon(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_lspcon *lspcon = enc_to_intel_lspcon(encoder); /* LSPCON only needs AVI IF */ if (type != HDMI_INFOFRAME_TYPE_AVI) @@ -472,7 +472,7 @@ void lspcon_set_infoframes(struct intel_encoder *encoder, ssize_t ret; union hdmi_infoframe frame; u8 buf[VIDEO_DIP_DATA_SIZE]; - struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct intel_lspcon *lspcon = &dig_port->lspcon; const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; @@ -522,7 +522,7 @@ u32 lspcon_infoframes_enabled(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { /* FIXME actually read this from the hw */ - return enc_to_intel_lspcon(&encoder->base)->active; + return enc_to_intel_lspcon(encoder)->active; } void lspcon_resume(struct intel_lspcon *lspcon) diff --git a/drivers/gpu/drm/i915/display/intel_pipe_crc.c b/drivers/gpu/drm/i915/display/intel_pipe_crc.c index 2746512f4466..520408e83681 100644 --- a/drivers/gpu/drm/i915/display/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/display/intel_pipe_crc.c @@ -98,7 +98,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_i915_private *dev_priv, break; case INTEL_OUTPUT_DP: case INTEL_OUTPUT_EDP: - dig_port = enc_to_dig_port(&encoder->base); + dig_port = enc_to_dig_port(encoder); switch (dig_port->base.port) { case PORT_B: *source = INTEL_PIPE_CRC_SOURCE_DP_B; diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 16e9ff47d519..89c9cf5f38d2 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1523,3 +1523,27 @@ bool intel_psr_enabled(struct intel_dp *intel_dp) return ret; } + +void intel_psr_atomic_check(struct drm_connector *connector, + struct drm_connector_state *old_state, + struct drm_connector_state *new_state) +{ + struct drm_i915_private *dev_priv = to_i915(connector->dev); + struct intel_connector *intel_connector; + struct intel_digital_port *dig_port; + struct drm_crtc_state *crtc_state; + + if (!CAN_PSR(dev_priv) || !new_state->crtc || + dev_priv->psr.initially_probed) + return; + + intel_connector = to_intel_connector(connector); + dig_port = enc_to_dig_port(intel_connector->encoder); + if (dev_priv->psr.dp != &dig_port->dp) + return; + + crtc_state = drm_atomic_get_new_crtc_state(new_state->state, + new_state->crtc); + crtc_state->mode_changed = true; + dev_priv->psr.initially_probed = true; +} diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index 46e4de8b8cd5..c58a1d438808 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -8,6 +8,8 @@ #include "intel_frontbuffer.h" +struct drm_connector; +struct drm_connector_state; struct drm_i915_private; struct intel_crtc_state; struct intel_dp; @@ -35,5 +37,8 @@ void intel_psr_short_pulse(struct intel_dp *intel_dp); int intel_psr_wait_for_idle(const struct intel_crtc_state *new_crtc_state, u32 *out_value); bool intel_psr_enabled(struct intel_dp *intel_dp); +void intel_psr_atomic_check(struct drm_connector *connector, + struct drm_connector_state *old_state, + struct drm_connector_state *new_state); #endif /* __INTEL_PSR_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 8758ee2a4442..e8819fd21e03 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -180,7 +180,7 @@ static struct intel_sdvo *to_sdvo(struct intel_encoder *encoder) return container_of(encoder, struct intel_sdvo, base); } -static struct intel_sdvo *intel_attached_sdvo(struct drm_connector *connector) +static struct intel_sdvo *intel_attached_sdvo(struct intel_connector *connector) { return to_sdvo(intel_attached_encoder(connector)); } @@ -1551,7 +1551,7 @@ static bool intel_sdvo_connector_get_hw_state(struct intel_connector *connector) { struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(&connector->base); - struct intel_sdvo *intel_sdvo = intel_attached_sdvo(&connector->base); + struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); u16 active_outputs = 0; intel_sdvo_get_active_outputs(intel_sdvo, &active_outputs); @@ -1823,7 +1823,7 @@ static enum drm_mode_status intel_sdvo_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; @@ -1941,7 +1941,7 @@ intel_sdvo_multifunc_encoder(struct intel_sdvo *intel_sdvo) static struct edid * intel_sdvo_get_edid(struct drm_connector *connector) { - struct intel_sdvo *sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *sdvo = intel_attached_sdvo(to_intel_connector(connector)); return drm_get_edid(connector, &sdvo->ddc); } @@ -1959,7 +1959,7 @@ intel_sdvo_get_analog_edid(struct drm_connector *connector) static enum drm_connector_status intel_sdvo_tmds_sink_detect(struct drm_connector *connector) { - struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); enum drm_connector_status status; @@ -2028,7 +2028,7 @@ static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector, bool force) { u16 response; - struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); enum drm_connector_status ret; @@ -2175,7 +2175,7 @@ static const struct drm_display_mode sdvo_tv_modes[] = { static void intel_sdvo_get_tv_modes(struct drm_connector *connector) { - struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); const struct drm_connector_state *conn_state = connector->state; struct intel_sdvo_sdtv_resolution_request tv_res; u32 reply = 0, format_map = 0; @@ -2215,7 +2215,7 @@ static void intel_sdvo_get_tv_modes(struct drm_connector *connector) static void intel_sdvo_get_lvds_modes(struct drm_connector *connector) { - struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct drm_i915_private *dev_priv = to_i915(connector->dev); struct drm_display_mode *newmode; @@ -2379,7 +2379,7 @@ intel_sdvo_connector_atomic_set_property(struct drm_connector *connector, static int intel_sdvo_connector_register(struct drm_connector *connector) { - struct intel_sdvo *sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *sdvo = intel_attached_sdvo(to_intel_connector(connector)); int ret; ret = intel_connector_register(connector); @@ -2394,7 +2394,7 @@ intel_sdvo_connector_register(struct drm_connector *connector) static void intel_sdvo_connector_unregister(struct drm_connector *connector) { - struct intel_sdvo *sdvo = intel_attached_sdvo(connector); + struct intel_sdvo *sdvo = intel_attached_sdvo(to_intel_connector(connector)); sysfs_remove_link(&connector->kdev->kobj, sdvo->ddc.dev.kobj.name); @@ -2932,7 +2932,7 @@ static void intel_sdvo_output_cleanup(struct intel_sdvo *intel_sdvo) list_for_each_entry_safe(connector, tmp, &dev->mode_config.connector_list, head) { - if (intel_attached_encoder(connector) == &intel_sdvo->base) { + if (intel_attached_encoder(to_intel_connector(connector)) == &intel_sdvo->base) { drm_connector_unregister(connector); intel_connector_destroy(connector); } diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 3f7b8f2ff671..fca77ec1e0dd 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -583,15 +583,16 @@ skl_program_plane(struct intel_plane *plane, const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 surf_addr = plane_state->color_plane[color_plane].offset; u32 stride = skl_plane_stride(plane_state, color_plane); - u32 aux_dist = plane_state->color_plane[1].offset - surf_addr; - u32 aux_stride = skl_plane_stride(plane_state, 1); + const struct drm_framebuffer *fb = plane_state->hw.fb; + int aux_plane = intel_main_to_aux_plane(fb, color_plane); + u32 aux_dist = plane_state->color_plane[aux_plane].offset - surf_addr; + u32 aux_stride = skl_plane_stride(plane_state, aux_plane); int crtc_x = plane_state->uapi.dst.x1; int crtc_y = plane_state->uapi.dst.y1; u32 x = plane_state->color_plane[color_plane].x; u32 y = plane_state->color_plane[color_plane].y; u32 src_w = drm_rect_width(&plane_state->uapi.src) >> 16; u32 src_h = drm_rect_height(&plane_state->uapi.src) >> 16; - const struct drm_framebuffer *fb = plane_state->hw.fb; u8 alpha = plane_state->hw.alpha >> 8; u32 plane_color_ctl = 0; unsigned long irqflags; @@ -2106,7 +2107,8 @@ static int skl_plane_check_fb(const struct intel_crtc_state *crtc_state, fb->modifier == I915_FORMAT_MOD_Yf_TILED || fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS || - fb->modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS)) { + fb->modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS || + fb->modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS)) { DRM_DEBUG_KMS("Y/Yf tiling not supported in IF-ID mode\n"); return -EINVAL; } @@ -2578,7 +2580,16 @@ static const u64 skl_plane_format_modifiers_ccs[] = { DRM_FORMAT_MOD_INVALID }; -static const u64 gen12_plane_format_modifiers_ccs[] = { +static const u64 gen12_plane_format_modifiers_mc_ccs[] = { + I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS, + I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS, + I915_FORMAT_MOD_Y_TILED, + I915_FORMAT_MOD_X_TILED, + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID +}; + +static const u64 gen12_plane_format_modifiers_rc_ccs[] = { I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS, I915_FORMAT_MOD_Y_TILED, I915_FORMAT_MOD_X_TILED, @@ -2743,10 +2754,21 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane, } } +static bool gen12_plane_supports_mc_ccs(enum plane_id plane_id) +{ + return plane_id < PLANE_SPRITE4; +} + static bool gen12_plane_format_mod_supported(struct drm_plane *_plane, u32 format, u64 modifier) { + struct intel_plane *plane = to_intel_plane(_plane); + switch (modifier) { + case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: + if (!gen12_plane_supports_mc_ccs(plane->id)) + return false; + /* fall through */ case DRM_FORMAT_MOD_LINEAR: case I915_FORMAT_MOD_X_TILED: case I915_FORMAT_MOD_Y_TILED: @@ -2764,11 +2786,6 @@ static bool gen12_plane_format_mod_supported(struct drm_plane *_plane, if (is_ccs_modifier(modifier)) return true; /* fall through */ - case DRM_FORMAT_RGB565: - case DRM_FORMAT_XRGB2101010: - case DRM_FORMAT_XBGR2101010: - case DRM_FORMAT_ARGB2101010: - case DRM_FORMAT_ABGR2101010: case DRM_FORMAT_YUYV: case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: @@ -2777,6 +2794,14 @@ static bool gen12_plane_format_mod_supported(struct drm_plane *_plane, case DRM_FORMAT_P010: case DRM_FORMAT_P012: case DRM_FORMAT_P016: + if (modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS) + return true; + /* fall through */ + case DRM_FORMAT_RGB565: + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_ARGB2101010: + case DRM_FORMAT_ABGR2101010: case DRM_FORMAT_XVYU2101010: case DRM_FORMAT_C8: case DRM_FORMAT_XBGR16161616F: @@ -2910,6 +2935,14 @@ static const u32 *icl_get_plane_formats(struct drm_i915_private *dev_priv, } } +static const u64 *gen12_get_plane_modifiers(enum plane_id plane_id) +{ + if (gen12_plane_supports_mc_ccs(plane_id)) + return gen12_plane_format_modifiers_mc_ccs; + else + return gen12_plane_format_modifiers_rc_ccs; +} + static bool skl_plane_has_ccs(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id) { @@ -2975,7 +3008,7 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, plane->has_ccs = skl_plane_has_ccs(dev_priv, pipe, plane_id); if (INTEL_GEN(dev_priv) >= 12) { - modifiers = gen12_plane_format_modifiers_ccs; + modifiers = gen12_get_plane_modifiers(plane_id); plane_funcs = &gen12_plane_funcs; } else { if (plane->has_ccs) diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 50703536436c..c75e0ceecee6 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -898,7 +898,7 @@ static struct intel_tv *enc_to_tv(struct intel_encoder *encoder) return container_of(encoder, struct intel_tv, base); } -static struct intel_tv *intel_attached_tv(struct drm_connector *connector) +static struct intel_tv *intel_attached_tv(struct intel_connector *connector) { return enc_to_tv(intel_attached_encoder(connector)); } @@ -1527,7 +1527,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, ((video_levels->black << TV_BLACK_LEVEL_SHIFT) | (video_levels->blank << TV_BLANK_LEVEL_SHIFT))); - assert_pipe_disabled(dev_priv, intel_crtc->pipe); + assert_pipe_disabled(dev_priv, pipe_config->cpu_transcoder); /* Filter ctl must be set before TV_WIN_SIZE */ tv_filter_ctl = TV_AUTO_SCALE; @@ -1662,7 +1662,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv, */ static void intel_tv_find_better_format(struct drm_connector *connector) { - struct intel_tv *intel_tv = intel_attached_tv(connector); + struct intel_tv *intel_tv = intel_attached_tv(to_intel_connector(connector)); const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); int i; @@ -1689,7 +1689,7 @@ intel_tv_detect(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, bool force) { - struct intel_tv *intel_tv = intel_attached_tv(connector); + struct intel_tv *intel_tv = intel_attached_tv(to_intel_connector(connector)); enum drm_connector_status status; int type; diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 6bab08db5d75..9e6aaa302e40 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -943,7 +943,7 @@ static void intel_dsc_dsi_pps_write(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct mipi_dsi_device *dsi; struct drm_dsc_picture_parameter_set pps; enum port port; @@ -961,7 +961,7 @@ static void intel_dsc_dsi_pps_write(struct intel_encoder *encoder, static void intel_dsc_dp_pps_write(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; struct drm_dsc_pps_infoframe dp_dsc_pps_sdp; diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 21e820299107..daf4fc3dab6f 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -23,7 +23,6 @@ * Author: Jani Nikula <jani.nikula@intel.com> */ -#include <linux/gpio/consumer.h> #include <linux/slab.h> #include <drm/drm_atomic_helper.h> @@ -319,7 +318,7 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder, static bool glk_dsi_enable_io(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 tmp; bool cold_boot = false; @@ -367,7 +366,7 @@ static bool glk_dsi_enable_io(struct intel_encoder *encoder) static void glk_dsi_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 val; @@ -438,7 +437,7 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) static void bxt_dsi_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 val; @@ -465,7 +464,7 @@ static void bxt_dsi_device_ready(struct intel_encoder *encoder) static void vlv_dsi_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 val; @@ -516,7 +515,7 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder) static void glk_dsi_enter_low_power_mode(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 val; @@ -546,7 +545,7 @@ static void glk_dsi_enter_low_power_mode(struct intel_encoder *encoder) static void glk_dsi_disable_mipi_io(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 tmp; @@ -579,7 +578,7 @@ static void glk_dsi_clear_device_ready(struct intel_encoder *encoder) static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; DRM_DEBUG_KMS("\n"); @@ -625,7 +624,7 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) { @@ -681,7 +680,7 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; for_each_dsi_port(port, intel_dsi->ports) { @@ -745,7 +744,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct drm_crtc *crtc = pipe_config->uapi.crtc; struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); @@ -793,9 +792,6 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, if (!IS_GEMINILAKE(dev_priv)) intel_dsi_prepare(encoder, pipe_config); - /* Power on, try both CRC pmic gpio and VBT */ - if (intel_dsi->gpio_panel) - gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_ON); intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay); @@ -850,7 +846,7 @@ static void intel_dsi_disable(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; DRM_DEBUG_KMS("\n"); @@ -886,7 +882,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 val; @@ -895,7 +891,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, if (IS_GEN9_LP(dev_priv)) { intel_crtc_vblank_off(old_crtc_state); - skylake_scaler_disable(old_crtc_state); + skl_scaler_disable(old_crtc_state); } if (is_vid_mode(intel_dsi)) { @@ -945,11 +941,8 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, /* Assert reset */ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); - /* Power off, try both CRC pmic gpio and VBT */ intel_dsi_msleep(intel_dsi, intel_dsi->panel_off_delay); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_OFF); - if (intel_dsi->gpio_panel) - gpiod_set_value_cansleep(intel_dsi->gpio_panel, 0); /* * FIXME As we do with eDP, just make a note of the time here @@ -962,7 +955,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); intel_wakeref_t wakeref; enum port port; bool active = false; @@ -1041,7 +1034,7 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, &pipe_config->hw.adjusted_mode; struct drm_display_mode *adjusted_mode_sw; struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); unsigned int lane_count = intel_dsi->lane_count; unsigned int bpp, fmt; enum port port; @@ -1234,7 +1227,7 @@ static void set_dsi_timings(struct drm_encoder *encoder, { struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder)); enum port port; unsigned int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); unsigned int lane_count = intel_dsi->lane_count; @@ -1322,7 +1315,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder)); const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; enum port port; unsigned int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); @@ -1512,7 +1505,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, static void intel_dsi_unprepare(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 val; @@ -1539,12 +1532,9 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder) static void intel_dsi_encoder_destroy(struct drm_encoder *encoder) { - struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - - /* dispose of the gpios */ - if (intel_dsi->gpio_panel) - gpiod_put(intel_dsi->gpio_panel); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder)); + intel_dsi_vbt_gpio_cleanup(intel_dsi); intel_encoder_destroy(encoder); } @@ -1825,6 +1815,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) struct drm_connector *connector; struct drm_display_mode *current_mode, *fixed_mode; enum port port; + enum pipe pipe; DRM_DEBUG_KMS("\n"); @@ -1923,20 +1914,8 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) vlv_dphy_param_init(intel_dsi); - /* - * In case of BYT with CRC PMIC, we need to use GPIO for - * Panel control. - */ - if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && - (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC)) { - intel_dsi->gpio_panel = - gpiod_get(dev->dev, "panel", GPIOD_OUT_HIGH); - - if (IS_ERR(intel_dsi->gpio_panel)) { - DRM_ERROR("Failed to own gpio for panel control\n"); - intel_dsi->gpio_panel = NULL; - } - } + intel_dsi_vbt_gpio_init(intel_dsi, + intel_dsi_get_hw_state(intel_encoder, &pipe)); drm_connector_init(dev, connector, &intel_dsi_connector_funcs, DRM_MODE_CONNECTOR_DSI); diff --git a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c index 95f39cd0ce02..6b89e67b120f 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c @@ -117,7 +117,7 @@ int vlv_dsi_pll_compute(struct intel_encoder *encoder, struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); int ret; u32 dsi_clk; @@ -255,7 +255,7 @@ u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); u32 dsi_clock, pclk; u32 pll_ctl, pll_div; @@ -321,7 +321,7 @@ u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, u32 pclk; u32 dsi_clk; u32 dsi_ratio; - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); @@ -341,7 +341,7 @@ void vlv_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) { u32 temp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); temp = I915_READ(MIPI_CTRL(port)); temp &= ~ESCAPE_CLOCK_DIVIDER_MASK; @@ -455,7 +455,7 @@ int bxt_dsi_pll_compute(struct intel_encoder *encoder, struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); u8 dsi_ratio, dsi_ratio_min, dsi_ratio_max; u32 dsi_clk; @@ -503,7 +503,7 @@ void bxt_dsi_pll_enable(struct intel_encoder *encoder, const struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; u32 val; diff --git a/drivers/gpu/drm/i915/gem/Makefile b/drivers/gpu/drm/i915/gem/Makefile deleted file mode 100644 index 7e73aa587967..000000000000 --- a/drivers/gpu/drm/i915/gem/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# For building individual subdir files on the command line -subdir-ccflags-y += -I$(srctree)/$(src)/.. - -# Extra header tests -header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index dc90b044a217..a2e57e62af30 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -69,6 +69,7 @@ #include <drm/i915_drm.h> +#include "gt/gen6_ppgtt.h" #include "gt/intel_context.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_pm.h" @@ -705,7 +706,7 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) if (HAS_FULL_PPGTT(i915)) { struct i915_ppgtt *ppgtt; - ppgtt = i915_ppgtt_create(i915); + ppgtt = i915_ppgtt_create(&i915->gt); if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); @@ -760,12 +761,6 @@ void i915_gem_driver_release__contexts(struct drm_i915_private *i915) flush_work(&i915->gem.contexts.free_work); } -static int context_idr_cleanup(int id, void *p, void *data) -{ - context_close(p); - return 0; -} - static int vm_idr_cleanup(int id, void *p, void *data) { i915_vm_put(p); @@ -773,7 +768,8 @@ static int vm_idr_cleanup(int id, void *p, void *data) } static int gem_context_register(struct i915_gem_context *ctx, - struct drm_i915_file_private *fpriv) + struct drm_i915_file_private *fpriv, + u32 *id) { struct i915_address_space *vm; int ret; @@ -791,14 +787,10 @@ static int gem_context_register(struct i915_gem_context *ctx, current->comm, pid_nr(ctx->pid)); /* And finally expose ourselves to userspace via the idr */ - mutex_lock(&fpriv->context_idr_lock); - ret = idr_alloc(&fpriv->context_idr, ctx, 0, 0, GFP_KERNEL); - mutex_unlock(&fpriv->context_idr_lock); - if (ret >= 0) - goto out; + ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL); + if (ret) + put_pid(fetch_and_zero(&ctx->pid)); - put_pid(fetch_and_zero(&ctx->pid)); -out: return ret; } @@ -808,11 +800,11 @@ int i915_gem_context_open(struct drm_i915_private *i915, struct drm_i915_file_private *file_priv = file->driver_priv; struct i915_gem_context *ctx; int err; + u32 id; - mutex_init(&file_priv->context_idr_lock); - mutex_init(&file_priv->vm_idr_lock); + xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC); - idr_init(&file_priv->context_idr); + mutex_init(&file_priv->vm_idr_lock); idr_init_base(&file_priv->vm_idr, 1); ctx = i915_gem_create_context(i915, 0); @@ -821,21 +813,19 @@ int i915_gem_context_open(struct drm_i915_private *i915, goto err; } - err = gem_context_register(ctx, file_priv); + err = gem_context_register(ctx, file_priv, &id); if (err < 0) goto err_ctx; - GEM_BUG_ON(err > 0); - + GEM_BUG_ON(id); return 0; err_ctx: context_close(ctx); err: idr_destroy(&file_priv->vm_idr); - idr_destroy(&file_priv->context_idr); + xa_destroy(&file_priv->context_xa); mutex_destroy(&file_priv->vm_idr_lock); - mutex_destroy(&file_priv->context_idr_lock); return err; } @@ -843,10 +833,12 @@ void i915_gem_context_close(struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_private *i915 = file_priv->dev_priv; + struct i915_gem_context *ctx; + unsigned long idx; - idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); - idr_destroy(&file_priv->context_idr); - mutex_destroy(&file_priv->context_idr_lock); + xa_for_each(&file_priv->context_xa, idx, ctx) + context_close(ctx); + xa_destroy(&file_priv->context_xa); idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL); idr_destroy(&file_priv->vm_idr); @@ -870,7 +862,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, if (args->flags) return -EINVAL; - ppgtt = i915_ppgtt_create(i915); + ppgtt = i915_ppgtt_create(&i915->gt); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); @@ -1244,12 +1236,14 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) * image, or into the registers directory, does not stick). Pristine * and idle contexts will be configured on pinning. */ - if (!intel_context_is_pinned(ce)) + if (!intel_context_pin_if_active(ce)) return 0; rq = intel_engine_create_kernel_request(ce->engine); - if (IS_ERR(rq)) - return PTR_ERR(rq); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + goto out_unpin; + } /* Serialise with the remote context */ ret = intel_context_prepare_remote_request(ce, rq); @@ -1257,6 +1251,8 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) ret = gen8_emit_rpcs_config(rq, ce, sseu); i915_request_add(rq); +out_unpin: + intel_context_unpin(ce); return ret; } @@ -2187,6 +2183,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_context_create_ext *args = data; struct create_ext ext_data; int ret; + u32 id; if (!DRIVER_CAPS(i915)->has_logical_contexts) return -ENODEV; @@ -2218,11 +2215,11 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, goto err_ctx; } - ret = gem_context_register(ext_data.ctx, ext_data.fpriv); + ret = gem_context_register(ext_data.ctx, ext_data.fpriv, &id); if (ret < 0) goto err_ctx; - args->ctx_id = ret; + args->ctx_id = id; DRM_DEBUG("HW context %d created\n", args->ctx_id); return 0; @@ -2245,11 +2242,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, if (!args->ctx_id) return -ENOENT; - if (mutex_lock_interruptible(&file_priv->context_idr_lock)) - return -EINTR; - - ctx = idr_remove(&file_priv->context_idr, args->ctx_id); - mutex_unlock(&file_priv->context_idr_lock); + ctx = xa_erase(&file_priv->context_xa, args->ctx_id); if (!ctx) return -ENOENT; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 14f3cc1b7583..3ae61a355d87 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -13,7 +13,6 @@ #include "i915_drv.h" #include "i915_gem.h" -#include "i915_gem_gtt.h" #include "i915_scheduler.h" #include "intel_device_info.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index cbd2bcade3c8..d5a0f5ae4a8b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2173,7 +2173,7 @@ static int eb_submit(struct i915_execbuffer *eb) } if (intel_context_nopreempt(eb->context)) - eb->request->flags |= I915_REQUEST_NOPREEMPT; + __set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags); return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 520cc9cac471..70543c83df06 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -16,46 +16,6 @@ const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops = { .release = i915_gem_object_release_memory_region, }; -/* XXX: Time to vfunc your life up? */ -void __iomem * -i915_gem_object_lmem_io_map_page(struct drm_i915_gem_object *obj, - unsigned long n) -{ - resource_size_t offset; - - offset = i915_gem_object_get_dma_address(obj, n); - offset -= obj->mm.region->region.start; - - return io_mapping_map_wc(&obj->mm.region->iomap, offset, PAGE_SIZE); -} - -void __iomem * -i915_gem_object_lmem_io_map_page_atomic(struct drm_i915_gem_object *obj, - unsigned long n) -{ - resource_size_t offset; - - offset = i915_gem_object_get_dma_address(obj, n); - offset -= obj->mm.region->region.start; - - return io_mapping_map_atomic_wc(&obj->mm.region->iomap, offset); -} - -void __iomem * -i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, - unsigned long n, - unsigned long size) -{ - resource_size_t offset; - - GEM_BUG_ON(!i915_gem_object_is_contiguous(obj)); - - offset = i915_gem_object_get_dma_address(obj, n); - offset -= obj->mm.region->region.start; - - return io_mapping_map_wc(&obj->mm.region->iomap, offset, size); -} - bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) { return obj->ops == &i915_gem_lmem_obj_ops; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index 7c176b8b7d2f..fc3f15580fe3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -14,14 +14,6 @@ struct intel_memory_region; extern const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops; -void __iomem *i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, - unsigned long n, unsigned long size); -void __iomem *i915_gem_object_lmem_io_map_page(struct drm_i915_gem_object *obj, - unsigned long n); -void __iomem * -i915_gem_object_lmem_io_map_page_atomic(struct drm_i915_gem_object *obj, - unsigned long n); - bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 879fff8adc48..b9fdac2f9003 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -4,6 +4,7 @@ * Copyright © 2014-2016 Intel Corporation */ +#include <linux/anon_inodes.h> #include <linux/mman.h> #include <linux/pfn_t.h> #include <linux/sizes.h> @@ -212,6 +213,7 @@ static vm_fault_t i915_error_to_vmf_fault(int err) case -EIO: /* shmemfs failure from swap device */ case -EFAULT: /* purged object */ case -ENODEV: /* bad object, how did you get here! */ + case -ENXIO: /* unable to access backing store (on device) */ return VM_FAULT_SIGBUS; case -ENOSPC: /* shmemfs allocation failure */ @@ -236,42 +238,38 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf) struct vm_area_struct *area = vmf->vma; struct i915_mmap_offset *mmo = area->vm_private_data; struct drm_i915_gem_object *obj = mmo->obj; - unsigned long i, size = area->vm_end - area->vm_start; - bool write = area->vm_flags & VM_WRITE; - vm_fault_t ret = VM_FAULT_SIGBUS; + resource_size_t iomap; int err; - if (!i915_gem_object_has_struct_page(obj)) - return ret; - /* Sanity check that we allow writing into this object */ - if (i915_gem_object_is_readonly(obj) && write) - return ret; + if (unlikely(i915_gem_object_is_readonly(obj) && + area->vm_flags & VM_WRITE)) + return VM_FAULT_SIGBUS; err = i915_gem_object_pin_pages(obj); if (err) - return i915_error_to_vmf_fault(err); + goto out; - /* PTEs are revoked in obj->ops->put_pages() */ - for (i = 0; i < size >> PAGE_SHIFT; i++) { - struct page *page = i915_gem_object_get_page(obj, i); - - ret = vmf_insert_pfn(area, - (unsigned long)area->vm_start + i * PAGE_SIZE, - page_to_pfn(page)); - if (ret != VM_FAULT_NOPAGE) - break; + iomap = -1; + if (!i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE)) { + iomap = obj->mm.region->iomap.base; + iomap -= obj->mm.region->region.start; } - if (write) { + /* PTEs are revoked in obj->ops->put_pages() */ + err = remap_io_sg(area, + area->vm_start, area->vm_end - area->vm_start, + obj->mm.pages->sgl, iomap); + + if (area->vm_flags & VM_WRITE) { GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - obj->cache_dirty = true; /* XXX flush after PAT update? */ obj->mm.dirty = true; } i915_gem_object_unpin_pages(obj); - return ret; +out: + return i915_error_to_vmf_fault(err); } static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) @@ -560,7 +558,9 @@ __assign_mmap_offset(struct drm_file *file, } if (mmap_type != I915_MMAP_TYPE_GTT && - !i915_gem_object_has_struct_page(obj)) { + !i915_gem_object_type_has(obj, + I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_HAS_IOMEM)) { err = -ENODEV; goto out; } @@ -694,6 +694,46 @@ static const struct vm_operations_struct vm_ops_cpu = { .close = vm_close, }; +static int singleton_release(struct inode *inode, struct file *file) +{ + struct drm_i915_private *i915 = file->private_data; + + cmpxchg(&i915->gem.mmap_singleton, file, NULL); + drm_dev_put(&i915->drm); + + return 0; +} + +static const struct file_operations singleton_fops = { + .owner = THIS_MODULE, + .release = singleton_release, +}; + +static struct file *mmap_singleton(struct drm_i915_private *i915) +{ + struct file *file; + + rcu_read_lock(); + file = i915->gem.mmap_singleton; + if (file && !get_file_rcu(file)) + file = NULL; + rcu_read_unlock(); + if (file) + return file; + + file = anon_inode_getfile("i915.gem", &singleton_fops, i915, O_RDWR); + if (IS_ERR(file)) + return file; + + /* Everyone shares a single global address space */ + file->f_mapping = i915->drm.anon_inode->i_mapping; + + smp_store_mb(i915->gem.mmap_singleton, file); + drm_dev_get(&i915->drm); + + return file; +} + /* * This overcomes the limitation in drm_gem_mmap's assignment of a * drm_gem_object as the vma->vm_private_data. Since we need to @@ -707,6 +747,7 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) struct drm_device *dev = priv->minor->dev; struct i915_mmap_offset *mmo = NULL; struct drm_gem_object *obj = NULL; + struct file *anon; if (drm_dev_is_unplugged(dev)) return -ENODEV; @@ -755,9 +796,26 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_flags &= ~VM_MAYWRITE; } + anon = mmap_singleton(to_i915(obj->dev)); + if (IS_ERR(anon)) { + drm_gem_object_put_unlocked(obj); + return PTR_ERR(anon); + } + vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = mmo; + /* + * We keep the ref on mmo->obj, not vm_file, but we require + * vma->vm_file->f_mapping, see vma_link(), for later revocation. + * Our userspace is accustomed to having per-file resource cleanup + * (i.e. contexts, objects and requests) on their close(fd), which + * requires avoiding extraneous references to their filp, hence why + * we prefer to use an anonymous file for their mmaps. + */ + fput(vma->vm_file); + vma->vm_file = anon; + switch (mmo->mmap_type) { case I915_MMAP_TYPE_WC: vma->vm_page_prot = diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 858f8bf49a04..db70a3306e59 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -16,6 +16,7 @@ #include "display/intel_frontbuffer.h" #include "i915_gem_object_types.h" #include "i915_gem_gtt.h" +#include "i915_vma_types.h" void i915_gem_init__objects(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 75197ca696a8..54aca5c9101e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -158,9 +158,7 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) static void unmap_object(struct drm_i915_gem_object *obj, void *ptr) { - if (i915_gem_object_is_lmem(obj)) - io_mapping_unmap((void __force __iomem *)ptr); - else if (is_vmalloc_addr(ptr)) + if (is_vmalloc_addr(ptr)) vunmap(ptr); else kunmap(kmap_to_page(ptr)); @@ -236,46 +234,44 @@ unlock: return err; } +static inline pte_t iomap_pte(resource_size_t base, + dma_addr_t offset, + pgprot_t prot) +{ + return pte_mkspecial(pfn_pte((base + offset) >> PAGE_SHIFT, prot)); +} + /* The 'mapping' part of i915_gem_object_pin_map() below */ static void *i915_gem_object_map(struct drm_i915_gem_object *obj, enum i915_map_type type) { - unsigned long n_pages = obj->base.size >> PAGE_SHIFT; + unsigned long n_pte = obj->base.size >> PAGE_SHIFT; struct sg_table *sgt = obj->mm.pages; - struct sgt_iter sgt_iter; - struct page *page; - struct page *stack_pages[32]; - struct page **pages = stack_pages; - unsigned long i = 0; + pte_t *stack[32], **mem; + struct vm_struct *area; pgprot_t pgprot; - void *addr; - if (i915_gem_object_is_lmem(obj)) { - void __iomem *io; - - if (type != I915_MAP_WC) - return NULL; - - io = i915_gem_object_lmem_io_map(obj, 0, obj->base.size); - return (void __force *)io; - } + if (!i915_gem_object_has_struct_page(obj) && type != I915_MAP_WC) + return NULL; /* A single page can always be kmapped */ - if (n_pages == 1 && type == I915_MAP_WB) + if (n_pte == 1 && type == I915_MAP_WB) return kmap(sg_page(sgt->sgl)); - if (n_pages > ARRAY_SIZE(stack_pages)) { + mem = stack; + if (n_pte > ARRAY_SIZE(stack)) { /* Too big for stack -- allocate temporary array instead */ - pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); - if (!pages) + mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL); + if (!mem) return NULL; } - for_each_sgt_page(page, sgt_iter, sgt) - pages[i++] = page; - - /* Check that we have the expected number of pages */ - GEM_BUG_ON(i != n_pages); + area = alloc_vm_area(obj->base.size, mem); + if (!area) { + if (mem != stack) + kvfree(mem); + return NULL; + } switch (type) { default: @@ -288,12 +284,31 @@ static void *i915_gem_object_map(struct drm_i915_gem_object *obj, pgprot = pgprot_writecombine(PAGE_KERNEL_IO); break; } - addr = vmap(pages, n_pages, 0, pgprot); - if (pages != stack_pages) - kvfree(pages); + if (i915_gem_object_has_struct_page(obj)) { + struct sgt_iter iter; + struct page *page; + pte_t **ptes = mem; + + for_each_sgt_page(page, iter, sgt) + **ptes++ = mk_pte(page, pgprot); + } else { + resource_size_t iomap; + struct sgt_iter iter; + pte_t **ptes = mem; + dma_addr_t addr; + + iomap = obj->mm.region->iomap.base; + iomap -= obj->mm.region->region.start; + + for_each_sgt_daddr(addr, iter, sgt) + **ptes++ = iomap_pte(iomap, addr, pgprot); + } + + if (mem != stack) + kvfree(mem); - return addr; + return area->addr; } /* get, pin, and map the pages of the object into kernel space */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index d50adac12249..1515384d7e0e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -107,7 +107,10 @@ void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, { INIT_LIST_HEAD(&obj->mm.blocks); obj->mm.region = intel_memory_region_get(mem); + obj->flags |= flags; + if (obj->base.size <= mem->min_page_size) + obj->flags |= I915_BO_ALLOC_CONTIGUOUS; mutex_lock(&mem->objects.lock); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 4d69c3fc3439..a2a980d9d241 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -594,6 +594,8 @@ static int init_shmem(struct intel_memory_region *mem) err); } + intel_memory_region_set_name(mem, "system"); + return 0; /* Don't error, we can simply fallback to the kernel mnt */ } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index afb08a1704a2..451f3078d60d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -645,6 +645,8 @@ i915_gem_object_create_stolen(struct drm_i915_private *i915, static int init_stolen(struct intel_memory_region *mem) { + intel_memory_region_set_name(mem, "stolen"); + /* * Initialise stolen early so that we may reserve preallocated * objects for the BIOS to KMS transition. diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h index 549c1394bcdc..b8cf31b7bf14 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h @@ -7,6 +7,12 @@ #ifndef __HUGE_GEM_OBJECT_H #define __HUGE_GEM_OBJECT_H +#include <linux/types.h> + +#include "gem/i915_gem_object_types.h" + +struct drm_i915_private; + struct drm_i915_gem_object * huge_gem_object(struct drm_i915_private *i915, phys_addr_t phys_size, diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 2479395c1873..9311250d7d6f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1017,38 +1017,33 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) return err; } -static int __cpu_check_lmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) +static int __cpu_check_vmap(struct drm_i915_gem_object *obj, u32 dword, u32 val) { - unsigned long n; + unsigned long n = obj->base.size >> PAGE_SHIFT; + u32 *ptr; int err; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_wc_domain(obj, false); - i915_gem_object_unlock(obj); - if (err) - return err; - - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); if (err) return err; - for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { - u32 __iomem *base; - u32 read_val; - - base = i915_gem_object_lmem_io_map_page_atomic(obj, n); + ptr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); - read_val = ioread32(base + dword); - io_mapping_unmap_atomic(base); - if (read_val != val) { - pr_err("n=%lu base[%u]=%u, val=%u\n", - n, dword, read_val, val); + ptr += dword; + while (n--) { + if (*ptr != val) { + pr_err("base[%u]=%08x, val=%08x\n", + dword, *ptr, val); err = -EINVAL; break; } + + ptr += PAGE_SIZE / sizeof(*ptr); } - i915_gem_object_unpin_pages(obj); + i915_gem_object_unpin_map(obj); return err; } @@ -1056,10 +1051,8 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) { if (i915_gem_object_has_struct_page(obj)) return __cpu_check_shmem(obj, dword, val); - else if (i915_gem_object_is_lmem(obj)) - return __cpu_check_lmem(obj, dword, val); - - return -ENODEV; + else + return __cpu_check_vmap(obj, dword, val); } static int __igt_write_huge(struct intel_context *ce, @@ -1872,7 +1865,7 @@ int i915_gem_huge_page_mock_selftests(void) mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; mkwrite_device_info(dev_priv)->ppgtt_size = 48; - ppgtt = i915_ppgtt_create(dev_priv); + ppgtt = i915_ppgtt_create(&dev_priv->gt); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); goto out_unlock; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 49edc51111d5..3f6079e1dfb6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -325,7 +325,10 @@ static int igt_gem_coherency(void *arg) values = offsets + ncachelines; ctx.engine = random_engine(i915, &prng); - GEM_BUG_ON(!ctx.engine); + if (!ctx.engine) { + err = -ENODEV; + goto out_free; + } pr_info("%s: using %s\n", __func__, ctx.engine->name); intel_engine_pm_get(ctx.engine); @@ -354,7 +357,7 @@ static int igt_gem_coherency(void *arg) ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(ctx.obj)) { err = PTR_ERR(ctx.obj); - goto free; + goto out_pm; } i915_random_reorder(offsets, ncachelines, &prng); @@ -405,14 +408,15 @@ static int igt_gem_coherency(void *arg) } } } -free: +out_pm: intel_engine_pm_put(ctx.engine); +out_free: kfree(offsets); return err; put_object: i915_gem_object_put(ctx.obj); - goto free; + goto out_pm; } int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index cbf796da64e3..ef7c74cff28a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -9,6 +9,7 @@ #include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gem/i915_gem_region.h" #include "huge_gem_object.h" #include "i915_selftest.h" #include "selftests/i915_random.h" @@ -725,114 +726,359 @@ err_obj: goto out; } -#define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24)) -static int igt_mmap(void *arg, enum i915_mmap_type type) +static int gtt_set(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct i915_mmap_offset *mmo; - struct vm_area_struct *area; - unsigned long addr; - void *vaddr; - int err = 0, i; + struct i915_vma *vma; + void __iomem *map; + int err = 0; - if (!i915_ggtt_has_aperture(&i915->ggtt)) - return 0; + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out; + } + + memset_io(map, POISON_INUSE, obj->base.size); + i915_vma_unpin_iomap(vma); + +out: + intel_gt_pm_put(vma->vm->gt); + return err; +} + +static int gtt_check(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + void __iomem *map; + int err = 0; - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) { + err = PTR_ERR(map); goto out; } - memset(vaddr, POISON_INUSE, PAGE_SIZE); + + if (memchr_inv((void __force *)map, POISON_FREE, obj->base.size)) { + pr_err("%s: Write via mmap did not land in backing store (GTT)\n", + obj->mm.region->name); + err = -EINVAL; + } + i915_vma_unpin_iomap(vma); + +out: + intel_gt_pm_put(vma->vm->gt); + return err; +} + +static int wc_set(struct drm_i915_gem_object *obj) +{ + void *vaddr; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + memset(vaddr, POISON_INUSE, obj->base.size); i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); - mmo = mmap_offset_attach(obj, type, NULL); - if (IS_ERR(mmo)) { - err = PTR_ERR(mmo); - goto out; + return 0; +} + +static int wc_check(struct drm_i915_gem_object *obj) +{ + void *vaddr; + int err = 0; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + if (memchr_inv(vaddr, POISON_FREE, obj->base.size)) { + pr_err("%s: Write via mmap did not land in backing store (WC)\n", + obj->mm.region->name); + err = -EINVAL; } + i915_gem_object_unpin_map(obj); + + return err; +} + +static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) +{ + if (type == I915_MMAP_TYPE_GTT && + !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt)) + return false; + + if (type != I915_MMAP_TYPE_GTT && + !i915_gem_object_type_has(obj, + I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_HAS_IOMEM)) + return false; + + return true; +} + +#define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24)) +static int __igt_mmap(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + enum i915_mmap_type type) +{ + struct i915_mmap_offset *mmo; + struct vm_area_struct *area; + unsigned long addr; + int err, i; + + if (!can_mmap(obj, type)) + return 0; + + err = wc_set(obj); + if (err == -ENXIO) + err = gtt_set(obj); + if (err) + return err; + + mmo = mmap_offset_attach(obj, type, NULL); + if (IS_ERR(mmo)) + return PTR_ERR(mmo); addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); - if (IS_ERR_VALUE(addr)) { - err = addr; - goto out; - } + if (IS_ERR_VALUE(addr)) + return addr; - pr_debug("igt_mmap() @ %lx\n", addr); + pr_debug("igt_mmap(%s, %d) @ %lx\n", obj->mm.region->name, type, addr); area = find_vma(current->mm, addr); if (!area) { - pr_err("Did not create a vm_area_struct for the mmap\n"); + pr_err("%s: Did not create a vm_area_struct for the mmap\n", + obj->mm.region->name); err = -EINVAL; goto out_unmap; } if (area->vm_private_data != mmo) { - pr_err("vm_area_struct did not point back to our mmap_offset object!\n"); + pr_err("%s: vm_area_struct did not point back to our mmap_offset object!\n", + obj->mm.region->name); err = -EINVAL; goto out_unmap; } - for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) { + for (i = 0; i < obj->base.size / sizeof(u32); i++) { u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux))); u32 x; if (get_user(x, ux)) { - pr_err("Unable to read from mmap, offset:%zd\n", - i * sizeof(x)); + pr_err("%s: Unable to read from mmap, offset:%zd\n", + obj->mm.region->name, i * sizeof(x)); err = -EFAULT; - break; + goto out_unmap; } if (x != expand32(POISON_INUSE)) { - pr_err("Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n", + pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n", + obj->mm.region->name, i * sizeof(x), x, expand32(POISON_INUSE)); err = -EINVAL; - break; + goto out_unmap; } x = expand32(POISON_FREE); if (put_user(x, ux)) { - pr_err("Unable to write to mmap, offset:%zd\n", - i * sizeof(x)); + pr_err("%s: Unable to write to mmap, offset:%zd\n", + obj->mm.region->name, i * sizeof(x)); err = -EFAULT; - break; + goto out_unmap; } } + if (type == I915_MMAP_TYPE_GTT) + intel_gt_flush_ggtt_writes(&i915->gt); + + err = wc_check(obj); + if (err == -ENXIO) + err = gtt_check(obj); out_unmap: - vm_munmap(addr, PAGE_SIZE); + vm_munmap(addr, obj->base.size); + return err; +} - vaddr = i915_gem_object_pin_map(obj, I915_MAP_FORCE_WC); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto out; - } - if (err == 0 && memchr_inv(vaddr, POISON_FREE, PAGE_SIZE)) { - pr_err("Write via mmap did not land in backing store\n"); - err = -EINVAL; +static int igt_mmap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *mr; + enum intel_region_id id; + + for_each_memory_region(mr, i915, id) { + unsigned long sizes[] = { + PAGE_SIZE, + mr->min_page_size, + SZ_4M, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_region(mr, sizes[i], 0); + if (obj == ERR_PTR(-ENODEV)) + continue; + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = __igt_mmap(i915, obj, I915_MMAP_TYPE_GTT); + if (err == 0) + err = __igt_mmap(i915, obj, I915_MMAP_TYPE_WC); + + i915_gem_object_put(obj); + if (err) + return err; + } } - i915_gem_object_unpin_map(obj); -out: - i915_gem_object_put(obj); - return err; + return 0; } -static int igt_mmap_gtt(void *arg) +static int __igt_mmap_gpu(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + enum i915_mmap_type type) { - return igt_mmap(arg, I915_MMAP_TYPE_GTT); + struct intel_engine_cs *engine; + struct i915_mmap_offset *mmo; + unsigned long addr; + u32 __user *ux; + u32 bbe; + int err; + + /* + * Verify that the mmap access into the backing store aligns with + * that of the GPU, i.e. that mmap is indeed writing into the same + * page as being read by the GPU. + */ + + if (!can_mmap(obj, type)) + return 0; + + err = wc_set(obj); + if (err == -ENXIO) + err = gtt_set(obj); + if (err) + return err; + + mmo = mmap_offset_attach(obj, type, NULL); + if (IS_ERR(mmo)) + return PTR_ERR(mmo); + + addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + if (IS_ERR_VALUE(addr)) + return addr; + + ux = u64_to_user_ptr((u64)addr); + bbe = MI_BATCH_BUFFER_END; + if (put_user(bbe, ux)) { + pr_err("%s: Unable to write to mmap\n", obj->mm.region->name); + err = -EFAULT; + goto out_unmap; + } + + if (type == I915_MMAP_TYPE_GTT) + intel_gt_flush_ggtt_writes(&i915->gt); + + for_each_uabi_engine(engine, i915) { + struct i915_request *rq; + struct i915_vma *vma; + + vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_unmap; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_unmap; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unpin; + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); + + err = engine->emit_bb_start(rq, vma->node.start, 0, 0); + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + struct drm_printer p = + drm_info_printer(engine->i915->drm.dev); + + pr_err("%s(%s, %s): Failed to execute batch\n", + __func__, engine->name, obj->mm.region->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + intel_gt_set_wedged(engine->gt); + err = -EIO; + } + i915_request_put(rq); + +out_unpin: + i915_vma_unpin(vma); + if (err) + goto out_unmap; + } + +out_unmap: + vm_munmap(addr, obj->base.size); + return err; } -static int igt_mmap_cpu(void *arg) +static int igt_mmap_gpu(void *arg) { - return igt_mmap(arg, I915_MMAP_TYPE_WC); + struct drm_i915_private *i915 = arg; + struct intel_memory_region *mr; + enum intel_region_id id; + + for_each_memory_region(mr, i915, id) { + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + if (obj == ERR_PTR(-ENODEV)) + continue; + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_GTT); + if (err == 0) + err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_WC); + + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; } static int check_present_pte(pte_t *pte, unsigned long addr, void *data) @@ -887,32 +1133,24 @@ static int prefault_range(u64 start, u64 len) return __get_user(c, end - 1); } -static int igt_mmap_revoke(void *arg, enum i915_mmap_type type) +static int __igt_mmap_revoke(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + enum i915_mmap_type type) { - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; struct i915_mmap_offset *mmo; unsigned long addr; int err; - if (!i915_ggtt_has_aperture(&i915->ggtt)) + if (!can_mmap(obj, type)) return 0; - obj = i915_gem_object_create_internal(i915, SZ_4M); - if (IS_ERR(obj)) - return PTR_ERR(obj); - mmo = mmap_offset_attach(obj, type, NULL); - if (IS_ERR(mmo)) { - err = PTR_ERR(mmo); - goto out; - } + if (IS_ERR(mmo)) + return PTR_ERR(mmo); addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); - if (IS_ERR_VALUE(addr)) { - err = addr; - goto out; - } + if (IS_ERR_VALUE(addr)) + return addr; err = prefault_range(addr, obj->base.size); if (err) @@ -922,8 +1160,10 @@ static int igt_mmap_revoke(void *arg, enum i915_mmap_type type) !atomic_read(&obj->bind_count)); err = check_present(addr, obj->base.size); - if (err) + if (err) { + pr_err("%s: was not present\n", obj->mm.region->name); goto out_unmap; + } /* * After unbinding the object from the GGTT, its address may be reused @@ -947,24 +1187,43 @@ static int igt_mmap_revoke(void *arg, enum i915_mmap_type type) } err = check_absent(addr, obj->base.size); - if (err) + if (err) { + pr_err("%s: was not absent\n", obj->mm.region->name); goto out_unmap; + } out_unmap: vm_munmap(addr, obj->base.size); -out: - i915_gem_object_put(obj); return err; } -static int igt_mmap_gtt_revoke(void *arg) +static int igt_mmap_revoke(void *arg) { - return igt_mmap_revoke(arg, I915_MMAP_TYPE_GTT); -} + struct drm_i915_private *i915 = arg; + struct intel_memory_region *mr; + enum intel_region_id id; -static int igt_mmap_cpu_revoke(void *arg) -{ - return igt_mmap_revoke(arg, I915_MMAP_TYPE_WC); + for_each_memory_region(mr, i915, id) { + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + if (obj == ERR_PTR(-ENODEV)) + continue; + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_GTT); + if (err == 0) + err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_WC); + + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; } int i915_gem_mman_live_selftests(struct drm_i915_private *i915) @@ -973,10 +1232,9 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_partial_tiling), SUBTEST(igt_smoke_tiling), SUBTEST(igt_mmap_offset_exhaustion), - SUBTEST(igt_mmap_gtt), - SUBTEST(igt_mmap_cpu), - SUBTEST(igt_mmap_gtt_revoke), - SUBTEST(igt_mmap_cpu_revoke), + SUBTEST(igt_mmap), + SUBTEST(igt_mmap_revoke), + SUBTEST(igt_mmap_gpu), }; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 7d7e13dc2fdf..384143aa7776 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -77,12 +77,13 @@ live_context(struct drm_i915_private *i915, struct file *file) { struct i915_gem_context *ctx; int err; + u32 id; ctx = i915_gem_create_context(i915, 0); if (IS_ERR(ctx)) return ctx; - err = gem_context_register(ctx, to_drm_file(file)->driver_priv); + err = gem_context_register(ctx, to_drm_file(file)->driver_priv, &id); if (err < 0) goto err_ctx; diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h index 370360b4a148..688511afa883 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h +++ b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h @@ -7,6 +7,8 @@ #ifndef __MOCK_GEM_OBJECT_H__ #define __MOCK_GEM_OBJECT_H__ +#include "gem/i915_gem_object_types.h" + struct mock_object { struct drm_i915_gem_object base; }; diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile deleted file mode 100644 index 7e73aa587967..000000000000 --- a/drivers/gpu/drm/i915/gt/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# For building individual subdir files on the command line -subdir-ccflags-y += -I$(srctree)/$(src)/.. - -# Extra header tests -header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c new file mode 100644 index 000000000000..f10b2c41571c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -0,0 +1,482 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/log2.h> + +#include "gen6_ppgtt.h" +#include "i915_scatterlist.h" +#include "i915_trace.h" +#include "i915_vgpu.h" +#include "intel_gt.h" + +/* Write pde (index) from the page directory @pd to the page table @pt */ +static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt, + const unsigned int pde, + const struct i915_page_table *pt) +{ + /* Caller needs to make sure the write completes if necessary */ + iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, + ppgtt->pd_addr + pde); +} + +void gen7_ppgtt_enable(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 ecochk; + + intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B); + + ecochk = intel_uncore_read(uncore, GAM_ECOCHK); + if (IS_HASWELL(i915)) { + ecochk |= ECOCHK_PPGTT_WB_HSW; + } else { + ecochk |= ECOCHK_PPGTT_LLC_IVB; + ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; + } + intel_uncore_write(uncore, GAM_ECOCHK, ecochk); + + for_each_engine(engine, gt, id) { + /* GFX_MODE is per-ring on gen7+ */ + ENGINE_WRITE(engine, + RING_MODE_GEN7, + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); + } +} + +void gen6_ppgtt_enable(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + + intel_uncore_rmw(uncore, + GAC_ECO_BITS, + 0, + ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B); + + intel_uncore_rmw(uncore, + GAB_CTL, + 0, + GAB_CTL_CONT_AFTER_PAGEFAULT); + + intel_uncore_rmw(uncore, + GAM_ECOCHK, + 0, + ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); + + if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */ + intel_uncore_write(uncore, + GFX_MODE, + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); +} + +/* PPGTT support for Sandybdrige/Gen6 and later */ +static void gen6_ppgtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); + const unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + const gen6_pte_t scratch_pte = vm->scratch[0].encode; + unsigned int pde = first_entry / GEN6_PTES; + unsigned int pte = first_entry % GEN6_PTES; + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; + + while (num_entries) { + struct i915_page_table * const pt = + i915_pt_entry(ppgtt->base.pd, pde++); + const unsigned int count = min(num_entries, GEN6_PTES - pte); + gen6_pte_t *vaddr; + + GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1])); + + num_entries -= count; + + GEM_BUG_ON(count > atomic_read(&pt->used)); + if (!atomic_sub_return(count, &pt->used)) + ppgtt->scan_for_unused_pt = true; + + /* + * Note that the hw doesn't support removing PDE on the fly + * (they are cached inside the context with no means to + * invalidate the cache), so we can only reset the PTE + * entries back to scratch. + */ + + vaddr = kmap_atomic_px(pt); + memset32(vaddr + pte, scratch_pte, count); + kunmap_atomic(vaddr); + + pte = 0; + } +} + +static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_page_directory * const pd = ppgtt->pd; + unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE; + unsigned int act_pt = first_entry / GEN6_PTES; + unsigned int act_pte = first_entry % GEN6_PTES; + const u32 pte_encode = vm->pte_encode(0, cache_level, flags); + struct sgt_dma iter = sgt_dma(vma); + gen6_pte_t *vaddr; + + GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]); + + vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); + do { + vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); + + iter.dma += I915_GTT_PAGE_SIZE; + if (iter.dma == iter.max) { + iter.sg = __sg_next(iter.sg); + if (!iter.sg) + break; + + iter.dma = sg_dma_address(iter.sg); + iter.max = iter.dma + iter.sg->length; + } + + if (++act_pte == GEN6_PTES) { + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt)); + act_pte = 0; + } + } while (1); + kunmap_atomic(vaddr); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; +} + +static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end) +{ + struct i915_page_directory * const pd = ppgtt->base.pd; + struct i915_page_table *pt; + unsigned int pde; + + start = round_down(start, SZ_64K); + end = round_up(end, SZ_64K) - start; + + mutex_lock(&ppgtt->flush); + + gen6_for_each_pde(pt, pd, start, end, pde) + gen6_write_pde(ppgtt, pde, pt); + + mb(); + ioread32(ppgtt->pd_addr + pde - 1); + gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt); + mb(); + + mutex_unlock(&ppgtt->flush); +} + +static int gen6_alloc_va_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); + struct i915_page_directory * const pd = ppgtt->base.pd; + struct i915_page_table *pt, *alloc = NULL; + intel_wakeref_t wakeref; + u64 from = start; + unsigned int pde; + int ret = 0; + + wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); + + spin_lock(&pd->lock); + gen6_for_each_pde(pt, pd, start, length, pde) { + const unsigned int count = gen6_pte_count(start, length); + + if (px_base(pt) == px_base(&vm->scratch[1])) { + spin_unlock(&pd->lock); + + pt = fetch_and_zero(&alloc); + if (!pt) + pt = alloc_pt(vm); + if (IS_ERR(pt)) { + ret = PTR_ERR(pt); + goto unwind_out; + } + + fill32_px(pt, vm->scratch[0].encode); + + spin_lock(&pd->lock); + if (pd->entry[pde] == &vm->scratch[1]) { + pd->entry[pde] = pt; + } else { + alloc = pt; + pt = pd->entry[pde]; + } + } + + atomic_add(count, &pt->used); + } + spin_unlock(&pd->lock); + + if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) + gen6_flush_pd(ppgtt, from, start); + + goto out; + +unwind_out: + gen6_ppgtt_clear_range(vm, from, start - from); +out: + if (alloc) + free_px(vm, alloc); + intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); + return ret; +} + +static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) +{ + struct i915_address_space * const vm = &ppgtt->base.vm; + struct i915_page_directory * const pd = ppgtt->base.pd; + int ret; + + ret = setup_scratch_page(vm, __GFP_HIGHMEM); + if (ret) + return ret; + + vm->scratch[0].encode = + vm->pte_encode(px_dma(&vm->scratch[0]), + I915_CACHE_NONE, PTE_READ_ONLY); + + if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) { + cleanup_scratch_page(vm); + return -ENOMEM; + } + + fill32_px(&vm->scratch[1], vm->scratch[0].encode); + memset_p(pd->entry, &vm->scratch[1], I915_PDES); + + return 0; +} + +static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) +{ + struct i915_page_directory * const pd = ppgtt->base.pd; + struct i915_page_dma * const scratch = + px_base(&ppgtt->base.vm.scratch[1]); + struct i915_page_table *pt; + u32 pde; + + gen6_for_all_pdes(pt, pd, pde) + if (px_base(pt) != scratch) + free_px(&ppgtt->base.vm, pt); +} + +static void gen6_ppgtt_cleanup(struct i915_address_space *vm) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); + + __i915_vma_put(ppgtt->vma); + + gen6_ppgtt_free_pd(ppgtt); + free_scratch(vm); + + mutex_destroy(&ppgtt->flush); + mutex_destroy(&ppgtt->pin_mutex); + kfree(ppgtt->base.pd); +} + +static int pd_vma_set_pages(struct i915_vma *vma) +{ + vma->pages = ERR_PTR(-ENODEV); + return 0; +} + +static void pd_vma_clear_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + + vma->pages = NULL; +} + +static int pd_vma_bind(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 unused) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); + struct gen6_ppgtt *ppgtt = vma->private; + u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE; + + px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); + ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; + + gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total); + return 0; +} + +static void pd_vma_unbind(struct i915_vma *vma) +{ + struct gen6_ppgtt *ppgtt = vma->private; + struct i915_page_directory * const pd = ppgtt->base.pd; + struct i915_page_dma * const scratch = + px_base(&ppgtt->base.vm.scratch[1]); + struct i915_page_table *pt; + unsigned int pde; + + if (!ppgtt->scan_for_unused_pt) + return; + + /* Free all no longer used page tables */ + gen6_for_all_pdes(pt, ppgtt->base.pd, pde) { + if (px_base(pt) == scratch || atomic_read(&pt->used)) + continue; + + free_px(&ppgtt->base.vm, pt); + pd->entry[pde] = scratch; + } + + ppgtt->scan_for_unused_pt = false; +} + +static const struct i915_vma_ops pd_vma_ops = { + .set_pages = pd_vma_set_pages, + .clear_pages = pd_vma_clear_pages, + .bind_vma = pd_vma_bind, + .unbind_vma = pd_vma_unbind, +}; + +static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) +{ + struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; + struct i915_vma *vma; + + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + GEM_BUG_ON(size > ggtt->vm.total); + + vma = i915_vma_alloc(); + if (!vma) + return ERR_PTR(-ENOMEM); + + i915_active_init(&vma->active, NULL, NULL); + + kref_init(&vma->ref); + mutex_init(&vma->pages_mutex); + vma->vm = i915_vm_get(&ggtt->vm); + vma->ops = &pd_vma_ops; + vma->private = ppgtt; + + vma->size = size; + vma->fence_size = size; + atomic_set(&vma->flags, I915_VMA_GGTT); + vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ + + INIT_LIST_HEAD(&vma->obj_link); + INIT_LIST_HEAD(&vma->closed_link); + + return vma; +} + +int gen6_ppgtt_pin(struct i915_ppgtt *base) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + int err; + + GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open)); + + /* + * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt + * which will be pinned into every active context. + * (When vma->pin_count becomes atomic, I expect we will naturally + * need a larger, unpacked, type and kill this redundancy.) + */ + if (atomic_add_unless(&ppgtt->pin_count, 1, 0)) + return 0; + + if (mutex_lock_interruptible(&ppgtt->pin_mutex)) + return -EINTR; + + /* + * PPGTT PDEs reside in the GGTT and consists of 512 entries. The + * allocator works in address space sizes, so it's multiplied by page + * size. We allocate at the top of the GTT to avoid fragmentation. + */ + err = 0; + if (!atomic_read(&ppgtt->pin_count)) + err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH); + if (!err) + atomic_inc(&ppgtt->pin_count); + mutex_unlock(&ppgtt->pin_mutex); + + return err; +} + +void gen6_ppgtt_unpin(struct i915_ppgtt *base) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + + GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); + if (atomic_dec_and_test(&ppgtt->pin_count)) + i915_vma_unpin(ppgtt->vma); +} + +void gen6_ppgtt_unpin_all(struct i915_ppgtt *base) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + + if (!atomic_read(&ppgtt->pin_count)) + return; + + i915_vma_unpin(ppgtt->vma); + atomic_set(&ppgtt->pin_count, 0); +} + +struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) +{ + struct i915_ggtt * const ggtt = gt->ggtt; + struct gen6_ppgtt *ppgtt; + int err; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return ERR_PTR(-ENOMEM); + + mutex_init(&ppgtt->flush); + mutex_init(&ppgtt->pin_mutex); + + ppgtt_init(&ppgtt->base, gt); + ppgtt->base.vm.top = 1; + + ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; + ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; + ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; + ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; + ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; + + ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; + + ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd)); + if (!ppgtt->base.pd) { + err = -ENOMEM; + goto err_free; + } + + err = gen6_ppgtt_init_scratch(ppgtt); + if (err) + goto err_pd; + + ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); + if (IS_ERR(ppgtt->vma)) { + err = PTR_ERR(ppgtt->vma); + goto err_scratch; + } + + return &ppgtt->base; + +err_scratch: + free_scratch(&ppgtt->base.vm); +err_pd: + kfree(ppgtt->base.pd); +err_free: + mutex_destroy(&ppgtt->pin_mutex); + kfree(ppgtt); + return ERR_PTR(err); +} diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h new file mode 100644 index 000000000000..72e481806c96 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __GEN6_PPGTT_H__ +#define __GEN6_PPGTT_H__ + +#include "intel_gtt.h" + +struct gen6_ppgtt { + struct i915_ppgtt base; + + struct mutex flush; + struct i915_vma *vma; + gen6_pte_t __iomem *pd_addr; + + atomic_t pin_count; + struct mutex pin_mutex; + + bool scan_for_unused_pt; +}; + +static inline u32 gen6_pte_index(u32 addr) +{ + return i915_pte_index(addr, GEN6_PDE_SHIFT); +} + +static inline u32 gen6_pte_count(u32 addr, u32 length) +{ + return i915_pte_count(addr, length, GEN6_PDE_SHIFT); +} + +static inline u32 gen6_pde_index(u32 addr) +{ + return i915_pde_index(addr, GEN6_PDE_SHIFT); +} + +#define __to_gen6_ppgtt(base) container_of(base, struct gen6_ppgtt, base) + +static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base) +{ + BUILD_BUG_ON(offsetof(struct gen6_ppgtt, base)); + return __to_gen6_ppgtt(base); +} + +/* + * gen6_for_each_pde() iterates over every pde from start until start+length. + * If start and start+length are not perfectly divisible, the macro will round + * down and up as needed. Start=0 and length=2G effectively iterates over + * every PDE in the system. The macro modifies ALL its parameters except 'pd', + * so each of the other parameters should preferably be a simple variable, or + * at most an lvalue with no side-effects! + */ +#define gen6_for_each_pde(pt, pd, start, length, iter) \ + for (iter = gen6_pde_index(start); \ + length > 0 && iter < I915_PDES && \ + (pt = i915_pt_entry(pd, iter), true); \ + ({ u32 temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT); \ + temp = min(temp - start, length); \ + start += temp, length -= temp; }), ++iter) + +#define gen6_for_all_pdes(pt, pd, iter) \ + for (iter = 0; \ + iter < I915_PDES && \ + (pt = i915_pt_entry(pd, iter), true); \ + ++iter) + +int gen6_ppgtt_pin(struct i915_ppgtt *base); +void gen6_ppgtt_unpin(struct i915_ppgtt *base); +void gen6_ppgtt_unpin_all(struct i915_ppgtt *base); +void gen6_ppgtt_enable(struct intel_gt *gt); +void gen7_ppgtt_enable(struct intel_gt *gt); +struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt); + +#endif diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c new file mode 100644 index 000000000000..077b8f7cf6cb --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -0,0 +1,723 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/log2.h> + +#include "gen8_ppgtt.h" +#include "i915_scatterlist.h" +#include "i915_trace.h" +#include "i915_vgpu.h" +#include "intel_gt.h" +#include "intel_gtt.h" + +static u64 gen8_pde_encode(const dma_addr_t addr, + const enum i915_cache_level level) +{ + u64 pde = addr | _PAGE_PRESENT | _PAGE_RW; + + if (level != I915_CACHE_NONE) + pde |= PPAT_CACHED_PDE; + else + pde |= PPAT_UNCACHED; + + return pde; +} + +static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) +{ + struct drm_i915_private *i915 = ppgtt->vm.i915; + struct intel_uncore *uncore = ppgtt->vm.gt->uncore; + enum vgt_g2v_type msg; + int i; + + if (create) + atomic_inc(px_used(ppgtt->pd)); /* never remove */ + else + atomic_dec(px_used(ppgtt->pd)); + + mutex_lock(&i915->vgpu.lock); + + if (i915_vm_is_4lvl(&ppgtt->vm)) { + const u64 daddr = px_dma(ppgtt->pd); + + intel_uncore_write(uncore, + vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); + intel_uncore_write(uncore, + vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); + + msg = create ? + VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : + VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY; + } else { + for (i = 0; i < GEN8_3LVL_PDPES; i++) { + const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); + + intel_uncore_write(uncore, + vgtif_reg(pdp[i].lo), + lower_32_bits(daddr)); + intel_uncore_write(uncore, + vgtif_reg(pdp[i].hi), + upper_32_bits(daddr)); + } + + msg = create ? + VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : + VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY; + } + + /* g2v_notify atomically (via hv trap) consumes the message packet. */ + intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg); + + mutex_unlock(&i915->vgpu.lock); +} + +/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */ +#define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */ +#define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE)) +#define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64)) +#define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES)) +#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl)) +#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl)) +#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl)) + +#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt) + +static inline unsigned int +gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx) +{ + const int shift = gen8_pd_shift(lvl); + const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); + + GEM_BUG_ON(start >= end); + end += ~mask >> gen8_pd_shift(1); + + *idx = i915_pde_index(start, shift); + if ((start ^ end) & mask) + return GEN8_PDES - *idx; + else + return i915_pde_index(end, shift) - *idx; +} + +static inline bool gen8_pd_contains(u64 start, u64 end, int lvl) +{ + const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); + + GEM_BUG_ON(start >= end); + return (start ^ end) & mask && (start & ~mask) == 0; +} + +static inline unsigned int gen8_pt_count(u64 start, u64 end) +{ + GEM_BUG_ON(start >= end); + if ((start ^ end) >> gen8_pd_shift(1)) + return GEN8_PDES - (start & (GEN8_PDES - 1)); + else + return end - start; +} + +static inline unsigned int +gen8_pd_top_count(const struct i915_address_space *vm) +{ + unsigned int shift = __gen8_pte_shift(vm->top); + return (vm->total + (1ull << shift) - 1) >> shift; +} + +static inline struct i915_page_directory * +gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) +{ + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); + + if (vm->top == 2) + return ppgtt->pd; + else + return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top)); +} + +static inline struct i915_page_directory * +gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr) +{ + return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT); +} + +static void __gen8_ppgtt_cleanup(struct i915_address_space *vm, + struct i915_page_directory *pd, + int count, int lvl) +{ + if (lvl) { + void **pde = pd->entry; + + do { + if (!*pde) + continue; + + __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1); + } while (pde++, --count); + } + + free_px(vm, pd); +} + +static void gen8_ppgtt_cleanup(struct i915_address_space *vm) +{ + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + + if (intel_vgpu_active(vm->i915)) + gen8_ppgtt_notify_vgt(ppgtt, false); + + __gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top); + free_scratch(vm); +} + +static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, + struct i915_page_directory * const pd, + u64 start, const u64 end, int lvl) +{ + const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; + unsigned int idx, len; + + GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); + + len = gen8_pd_range(start, end, lvl--, &idx); + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", + __func__, vm, lvl + 1, start, end, + idx, len, atomic_read(px_used(pd))); + GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); + + do { + struct i915_page_table *pt = pd->entry[idx]; + + if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) && + gen8_pd_contains(start, end, lvl)) { + DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", + __func__, vm, lvl + 1, idx, start, end); + clear_pd_entry(pd, idx, scratch); + __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); + start += (u64)I915_PDES << gen8_pd_shift(lvl); + continue; + } + + if (lvl) { + start = __gen8_ppgtt_clear(vm, as_pd(pt), + start, end, lvl); + } else { + unsigned int count; + u64 *vaddr; + + count = gen8_pt_count(start, end); + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", + __func__, vm, lvl, start, end, + gen8_pd_index(start, 0), count, + atomic_read(&pt->used)); + GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); + + vaddr = kmap_atomic_px(pt); + memset64(vaddr + gen8_pd_index(start, 0), + vm->scratch[0].encode, + count); + kunmap_atomic(vaddr); + + atomic_sub(count, &pt->used); + start += count; + } + + if (release_pd_entry(pd, idx, pt, scratch)) + free_px(vm, pt); + } while (idx++, --len); + + return start; +} + +static void gen8_ppgtt_clear(struct i915_address_space *vm, + u64 start, u64 length) +{ + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(range_overflows(start, length, vm->total)); + + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; + GEM_BUG_ON(length == 0); + + __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, + start, start + length, vm->top); +} + +static int __gen8_ppgtt_alloc(struct i915_address_space * const vm, + struct i915_page_directory * const pd, + u64 * const start, const u64 end, int lvl) +{ + const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; + struct i915_page_table *alloc = NULL; + unsigned int idx, len; + int ret = 0; + + GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); + + len = gen8_pd_range(*start, end, lvl--, &idx); + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", + __func__, vm, lvl + 1, *start, end, + idx, len, atomic_read(px_used(pd))); + GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); + + spin_lock(&pd->lock); + GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */ + do { + struct i915_page_table *pt = pd->entry[idx]; + + if (!pt) { + spin_unlock(&pd->lock); + + DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", + __func__, vm, lvl + 1, idx); + + pt = fetch_and_zero(&alloc); + if (lvl) { + if (!pt) { + pt = &alloc_pd(vm)->pt; + if (IS_ERR(pt)) { + ret = PTR_ERR(pt); + goto out; + } + } + + fill_px(pt, vm->scratch[lvl].encode); + } else { + if (!pt) { + pt = alloc_pt(vm); + if (IS_ERR(pt)) { + ret = PTR_ERR(pt); + goto out; + } + } + + if (intel_vgpu_active(vm->i915) || + gen8_pt_count(*start, end) < I915_PDES) + fill_px(pt, vm->scratch[lvl].encode); + } + + spin_lock(&pd->lock); + if (likely(!pd->entry[idx])) + set_pd_entry(pd, idx, pt); + else + alloc = pt, pt = pd->entry[idx]; + } + + if (lvl) { + atomic_inc(&pt->used); + spin_unlock(&pd->lock); + + ret = __gen8_ppgtt_alloc(vm, as_pd(pt), + start, end, lvl); + if (unlikely(ret)) { + if (release_pd_entry(pd, idx, pt, scratch)) + free_px(vm, pt); + goto out; + } + + spin_lock(&pd->lock); + atomic_dec(&pt->used); + GEM_BUG_ON(!atomic_read(&pt->used)); + } else { + unsigned int count = gen8_pt_count(*start, end); + + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", + __func__, vm, lvl, *start, end, + gen8_pd_index(*start, 0), count, + atomic_read(&pt->used)); + + atomic_add(count, &pt->used); + /* All other pdes may be simultaneously removed */ + GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES); + *start += count; + } + } while (idx++, --len); + spin_unlock(&pd->lock); +out: + if (alloc) + free_px(vm, alloc); + return ret; +} + +static int gen8_ppgtt_alloc(struct i915_address_space *vm, + u64 start, u64 length) +{ + u64 from; + int err; + + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(range_overflows(start, length, vm->total)); + + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; + GEM_BUG_ON(length == 0); + from = start; + + err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd, + &start, start + length, vm->top); + if (unlikely(err && from != start)) + __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, + from, start, vm->top); + + return err; +} + +static __always_inline u64 +gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, + struct i915_page_directory *pdp, + struct sgt_dma *iter, + u64 idx, + enum i915_cache_level cache_level, + u32 flags) +{ + struct i915_page_directory *pd; + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + gen8_pte_t *vaddr; + + pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); + vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); + do { + vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; + + iter->dma += I915_GTT_PAGE_SIZE; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) { + idx = 0; + break; + } + + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + iter->sg->length; + } + + if (gen8_pd_index(++idx, 0) == 0) { + if (gen8_pd_index(idx, 1) == 0) { + /* Limited by sg length for 3lvl */ + if (gen8_pd_index(idx, 2) == 0) + break; + + pd = pdp->entry[gen8_pd_index(idx, 2)]; + } + + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); + } + } while (1); + kunmap_atomic(vaddr); + + return idx; +} + +static void gen8_ppgtt_insert_huge(struct i915_vma *vma, + struct sgt_dma *iter, + enum i915_cache_level cache_level, + u32 flags) +{ + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + u64 start = vma->node.start; + dma_addr_t rem = iter->sg->length; + + GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm)); + + do { + struct i915_page_directory * const pdp = + gen8_pdp_for_page_address(vma->vm, start); + struct i915_page_directory * const pd = + i915_pd_entry(pdp, __gen8_pte_index(start, 2)); + gen8_pte_t encode = pte_encode; + unsigned int maybe_64K = -1; + unsigned int page_size; + gen8_pte_t *vaddr; + u16 index; + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && + rem >= I915_GTT_PAGE_SIZE_2M && + !__gen8_pte_index(start, 0)) { + index = __gen8_pte_index(start, 1); + encode |= GEN8_PDE_PS_2M; + page_size = I915_GTT_PAGE_SIZE_2M; + + vaddr = kmap_atomic_px(pd); + } else { + struct i915_page_table *pt = + i915_pt_entry(pd, __gen8_pte_index(start, 1)); + + index = __gen8_pte_index(start, 0); + page_size = I915_GTT_PAGE_SIZE; + + if (!index && + vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)) + maybe_64K = __gen8_pte_index(start, 1); + + vaddr = kmap_atomic_px(pt); + } + + do { + GEM_BUG_ON(iter->sg->length < page_size); + vaddr[index++] = encode | iter->dma; + + start += page_size; + iter->dma += page_size; + rem -= page_size; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) + break; + + rem = iter->sg->length; + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + rem; + + if (maybe_64K != -1 && index < I915_PDES && + !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))) + maybe_64K = -1; + + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) + break; + } + } while (rem >= page_size && index < I915_PDES); + + kunmap_atomic(vaddr); + + /* + * Is it safe to mark the 2M block as 64K? -- Either we have + * filled whole page-table with 64K entries, or filled part of + * it and have reached the end of the sg table and we have + * enough padding. + */ + if (maybe_64K != -1 && + (index == I915_PDES || + (i915_vm_has_scratch_64K(vma->vm) && + !iter->sg && IS_ALIGNED(vma->node.start + + vma->node.size, + I915_GTT_PAGE_SIZE_2M)))) { + vaddr = kmap_atomic_px(pd); + vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; + kunmap_atomic(vaddr); + page_size = I915_GTT_PAGE_SIZE_64K; + + /* + * We write all 4K page entries, even when using 64K + * pages. In order to verify that the HW isn't cheating + * by using the 4K PTE instead of the 64K PTE, we want + * to remove all the surplus entries. If the HW skipped + * the 64K PTE, it will read/write into the scratch page + * instead - which we detect as missing results during + * selftests. + */ + if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { + u16 i; + + encode = vma->vm->scratch[0].encode; + vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K)); + + for (i = 1; i < index; i += 16) + memset64(vaddr + i, encode, 15); + + kunmap_atomic(vaddr); + } + } + + vma->page_sizes.gtt |= page_size; + } while (iter->sg); +} + +static void gen8_ppgtt_insert(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); + struct sgt_dma iter = sgt_dma(vma); + + if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { + gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags); + } else { + u64 idx = vma->node.start >> GEN8_PTE_SHIFT; + + do { + struct i915_page_directory * const pdp = + gen8_pdp_for_page_index(vm, idx); + + idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx, + cache_level, flags); + } while (idx); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + } +} + +static int gen8_init_scratch(struct i915_address_space *vm) +{ + int ret; + int i; + + /* + * If everybody agrees to not to write into the scratch page, + * we can reuse it for all vm, keeping contexts and processes separate. + */ + if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) { + struct i915_address_space *clone = vm->gt->vm; + + GEM_BUG_ON(!clone->has_read_only); + + vm->scratch_order = clone->scratch_order; + memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch)); + px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */ + return 0; + } + + ret = setup_scratch_page(vm, __GFP_HIGHMEM); + if (ret) + return ret; + + vm->scratch[0].encode = + gen8_pte_encode(px_dma(&vm->scratch[0]), + I915_CACHE_LLC, vm->has_read_only); + + for (i = 1; i <= vm->top; i++) { + if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i])))) + goto free_scratch; + + fill_px(&vm->scratch[i], vm->scratch[i - 1].encode); + vm->scratch[i].encode = + gen8_pde_encode(px_dma(&vm->scratch[i]), + I915_CACHE_LLC); + } + + return 0; + +free_scratch: + free_scratch(vm); + return -ENOMEM; +} + +static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) +{ + struct i915_address_space *vm = &ppgtt->vm; + struct i915_page_directory *pd = ppgtt->pd; + unsigned int idx; + + GEM_BUG_ON(vm->top != 2); + GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES); + + for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) { + struct i915_page_directory *pde; + + pde = alloc_pd(vm); + if (IS_ERR(pde)) + return PTR_ERR(pde); + + fill_px(pde, vm->scratch[1].encode); + set_pd_entry(pd, idx, pde); + atomic_inc(px_used(pde)); /* keep pinned */ + } + wmb(); + + return 0; +} + +static struct i915_page_directory * +gen8_alloc_top_pd(struct i915_address_space *vm) +{ + const unsigned int count = gen8_pd_top_count(vm); + struct i915_page_directory *pd; + + GEM_BUG_ON(count > ARRAY_SIZE(pd->entry)); + + pd = __alloc_pd(offsetof(typeof(*pd), entry[count])); + if (unlikely(!pd)) + return ERR_PTR(-ENOMEM); + + if (unlikely(setup_page_dma(vm, px_base(pd)))) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } + + fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count); + atomic_inc(px_used(pd)); /* mark as pinned */ + return pd; +} + +/* + * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers + * with a net effect resembling a 2-level page table in normal x86 terms. Each + * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address + * space. + * + */ +struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) +{ + struct i915_ppgtt *ppgtt; + int err; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return ERR_PTR(-ENOMEM); + + ppgtt_init(ppgtt, gt); + ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; + + /* + * From bdw, there is hw support for read-only pages in the PPGTT. + * + * Gen11 has HSDES#:1807136187 unresolved. Disable ro support + * for now. + * + * Gen12 has inherited the same read-only fault issue from gen11. + */ + ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12); + + /* + * There are only few exceptions for gen >=6. chv and bxt. + * And we are not sure about the latter so play safe for now. + */ + if (IS_CHERRYVIEW(gt->i915) || IS_BROXTON(gt->i915)) + ppgtt->vm.pt_kmap_wc = true; + + err = gen8_init_scratch(&ppgtt->vm); + if (err) + goto err_free; + + ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm); + if (IS_ERR(ppgtt->pd)) { + err = PTR_ERR(ppgtt->pd); + goto err_free_scratch; + } + + if (!i915_vm_is_4lvl(&ppgtt->vm)) { + err = gen8_preallocate_top_level_pdp(ppgtt); + if (err) + goto err_free_pd; + } + + ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; + ppgtt->vm.insert_entries = gen8_ppgtt_insert; + ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; + ppgtt->vm.clear_range = gen8_ppgtt_clear; + + if (intel_vgpu_active(gt->i915)) + gen8_ppgtt_notify_vgt(ppgtt, true); + + ppgtt->vm.cleanup = gen8_ppgtt_cleanup; + + return ppgtt; + +err_free_pd: + __gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd, + gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top); +err_free_scratch: + free_scratch(&ppgtt->vm); +err_free: + kfree(ppgtt); + return ERR_PTR(err); +} diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h new file mode 100644 index 000000000000..76a08b9c1f5c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __GEN8_PPGTT_H__ +#define __GEN8_PPGTT_H__ + +struct intel_gt; + +struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt); + +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index fbaa9df6f436..23137b2a8689 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -43,30 +43,76 @@ intel_context_create(struct intel_engine_cs *engine) return ce; } -int __intel_context_do_pin(struct intel_context *ce) +int intel_context_alloc_state(struct intel_context *ce) { - int err; + int err = 0; if (mutex_lock_interruptible(&ce->pin_mutex)) return -EINTR; - if (likely(!atomic_read(&ce->pin_count))) { - intel_wakeref_t wakeref; + if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { + err = ce->ops->alloc(ce); + if (unlikely(err)) + goto unlock; - if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { - err = ce->ops->alloc(ce); - if (unlikely(err)) - goto err; + set_bit(CONTEXT_ALLOC_BIT, &ce->flags); + } - __set_bit(CONTEXT_ALLOC_BIT, &ce->flags); +unlock: + mutex_unlock(&ce->pin_mutex); + return err; +} + +static int intel_context_active_acquire(struct intel_context *ce) +{ + int err; + + err = i915_active_acquire(&ce->active); + if (err) + return err; + + /* Preallocate tracking nodes */ + if (!intel_context_is_barrier(ce)) { + err = i915_active_acquire_preallocate_barrier(&ce->active, + ce->engine); + if (err) { + i915_active_release(&ce->active); + return err; } + } + + return 0; +} + +static void intel_context_active_release(struct intel_context *ce) +{ + /* Nodes preallocated in intel_context_active() */ + i915_active_acquire_barrier(&ce->active); + i915_active_release(&ce->active); +} - err = 0; - with_intel_runtime_pm(ce->engine->uncore->rpm, wakeref) - err = ce->ops->pin(ce); +int __intel_context_do_pin(struct intel_context *ce) +{ + int err; + + if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { + err = intel_context_alloc_state(ce); if (err) + return err; + } + + if (mutex_lock_interruptible(&ce->pin_mutex)) + return -EINTR; + + if (likely(!atomic_read(&ce->pin_count))) { + err = intel_context_active_acquire(ce); + if (unlikely(err)) goto err; + err = ce->ops->pin(ce); + if (unlikely(err)) + goto err_active; + CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n", ce->ring->head, ce->ring->tail); @@ -79,6 +125,8 @@ int __intel_context_do_pin(struct intel_context *ce) mutex_unlock(&ce->pin_mutex); return 0; +err_active: + intel_context_active_release(ce); err: mutex_unlock(&ce->pin_mutex); return err; @@ -86,22 +134,20 @@ err: void intel_context_unpin(struct intel_context *ce) { - if (likely(atomic_add_unless(&ce->pin_count, -1, 1))) + if (!atomic_dec_and_test(&ce->pin_count)) return; - /* We may be called from inside intel_context_pin() to evict another */ - intel_context_get(ce); - mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING); - - if (likely(atomic_dec_and_test(&ce->pin_count))) { - CE_TRACE(ce, "retire\n"); - - ce->ops->unpin(ce); + CE_TRACE(ce, "unpin\n"); + ce->ops->unpin(ce); - intel_context_active_release(ce); - } - - mutex_unlock(&ce->pin_mutex); + /* + * Once released, we may asynchronously drop the active reference. + * As that may be the only reference keeping the context alive, + * take an extra now so that it is not freed before we finish + * dereferencing it. + */ + intel_context_get(ce); + intel_context_active_release(ce); intel_context_put(ce); } @@ -114,6 +160,10 @@ static int __context_pin_state(struct i915_vma *vma) if (err) return err; + err = i915_active_acquire(&vma->active); + if (err) + goto err_unpin; + /* * And mark it as a globally pinned object to let the shrinker know * it cannot reclaim the object until we release it. @@ -122,14 +172,44 @@ static int __context_pin_state(struct i915_vma *vma) vma->obj->mm.dirty = true; return 0; + +err_unpin: + i915_vma_unpin(vma); + return err; } static void __context_unpin_state(struct i915_vma *vma) { i915_vma_make_shrinkable(vma); + i915_active_release(&vma->active); __i915_vma_unpin(vma); } +static int __ring_active(struct intel_ring *ring) +{ + int err; + + err = i915_active_acquire(&ring->vma->active); + if (err) + return err; + + err = intel_ring_pin(ring); + if (err) + goto err_active; + + return 0; + +err_active: + i915_active_release(&ring->vma->active); + return err; +} + +static void __ring_retire(struct intel_ring *ring) +{ + intel_ring_unpin(ring); + i915_active_release(&ring->vma->active); +} + __i915_active_call static void __intel_context_retire(struct i915_active *active) { @@ -142,7 +222,7 @@ static void __intel_context_retire(struct i915_active *active) __context_unpin_state(ce->state); intel_timeline_unpin(ce->timeline); - intel_ring_unpin(ce->ring); + __ring_retire(ce->ring); intel_context_put(ce); } @@ -152,9 +232,11 @@ static int __intel_context_active(struct i915_active *active) struct intel_context *ce = container_of(active, typeof(*ce), active); int err; + CE_TRACE(ce, "active\n"); + intel_context_get(ce); - err = intel_ring_pin(ce->ring); + err = __ring_active(ce->ring); if (err) goto err_put; @@ -174,40 +256,12 @@ static int __intel_context_active(struct i915_active *active) err_timeline: intel_timeline_unpin(ce->timeline); err_ring: - intel_ring_unpin(ce->ring); + __ring_retire(ce->ring); err_put: intel_context_put(ce); return err; } -int intel_context_active_acquire(struct intel_context *ce) -{ - int err; - - err = i915_active_acquire(&ce->active); - if (err) - return err; - - /* Preallocate tracking nodes */ - if (!intel_context_is_barrier(ce)) { - err = i915_active_acquire_preallocate_barrier(&ce->active, - ce->engine); - if (err) { - i915_active_release(&ce->active); - return err; - } - } - - return 0; -} - -void intel_context_active_release(struct intel_context *ce) -{ - /* Nodes preallocated in intel_context_active() */ - i915_active_acquire_barrier(&ce->active); - i915_active_release(&ce->active); -} - void intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 1d4a1b1357cf..30bd248827d8 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -19,7 +19,7 @@ #define CE_TRACE(ce, fmt, ...) do { \ const struct intel_context *ce__ = (ce); \ - ENGINE_TRACE(ce__->engine, "context:%llx" fmt, \ + ENGINE_TRACE(ce__->engine, "context:%llx " fmt, \ ce__->timeline->fence_context, \ ##__VA_ARGS__); \ } while (0) @@ -31,6 +31,8 @@ void intel_context_fini(struct intel_context *ce); struct intel_context * intel_context_create(struct intel_engine_cs *engine); +int intel_context_alloc_state(struct intel_context *ce); + void intel_context_free(struct intel_context *ce); /** @@ -76,9 +78,14 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce) int __intel_context_do_pin(struct intel_context *ce); +static inline bool intel_context_pin_if_active(struct intel_context *ce) +{ + return atomic_inc_not_zero(&ce->pin_count); +} + static inline int intel_context_pin(struct intel_context *ce) { - if (likely(atomic_inc_not_zero(&ce->pin_count))) + if (likely(intel_context_pin_if_active(ce))) return 0; return __intel_context_do_pin(ce); @@ -116,9 +123,6 @@ static inline void intel_context_exit(struct intel_context *ce) ce->ops->exit(ce); } -int intel_context_active_acquire(struct intel_context *ce); -void intel_context_active_release(struct intel_context *ce); - static inline struct intel_context *intel_context_get(struct intel_context *ce) { kref_get(&ce->ref); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 9527a659546c..ca1420fb8b53 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -17,6 +17,8 @@ #include "intel_engine_types.h" #include "intel_sseu.h" +#define CONTEXT_REDZONE POISON_INUSE + struct i915_gem_context; struct i915_vma; struct intel_context; diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 79ecac5ac0ab..5df003061e44 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -202,7 +202,7 @@ void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask); u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); -void intel_engine_get_instdone(struct intel_engine_cs *engine, +void intel_engine_get_instdone(const struct intel_engine_cs *engine, struct intel_instdone *instdone); void intel_engine_init_execlists(struct intel_engine_cs *engine); @@ -282,7 +282,7 @@ static inline void __intel_engine_reset(struct intel_engine_cs *engine, bool intel_engines_are_idle(struct intel_gt *gt); bool intel_engine_is_idle(struct intel_engine_cs *engine); -bool intel_engine_flush_submission(struct intel_engine_cs *engine); +void intel_engine_flush_submission(struct intel_engine_cs *engine); void intel_engines_reset_default_submission(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index ddf9543b1261..f451ef376548 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -914,8 +914,8 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type) } static u32 -read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice, - i915_reg_t reg) +read_subslice_reg(const struct intel_engine_cs *engine, + int slice, int subslice, i915_reg_t reg) { struct drm_i915_private *i915 = engine->i915; struct intel_uncore *uncore = engine->uncore; @@ -959,7 +959,7 @@ read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice, } /* NB: please notice the memset */ -void intel_engine_get_instdone(struct intel_engine_cs *engine, +void intel_engine_get_instdone(const struct intel_engine_cs *engine, struct intel_instdone *instdone) { struct drm_i915_private *i915 = engine->i915; @@ -1047,10 +1047,9 @@ static bool ring_is_idle(struct intel_engine_cs *engine) return idle; } -bool intel_engine_flush_submission(struct intel_engine_cs *engine) +void intel_engine_flush_submission(struct intel_engine_cs *engine) { struct tasklet_struct *t = &engine->execlists.tasklet; - bool active = tasklet_is_locked(t); if (__tasklet_is_scheduled(t)) { local_bh_disable(); @@ -1061,13 +1060,10 @@ bool intel_engine_flush_submission(struct intel_engine_cs *engine) tasklet_unlock(t); } local_bh_enable(); - active = true; } /* Otherwise flush the tasklet if it was running on another cpu */ tasklet_unlock_wait(t); - - return active; } /** diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 742628e40201..6c6fd185457c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -199,7 +199,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine) goto out_unlock; } - rq->flags |= I915_REQUEST_SENTINEL; + __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); idle_pulse(engine, rq); __i915_request_commit(rq); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 010620b78202..ea90ab3e396e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -20,6 +20,7 @@ static int __engine_unpark(struct intel_wakeref *wf) { struct intel_engine_cs *engine = container_of(wf, typeof(*engine), wakeref); + struct intel_context *ce; void *map; ENGINE_TRACE(engine, "\n"); @@ -34,6 +35,27 @@ static int __engine_unpark(struct intel_wakeref *wf) if (!IS_ERR_OR_NULL(map)) engine->pinned_default_state = map; + /* Discard stale context state from across idling */ + ce = engine->kernel_context; + if (ce) { + GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags)); + + /* First poison the image to verify we never fully trust it */ + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) { + struct drm_i915_gem_object *obj = ce->state->obj; + int type = i915_coherent_map_type(engine->i915); + + map = i915_gem_object_pin_map(obj, type); + if (!IS_ERR(map)) { + memset(map, CONTEXT_REDZONE, obj->base.size); + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + } + } + + ce->ops->reset(ce); + } + if (engine->unpark) engine->unpark(engine); @@ -123,16 +145,16 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) unsigned long flags; bool result = true; + /* GPU is pointing to the void, as good as in the kernel context. */ + if (intel_gt_is_wedged(engine->gt)) + return true; + GEM_BUG_ON(!intel_context_is_barrier(ce)); /* Already inside the kernel context, safe to power down. */ if (engine->wakeref_serial == engine->serial) return true; - /* GPU is pointing to the void, as good as in the kernel context. */ - if (intel_gt_is_wedged(engine->gt)) - return true; - /* * Note, we do this without taking the timeline->mutex. We cannot * as we may be called while retiring the kernel context and so diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 7f7150a733f4..9e7f12bef828 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -11,6 +11,7 @@ #include "i915_drv.h" #include "intel_engine.h" #include "intel_engine_user.h" +#include "intel_gt.h" struct intel_engine_cs * intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) @@ -200,6 +201,9 @@ void intel_engines_driver_register(struct drm_i915_private *i915) uabi_node); char old[sizeof(engine->name)]; + if (intel_gt_has_init_error(engine->gt)) + continue; /* ignore incomplete engines */ + GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes)); engine->uabi_class = uabi_classes[engine->class]; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c new file mode 100644 index 000000000000..79096722ce16 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -0,0 +1,1486 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/stop_machine.h> + +#include <asm/set_memory.h> +#include <asm/smp.h> + +#include "intel_gt.h" +#include "i915_drv.h" +#include "i915_scatterlist.h" +#include "i915_vgpu.h" + +#include "intel_gtt.h" + +static int +i915_get_ggtt_vma_pages(struct i915_vma *vma); + +static void i915_ggtt_color_adjust(const struct drm_mm_node *node, + unsigned long color, + u64 *start, + u64 *end) +{ + if (i915_node_color_differs(node, color)) + *start += I915_GTT_PAGE_SIZE; + + /* + * Also leave a space between the unallocated reserved node after the + * GTT and any objects within the GTT, i.e. we use the color adjustment + * to insert a guard page to prevent prefetches crossing over the + * GTT boundary. + */ + node = list_next_entry(node, node_list); + if (node->color != color) + *end -= I915_GTT_PAGE_SIZE; +} + +static int ggtt_init_hw(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + + i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); + + ggtt->vm.is_ggtt = true; + + /* Only VLV supports read-only GGTT mappings */ + ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); + + if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) + ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; + + if (ggtt->mappable_end) { + if (!io_mapping_init_wc(&ggtt->iomap, + ggtt->gmadr.start, + ggtt->mappable_end)) { + ggtt->vm.cleanup(&ggtt->vm); + return -EIO; + } + + ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, + ggtt->mappable_end); + } + + i915_ggtt_init_fences(ggtt); + + return 0; +} + +/** + * i915_ggtt_init_hw - Initialize GGTT hardware + * @i915: i915 device + */ +int i915_ggtt_init_hw(struct drm_i915_private *i915) +{ + int ret; + + stash_init(&i915->mm.wc_stash); + + /* + * Note that we use page colouring to enforce a guard page at the + * end of the address space. This is required as the CS may prefetch + * beyond the end of the batch buffer, across the page boundary, + * and beyond the end of the GTT if we do not provide a guard. + */ + ret = ggtt_init_hw(&i915->ggtt); + if (ret) + return ret; + + return 0; +} + +/* + * Certain Gen5 chipsets require require idling the GPU before + * unmapping anything from the GTT when VT-d is enabled. + */ +static bool needs_idle_maps(struct drm_i915_private *i915) +{ + /* + * Query intel_iommu to see if we need the workaround. Presumably that + * was loaded first. + */ + return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active(); +} + +static void ggtt_suspend_mappings(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + + /* + * Don't bother messing with faults pre GEN6 as we have little + * documentation supporting that it's a good idea. + */ + if (INTEL_GEN(i915) < 6) + return; + + intel_gt_check_and_clear_faults(ggtt->vm.gt); + + ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); + + ggtt->invalidate(ggtt); +} + +void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915) +{ + ggtt_suspend_mappings(&i915->ggtt); +} + +void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) +{ + struct intel_uncore *uncore = ggtt->vm.gt->uncore; + + spin_lock_irq(&uncore->lock); + intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); + spin_unlock_irq(&uncore->lock); +} + +static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) +{ + struct intel_uncore *uncore = ggtt->vm.gt->uncore; + + /* + * Note that as an uncached mmio write, this will flush the + * WCB of the writes into the GGTT before it triggers the invalidate. + */ + intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); +} + +static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) +{ + struct intel_uncore *uncore = ggtt->vm.gt->uncore; + struct drm_i915_private *i915 = ggtt->vm.i915; + + gen8_ggtt_invalidate(ggtt); + + if (INTEL_GEN(i915) >= 12) + intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, + GEN12_GUC_TLB_INV_CR_INVALIDATE); + else + intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); +} + +static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) +{ + intel_gtt_chipset_flush(); +} + +static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) +{ + writeq(pte, addr); +} + +static void gen8_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 unused) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen8_pte_t __iomem *pte = + (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; + + gen8_set_pte(pte, gen8_pte_encode(addr, level, 0)); + + ggtt->invalidate(ggtt); +} + +static void gen8_ggtt_insert_entries(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level level, + u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + struct sgt_iter sgt_iter; + gen8_pte_t __iomem *gtt_entries; + const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); + dma_addr_t addr; + + /* + * Note that we ignore PTE_READ_ONLY here. The caller must be careful + * not to allow the user to override access to a read only page. + */ + + gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; + gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE; + for_each_sgt_daddr(addr, sgt_iter, vma->pages) + gen8_set_pte(gtt_entries++, pte_encode | addr); + + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. + */ + ggtt->invalidate(ggtt); +} + +static void gen6_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen6_pte_t __iomem *pte = + (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; + + iowrite32(vm->pte_encode(addr, level, flags), pte); + + ggtt->invalidate(ggtt); +} + +/* + * Binds an object into the global gtt with the specified cache level. + * The object will be accessible to the GPU via commands whose operands + * reference offsets within the global GTT as well as accessible by the GPU + * through the GMADR mapped BAR (i915->mm.gtt->gtt). + */ +static void gen6_ggtt_insert_entries(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level level, + u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; + unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE; + struct sgt_iter iter; + dma_addr_t addr; + + for_each_sgt_daddr(addr, iter, vma->pages) + iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); + + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. + */ + ggtt->invalidate(ggtt); +} + +static void nop_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ +} + +static void gen8_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; + const gen8_pte_t scratch_pte = vm->scratch[0].encode; + gen8_pte_t __iomem *gtt_base = + (gen8_pte_t __iomem *)ggtt->gsm + first_entry; + const int max_entries = ggtt_total_entries(ggtt) - first_entry; + int i; + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + for (i = 0; i < num_entries; i++) + gen8_set_pte(>t_base[i], scratch_pte); +} + +static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) +{ + /* + * Make sure the internal GAM fifo has been cleared of all GTT + * writes before exiting stop_machine(). This guarantees that + * any aperture accesses waiting to start in another process + * cannot back up behind the GTT writes causing a hang. + * The register can be any arbitrary GAM register. + */ + intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); +} + +struct insert_page { + struct i915_address_space *vm; + dma_addr_t addr; + u64 offset; + enum i915_cache_level level; +}; + +static int bxt_vtd_ggtt_insert_page__cb(void *_arg) +{ + struct insert_page *arg = _arg; + + gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 unused) +{ + struct insert_page arg = { vm, addr, offset, level }; + + stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); +} + +struct insert_entries { + struct i915_address_space *vm; + struct i915_vma *vma; + enum i915_cache_level level; + u32 flags; +}; + +static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) +{ + struct insert_entries *arg = _arg; + + gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level level, + u32 flags) +{ + struct insert_entries arg = { vm, vma, level, flags }; + + stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); +} + +struct clear_range { + struct i915_address_space *vm; + u64 start; + u64 length; +}; + +static int bxt_vtd_ggtt_clear_range__cb(void *_arg) +{ + struct clear_range *arg = _arg; + + gen8_ggtt_clear_range(arg->vm, arg->start, arg->length); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm, + u64 start, + u64 length) +{ + struct clear_range arg = { vm, start, length }; + + stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL); +} + +static void gen6_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; + gen6_pte_t scratch_pte, __iomem *gtt_base = + (gen6_pte_t __iomem *)ggtt->gsm + first_entry; + const int max_entries = ggtt_total_entries(ggtt) - first_entry; + int i; + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + scratch_pte = vm->scratch[0].encode; + for (i = 0; i < num_entries; i++) + iowrite32(scratch_pte, >t_base[i]); +} + +static void i915_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level cache_level, + u32 unused) +{ + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + + intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); +} + +static void i915_ggtt_insert_entries(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 unused) +{ + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + + intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT, + flags); +} + +static void i915_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); +} + +static int ggtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + struct drm_i915_gem_object *obj = vma->obj; + u32 pte_flags; + + /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ + pte_flags = 0; + if (i915_gem_object_is_readonly(obj)) + pte_flags |= PTE_READ_ONLY; + + vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + + /* + * Without aliasing PPGTT there's no difference between + * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally + * upgrade to both bound if we bind either to avoid double-binding. + */ + atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); + + return 0; +} + +static void ggtt_unbind_vma(struct i915_vma *vma) +{ + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); +} + +static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) +{ + u64 size; + int ret; + + if (!USES_GUC(ggtt->vm.i915)) + return 0; + + GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); + size = ggtt->vm.total - GUC_GGTT_TOP; + + ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, + GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, + PIN_NOEVICT); + if (ret) + DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n"); + + return ret; +} + +static void ggtt_release_guc_top(struct i915_ggtt *ggtt) +{ + if (drm_mm_node_allocated(&ggtt->uc_fw)) + drm_mm_remove_node(&ggtt->uc_fw); +} + +static void cleanup_init_ggtt(struct i915_ggtt *ggtt) +{ + ggtt_release_guc_top(ggtt); + if (drm_mm_node_allocated(&ggtt->error_capture)) + drm_mm_remove_node(&ggtt->error_capture); + mutex_destroy(&ggtt->error_mutex); +} + +static int init_ggtt(struct i915_ggtt *ggtt) +{ + /* + * Let GEM Manage all of the aperture. + * + * However, leave one page at the end still bound to the scratch page. + * There are a number of places where the hardware apparently prefetches + * past the end of the object, and we've seen multiple hangs with the + * GPU head pointer stuck in a batchbuffer bound at the last page of the + * aperture. One page should be enough to keep any prefetching inside + * of the aperture. + */ + unsigned long hole_start, hole_end; + struct drm_mm_node *entry; + int ret; + + /* + * GuC requires all resources that we're sharing with it to be placed in + * non-WOPCM memory. If GuC is not present or not in use we still need a + * small bias as ring wraparound at offset 0 sometimes hangs. No idea + * why. + */ + ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, + intel_wopcm_guc_size(&ggtt->vm.i915->wopcm)); + + ret = intel_vgt_balloon(ggtt); + if (ret) + return ret; + + mutex_init(&ggtt->error_mutex); + if (ggtt->mappable_end) { + /* Reserve a mappable slot for our lockless error capture */ + ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, + &ggtt->error_capture, + PAGE_SIZE, 0, + I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + if (ret) + return ret; + } + + /* + * The upper portion of the GuC address space has a sizeable hole + * (several MB) that is inaccessible by GuC. Reserve this range within + * GGTT as it can comfortably hold GuC/HuC firmware images. + */ + ret = ggtt_reserve_guc_top(ggtt); + if (ret) + goto err; + + /* Clear any non-preallocated blocks */ + drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { + DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", + hole_start, hole_end); + ggtt->vm.clear_range(&ggtt->vm, hole_start, + hole_end - hole_start); + } + + /* And finally clear the reserved guard page */ + ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); + + return 0; + +err: + cleanup_init_ggtt(ggtt); + return ret; +} + +static int aliasing_gtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + u32 pte_flags; + int ret; + + /* Currently applicable only to VLV */ + pte_flags = 0; + if (i915_gem_object_is_readonly(vma->obj)) + pte_flags |= PTE_READ_ONLY; + + if (flags & I915_VMA_LOCAL_BIND) { + struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias; + + if (flags & I915_VMA_ALLOC) { + ret = alias->vm.allocate_va_range(&alias->vm, + vma->node.start, + vma->size); + if (ret) + return ret; + + set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); + } + + GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, + __i915_vma_flags(vma))); + alias->vm.insert_entries(&alias->vm, vma, + cache_level, pte_flags); + } + + if (flags & I915_VMA_GLOBAL_BIND) + vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); + + return 0; +} + +static void aliasing_gtt_unbind_vma(struct i915_vma *vma) +{ + if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { + struct i915_address_space *vm = vma->vm; + + vm->clear_range(vm, vma->node.start, vma->size); + } + + if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) { + struct i915_address_space *vm = + &i915_vm_to_ggtt(vma->vm)->alias->vm; + + vm->clear_range(vm, vma->node.start, vma->size); + } +} + +static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) +{ + struct i915_ppgtt *ppgtt; + int err; + + ppgtt = i915_ppgtt_create(ggtt->vm.gt); + if (IS_ERR(ppgtt)) + return PTR_ERR(ppgtt); + + if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { + err = -ENODEV; + goto err_ppgtt; + } + + /* + * Note we only pre-allocate as far as the end of the global + * GTT. On 48b / 4-level page-tables, the difference is very, + * very significant! We have to preallocate as GVT/vgpu does + * not like the page directory disappearing. + */ + err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total); + if (err) + goto err_ppgtt; + + ggtt->alias = ppgtt; + ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; + + GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); + ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; + + GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); + ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; + + return 0; + +err_ppgtt: + i915_vm_put(&ppgtt->vm); + return err; +} + +static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) +{ + struct i915_ppgtt *ppgtt; + + ppgtt = fetch_and_zero(&ggtt->alias); + if (!ppgtt) + return; + + i915_vm_put(&ppgtt->vm); + + ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; +} + +int i915_init_ggtt(struct drm_i915_private *i915) +{ + int ret; + + ret = init_ggtt(&i915->ggtt); + if (ret) + return ret; + + if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { + ret = init_aliasing_ppgtt(&i915->ggtt); + if (ret) + cleanup_init_ggtt(&i915->ggtt); + } + + return 0; +} + +static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) +{ + struct i915_vma *vma, *vn; + + atomic_set(&ggtt->vm.open, 0); + + rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ + flush_workqueue(ggtt->vm.i915->wq); + + mutex_lock(&ggtt->vm.mutex); + + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) + WARN_ON(__i915_vma_unbind(vma)); + + if (drm_mm_node_allocated(&ggtt->error_capture)) + drm_mm_remove_node(&ggtt->error_capture); + mutex_destroy(&ggtt->error_mutex); + + ggtt_release_guc_top(ggtt); + intel_vgt_deballoon(ggtt); + + ggtt->vm.cleanup(&ggtt->vm); + + mutex_unlock(&ggtt->vm.mutex); + i915_address_space_fini(&ggtt->vm); + + arch_phys_wc_del(ggtt->mtrr); + + if (ggtt->iomap.size) + io_mapping_fini(&ggtt->iomap); +} + +/** + * i915_ggtt_driver_release - Clean up GGTT hardware initialization + * @i915: i915 device + */ +void i915_ggtt_driver_release(struct drm_i915_private *i915) +{ + struct pagevec *pvec; + + fini_aliasing_ppgtt(&i915->ggtt); + + ggtt_cleanup_hw(&i915->ggtt); + + pvec = &i915->mm.wc_stash.pvec; + if (pvec->nr) { + set_pages_array_wb(pvec->pages, pvec->nr); + __pagevec_release(pvec); + } +} + +static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) +{ + snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; + snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; + return snb_gmch_ctl << 20; +} + +static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) +{ + bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; + bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; + if (bdw_gmch_ctl) + bdw_gmch_ctl = 1 << bdw_gmch_ctl; + +#ifdef CONFIG_X86_32 + /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ + if (bdw_gmch_ctl > 4) + bdw_gmch_ctl = 4; +#endif + + return bdw_gmch_ctl << 20; +} + +static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) +{ + gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; + gmch_ctrl &= SNB_GMCH_GGMS_MASK; + + if (gmch_ctrl) + return 1 << (20 + gmch_ctrl); + + return 0; +} + +static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct pci_dev *pdev = i915->drm.pdev; + phys_addr_t phys_addr; + int ret; + + /* For Modern GENs the PTEs and register space are split in the BAR */ + phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; + + /* + * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range + * will be dropped. For WC mappings in general we have 64 byte burst + * writes when the WC buffer is flushed, so we can't use it, but have to + * resort to an uncached mapping. The WC issue is easily caught by the + * readback check when writing GTT PTE entries. + */ + if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10) + ggtt->gsm = ioremap_nocache(phys_addr, size); + else + ggtt->gsm = ioremap_wc(phys_addr, size); + if (!ggtt->gsm) { + DRM_ERROR("Failed to map the ggtt page table\n"); + return -ENOMEM; + } + + ret = setup_scratch_page(&ggtt->vm, GFP_DMA32); + if (ret) { + DRM_ERROR("Scratch setup failed\n"); + /* iounmap will also get called at remove, but meh */ + iounmap(ggtt->gsm); + return ret; + } + + ggtt->vm.scratch[0].encode = + ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]), + I915_CACHE_NONE, 0); + + return 0; +} + +int ggtt_set_pages(struct i915_vma *vma) +{ + int ret; + + GEM_BUG_ON(vma->pages); + + ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + + vma->page_sizes = vma->obj->mm.page_sizes; + + return 0; +} + +static void gen6_gmch_remove(struct i915_address_space *vm) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + + iounmap(ggtt->gsm); + cleanup_scratch_page(vm); +} + +static struct resource pci_resource(struct pci_dev *pdev, int bar) +{ + return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar), + pci_resource_len(pdev, bar)); +} + +static int gen8_gmch_probe(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct pci_dev *pdev = i915->drm.pdev; + unsigned int size; + u16 snb_gmch_ctl; + int err; + + /* TODO: We're not aware of mappable constraints on gen8 yet */ + if (!IS_DGFX(i915)) { + ggtt->gmadr = pci_resource(pdev, 2); + ggtt->mappable_end = resource_size(&ggtt->gmadr); + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); + if (!err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); + if (err) + DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); + + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + if (IS_CHERRYVIEW(i915)) + size = chv_get_total_gtt_size(snb_gmch_ctl); + else + size = gen8_get_total_gtt_size(snb_gmch_ctl); + + ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; + ggtt->vm.cleanup = gen6_gmch_remove; + ggtt->vm.insert_page = gen8_ggtt_insert_page; + ggtt->vm.clear_range = nop_clear_range; + if (intel_scanout_needs_vtd_wa(i915)) + ggtt->vm.clear_range = gen8_ggtt_clear_range; + + ggtt->vm.insert_entries = gen8_ggtt_insert_entries; + + /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ + if (intel_ggtt_update_needs_vtd_wa(i915) || + IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { + ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; + ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; + if (ggtt->vm.clear_range != nop_clear_range) + ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; + } + + ggtt->invalidate = gen8_ggtt_invalidate; + + ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; + ggtt->vm.vma_ops.set_pages = ggtt_set_pages; + ggtt->vm.vma_ops.clear_pages = clear_pages; + + ggtt->vm.pte_encode = gen8_pte_encode; + + setup_private_pat(ggtt->vm.gt->uncore); + + return ggtt_probe_common(ggtt, size); +} + +static u64 snb_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + switch (level) { + case I915_CACHE_L3_LLC: + case I915_CACHE_LLC: + pte |= GEN6_PTE_CACHE_LLC; + break; + case I915_CACHE_NONE: + pte |= GEN6_PTE_UNCACHED; + break; + default: + MISSING_CASE(level); + } + + return pte; +} + +static u64 ivb_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + switch (level) { + case I915_CACHE_L3_LLC: + pte |= GEN7_PTE_CACHE_L3_LLC; + break; + case I915_CACHE_LLC: + pte |= GEN6_PTE_CACHE_LLC; + break; + case I915_CACHE_NONE: + pte |= GEN6_PTE_UNCACHED; + break; + default: + MISSING_CASE(level); + } + + return pte; +} + +static u64 byt_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + if (!(flags & PTE_READ_ONLY)) + pte |= BYT_PTE_WRITEABLE; + + if (level != I915_CACHE_NONE) + pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; + + return pte; +} + +static u64 hsw_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + if (level != I915_CACHE_NONE) + pte |= HSW_WB_LLC_AGE3; + + return pte; +} + +static u64 iris_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + switch (level) { + case I915_CACHE_NONE: + break; + case I915_CACHE_WT: + pte |= HSW_WT_ELLC_LLC_AGE3; + break; + default: + pte |= HSW_WB_ELLC_LLC_AGE3; + break; + } + + return pte; +} + +static int gen6_gmch_probe(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct pci_dev *pdev = i915->drm.pdev; + unsigned int size; + u16 snb_gmch_ctl; + int err; + + ggtt->gmadr = pci_resource(pdev, 2); + ggtt->mappable_end = resource_size(&ggtt->gmadr); + + /* + * 64/512MB is the current min/max we actually know of, but this is + * just a coarse sanity check. + */ + if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { + DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end); + return -ENXIO; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); + if (!err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); + if (err) + DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + + size = gen6_get_total_gtt_size(snb_gmch_ctl); + ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; + + ggtt->vm.clear_range = nop_clear_range; + if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) + ggtt->vm.clear_range = gen6_ggtt_clear_range; + ggtt->vm.insert_page = gen6_ggtt_insert_page; + ggtt->vm.insert_entries = gen6_ggtt_insert_entries; + ggtt->vm.cleanup = gen6_gmch_remove; + + ggtt->invalidate = gen6_ggtt_invalidate; + + if (HAS_EDRAM(i915)) + ggtt->vm.pte_encode = iris_pte_encode; + else if (IS_HASWELL(i915)) + ggtt->vm.pte_encode = hsw_pte_encode; + else if (IS_VALLEYVIEW(i915)) + ggtt->vm.pte_encode = byt_pte_encode; + else if (INTEL_GEN(i915) >= 7) + ggtt->vm.pte_encode = ivb_pte_encode; + else + ggtt->vm.pte_encode = snb_pte_encode; + + ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; + ggtt->vm.vma_ops.set_pages = ggtt_set_pages; + ggtt->vm.vma_ops.clear_pages = clear_pages; + + return ggtt_probe_common(ggtt, size); +} + +static void i915_gmch_remove(struct i915_address_space *vm) +{ + intel_gmch_remove(); +} + +static int i915_gmch_probe(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + phys_addr_t gmadr_base; + int ret; + + ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL); + if (!ret) { + DRM_ERROR("failed to set up gmch\n"); + return -EIO; + } + + intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); + + ggtt->gmadr = + (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); + + ggtt->do_idle_maps = needs_idle_maps(i915); + ggtt->vm.insert_page = i915_ggtt_insert_page; + ggtt->vm.insert_entries = i915_ggtt_insert_entries; + ggtt->vm.clear_range = i915_ggtt_clear_range; + ggtt->vm.cleanup = i915_gmch_remove; + + ggtt->invalidate = gmch_ggtt_invalidate; + + ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; + ggtt->vm.vma_ops.set_pages = ggtt_set_pages; + ggtt->vm.vma_ops.clear_pages = clear_pages; + + if (unlikely(ggtt->do_idle_maps)) + dev_notice(i915->drm.dev, + "Applying Ironlake quirks for intel_iommu\n"); + + return 0; +} + +static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + int ret; + + ggtt->vm.gt = gt; + ggtt->vm.i915 = i915; + ggtt->vm.dma = &i915->drm.pdev->dev; + + if (INTEL_GEN(i915) <= 5) + ret = i915_gmch_probe(ggtt); + else if (INTEL_GEN(i915) < 8) + ret = gen6_gmch_probe(ggtt); + else + ret = gen8_gmch_probe(ggtt); + if (ret) + return ret; + + if ((ggtt->vm.total - 1) >> 32) { + DRM_ERROR("We never expected a Global GTT with more than 32bits" + " of address space! Found %lldM!\n", + ggtt->vm.total >> 20); + ggtt->vm.total = 1ULL << 32; + ggtt->mappable_end = + min_t(u64, ggtt->mappable_end, ggtt->vm.total); + } + + if (ggtt->mappable_end > ggtt->vm.total) { + DRM_ERROR("mappable aperture extends past end of GGTT," + " aperture=%pa, total=%llx\n", + &ggtt->mappable_end, ggtt->vm.total); + ggtt->mappable_end = ggtt->vm.total; + } + + /* GMADR is the PCI mmio aperture into the global GTT. */ + DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20); + DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20); + DRM_DEBUG_DRIVER("DSM size = %lluM\n", + (u64)resource_size(&intel_graphics_stolen_res) >> 20); + + return 0; +} + +/** + * i915_ggtt_probe_hw - Probe GGTT hardware location + * @i915: i915 device + */ +int i915_ggtt_probe_hw(struct drm_i915_private *i915) +{ + int ret; + + ret = ggtt_probe_hw(&i915->ggtt, &i915->gt); + if (ret) + return ret; + + if (intel_vtd_active()) + dev_info(i915->drm.dev, "VT-d active for gfx access\n"); + + return 0; +} + +int i915_ggtt_enable_hw(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) < 6 && !intel_enable_gtt()) + return -EIO; + + return 0; +} + +void i915_ggtt_enable_guc(struct i915_ggtt *ggtt) +{ + GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate); + + ggtt->invalidate = guc_ggtt_invalidate; + + ggtt->invalidate(ggtt); +} + +void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) +{ + /* XXX Temporary pardon for error unload */ + if (ggtt->invalidate == gen8_ggtt_invalidate) + return; + + /* We should only be called after i915_ggtt_enable_guc() */ + GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate); + + ggtt->invalidate = gen8_ggtt_invalidate; + + ggtt->invalidate(ggtt); +} + +static void ggtt_restore_mappings(struct i915_ggtt *ggtt) +{ + struct i915_vma *vma; + bool flush = false; + int open; + + intel_gt_check_and_clear_faults(ggtt->vm.gt); + + mutex_lock(&ggtt->vm.mutex); + + /* First fill our portion of the GTT with scratch pages */ + ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); + + /* Skip rewriting PTE on VMA unbind. */ + open = atomic_xchg(&ggtt->vm.open, 0); + + /* clflush objects bound into the GGTT and rebind them. */ + list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) { + struct drm_i915_gem_object *obj = vma->obj; + + if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) + continue; + + clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); + WARN_ON(i915_vma_bind(vma, + obj ? obj->cache_level : 0, + PIN_GLOBAL, NULL)); + if (obj) { /* only used during resume => exclusive access */ + flush |= fetch_and_zero(&obj->write_domain); + obj->read_domains |= I915_GEM_DOMAIN_GTT; + } + } + + atomic_set(&ggtt->vm.open, open); + ggtt->invalidate(ggtt); + + mutex_unlock(&ggtt->vm.mutex); + + if (flush) + wbinvd_on_all_cpus(); +} + +void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + + ggtt_restore_mappings(ggtt); + + if (INTEL_GEN(i915) >= 8) + setup_private_pat(ggtt->vm.gt->uncore); +} + +static struct scatterlist * +rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, + unsigned int width, unsigned int height, + unsigned int stride, + struct sg_table *st, struct scatterlist *sg) +{ + unsigned int column, row; + unsigned int src_idx; + + for (column = 0; column < width; column++) { + src_idx = stride * (height - 1) + column + offset; + for (row = 0; row < height; row++) { + st->nents++; + /* + * We don't need the pages, but need to initialize + * the entries so the sg list can be happily traversed. + * The only thing we need are DMA addresses. + */ + sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0); + sg_dma_address(sg) = + i915_gem_object_get_dma_address(obj, src_idx); + sg_dma_len(sg) = I915_GTT_PAGE_SIZE; + sg = sg_next(sg); + src_idx -= stride; + } + } + + return sg; +} + +static noinline struct sg_table * +intel_rotate_pages(struct intel_rotation_info *rot_info, + struct drm_i915_gem_object *obj) +{ + unsigned int size = intel_rotation_info_size(rot_info); + struct sg_table *st; + struct scatterlist *sg; + int ret = -ENOMEM; + int i; + + /* Allocate target SG list. */ + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto err_st_alloc; + + ret = sg_alloc_table(st, size, GFP_KERNEL); + if (ret) + goto err_sg_alloc; + + st->nents = 0; + sg = st->sgl; + + for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { + sg = rotate_pages(obj, rot_info->plane[i].offset, + rot_info->plane[i].width, rot_info->plane[i].height, + rot_info->plane[i].stride, st, sg); + } + + return st; + +err_sg_alloc: + kfree(st); +err_st_alloc: + + DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", + obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); + + return ERR_PTR(ret); +} + +static struct scatterlist * +remap_pages(struct drm_i915_gem_object *obj, unsigned int offset, + unsigned int width, unsigned int height, + unsigned int stride, + struct sg_table *st, struct scatterlist *sg) +{ + unsigned int row; + + for (row = 0; row < height; row++) { + unsigned int left = width * I915_GTT_PAGE_SIZE; + + while (left) { + dma_addr_t addr; + unsigned int length; + + /* + * We don't need the pages, but need to initialize + * the entries so the sg list can be happily traversed. + * The only thing we need are DMA addresses. + */ + + addr = i915_gem_object_get_dma_address_len(obj, offset, &length); + + length = min(left, length); + + st->nents++; + + sg_set_page(sg, NULL, length, 0); + sg_dma_address(sg) = addr; + sg_dma_len(sg) = length; + sg = sg_next(sg); + + offset += length / I915_GTT_PAGE_SIZE; + left -= length; + } + + offset += stride - width; + } + + return sg; +} + +static noinline struct sg_table * +intel_remap_pages(struct intel_remapped_info *rem_info, + struct drm_i915_gem_object *obj) +{ + unsigned int size = intel_remapped_info_size(rem_info); + struct sg_table *st; + struct scatterlist *sg; + int ret = -ENOMEM; + int i; + + /* Allocate target SG list. */ + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto err_st_alloc; + + ret = sg_alloc_table(st, size, GFP_KERNEL); + if (ret) + goto err_sg_alloc; + + st->nents = 0; + sg = st->sgl; + + for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { + sg = remap_pages(obj, rem_info->plane[i].offset, + rem_info->plane[i].width, rem_info->plane[i].height, + rem_info->plane[i].stride, st, sg); + } + + i915_sg_trim(st); + + return st; + +err_sg_alloc: + kfree(st); +err_st_alloc: + + DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", + obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size); + + return ERR_PTR(ret); +} + +static noinline struct sg_table * +intel_partial_pages(const struct i915_ggtt_view *view, + struct drm_i915_gem_object *obj) +{ + struct sg_table *st; + struct scatterlist *sg, *iter; + unsigned int count = view->partial.size; + unsigned int offset; + int ret = -ENOMEM; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto err_st_alloc; + + ret = sg_alloc_table(st, count, GFP_KERNEL); + if (ret) + goto err_sg_alloc; + + iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); + GEM_BUG_ON(!iter); + + sg = st->sgl; + st->nents = 0; + do { + unsigned int len; + + len = min(iter->length - (offset << PAGE_SHIFT), + count << PAGE_SHIFT); + sg_set_page(sg, NULL, len, 0); + sg_dma_address(sg) = + sg_dma_address(iter) + (offset << PAGE_SHIFT); + sg_dma_len(sg) = len; + + st->nents++; + count -= len >> PAGE_SHIFT; + if (count == 0) { + sg_mark_end(sg); + i915_sg_trim(st); /* Drop any unused tail entries. */ + + return st; + } + + sg = __sg_next(sg); + iter = __sg_next(iter); + offset = 0; + } while (1); + +err_sg_alloc: + kfree(st); +err_st_alloc: + return ERR_PTR(ret); +} + +static int +i915_get_ggtt_vma_pages(struct i915_vma *vma) +{ + int ret; + + /* + * The vma->pages are only valid within the lifespan of the borrowed + * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so + * must be the vma->pages. A simple rule is that vma->pages must only + * be accessed when the obj->mm.pages are pinned. + */ + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); + + switch (vma->ggtt_view.type) { + default: + GEM_BUG_ON(vma->ggtt_view.type); + /* fall through */ + case I915_GGTT_VIEW_NORMAL: + vma->pages = vma->obj->mm.pages; + return 0; + + case I915_GGTT_VIEW_ROTATED: + vma->pages = + intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); + break; + + case I915_GGTT_VIEW_REMAPPED: + vma->pages = + intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); + break; + + case I915_GGTT_VIEW_PARTIAL: + vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); + break; + } + + ret = 0; + if (IS_ERR(vma->pages)) { + ret = PTR_ERR(vma->pages); + vma->pages = NULL; + DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", + vma->ggtt_view.type, ret); + } + return ret; +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index ec84b5e62fef..da2b6e2ae692 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -38,8 +38,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) { gt->ggtt = ggtt; - - intel_gt_sanitize(gt, false); } static void init_unused_ring(struct intel_gt *gt, u32 base) @@ -77,10 +75,6 @@ int intel_gt_init_hw(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; int ret; - ret = intel_gt_terminally_wedged(gt); - if (ret) - return ret; - gt->last_init_time = ktime_get(); /* Double layer security blanket, see i915_gem_init() */ @@ -372,7 +366,7 @@ static void intel_gt_fini_scratch(struct intel_gt *gt) static struct i915_address_space *kernel_vm(struct intel_gt *gt) { if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING) - return &i915_ppgtt_create(gt->i915)->vm; + return &i915_ppgtt_create(gt)->vm; else return i915_vm_get(>->ggtt->vm); } @@ -410,14 +404,13 @@ static int __engines_record_defaults(struct intel_gt *gt) struct intel_context *ce; struct i915_request *rq; + /* We must be able to switch to something! */ + GEM_BUG_ON(!engine->kernel_context); + err = intel_renderstate_init(&so, engine); if (err) goto out; - /* We must be able to switch to something! */ - GEM_BUG_ON(!engine->kernel_context); - engine->serial++; /* force the kernel context switch */ - ce = intel_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 2355cf129e9c..1dac441cb8f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -58,9 +58,14 @@ static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt, return i915_ggtt_offset(gt->scratch) + field; } -static inline bool intel_gt_is_wedged(struct intel_gt *gt) +static inline bool intel_gt_is_wedged(const struct intel_gt *gt) { return __intel_reset_failed(>->reset); } +static inline bool intel_gt_has_init_error(const struct intel_gt *gt) +{ + return test_bit(I915_WEDGED_ON_INIT, >->reset.flags); +} + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 45b68a17da4d..d1c2f034296a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -126,17 +126,7 @@ static bool reset_engines(struct intel_gt *gt) return __intel_gt_reset(gt, ALL_ENGINES) == 0; } -/** - * intel_gt_sanitize: called after the GPU has lost power - * @gt: the i915 GT container - * @force: ignore a failed reset and sanitize engine state anyway - * - * Anytime we reset the GPU, either with an explicit GPU reset or through a - * PCI power cycle, the GPU loses state and we must reset our state tracking - * to match. Note that calling intel_gt_sanitize() if the GPU has not - * been reset results in much confusion! - */ -void intel_gt_sanitize(struct intel_gt *gt, bool force) +static void gt_sanitize(struct intel_gt *gt, bool force) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -189,6 +179,10 @@ int intel_gt_resume(struct intel_gt *gt) enum intel_engine_id id; int err; + err = intel_gt_has_init_error(gt); + if (err) + return err; + GT_TRACE(gt, "\n"); /* @@ -201,30 +195,26 @@ int intel_gt_resume(struct intel_gt *gt) intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); intel_rc6_sanitize(>->rc6); + gt_sanitize(gt, true); + if (intel_gt_is_wedged(gt)) { + err = -EIO; + goto out_fw; + } /* Only when the HW is re-initialised, can we replay the requests */ err = intel_gt_init_hw(gt); if (err) { dev_err(gt->i915->drm.dev, "Failed to initialize GPU, declaring it wedged!\n"); - intel_gt_set_wedged(gt); - goto err_fw; + goto err_wedged; } intel_rps_enable(>->rps); intel_llc_enable(>->llc); for_each_engine(engine, gt, id) { - struct intel_context *ce; - intel_engine_pm_get(engine); - ce = engine->kernel_context; - if (ce) { - GEM_BUG_ON(!intel_context_is_pinned(ce)); - ce->ops->reset(ce); - } - engine->serial++; /* kernel context lost */ err = engine->resume(engine); @@ -233,7 +223,7 @@ int intel_gt_resume(struct intel_gt *gt) dev_err(gt->i915->drm.dev, "Failed to restart %s (%d)\n", engine->name, err); - break; + goto err_wedged; } } @@ -243,11 +233,14 @@ int intel_gt_resume(struct intel_gt *gt) user_forcewake(gt, false); -err_fw: +out_fw: intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); intel_gt_pm_put(gt); - return err; + +err_wedged: + intel_gt_set_wedged(gt); + goto out_fw; } static void wait_for_suspend(struct intel_gt *gt) @@ -315,7 +308,7 @@ void intel_gt_suspend_late(struct intel_gt *gt) intel_llc_disable(>->llc); } - intel_gt_sanitize(gt, false); + gt_sanitize(gt, false); GT_TRACE(gt, "\n"); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 4a9e48c12bd4..60f0e2fbe55c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -51,8 +51,6 @@ void intel_gt_pm_init_early(struct intel_gt *gt); void intel_gt_pm_init(struct intel_gt *gt); void intel_gt_pm_fini(struct intel_gt *gt); -void intel_gt_sanitize(struct intel_gt *gt, bool force); - void intel_gt_suspend_prepare(struct intel_gt *gt); void intel_gt_suspend_late(struct intel_gt *gt); int intel_gt_resume(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index b4f04614230e..7ef1d37970f6 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -14,13 +14,16 @@ #include "intel_gt_requests.h" #include "intel_timeline.h" -static void retire_requests(struct intel_timeline *tl) +static bool retire_requests(struct intel_timeline *tl) { struct i915_request *rq, *rn; list_for_each_entry_safe(rq, rn, &tl->requests, link) if (!i915_request_retire(rq)) - break; + return false; + + /* And check nothing new was submitted */ + return !i915_active_fence_isset(&tl->last_request); } static bool flush_submission(struct intel_gt *gt) @@ -29,9 +32,13 @@ static bool flush_submission(struct intel_gt *gt) enum intel_engine_id id; bool active = false; + if (!intel_gt_pm_is_awake(gt)) + return false; + for_each_engine(engine, gt, id) { - active |= intel_engine_flush_submission(engine); + intel_engine_flush_submission(engine); active |= flush_work(&engine->retire_work); + active |= flush_work(&engine->wakeref.work); } return active; @@ -120,7 +127,6 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) timeout = -timeout, interruptible = false; flush_submission(gt); /* kick the ksoftirqd tasklets */ - spin_lock(&timelines->lock); list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { if (!mutex_trylock(&tl->mutex)) { @@ -145,7 +151,8 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) } } - retire_requests(tl); + if (!retire_requests(tl) || flush_submission(gt)) + active_count++; spin_lock(&timelines->lock); @@ -153,8 +160,6 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) list_safe_reset_next(tl, tn, link); if (atomic_dec_and_test(&tl->active_count)) list_del(&tl->link); - else - active_count += i915_active_fence_isset(&tl->last_request); mutex_unlock(&tl->mutex); @@ -169,9 +174,6 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) list_for_each_entry_safe(tl, tn, &free, link) __intel_timeline_free(&tl->kref); - if (flush_submission(gt)) - active_count++; - return active_count ? timeout : 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c new file mode 100644 index 000000000000..16acdc5d6734 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -0,0 +1,598 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/slab.h> /* fault-inject.h is not standalone! */ + +#include <linux/fault-inject.h> + +#include "i915_trace.h" +#include "intel_gt.h" +#include "intel_gtt.h" + +void stash_init(struct pagestash *stash) +{ + pagevec_init(&stash->pvec); + spin_lock_init(&stash->lock); +} + +static struct page *stash_pop_page(struct pagestash *stash) +{ + struct page *page = NULL; + + spin_lock(&stash->lock); + if (likely(stash->pvec.nr)) + page = stash->pvec.pages[--stash->pvec.nr]; + spin_unlock(&stash->lock); + + return page; +} + +static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec) +{ + unsigned int nr; + + spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING); + + nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec)); + memcpy(stash->pvec.pages + stash->pvec.nr, + pvec->pages + pvec->nr - nr, + sizeof(pvec->pages[0]) * nr); + stash->pvec.nr += nr; + + spin_unlock(&stash->lock); + + pvec->nr -= nr; +} + +static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) +{ + struct pagevec stack; + struct page *page; + + if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) + i915_gem_shrink_all(vm->i915); + + page = stash_pop_page(&vm->free_pages); + if (page) + return page; + + if (!vm->pt_kmap_wc) + return alloc_page(gfp); + + /* Look in our global stash of WC pages... */ + page = stash_pop_page(&vm->i915->mm.wc_stash); + if (page) + return page; + + /* + * Otherwise batch allocate pages to amortize cost of set_pages_wc. + * + * We have to be careful as page allocation may trigger the shrinker + * (via direct reclaim) which will fill up the WC stash underneath us. + * So we add our WB pages into a temporary pvec on the stack and merge + * them into the WC stash after all the allocations are complete. + */ + pagevec_init(&stack); + do { + struct page *page; + + page = alloc_page(gfp); + if (unlikely(!page)) + break; + + stack.pages[stack.nr++] = page; + } while (pagevec_space(&stack)); + + if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) { + page = stack.pages[--stack.nr]; + + /* Merge spare WC pages to the global stash */ + if (stack.nr) + stash_push_pagevec(&vm->i915->mm.wc_stash, &stack); + + /* Push any surplus WC pages onto the local VM stash */ + if (stack.nr) + stash_push_pagevec(&vm->free_pages, &stack); + } + + /* Return unwanted leftovers */ + if (unlikely(stack.nr)) { + WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr)); + __pagevec_release(&stack); + } + + return page; +} + +static void vm_free_pages_release(struct i915_address_space *vm, + bool immediate) +{ + struct pagevec *pvec = &vm->free_pages.pvec; + struct pagevec stack; + + lockdep_assert_held(&vm->free_pages.lock); + GEM_BUG_ON(!pagevec_count(pvec)); + + if (vm->pt_kmap_wc) { + /* + * When we use WC, first fill up the global stash and then + * only if full immediately free the overflow. + */ + stash_push_pagevec(&vm->i915->mm.wc_stash, pvec); + + /* + * As we have made some room in the VM's free_pages, + * we can wait for it to fill again. Unless we are + * inside i915_address_space_fini() and must + * immediately release the pages! + */ + if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1)) + return; + + /* + * We have to drop the lock to allow ourselves to sleep, + * so take a copy of the pvec and clear the stash for + * others to use it as we sleep. + */ + stack = *pvec; + pagevec_reinit(pvec); + spin_unlock(&vm->free_pages.lock); + + pvec = &stack; + set_pages_array_wb(pvec->pages, pvec->nr); + + spin_lock(&vm->free_pages.lock); + } + + __pagevec_release(pvec); +} + +static void vm_free_page(struct i915_address_space *vm, struct page *page) +{ + /* + * On !llc, we need to change the pages back to WB. We only do so + * in bulk, so we rarely need to change the page attributes here, + * but doing so requires a stop_machine() from deep inside arch/x86/mm. + * To make detection of the possible sleep more likely, use an + * unconditional might_sleep() for everybody. + */ + might_sleep(); + spin_lock(&vm->free_pages.lock); + while (!pagevec_space(&vm->free_pages.pvec)) + vm_free_pages_release(vm, false); + GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE); + pagevec_add(&vm->free_pages.pvec, page); + spin_unlock(&vm->free_pages.lock); +} + +void __i915_vm_close(struct i915_address_space *vm) +{ + struct i915_vma *vma, *vn; + + mutex_lock(&vm->mutex); + list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { + struct drm_i915_gem_object *obj = vma->obj; + + /* Keep the obj (and hence the vma) alive as _we_ destroy it */ + if (!kref_get_unless_zero(&obj->base.refcount)) + continue; + + atomic_and(~I915_VMA_PIN_MASK, &vma->flags); + WARN_ON(__i915_vma_unbind(vma)); + __i915_vma_put(vma); + + i915_gem_object_put(obj); + } + GEM_BUG_ON(!list_empty(&vm->bound_list)); + mutex_unlock(&vm->mutex); +} + +void i915_address_space_fini(struct i915_address_space *vm) +{ + spin_lock(&vm->free_pages.lock); + if (pagevec_count(&vm->free_pages.pvec)) + vm_free_pages_release(vm, true); + GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec)); + spin_unlock(&vm->free_pages.lock); + + drm_mm_takedown(&vm->mm); + + mutex_destroy(&vm->mutex); +} + +static void __i915_vm_release(struct work_struct *work) +{ + struct i915_address_space *vm = + container_of(work, struct i915_address_space, rcu.work); + + vm->cleanup(vm); + i915_address_space_fini(vm); + + kfree(vm); +} + +void i915_vm_release(struct kref *kref) +{ + struct i915_address_space *vm = + container_of(kref, struct i915_address_space, ref); + + GEM_BUG_ON(i915_is_ggtt(vm)); + trace_i915_ppgtt_release(vm); + + queue_rcu_work(vm->i915->wq, &vm->rcu); +} + +void i915_address_space_init(struct i915_address_space *vm, int subclass) +{ + kref_init(&vm->ref); + INIT_RCU_WORK(&vm->rcu, __i915_vm_release); + atomic_set(&vm->open, 1); + + /* + * The vm->mutex must be reclaim safe (for use in the shrinker). + * Do a dummy acquire now under fs_reclaim so that any allocation + * attempt holding the lock is immediately reported by lockdep. + */ + mutex_init(&vm->mutex); + lockdep_set_subclass(&vm->mutex, subclass); + i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); + + GEM_BUG_ON(!vm->total); + drm_mm_init(&vm->mm, 0, vm->total); + vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; + + stash_init(&vm->free_pages); + + INIT_LIST_HEAD(&vm->bound_list); +} + +void clear_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + + if (vma->pages != vma->obj->mm.pages) { + sg_free_table(vma->pages); + kfree(vma->pages); + } + vma->pages = NULL; + + memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); +} + +static int __setup_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p, + gfp_t gfp) +{ + p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL); + if (unlikely(!p->page)) + return -ENOMEM; + + p->daddr = dma_map_page_attrs(vm->dma, + p->page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_NO_WARN); + if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { + vm_free_page(vm, p->page); + return -ENOMEM; + } + + return 0; +} + +int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) +{ + return __setup_page_dma(vm, p, __GFP_HIGHMEM); +} + +void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) +{ + dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + vm_free_page(vm, p->page); +} + +void +fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count) +{ + kunmap_atomic(memset64(kmap_atomic(p->page), val, count)); +} + +int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) +{ + unsigned long size; + + /* + * In order to utilize 64K pages for an object with a size < 2M, we will + * need to support a 64K scratch page, given that every 16th entry for a + * page-table operating in 64K mode must point to a properly aligned 64K + * region, including any PTEs which happen to point to scratch. + * + * This is only relevant for the 48b PPGTT where we support + * huge-gtt-pages, see also i915_vma_insert(). However, as we share the + * scratch (read-only) between all vm, we create one 64k scratch page + * for all. + */ + size = I915_GTT_PAGE_SIZE_4K; + if (i915_vm_is_4lvl(vm) && + HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { + size = I915_GTT_PAGE_SIZE_64K; + gfp |= __GFP_NOWARN; + } + gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL; + + do { + unsigned int order = get_order(size); + struct page *page; + dma_addr_t addr; + + page = alloc_pages(gfp, order); + if (unlikely(!page)) + goto skip; + + addr = dma_map_page_attrs(vm->dma, + page, 0, size, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_NO_WARN); + if (unlikely(dma_mapping_error(vm->dma, addr))) + goto free_page; + + if (unlikely(!IS_ALIGNED(addr, size))) + goto unmap_page; + + vm->scratch[0].base.page = page; + vm->scratch[0].base.daddr = addr; + vm->scratch_order = order; + return 0; + +unmap_page: + dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL); +free_page: + __free_pages(page, order); +skip: + if (size == I915_GTT_PAGE_SIZE_4K) + return -ENOMEM; + + size = I915_GTT_PAGE_SIZE_4K; + gfp &= ~__GFP_NOWARN; + } while (1); +} + +void cleanup_scratch_page(struct i915_address_space *vm) +{ + struct i915_page_dma *p = px_base(&vm->scratch[0]); + unsigned int order = vm->scratch_order; + + dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT, + PCI_DMA_BIDIRECTIONAL); + __free_pages(p->page, order); +} + +void free_scratch(struct i915_address_space *vm) +{ + int i; + + if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */ + return; + + for (i = 1; i <= vm->top; i++) { + if (!px_dma(&vm->scratch[i])) + break; + cleanup_page_dma(vm, px_base(&vm->scratch[i])); + } + + cleanup_scratch_page(vm); +} + +void gtt_write_workarounds(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + + /* + * This function is for gtt related workarounds. This function is + * called on driver load and after a GPU reset, so you can place + * workarounds here even if they get overwritten by GPU reset. + */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ + if (IS_BROADWELL(i915)) + intel_uncore_write(uncore, + GEN8_L3_LRA_1_GPGPU, + GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); + else if (IS_CHERRYVIEW(i915)) + intel_uncore_write(uncore, + GEN8_L3_LRA_1_GPGPU, + GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); + else if (IS_GEN9_LP(i915)) + intel_uncore_write(uncore, + GEN8_L3_LRA_1_GPGPU, + GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); + else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11) + intel_uncore_write(uncore, + GEN8_L3_LRA_1_GPGPU, + GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); + + /* + * To support 64K PTEs we need to first enable the use of the + * Intermediate-Page-Size(IPS) bit of the PDE field via some magical + * mmio, otherwise the page-walker will simply ignore the IPS bit. This + * shouldn't be needed after GEN10. + * + * 64K pages were first introduced from BDW+, although technically they + * only *work* from gen9+. For pre-BDW we instead have the option for + * 32K pages, but we don't currently have any support for it in our + * driver. + */ + if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && + INTEL_GEN(i915) <= 10) + intel_uncore_rmw(uncore, + GEN8_GAMW_ECO_DEV_RW_IA, + 0, + GAMW_ECO_ENABLE_64K_IPS_FIELD); + + if (IS_GEN_RANGE(i915, 8, 11)) { + bool can_use_gtt_cache = true; + + /* + * According to the BSpec if we use 2M/1G pages then we also + * need to disable the GTT cache. At least on BDW we can see + * visual corruption when using 2M pages, and not disabling the + * GTT cache. + */ + if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) + can_use_gtt_cache = false; + + /* WaGttCachingOffByDefault */ + intel_uncore_write(uncore, + HSW_GTT_CACHE_EN, + can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); + WARN_ON_ONCE(can_use_gtt_cache && + intel_uncore_read(uncore, + HSW_GTT_CACHE_EN) == 0); + } +} + +u64 gen8_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~_PAGE_RW; + + switch (level) { + case I915_CACHE_NONE: + pte |= PPAT_UNCACHED; + break; + case I915_CACHE_WT: + pte |= PPAT_DISPLAY_ELLC; + break; + default: + pte |= PPAT_CACHED; + break; + } + + return pte; +} + +static void tgl_setup_private_ppat(struct intel_uncore *uncore) +{ + /* TGL doesn't support LLC or AGE settings */ + intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); + intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); + intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); + intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); +} + +static void cnl_setup_private_ppat(struct intel_uncore *uncore) +{ + intel_uncore_write(uncore, + GEN10_PAT_INDEX(0), + GEN8_PPAT_WB | GEN8_PPAT_LLC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(1), + GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(2), + GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(3), + GEN8_PPAT_UC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(4), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(5), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(6), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(7), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); +} + +/* + * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability + * bits. When using advanced contexts each context stores its own PAT, but + * writing this data shouldn't be harmful even in those cases. + */ +static void bdw_setup_private_ppat(struct intel_uncore *uncore) +{ + u64 pat; + + pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ + GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ + GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ + GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ + GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | + GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | + GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | + GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); + + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); +} + +static void chv_setup_private_ppat(struct intel_uncore *uncore) +{ + u64 pat; + + /* + * Map WB on BDW to snooped on CHV. + * + * Only the snoop bit has meaning for CHV, the rest is + * ignored. + * + * The hardware will never snoop for certain types of accesses: + * - CPU GTT (GMADR->GGTT->no snoop->memory) + * - PPGTT page tables + * - some other special cycles + * + * As with BDW, we also need to consider the following for GT accesses: + * "For GGTT, there is NO pat_sel[2:0] from the entry, + * so RTL will always use the value corresponding to + * pat_sel = 000". + * Which means we must set the snoop bit in PAT entry 0 + * in order to keep the global status page working. + */ + + pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | + GEN8_PPAT(1, 0) | + GEN8_PPAT(2, 0) | + GEN8_PPAT(3, 0) | + GEN8_PPAT(4, CHV_PPAT_SNOOP) | + GEN8_PPAT(5, CHV_PPAT_SNOOP) | + GEN8_PPAT(6, CHV_PPAT_SNOOP) | + GEN8_PPAT(7, CHV_PPAT_SNOOP); + + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); +} + +void setup_private_pat(struct intel_uncore *uncore) +{ + struct drm_i915_private *i915 = uncore->i915; + + GEM_BUG_ON(INTEL_GEN(i915) < 8); + + if (INTEL_GEN(i915) >= 12) + tgl_setup_private_ppat(uncore); + else if (INTEL_GEN(i915) >= 10) + cnl_setup_private_ppat(uncore); + else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) + chv_setup_private_ppat(uncore); + else + bdw_setup_private_ppat(uncore); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_gtt.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h new file mode 100644 index 000000000000..7da7681c20b1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -0,0 +1,587 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + * + * Please try to maintain the following order within this file unless it makes + * sense to do otherwise. From top to bottom: + * 1. typedefs + * 2. #defines, and macros + * 3. structure definitions + * 4. function prototypes + * + * Within each section, please try to order by generation in ascending order, + * from top to bottom (ie. gen6 on the top, gen8 on the bottom). + */ + +#ifndef __INTEL_GTT_H__ +#define __INTEL_GTT_H__ + +#include <linux/io-mapping.h> +#include <linux/kref.h> +#include <linux/mm.h> +#include <linux/pagevec.h> +#include <linux/scatterlist.h> +#include <linux/workqueue.h> + +#include <drm/drm_mm.h> + +#include "gt/intel_reset.h" +#include "i915_gem_fence_reg.h" +#include "i915_selftest.h" +#include "i915_vma_types.h" + +#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) + +#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT) +#define DBG(...) trace_printk(__VA_ARGS__) +#else +#define DBG(...) +#endif + +#define NALLOC 3 /* 1 normal, 1 for concurrent threads, 1 for preallocation */ + +#define I915_GTT_PAGE_SIZE_4K BIT_ULL(12) +#define I915_GTT_PAGE_SIZE_64K BIT_ULL(16) +#define I915_GTT_PAGE_SIZE_2M BIT_ULL(21) + +#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K +#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M + +#define I915_GTT_PAGE_MASK -I915_GTT_PAGE_SIZE + +#define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE + +#define I915_FENCE_REG_NONE -1 +#define I915_MAX_NUM_FENCES 32 +/* 32 fences + sign bit for FENCE_REG_NONE */ +#define I915_MAX_NUM_FENCE_BITS 6 + +typedef u32 gen6_pte_t; +typedef u64 gen8_pte_t; + +#define ggtt_total_entries(ggtt) ((ggtt)->vm.total >> PAGE_SHIFT) + +#define I915_PTES(pte_len) ((unsigned int)(PAGE_SIZE / (pte_len))) +#define I915_PTE_MASK(pte_len) (I915_PTES(pte_len) - 1) +#define I915_PDES 512 +#define I915_PDE_MASK (I915_PDES - 1) + +/* gen6-hsw has bit 11-4 for physical addr bit 39-32 */ +#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) +#define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) +#define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) +#define GEN6_PTE_CACHE_LLC (2 << 1) +#define GEN6_PTE_UNCACHED (1 << 1) +#define GEN6_PTE_VALID REG_BIT(0) + +#define GEN6_PTES I915_PTES(sizeof(gen6_pte_t)) +#define GEN6_PD_SIZE (I915_PDES * PAGE_SIZE) +#define GEN6_PD_ALIGN (PAGE_SIZE * 16) +#define GEN6_PDE_SHIFT 22 +#define GEN6_PDE_VALID REG_BIT(0) +#define NUM_PTE(pde_shift) (1 << (pde_shift - PAGE_SHIFT)) + +#define GEN7_PTE_CACHE_L3_LLC (3 << 1) + +#define BYT_PTE_SNOOPED_BY_CPU_CACHES REG_BIT(2) +#define BYT_PTE_WRITEABLE REG_BIT(1) + +/* + * Cacheability Control is a 4-bit value. The low three bits are stored in bits + * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. + */ +#define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \ + (((bits) & 0x8) << (11 - 3))) +#define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2) +#define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3) +#define HSW_WB_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x8) +#define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) +#define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7) +#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) +#define HSW_PTE_UNCACHED (0) +#define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) +#define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) + +/* + * GEN8 32b style address is defined as a 3 level page table: + * 31:30 | 29:21 | 20:12 | 11:0 + * PDPE | PDE | PTE | offset + * The difference as compared to normal x86 3 level page table is the PDPEs are + * programmed via register. + * + * GEN8 48b style address is defined as a 4 level page table: + * 47:39 | 38:30 | 29:21 | 20:12 | 11:0 + * PML4E | PDPE | PDE | PTE | offset + */ +#define GEN8_3LVL_PDPES 4 + +#define PPAT_UNCACHED (_PAGE_PWT | _PAGE_PCD) +#define PPAT_CACHED_PDE 0 /* WB LLC */ +#define PPAT_CACHED _PAGE_PAT /* WB LLCeLLC */ +#define PPAT_DISPLAY_ELLC _PAGE_PCD /* WT eLLC */ + +#define CHV_PPAT_SNOOP REG_BIT(6) +#define GEN8_PPAT_AGE(x) ((x)<<4) +#define GEN8_PPAT_LLCeLLC (3<<2) +#define GEN8_PPAT_LLCELLC (2<<2) +#define GEN8_PPAT_LLC (1<<2) +#define GEN8_PPAT_WB (3<<0) +#define GEN8_PPAT_WT (2<<0) +#define GEN8_PPAT_WC (1<<0) +#define GEN8_PPAT_UC (0<<0) +#define GEN8_PPAT_ELLC_OVERRIDE (0<<2) +#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) + +#define GEN8_PDE_IPS_64K BIT(11) +#define GEN8_PDE_PS_2M BIT(7) + +#define for_each_sgt_daddr(__dp, __iter, __sgt) \ + __for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE) + +struct i915_page_dma { + struct page *page; + union { + dma_addr_t daddr; + + /* + * For gen6/gen7 only. This is the offset in the GGTT + * where the page directory entries for PPGTT begin + */ + u32 ggtt_offset; + }; +}; + +struct i915_page_scratch { + struct i915_page_dma base; + u64 encode; +}; + +struct i915_page_table { + struct i915_page_dma base; + atomic_t used; +}; + +struct i915_page_directory { + struct i915_page_table pt; + spinlock_t lock; + void *entry[512]; +}; + +#define __px_choose_expr(x, type, expr, other) \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), type) || \ + __builtin_types_compatible_p(typeof(x), const type), \ + ({ type __x = (type)(x); expr; }), \ + other) + +#define px_base(px) \ + __px_choose_expr(px, struct i915_page_dma *, __x, \ + __px_choose_expr(px, struct i915_page_scratch *, &__x->base, \ + __px_choose_expr(px, struct i915_page_table *, &__x->base, \ + __px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \ + (void)0)))) +#define px_dma(px) (px_base(px)->daddr) + +#define px_pt(px) \ + __px_choose_expr(px, struct i915_page_table *, __x, \ + __px_choose_expr(px, struct i915_page_directory *, &__x->pt, \ + (void)0)) +#define px_used(px) (&px_pt(px)->used) + +enum i915_cache_level; + +struct drm_i915_file_private; +struct drm_i915_gem_object; +struct i915_vma; +struct intel_gt; + +struct i915_vma_ops { + /* Map an object into an address space with the given cache flags. */ + int (*bind_vma)(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags); + /* + * Unmap an object from an address space. This usually consists of + * setting the valid PTE entries to a reserved scratch page. + */ + void (*unbind_vma)(struct i915_vma *vma); + + int (*set_pages)(struct i915_vma *vma); + void (*clear_pages)(struct i915_vma *vma); +}; + +struct pagestash { + spinlock_t lock; + struct pagevec pvec; +}; + +void stash_init(struct pagestash *stash); + +struct i915_address_space { + struct kref ref; + struct rcu_work rcu; + + struct drm_mm mm; + struct intel_gt *gt; + struct drm_i915_private *i915; + struct device *dma; + /* + * Every address space belongs to a struct file - except for the global + * GTT that is owned by the driver (and so @file is set to NULL). In + * principle, no information should leak from one context to another + * (or between files/processes etc) unless explicitly shared by the + * owner. Tracking the owner is important in order to free up per-file + * objects along with the file, to aide resource tracking, and to + * assign blame. + */ + struct drm_i915_file_private *file; + u64 total; /* size addr space maps (ex. 2GB for ggtt) */ + u64 reserved; /* size addr space reserved */ + + unsigned int bind_async_flags; + + /* + * Each active user context has its own address space (in full-ppgtt). + * Since the vm may be shared between multiple contexts, we count how + * many contexts keep us "open". Once open hits zero, we are closed + * and do not allow any new attachments, and proceed to shutdown our + * vma and page directories. + */ + atomic_t open; + + struct mutex mutex; /* protects vma and our lists */ +#define VM_CLASS_GGTT 0 +#define VM_CLASS_PPGTT 1 + + struct i915_page_scratch scratch[4]; + unsigned int scratch_order; + unsigned int top; + + /** + * List of vma currently bound. + */ + struct list_head bound_list; + + struct pagestash free_pages; + + /* Global GTT */ + bool is_ggtt:1; + + /* Some systems require uncached updates of the page directories */ + bool pt_kmap_wc:1; + + /* Some systems support read-only mappings for GGTT and/or PPGTT */ + bool has_read_only:1; + + u64 (*pte_encode)(dma_addr_t addr, + enum i915_cache_level level, + u32 flags); /* Create a valid PTE */ +#define PTE_READ_ONLY BIT(0) + + int (*allocate_va_range)(struct i915_address_space *vm, + u64 start, u64 length); + void (*clear_range)(struct i915_address_space *vm, + u64 start, u64 length); + void (*insert_page)(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level cache_level, + u32 flags); + void (*insert_entries)(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags); + void (*cleanup)(struct i915_address_space *vm); + + struct i915_vma_ops vma_ops; + + I915_SELFTEST_DECLARE(struct fault_attr fault_attr); + I915_SELFTEST_DECLARE(bool scrub_64K); +}; + +/* + * The Graphics Translation Table is the way in which GEN hardware translates a + * Graphics Virtual Address into a Physical Address. In addition to the normal + * collateral associated with any va->pa translations GEN hardware also has a + * portion of the GTT which can be mapped by the CPU and remain both coherent + * and correct (in cases like swizzling). That region is referred to as GMADR in + * the spec. + */ +struct i915_ggtt { + struct i915_address_space vm; + + struct io_mapping iomap; /* Mapping to our CPU mappable region */ + struct resource gmadr; /* GMADR resource */ + resource_size_t mappable_end; /* End offset that we can CPU map */ + + /** "Graphics Stolen Memory" holds the global PTEs */ + void __iomem *gsm; + void (*invalidate)(struct i915_ggtt *ggtt); + + /** PPGTT used for aliasing the PPGTT with the GTT */ + struct i915_ppgtt *alias; + + bool do_idle_maps; + + int mtrr; + + /** Bit 6 swizzling required for X tiling */ + u32 bit_6_swizzle_x; + /** Bit 6 swizzling required for Y tiling */ + u32 bit_6_swizzle_y; + + u32 pin_bias; + + unsigned int num_fences; + struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; + struct list_head fence_list; + + /** + * List of all objects in gtt_space, currently mmaped by userspace. + * All objects within this list must also be on bound_list. + */ + struct list_head userfault_list; + + /* Manual runtime pm autosuspend delay for user GGTT mmaps */ + struct intel_wakeref_auto userfault_wakeref; + + struct mutex error_mutex; + struct drm_mm_node error_capture; + struct drm_mm_node uc_fw; +}; + +struct i915_ppgtt { + struct i915_address_space vm; + + struct i915_page_directory *pd; +}; + +#define i915_is_ggtt(vm) ((vm)->is_ggtt) + +static inline bool +i915_vm_is_4lvl(const struct i915_address_space *vm) +{ + return (vm->total - 1) >> 32; +} + +static inline bool +i915_vm_has_scratch_64K(struct i915_address_space *vm) +{ + return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K); +} + +static inline bool +i915_vm_has_cache_coloring(struct i915_address_space *vm) +{ + return i915_is_ggtt(vm) && vm->mm.color_adjust; +} + +static inline struct i915_ggtt * +i915_vm_to_ggtt(struct i915_address_space *vm) +{ + BUILD_BUG_ON(offsetof(struct i915_ggtt, vm)); + GEM_BUG_ON(!i915_is_ggtt(vm)); + return container_of(vm, struct i915_ggtt, vm); +} + +static inline struct i915_ppgtt * +i915_vm_to_ppgtt(struct i915_address_space *vm) +{ + BUILD_BUG_ON(offsetof(struct i915_ppgtt, vm)); + GEM_BUG_ON(i915_is_ggtt(vm)); + return container_of(vm, struct i915_ppgtt, vm); +} + +static inline struct i915_address_space * +i915_vm_get(struct i915_address_space *vm) +{ + kref_get(&vm->ref); + return vm; +} + +void i915_vm_release(struct kref *kref); + +static inline void i915_vm_put(struct i915_address_space *vm) +{ + kref_put(&vm->ref, i915_vm_release); +} + +static inline struct i915_address_space * +i915_vm_open(struct i915_address_space *vm) +{ + GEM_BUG_ON(!atomic_read(&vm->open)); + atomic_inc(&vm->open); + return i915_vm_get(vm); +} + +static inline bool +i915_vm_tryopen(struct i915_address_space *vm) +{ + if (atomic_add_unless(&vm->open, 1, 0)) + return i915_vm_get(vm); + + return false; +} + +void __i915_vm_close(struct i915_address_space *vm); + +static inline void +i915_vm_close(struct i915_address_space *vm) +{ + GEM_BUG_ON(!atomic_read(&vm->open)); + if (atomic_dec_and_test(&vm->open)) + __i915_vm_close(vm); + + i915_vm_put(vm); +} + +void i915_address_space_init(struct i915_address_space *vm, int subclass); +void i915_address_space_fini(struct i915_address_space *vm); + +static inline u32 i915_pte_index(u64 address, unsigned int pde_shift) +{ + const u32 mask = NUM_PTE(pde_shift) - 1; + + return (address >> PAGE_SHIFT) & mask; +} + +/* + * Helper to counts the number of PTEs within the given length. This count + * does not cross a page table boundary, so the max value would be + * GEN6_PTES for GEN6, and GEN8_PTES for GEN8. + */ +static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift) +{ + const u64 mask = ~((1ULL << pde_shift) - 1); + u64 end; + + GEM_BUG_ON(length == 0); + GEM_BUG_ON(offset_in_page(addr | length)); + + end = addr + length; + + if ((addr & mask) != (end & mask)) + return NUM_PTE(pde_shift) - i915_pte_index(addr, pde_shift); + + return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift); +} + +static inline u32 i915_pde_index(u64 addr, u32 shift) +{ + return (addr >> shift) & I915_PDE_MASK; +} + +static inline struct i915_page_table * +i915_pt_entry(const struct i915_page_directory * const pd, + const unsigned short n) +{ + return pd->entry[n]; +} + +static inline struct i915_page_directory * +i915_pd_entry(const struct i915_page_directory * const pdp, + const unsigned short n) +{ + return pdp->entry[n]; +} + +static inline dma_addr_t +i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) +{ + struct i915_page_dma *pt = ppgtt->pd->entry[n]; + + return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top])); +} + +void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt); + +int i915_ggtt_probe_hw(struct drm_i915_private *i915); +int i915_ggtt_init_hw(struct drm_i915_private *i915); +int i915_ggtt_enable_hw(struct drm_i915_private *i915); +void i915_ggtt_enable_guc(struct i915_ggtt *ggtt); +void i915_ggtt_disable_guc(struct i915_ggtt *ggtt); +int i915_init_ggtt(struct drm_i915_private *i915); +void i915_ggtt_driver_release(struct drm_i915_private *i915); + +static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt) +{ + return ggtt->mappable_end > 0; +} + +int i915_ppgtt_init_hw(struct intel_gt *gt); + +struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt); + +void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915); +void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915); + +u64 gen8_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags); + +int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p); +void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p); + +#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) + +void +fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count); + +#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64)) +#define fill32_px(px, v) do { \ + u64 v__ = lower_32_bits(v); \ + fill_px((px), v__ << 32 | v__); \ +} while (0) + +int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp); +void cleanup_scratch_page(struct i915_address_space *vm); +void free_scratch(struct i915_address_space *vm); + +struct i915_page_table *alloc_pt(struct i915_address_space *vm); +struct i915_page_directory *alloc_pd(struct i915_address_space *vm); +struct i915_page_directory *__alloc_pd(size_t sz); + +void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd); + +#define free_px(vm, px) free_pd(vm, px_base(px)) + +void +__set_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + struct i915_page_dma * const to, + u64 (*encode)(const dma_addr_t, const enum i915_cache_level)); + +#define set_pd_entry(pd, idx, to) \ + __set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode) + +void +clear_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + const struct i915_page_scratch * const scratch); + +bool +release_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + struct i915_page_table * const pt, + const struct i915_page_scratch * const scratch); +void gen6_ggtt_invalidate(struct i915_ggtt *ggtt); + +int ggtt_set_pages(struct i915_vma *vma); +int ppgtt_set_pages(struct i915_vma *vma); +void clear_pages(struct i915_vma *vma); + +void gtt_write_workarounds(struct intel_gt *gt); + +void setup_private_pat(struct intel_uncore *uncore); + +static inline struct sgt_dma { + struct scatterlist *sg; + dma_addr_t dma, max; +} sgt_dma(struct i915_vma *vma) { + struct scatterlist *sg = vma->pages->sgl; + dma_addr_t addr = sg_dma_address(sg); + + return (struct sgt_dma){ sg, addr, addr + sg->length }; +} + +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 4fb70a7716e3..0cf0f6fae675 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -488,17 +488,23 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) return desc; } -static u32 *set_offsets(u32 *regs, +static inline unsigned int dword_in_page(void *addr) +{ + return offset_in_page(addr) / sizeof(u32); +} + +static void set_offsets(u32 *regs, const u8 *data, - const struct intel_engine_cs *engine) + const struct intel_engine_cs *engine, + bool clear) #define NOP(x) (BIT(7) | (x)) -#define LRI(count, flags) ((flags) << 6 | (count)) +#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))) #define POSTED BIT(0) #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) #define REG16(x) \ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ (((x) >> 2) & 0x7f) -#define END() 0 +#define END(x) 0, (x) { const u32 base = engine->mmio_base; @@ -506,7 +512,10 @@ static u32 *set_offsets(u32 *regs, u8 count, flags; if (*data & BIT(7)) { /* skip */ - regs += *data++ & ~BIT(7); + count = *data++ & ~BIT(7); + if (clear) + memset32(regs, MI_NOOP, count); + regs += count; continue; } @@ -532,12 +541,25 @@ static u32 *set_offsets(u32 *regs, offset |= v & ~BIT(7); } while (v & BIT(7)); - *regs = base + (offset << 2); + regs[0] = base + (offset << 2); + if (clear) + regs[1] = 0; regs += 2; } while (--count); } - return regs; + if (clear) { + u8 count = *++data; + + /* Clear past the tail for HW access */ + GEM_BUG_ON(dword_in_page(regs) > count); + memset32(regs, MI_NOOP, count - dword_in_page(regs)); + + /* Close the batch; used mainly by live_lrc_layout() */ + *regs = MI_BATCH_BUFFER_END; + if (INTEL_GEN(engine->i915) >= 10) + *regs |= BIT(0); + } } static const u8 gen8_xcs_offsets[] = { @@ -572,7 +594,7 @@ static const u8 gen8_xcs_offsets[] = { REG16(0x200), REG(0x028), - END(), + END(80) }; static const u8 gen9_xcs_offsets[] = { @@ -656,7 +678,7 @@ static const u8 gen9_xcs_offsets[] = { REG16(0x67c), REG(0x068), - END(), + END(176) }; static const u8 gen12_xcs_offsets[] = { @@ -688,7 +710,7 @@ static const u8 gen12_xcs_offsets[] = { REG16(0x274), REG16(0x270), - END(), + END(80) }; static const u8 gen8_rcs_offsets[] = { @@ -725,7 +747,91 @@ static const u8 gen8_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END(), + END(80) +}; + +static const u8 gen9_rcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x34), + REG(0x30), + REG(0x38), + REG(0x3c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, 0), + REG(0xc8), + + NOP(13), + LRI(44, POSTED), + REG(0x28), + REG(0x9c), + REG(0xc0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x68), + + END(176) }; static const u8 gen11_rcs_offsets[] = { @@ -766,7 +872,7 @@ static const u8 gen11_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END(), + END(80) }; static const u8 gen12_rcs_offsets[] = { @@ -807,7 +913,7 @@ static const u8 gen12_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END(), + END(80) }; #undef END @@ -832,6 +938,8 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) return gen12_rcs_offsets; else if (INTEL_GEN(engine->i915) >= 11) return gen11_rcs_offsets; + else if (INTEL_GEN(engine->i915) >= 9) + return gen9_rcs_offsets; else return gen8_rcs_offsets; } else { @@ -1108,7 +1216,7 @@ __execlists_schedule_in(struct i915_request *rq) /* We don't need a strict matching tag, just different values */ ce->lrc_desc &= ~GENMASK_ULL(47, 37); ce->lrc_desc |= - (u64)(engine->context_tag++ % NUM_CONTEXT_TAG) << + (u64)(++engine->context_tag % NUM_CONTEXT_TAG) << GEN11_SW_CTX_ID_SHIFT; BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); } @@ -1243,10 +1351,6 @@ static u64 execlists_update_context(struct i915_request *rq) */ wmb(); - /* Wa_1607138340:tgl */ - if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0)) - desc |= CTX_DESC_FORCE_RESTORE; - ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; return desc; } @@ -1430,8 +1534,8 @@ static bool can_merge_rq(const struct i915_request *prev, if (i915_request_completed(next)) return true; - if (unlikely((prev->flags ^ next->flags) & - (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL))) + if (unlikely((prev->fence.flags ^ next->fence.flags) & + (I915_FENCE_FLAG_NOPREEMPT | I915_FENCE_FLAG_SENTINEL))) return false; if (!can_merge_ctx(prev->context, next->context)) @@ -1443,7 +1547,7 @@ static bool can_merge_rq(const struct i915_request *prev, static void virtual_update_register_offsets(u32 *regs, struct intel_engine_cs *engine) { - set_offsets(regs, reg_offsets(engine), engine); + set_offsets(regs, reg_offsets(engine), engine, false); } static bool virtual_matches(const struct virtual_engine *ve, @@ -1590,7 +1694,7 @@ active_timeslice(const struct intel_engine_cs *engine) { const struct i915_request *rq = *engine->execlists.active; - if (i915_request_completed(rq)) + if (!rq || i915_request_completed(rq)) return 0; if (engine->execlists.switch_priority_hint < effective_prio(rq)) @@ -1636,6 +1740,11 @@ static void set_preempt_timeout(struct intel_engine_cs *engine) active_preempt_timeout(engine)); } +static inline void clear_ports(struct i915_request **ports, int count) +{ + memset_p((void **)ports, NULL, count); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1996,10 +2105,9 @@ done: goto skip_submit; } + clear_ports(port + 1, last_port - port); - memset(port + 1, 0, (last_port - port) * sizeof(*port)); execlists_submit_ports(engine); - set_preempt_timeout(engine); } else { skip_submit: @@ -2014,13 +2122,14 @@ cancel_port_requests(struct intel_engine_execlists * const execlists) for (port = execlists->pending; *port; port++) execlists_schedule_out(*port); - memset(execlists->pending, 0, sizeof(execlists->pending)); + clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending)); /* Mark the end of active before we overwrite *active */ for (port = xchg(&execlists->active, execlists->pending); *port; port++) execlists_schedule_out(*port); - WRITE_ONCE(execlists->active, - memset(execlists->inflight, 0, sizeof(execlists->inflight))); + clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); + + WRITE_ONCE(execlists->active, execlists->inflight); } static inline void @@ -2176,7 +2285,6 @@ static void process_csb(struct intel_engine_cs *engine) /* Point active to the new ELSP; prevent overwriting */ WRITE_ONCE(execlists->active, execlists->pending); - set_timeslice(engine); if (!inject_preempt_hang(execlists)) ring_set_paused(engine, 0); @@ -2217,6 +2325,7 @@ static void process_csb(struct intel_engine_cs *engine) } while (head != tail); execlists->csb_head = head; + set_timeslice(engine); /* * Gen11 has proven to fail wrt global observation point between @@ -2399,7 +2508,7 @@ set_redzone(void *vaddr, const struct intel_engine_cs *engine) vaddr += engine->context_size; - memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); + memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE); } static void @@ -2410,7 +2519,7 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine) vaddr += engine->context_size; - if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) + if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)) dev_err_once(engine->i915->drm.dev, "%s context redzone overwritten!\n", engine->name); @@ -2453,33 +2562,21 @@ __execlists_context_pin(struct intel_context *ce, struct intel_engine_cs *engine) { void *vaddr; - int ret; GEM_BUG_ON(!ce->state); - - ret = intel_context_active_acquire(ce); - if (ret) - goto err; GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); vaddr = i915_gem_object_pin_map(ce->state->obj, i915_coherent_map_type(engine->i915) | I915_MAP_OVERRIDE); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto unpin_active; - } + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); - ce->lrc_desc = lrc_descriptor(ce, engine); + ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; __execlists_update_reg_state(ce, engine); return 0; - -unpin_active: - intel_context_active_release(ce); -err: - return ret; } static int execlists_context_pin(struct intel_context *ce) @@ -2494,6 +2591,9 @@ static int execlists_context_alloc(struct intel_context *ce) static void execlists_context_reset(struct intel_context *ce) { + CE_TRACE(ce, "reset\n"); + GEM_BUG_ON(!intel_context_is_pinned(ce)); + /* * Because we emit WA_TAIL_DWORDS there may be a disparity * between our bookkeeping in ce->ring->head and ce->ring->tail and @@ -2510,8 +2610,14 @@ static void execlists_context_reset(struct intel_context *ce) * So to avoid that we reset the context images upon resume. For * simplicity, we just zero everything out. */ - intel_ring_reset(ce->ring, 0); + intel_ring_reset(ce->ring, ce->ring->emit); + + /* Scrub away the garbage */ + execlists_init_reg_state(ce->lrc_reg_state, + ce, ce->engine, ce->ring, true); __execlists_update_reg_state(ce, ce->engine); + + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; } static const struct intel_context_ops execlists_context_ops = { @@ -2730,6 +2836,14 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ batch = gen8_emit_flush_coherentl3_wa(engine, batch); + /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */ + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_STORE_DATA_INDEX | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + LRC_PPHWSP_SCRATCH_ADDR); + batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); /* WaMediaPoolStateCmdInWABB:bxt,glk */ @@ -2925,6 +3039,8 @@ static void enable_execlists(struct intel_engine_cs *engine) RING_HWS_PGA, i915_ggtt_offset(engine->status_page.vma)); ENGINE_POSTING_READ(engine, RING_HWS_PGA); + + engine->context_tag = 0; } static bool unexpected_starting_state(struct intel_engine_cs *engine) @@ -3030,10 +3146,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) &execlists->csb_status[reset_value]); } -static void __execlists_reset_reg_state(const struct intel_context *ce, - const struct intel_engine_cs *engine) +static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) { - u32 *regs = ce->lrc_reg_state; int x; x = lrc_ring_mi_mode(engine); @@ -3043,6 +3157,14 @@ static void __execlists_reset_reg_state(const struct intel_context *ce, } } +static void __execlists_reset_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine) +{ + u32 *regs = ce->lrc_reg_state; + + __reset_stop_ring(regs, engine); +} + static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -3795,7 +3917,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) { /* Default vfuncs which can be overriden by each engine. */ - engine->release = execlists_release; engine->resume = execlists_resume; engine->cops = &execlists_context_ops; @@ -3910,6 +4031,9 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) reset_csb_pointers(engine); + /* Finally, take ownership and responsibility for cleanup! */ + engine->release = execlists_release; + return 0; } @@ -3949,18 +4073,21 @@ static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine) static void init_common_reg_state(u32 * const regs, const struct intel_engine_cs *engine, - const struct intel_ring *ring) + const struct intel_ring *ring, + bool inhibit) { - regs[CTX_CONTEXT_CONTROL] = - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + u32 ctl; + + ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + if (inhibit) + ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; if (INTEL_GEN(engine->i915) < 11) - regs[CTX_CONTEXT_CONTROL] |= - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | - CTX_CTRL_RS_CTX_ENABLE); + ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | + CTX_CTRL_RS_CTX_ENABLE); + regs[CTX_CONTEXT_CONTROL] = ctl; regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; - regs[CTX_BB_STATE] = RING_BB_PPGTT; } static void init_wa_bb_reg_state(u32 * const regs, @@ -4016,7 +4143,7 @@ static void execlists_init_reg_state(u32 *regs, const struct intel_context *ce, const struct intel_engine_cs *engine, const struct intel_ring *ring, - bool close) + bool inhibit) { /* * A context is actually a big batch buffer with several @@ -4028,21 +4155,17 @@ static void execlists_init_reg_state(u32 *regs, * * Must keep consistent with virtual_update_register_offsets(). */ - u32 *bbe = set_offsets(regs, reg_offsets(engine), engine); - - if (close) { /* Close the batch; used mainly by live_lrc_layout() */ - *bbe = MI_BATCH_BUFFER_END; - if (INTEL_GEN(engine->i915) >= 10) - *bbe |= BIT(0); - } + set_offsets(regs, reg_offsets(engine), engine, inhibit); - init_common_reg_state(regs, engine, ring); + init_common_reg_state(regs, engine, ring, inhibit); init_ppgtt_reg_state(regs, vm_alias(ce->vm)); init_wa_bb_reg_state(regs, engine, INTEL_GEN(engine->i915) >= 12 ? GEN12_CTX_BB_PER_CTX_PTR : CTX_BB_PER_CTX_PTR); + + __reset_stop_ring(regs, engine); } static int @@ -4053,7 +4176,6 @@ populate_lr_context(struct intel_context *ce, { bool inhibit = true; void *vaddr; - u32 *regs; int ret; vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); @@ -4083,11 +4205,8 @@ populate_lr_context(struct intel_context *ce, /* The second page of the context object contains some fields which must * be set up prior to the first execution. */ - regs = vaddr + LRC_STATE_PN * PAGE_SIZE; - execlists_init_reg_state(regs, ce, engine, ring, inhibit); - if (inhibit) - regs[CTX_CONTEXT_CONTROL] |= - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, + ce, engine, ring, inhibit); ret = 0; err_unpin_ctx: @@ -4481,9 +4600,11 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, ve->base.gt = siblings[0]->gt; ve->base.uncore = siblings[0]->uncore; ve->base.id = -1; + ve->base.class = OTHER_CLASS; ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; + ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; /* * The decision on whether to submit a request using semaphores diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 893249ea48d4..eeef90b55c64 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -127,7 +127,7 @@ struct drm_i915_mocs_table { LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \ L3_3_WB) -static const struct drm_i915_mocs_entry skylake_mocs_table[] = { +static const struct drm_i915_mocs_entry skl_mocs_table[] = { GEN9_MOCS_ENTRIES, MOCS_ENTRY(I915_MOCS_CACHED, LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3), @@ -233,7 +233,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ L3_1_UC) -static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = { +static const struct drm_i915_mocs_entry tgl_mocs_table[] = { /* Base - Error (Reserved for Non-Use) */ MOCS_ENTRY(0, 0x0, 0x0), /* Base - Reserved */ @@ -267,7 +267,7 @@ static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = { L3_3_WB), }; -static const struct drm_i915_mocs_entry icelake_mocs_table[] = { +static const struct drm_i915_mocs_entry icl_mocs_table[] = { /* Base - Uncached (Deprecated) */ MOCS_ENTRY(I915_MOCS_UNCACHED, LE_1_UC | LE_TC_1_LLC, @@ -284,17 +284,17 @@ static bool get_mocs_settings(const struct drm_i915_private *i915, struct drm_i915_mocs_table *table) { if (INTEL_GEN(i915) >= 12) { - table->size = ARRAY_SIZE(tigerlake_mocs_table); - table->table = tigerlake_mocs_table; + table->size = ARRAY_SIZE(tgl_mocs_table); + table->table = tgl_mocs_table; table->n_entries = GEN11_NUM_MOCS_ENTRIES; } else if (IS_GEN(i915, 11)) { - table->size = ARRAY_SIZE(icelake_mocs_table); - table->table = icelake_mocs_table; + table->size = ARRAY_SIZE(icl_mocs_table); + table->table = icl_mocs_table; table->n_entries = GEN11_NUM_MOCS_ENTRIES; } else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) { - table->size = ARRAY_SIZE(skylake_mocs_table); + table->size = ARRAY_SIZE(skl_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; - table->table = skylake_mocs_table; + table->table = skl_mocs_table; } else if (IS_GEN9_LP(i915)) { table->size = ARRAY_SIZE(broxton_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c new file mode 100644 index 000000000000..f86f7e68ce5e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/slab.h> + +#include "i915_trace.h" +#include "intel_gtt.h" +#include "gen6_ppgtt.h" +#include "gen8_ppgtt.h" + +struct i915_page_table *alloc_pt(struct i915_address_space *vm) +{ + struct i915_page_table *pt; + + pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL); + if (unlikely(!pt)) + return ERR_PTR(-ENOMEM); + + if (unlikely(setup_page_dma(vm, &pt->base))) { + kfree(pt); + return ERR_PTR(-ENOMEM); + } + + atomic_set(&pt->used, 0); + return pt; +} + +struct i915_page_directory *__alloc_pd(size_t sz) +{ + struct i915_page_directory *pd; + + pd = kzalloc(sz, I915_GFP_ALLOW_FAIL); + if (unlikely(!pd)) + return NULL; + + spin_lock_init(&pd->lock); + return pd; +} + +struct i915_page_directory *alloc_pd(struct i915_address_space *vm) +{ + struct i915_page_directory *pd; + + pd = __alloc_pd(sizeof(*pd)); + if (unlikely(!pd)) + return ERR_PTR(-ENOMEM); + + if (unlikely(setup_page_dma(vm, px_base(pd)))) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } + + return pd; +} + +void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd) +{ + cleanup_page_dma(vm, pd); + kfree(pd); +} + +static inline void +write_dma_entry(struct i915_page_dma * const pdma, + const unsigned short idx, + const u64 encoded_entry) +{ + u64 * const vaddr = kmap_atomic(pdma->page); + + vaddr[idx] = encoded_entry; + kunmap_atomic(vaddr); +} + +void +__set_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + struct i915_page_dma * const to, + u64 (*encode)(const dma_addr_t, const enum i915_cache_level)) +{ + /* Each thread pre-pins the pd, and we may have a thread per pde. */ + GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry)); + + atomic_inc(px_used(pd)); + pd->entry[idx] = to; + write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC)); +} + +void +clear_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + const struct i915_page_scratch * const scratch) +{ + GEM_BUG_ON(atomic_read(px_used(pd)) == 0); + + write_dma_entry(px_base(pd), idx, scratch->encode); + pd->entry[idx] = NULL; + atomic_dec(px_used(pd)); +} + +bool +release_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + struct i915_page_table * const pt, + const struct i915_page_scratch * const scratch) +{ + bool free = false; + + if (atomic_add_unless(&pt->used, -1, 1)) + return false; + + spin_lock(&pd->lock); + if (atomic_dec_and_test(&pt->used)) { + clear_pd_entry(pd, idx, scratch); + free = true; + } + spin_unlock(&pd->lock); + + return free; +} + +int i915_ppgtt_init_hw(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + gtt_write_workarounds(gt); + + if (IS_GEN(i915, 6)) + gen6_ppgtt_enable(gt); + else if (IS_GEN(i915, 7)) + gen7_ppgtt_enable(gt); + + return 0; +} + +static struct i915_ppgtt * +__ppgtt_create(struct intel_gt *gt) +{ + if (INTEL_GEN(gt->i915) < 8) + return gen6_ppgtt_create(gt); + else + return gen8_ppgtt_create(gt); +} + +struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt) +{ + struct i915_ppgtt *ppgtt; + + ppgtt = __ppgtt_create(gt); + if (IS_ERR(ppgtt)) + return ppgtt; + + trace_i915_ppgtt_create(&ppgtt->vm); + + return ppgtt; +} + +static int ppgtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + u32 pte_flags; + int err; + + if (flags & I915_VMA_ALLOC) { + err = vma->vm->allocate_va_range(vma->vm, + vma->node.start, vma->size); + if (err) + return err; + + set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); + } + + /* Applicable to VLV, and gen8+ */ + pte_flags = 0; + if (i915_gem_object_is_readonly(vma->obj)) + pte_flags |= PTE_READ_ONLY; + + GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))); + vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); + wmb(); + + return 0; +} + +static void ppgtt_unbind_vma(struct i915_vma *vma) +{ + if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); +} + +int ppgtt_set_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->pages); + + vma->pages = vma->obj->mm.pages; + + vma->page_sizes = vma->obj->mm.page_sizes; + + return 0; +} + +void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + ppgtt->vm.gt = gt; + ppgtt->vm.i915 = i915; + ppgtt->vm.dma = &i915->drm.pdev->dev; + ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size); + + i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); + + ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma; + ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma; + ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages; + ppgtt->vm.vma_ops.clear_pages = clear_pages; +} diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 1c51296646e0..beee0cf89bce 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -147,11 +147,7 @@ static void mark_innocent(struct i915_request *rq) void __i915_request_reset(struct i915_request *rq, bool guilty) { - GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n", - rq->engine->name, - rq->fence.context, - rq->fence.seqno, - yesno(guilty)); + RQ_TRACE(rq, "guilty? %s\n", yesno(guilty)); GEM_BUG_ON(i915_request_completed(rq)); @@ -251,9 +247,8 @@ out: return ret; } -static int ironlake_do_reset(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_uncore *uncore = gt->uncore; int ret; @@ -597,7 +592,7 @@ static reset_func intel_get_gpu_reset(const struct intel_gt *gt) else if (INTEL_GEN(i915) >= 6) return gen6_reset_engines; else if (INTEL_GEN(i915) >= 5) - return ironlake_do_reset; + return ilk_do_reset; else if (IS_G4X(i915)) return g4x_do_reset; else if (IS_G33(i915) || IS_PINEVIEW(i915)) @@ -625,7 +620,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) */ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) { - GEM_TRACE("engine_mask=%x\n", engine_mask); + GT_TRACE(gt, "engine_mask=%x\n", engine_mask); preempt_disable(); ret = reset(gt, engine_mask, retry); preempt_enable(); @@ -785,8 +780,7 @@ static void nop_submit_request(struct i915_request *request) struct intel_engine_cs *engine = request->engine; unsigned long flags; - GEM_TRACE("%s fence %llx:%lld -> -EIO\n", - engine->name, request->fence.context, request->fence.seqno); + RQ_TRACE(request, "-EIO\n"); dma_fence_set_error(&request->fence, -EIO); spin_lock_irqsave(&engine->active.lock, flags); @@ -813,7 +807,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) intel_engine_dump(engine, &p, "%s\n", engine->name); } - GEM_TRACE("start\n"); + GT_TRACE(gt, "start\n"); /* * First, stop submission to hw, but do not yet complete requests by @@ -844,7 +838,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) reset_finish(gt, awake); - GEM_TRACE("end\n"); + GT_TRACE(gt, "end\n"); } void intel_gt_set_wedged(struct intel_gt *gt) @@ -870,7 +864,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) if (test_bit(I915_WEDGED_ON_INIT, >->reset.flags)) return false; - GEM_TRACE("start\n"); + GT_TRACE(gt, "start\n"); /* * Before unwedging, make sure that all pending operations @@ -932,7 +926,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) */ intel_engines_reset_default_submission(gt); - GEM_TRACE("end\n"); + GT_TRACE(gt, "end\n"); smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ clear_bit(I915_WEDGED, >->reset.flags); @@ -1007,7 +1001,7 @@ void intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t awake; int ret; - GEM_TRACE("flags=%lx\n", gt->reset.flags); + GT_TRACE(gt, "flags=%lx\n", gt->reset.flags); might_sleep(); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, >->reset.flags)); @@ -1236,7 +1230,7 @@ void intel_gt_handle_error(struct intel_gt *gt, engine_mask &= INTEL_INFO(gt->i915)->engine_mask; if (flags & I915_ERROR_CAPTURE) { - i915_capture_error_state(gt->i915, engine_mask, msg); + i915_capture_error_state(gt->i915); intel_gt_clear_error_registers(gt, engine_mask); } @@ -1329,10 +1323,10 @@ int intel_gt_terminally_wedged(struct intel_gt *gt) if (!intel_gt_is_wedged(gt)) return 0; - /* Reset still in progress? Maybe we will recover? */ - if (!test_bit(I915_RESET_BACKOFF, >->reset.flags)) + if (intel_gt_has_init_error(gt)) return -EIO; + /* Reset still in progress? Maybe we will recover? */ if (wait_event_interruptible(gt->reset.queue, !test_bit(I915_RESET_BACKOFF, >->reset.flags))) @@ -1354,6 +1348,9 @@ void intel_gt_init_reset(struct intel_gt *gt) init_waitqueue_head(>->reset.queue); mutex_init(>->reset.mutex); init_srcu_struct(>->reset.backoff_srcu); + + /* no GPU until we are ready! */ + __set_bit(I915_WEDGED, >->reset.flags); } void intel_gt_fini_reset(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 81f872f9ef03..bc44fe8e5ffa 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -33,6 +33,7 @@ #include "gem/i915_gem_context.h" +#include "gen6_ppgtt.h" #include "i915_drv.h" #include "i915_trace.h" #include "intel_context.h" @@ -1328,26 +1329,12 @@ static int ring_context_alloc(struct intel_context *ce) static int ring_context_pin(struct intel_context *ce) { - int err; - - err = intel_context_active_acquire(ce); - if (err) - return err; - - err = __context_pin_ppgtt(ce); - if (err) - goto err_active; - - return 0; - -err_active: - intel_context_active_release(ce); - return err; + return __context_pin_ppgtt(ce); } static void ring_context_reset(struct intel_context *ce) { - intel_ring_reset(ce->ring, 0); + intel_ring_reset(ce->ring, ce->ring->emit); } static const struct intel_context_ops ring_context_ops = { @@ -1394,7 +1381,7 @@ static int load_pd_dir(struct i915_request *rq, intel_ring_advance(rq, cs); - return 0; + return rq->engine->emit_flush(rq, EMIT_FLUSH); } static inline int mi_set_context(struct i915_request *rq, u32 flags) @@ -1408,14 +1395,6 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) int len; u32 *cs; - flags |= MI_MM_SPACE_GTT; - if (IS_HASWELL(i915)) - /* These flags are for resource streamer on HSW+ */ - flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; - else - /* We need to save the extended state for powersaving modes */ - flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; - len = 4; if (IS_GEN(i915, 7)) len += 2 + (num_engines ? 4 * num_engines + 6 : 0); @@ -1592,7 +1571,7 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm) if (ret) return ret; - return rq->engine->emit_flush(rq, EMIT_FLUSH); + return rq->engine->emit_flush(rq, EMIT_INVALIDATE); } static int switch_context(struct i915_request *rq) @@ -1607,15 +1586,21 @@ static int switch_context(struct i915_request *rq) return ret; if (ce->state) { - u32 hw_flags; + u32 flags; GEM_BUG_ON(rq->engine->id != RCS0); - hw_flags = 0; - if (!test_bit(CONTEXT_VALID_BIT, &ce->flags)) - hw_flags = MI_RESTORE_INHIBIT; + /* For resource streamer on HSW+ and power context elsewhere */ + BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN); + BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN); + + flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT; + if (test_bit(CONTEXT_VALID_BIT, &ce->flags)) + flags |= MI_RESTORE_EXT_STATE_EN; + else + flags |= MI_RESTORE_INHIBIT; - ret = mi_set_context(rq, hw_flags); + ret = mi_set_context(rq, flags); if (ret) return ret; } @@ -1842,8 +1827,6 @@ static void setup_common(struct intel_engine_cs *engine) setup_irq(engine); - engine->release = ring_release; - engine->resume = xcs_resume; engine->reset.prepare = reset_prepare; engine->reset.rewind = reset_rewind; @@ -2009,6 +1992,9 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); + /* Finally, take ownership and responsibility for cleanup! */ + engine->release = ring_release; + return 0; err_ring: diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index f232036c3c7a..d2a3d935d186 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -777,7 +777,7 @@ void intel_rps_boost(struct i915_request *rq) spin_lock_irqsave(&rq->lock, flags); if (!i915_request_has_waitboost(rq) && !dma_fence_is_signaled_locked(&rq->fence)) { - rq->flags |= I915_REQUEST_WAITBOOST; + set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); if (!atomic_fetch_inc(&rps->num_waiters) && READ_ONCE(rps->cur_freq) < rps->boost_freq) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index ee5dc4fbdeb9..87716529cd2f 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -348,7 +348,6 @@ void intel_timeline_enter(struct intel_timeline *tl) * use atomic to manipulate tl->active_count. */ lockdep_assert_held(&tl->mutex); - GEM_BUG_ON(!atomic_read(&tl->pin_count)); if (atomic_add_unless(&tl->active_count, 1, 0)) return; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 195ccf7db272..4e292d4bf7b9 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -254,7 +254,7 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, /* WaDisableDopClockGating:bdw * - * Also see the related UCGTCL1 write in broadwell_init_clock_gating() + * Also see the related UCGTCL1 write in bdw_init_clock_gating() * to disable EUTC clock gating. */ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 4e1eafa94be9..a560b7eee2cd 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -149,7 +149,11 @@ static int mock_context_alloc(struct intel_context *ce) static int mock_context_pin(struct intel_context *ce) { - return intel_context_active_acquire(ce); + return 0; +} + +static void mock_context_reset(struct intel_context *ce) +{ } static const struct intel_context_ops mock_context_ops = { @@ -161,6 +165,7 @@ static const struct intel_context_ops mock_context_ops = { .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, + .reset = mock_context_reset, .destroy = mock_context_destroy, }; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 5dbda2a74272..3e5e6c86e843 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1312,7 +1312,7 @@ static int igt_reset_evict_ppgtt(void *arg) if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL) return 0; - ppgtt = i915_ppgtt_create(gt->i915); + ppgtt = i915_ppgtt_create(gt); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); @@ -1498,7 +1498,7 @@ static int igt_handle_error(void *arg) struct intel_engine_cs *engine = gt->engine[RCS0]; struct hang h; struct i915_request *rq; - struct i915_gpu_state *error; + struct i915_gpu_coredump *error; int err; /* Check that we can issue a global GPU and engine reset */ diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 9ec9833c9c7b..15cda024e3e4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -527,13 +527,19 @@ static struct i915_request *nop_request(struct intel_engine_cs *engine) return rq; } -static void wait_for_submit(struct intel_engine_cs *engine, - struct i915_request *rq) +static int wait_for_submit(struct intel_engine_cs *engine, + struct i915_request *rq, + unsigned long timeout) { + timeout += jiffies; do { cond_resched(); intel_engine_flush_submission(engine); - } while (!i915_request_is_active(rq)); + if (i915_request_is_active(rq)) + return 0; + } while (time_before(jiffies, timeout)); + + return -ETIME; } static long timeslice_threshold(const struct intel_engine_cs *engine) @@ -601,7 +607,12 @@ static int live_timeslice_queue(void *arg) goto err_heartbeat; } engine->schedule(rq, &attr); - wait_for_submit(engine, rq); + err = wait_for_submit(engine, rq, HZ / 2); + if (err) { + pr_err("%s: Timed out trying to submit semaphores\n", + engine->name); + goto err_rq; + } /* ELSP[1]: nop request */ nop = nop_request(engine); @@ -609,8 +620,13 @@ static int live_timeslice_queue(void *arg) err = PTR_ERR(nop); goto err_rq; } - wait_for_submit(engine, nop); + err = wait_for_submit(engine, nop, HZ / 2); i915_request_put(nop); + if (err) { + pr_err("%s: Timed out trying to submit nop\n", + engine->name); + goto err_rq; + } GEM_BUG_ON(i915_request_completed(rq)); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); @@ -1137,7 +1153,7 @@ static int live_nopreempt(void *arg) } /* Low priority client, but unpreemptable! */ - rq_a->flags |= I915_REQUEST_NOPREEMPT; + __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); i915_request_add(rq_a); if (!igt_wait_for_spinner(&a.spin, rq_a)) { @@ -3362,7 +3378,7 @@ static int live_lrc_layout(void *arg) struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; - u32 *mem; + u32 *lrc; int err; /* @@ -3370,13 +3386,13 @@ static int live_lrc_layout(void *arg) * match the layout saved by HW. */ - mem = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!mem) + lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!lrc) return -ENOMEM; err = 0; for_each_engine(engine, gt, id) { - u32 *hw, *lrc; + u32 *hw; int dw; if (!engine->default_state) @@ -3390,8 +3406,7 @@ static int live_lrc_layout(void *arg) } hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); - lrc = memset(mem, 0, PAGE_SIZE); - execlists_init_reg_state(lrc, + execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), engine->kernel_context, engine, engine->kernel_context->ring, @@ -3406,6 +3421,13 @@ static int live_lrc_layout(void *arg) continue; } + if (lrc[dw] == 0) { + pr_debug("%s: skipped instruction %x at dword %d\n", + engine->name, lri, dw); + dw++; + continue; + } + if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { pr_err("%s: Expected LRI command at dword %d, found %08x\n", engine->name, dw, lri); @@ -3454,7 +3476,7 @@ static int live_lrc_layout(void *arg) break; } - kfree(mem); + kfree(lrc); return err; } diff --git a/drivers/gpu/drm/i915/gt/uc/Makefile b/drivers/gpu/drm/i915/gt/uc/Makefile deleted file mode 100644 index bec94d434cb6..000000000000 --- a/drivers/gpu/drm/i915/gt/uc/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# For building individual subdir files on the command line -subdir-ccflags-y += -I$(srctree)/$(src)/../.. - -# Extra header tests -header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 3ffc6267f96e..64934a876a50 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -12,6 +12,9 @@ #include "i915_drv.h" +static const struct intel_uc_ops uc_ops_off; +static const struct intel_uc_ops uc_ops_on; + /* Reset GuC providing us with fresh state for both GuC and HuC. */ static int __intel_uc_reset_hw(struct intel_uc *uc) @@ -89,6 +92,11 @@ void intel_uc_init_early(struct intel_uc *uc) intel_huc_init_early(&uc->huc); __confirm_options(uc); + + if (intel_uc_uses_guc(uc)) + uc->ops = &uc_ops_on; + else + uc->ops = &uc_ops_off; } void intel_uc_driver_late_release(struct intel_uc *uc) @@ -245,12 +253,11 @@ static void guc_disable_communication(struct intel_guc *guc) DRM_INFO("GuC communication disabled\n"); } -void intel_uc_fetch_firmwares(struct intel_uc *uc) +static void __uc_fetch_firmwares(struct intel_uc *uc) { int err; - if (!intel_uc_uses_guc(uc)) - return; + GEM_BUG_ON(!intel_uc_uses_guc(uc)); err = intel_uc_fw_fetch(&uc->guc.fw); if (err) @@ -260,20 +267,19 @@ void intel_uc_fetch_firmwares(struct intel_uc *uc) intel_uc_fw_fetch(&uc->huc.fw); } -void intel_uc_cleanup_firmwares(struct intel_uc *uc) +static void __uc_cleanup_firmwares(struct intel_uc *uc) { intel_uc_fw_cleanup_fetch(&uc->huc.fw); intel_uc_fw_cleanup_fetch(&uc->guc.fw); } -void intel_uc_init(struct intel_uc *uc) +static void __uc_init(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; struct intel_huc *huc = &uc->huc; int ret; - if (!intel_uc_uses_guc(uc)) - return; + GEM_BUG_ON(!intel_uc_uses_guc(uc)); /* XXX: GuC submission is unavailable for now */ GEM_BUG_ON(intel_uc_supports_guc_submission(uc)); @@ -288,7 +294,7 @@ void intel_uc_init(struct intel_uc *uc) intel_huc_init(huc); } -void intel_uc_fini(struct intel_uc *uc) +static void __uc_fini(struct intel_uc *uc) { intel_huc_fini(&uc->huc); intel_guc_fini(&uc->guc); @@ -309,14 +315,6 @@ static int __uc_sanitize(struct intel_uc *uc) return __intel_uc_reset_hw(uc); } -void intel_uc_sanitize(struct intel_uc *uc) -{ - if (!intel_uc_supports_guc(uc)) - return; - - __uc_sanitize(uc); -} - /* Initialize and verify the uC regs related to uC positioning in WOPCM */ static int uc_init_wopcm(struct intel_uc *uc) { @@ -380,13 +378,8 @@ static bool uc_is_wopcm_locked(struct intel_uc *uc) (intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET) & GUC_WOPCM_OFFSET_VALID); } -int intel_uc_init_hw(struct intel_uc *uc) +static int __uc_check_hw(struct intel_uc *uc) { - struct drm_i915_private *i915 = uc_to_gt(uc)->i915; - struct intel_guc *guc = &uc->guc; - struct intel_huc *huc = &uc->huc; - int ret, attempts; - if (!intel_uc_supports_guc(uc)) return 0; @@ -395,11 +388,24 @@ int intel_uc_init_hw(struct intel_uc *uc) * before on this system after reboot, otherwise we risk GPU hangs. * To check if GuC was loaded before we look at WOPCM registers. */ - if (!intel_uc_uses_guc(uc) && !uc_is_wopcm_locked(uc)) - return 0; + if (uc_is_wopcm_locked(uc)) + return -EIO; + + return 0; +} + +static int __uc_init_hw(struct intel_uc *uc) +{ + struct drm_i915_private *i915 = uc_to_gt(uc)->i915; + struct intel_guc *guc = &uc->guc; + struct intel_huc *huc = &uc->huc; + int ret, attempts; + + GEM_BUG_ON(!intel_uc_supports_guc(uc)); + GEM_BUG_ON(!intel_uc_uses_guc(uc)); if (!intel_uc_fw_is_available(&guc->fw)) { - ret = uc_is_wopcm_locked(uc) || + ret = __uc_check_hw(uc) || intel_uc_fw_is_overridden(&guc->fw) || intel_uc_supports_guc_submission(uc) ? intel_uc_fw_status_to_error(guc->fw.status) : 0; @@ -495,7 +501,7 @@ err_out: return -EIO; } -void intel_uc_fini_hw(struct intel_uc *uc) +static void __uc_fini_hw(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; @@ -595,3 +601,20 @@ int intel_uc_runtime_resume(struct intel_uc *uc) */ return __uc_resume(uc, true); } + +static const struct intel_uc_ops uc_ops_off = { + .init_hw = __uc_check_hw, +}; + +static const struct intel_uc_ops uc_ops_on = { + .sanitize = __uc_sanitize, + + .init_fw = __uc_fetch_firmwares, + .fini_fw = __uc_cleanup_firmwares, + + .init = __uc_init, + .fini = __uc_fini, + + .init_hw = __uc_init_hw, + .fini_hw = __uc_fini_hw, +}; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index 527995c21196..49c913524686 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -10,7 +10,20 @@ #include "intel_huc.h" #include "i915_params.h" +struct intel_uc; + +struct intel_uc_ops { + int (*sanitize)(struct intel_uc *uc); + void (*init_fw)(struct intel_uc *uc); + void (*fini_fw)(struct intel_uc *uc); + void (*init)(struct intel_uc *uc); + void (*fini)(struct intel_uc *uc); + int (*init_hw)(struct intel_uc *uc); + void (*fini_hw)(struct intel_uc *uc); +}; + struct intel_uc { + struct intel_uc_ops const *ops; struct intel_guc guc; struct intel_huc huc; @@ -21,13 +34,6 @@ struct intel_uc { void intel_uc_init_early(struct intel_uc *uc); void intel_uc_driver_late_release(struct intel_uc *uc); void intel_uc_init_mmio(struct intel_uc *uc); -void intel_uc_fetch_firmwares(struct intel_uc *uc); -void intel_uc_cleanup_firmwares(struct intel_uc *uc); -void intel_uc_sanitize(struct intel_uc *uc); -void intel_uc_init(struct intel_uc *uc); -int intel_uc_init_hw(struct intel_uc *uc); -void intel_uc_fini_hw(struct intel_uc *uc); -void intel_uc_fini(struct intel_uc *uc); void intel_uc_reset_prepare(struct intel_uc *uc); void intel_uc_suspend(struct intel_uc *uc); void intel_uc_runtime_suspend(struct intel_uc *uc); @@ -64,4 +70,20 @@ static inline bool intel_uc_uses_huc(struct intel_uc *uc) return intel_huc_is_enabled(&uc->huc); } +#define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \ +static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \ +{ \ + if (uc->ops->_OPS) \ + return uc->ops->_OPS(uc); \ + return _RET; \ +} +intel_uc_ops_function(sanitize, sanitize, int, 0); +intel_uc_ops_function(fetch_firmwares, init_fw, void, ); +intel_uc_ops_function(cleanup_firmwares, fini_fw, void, ); +intel_uc_ops_function(init, init, void, ); +intel_uc_ops_function(fini, fini, void, ); +intel_uc_ops_function(init_hw, init_hw, int, 0); +intel_uc_ops_function(fini_hw, fini_hw, void, ); +#undef intel_uc_ops_function + #endif diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index e451298d11c3..2477a1e5a166 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -36,13 +36,32 @@ #define GEN8_DECODE_PTE(pte) (pte & GENMASK_ULL(63, 12)) +static int vgpu_pin_dma_address(struct intel_vgpu *vgpu, + unsigned long size, + dma_addr_t dma_addr) +{ + int ret = 0; + + if (intel_gvt_hypervisor_dma_pin_guest_page(vgpu, dma_addr)) + ret = -EINVAL; + + return ret; +} + +static void vgpu_unpin_dma_address(struct intel_vgpu *vgpu, + dma_addr_t dma_addr) +{ + intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, dma_addr); +} + static int vgpu_gem_get_pages( struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct intel_vgpu *vgpu; struct sg_table *st; struct scatterlist *sg; - int i, ret; + int i, j, ret; gen8_pte_t __iomem *gtt_entries; struct intel_vgpu_fb_info *fb_info; u32 page_num; @@ -51,6 +70,10 @@ static int vgpu_gem_get_pages( if (WARN_ON(!fb_info)) return -ENODEV; + vgpu = fb_info->obj->vgpu; + if (WARN_ON(!vgpu)) + return -ENODEV; + st = kmalloc(sizeof(*st), GFP_KERNEL); if (unlikely(!st)) return -ENOMEM; @@ -64,21 +87,53 @@ static int vgpu_gem_get_pages( gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + (fb_info->start >> PAGE_SHIFT); for_each_sg(st->sgl, sg, page_num, i) { + dma_addr_t dma_addr = + GEN8_DECODE_PTE(readq(>t_entries[i])); + if (vgpu_pin_dma_address(vgpu, PAGE_SIZE, dma_addr)) { + ret = -EINVAL; + goto out; + } + sg->offset = 0; sg->length = PAGE_SIZE; - sg_dma_address(sg) = - GEN8_DECODE_PTE(readq(>t_entries[i])); sg_dma_len(sg) = PAGE_SIZE; + sg_dma_address(sg) = dma_addr; } __i915_gem_object_set_pages(obj, st, PAGE_SIZE); +out: + if (ret) { + dma_addr_t dma_addr; + + for_each_sg(st->sgl, sg, i, j) { + dma_addr = sg_dma_address(sg); + if (dma_addr) + vgpu_unpin_dma_address(vgpu, dma_addr); + } + sg_free_table(st); + kfree(st); + } + + return ret; - return 0; } static void vgpu_gem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { + struct scatterlist *sg; + + if (obj->base.dma_buf) { + struct intel_vgpu_fb_info *fb_info = obj->gvt_info; + struct intel_vgpu_dmabuf_obj *obj = fb_info->obj; + struct intel_vgpu *vgpu = obj->vgpu; + int i; + + for_each_sg(pages->sgl, sg, fb_info->size, i) + vgpu_unpin_dma_address(vgpu, + sg_dma_address(sg)); + } + sg_free_table(pages); kfree(pages); } @@ -163,6 +218,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, drm_gem_private_object_init(dev, &obj->base, roundup(info->size, PAGE_SIZE)); i915_gem_object_init(obj, &intel_vgpu_gem_ops, &lock_class); + i915_gem_object_set_readonly(obj); obj->read_domains = I915_GEM_DOMAIN_GTT; obj->write_domain = 0; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index bb9fe6bf5275..6d28d72e6c7e 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -341,6 +341,10 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, gvt_dbg_mmio("vgpu%d: request VCS2 Reset\n", vgpu->id); engine_mask |= BIT(VCS1); } + if (data & GEN9_GRDOM_GUC) { + gvt_dbg_mmio("vgpu%d: request GUC Reset\n", vgpu->id); + vgpu_vreg_t(vgpu, GUC_STATUS) |= GS_MIA_IN_RESET; + } engine_mask &= INTEL_INFO(vgpu->gvt->dev_priv)->engine_mask; } @@ -1636,6 +1640,16 @@ static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu, return 0; } +static int guc_status_read(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, + unsigned int bytes) +{ + /* keep MIA_IN_RESET before clearing */ + read_vreg(vgpu, offset, p_data, bytes); + vgpu_vreg(vgpu, offset) &= ~GS_MIA_IN_RESET; + return 0; +} + static int mmio_read_from_hw(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -2672,10 +2686,12 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DH(EDP_PSR_IMR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write); MMIO_DH(EDP_PSR_IIR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write); + MMIO_DH(GUC_STATUS, D_ALL, guc_status_read, NULL); + return 0; } -static int init_broadwell_mmio_info(struct intel_gvt *gvt) +static int init_bdw_mmio_info(struct intel_gvt *gvt) { struct drm_i915_private *dev_priv = gvt->dev_priv; int ret; @@ -3364,20 +3380,20 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt) goto err; if (IS_BROADWELL(dev_priv)) { - ret = init_broadwell_mmio_info(gvt); + ret = init_bdw_mmio_info(gvt); if (ret) goto err; } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - ret = init_broadwell_mmio_info(gvt); + ret = init_bdw_mmio_info(gvt); if (ret) goto err; ret = init_skl_mmio_info(gvt); if (ret) goto err; } else if (IS_BROXTON(dev_priv)) { - ret = init_broadwell_mmio_info(gvt); + ret = init_bdw_mmio_info(gvt); if (ret) goto err; ret = init_skl_mmio_info(gvt); diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index 9599c0a762b2..b17c4a1599cd 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -66,6 +66,8 @@ struct intel_gvt_mpt { unsigned long size, dma_addr_t *dma_addr); void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr); + int (*dma_pin_guest_page)(unsigned long handle, dma_addr_t dma_addr); + int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn, unsigned long mfn, unsigned int nr, bool map); int (*set_trap_area)(unsigned long handle, u64 start, u64 end, diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 04a5a0d90823..3259a1fa69e1 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1916,6 +1916,28 @@ err_unlock: return ret; } +static int kvmgt_dma_pin_guest_page(unsigned long handle, dma_addr_t dma_addr) +{ + struct kvmgt_guest_info *info; + struct gvt_dma *entry; + int ret = 0; + + if (!handle_valid(handle)) + return -ENODEV; + + info = (struct kvmgt_guest_info *)handle; + + mutex_lock(&info->vgpu->vdev.cache_lock); + entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr); + if (entry) + kref_get(&entry->ref); + else + ret = -ENOMEM; + mutex_unlock(&info->vgpu->vdev.cache_lock); + + return ret; +} + static void __gvt_dma_release(struct kref *ref) { struct gvt_dma *entry = container_of(ref, typeof(*entry), ref); @@ -2027,6 +2049,7 @@ static struct intel_gvt_mpt kvmgt_mpt = { .gfn_to_mfn = kvmgt_gfn_to_pfn, .dma_map_guest_page = kvmgt_dma_map_guest_page, .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page, + .dma_pin_guest_page = kvmgt_dma_pin_guest_page, .set_opregion = kvmgt_set_opregion, .set_edid = kvmgt_set_edid, .get_vfio_device = kvmgt_get_vfio_device, diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 0f9440128123..9ad224df9c68 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -255,6 +255,21 @@ static inline void intel_gvt_hypervisor_dma_unmap_guest_page( } /** + * intel_gvt_hypervisor_dma_pin_guest_page - pin guest dma buf + * @vgpu: a vGPU + * @dma_addr: guest dma addr + * + * Returns: + * 0 on success, negative error code if failed. + */ +static inline int +intel_gvt_hypervisor_dma_pin_guest_page(struct intel_vgpu *vgpu, + dma_addr_t dma_addr) +{ + return intel_gvt_host.mpt->dma_pin_guest_page(vgpu->handle, dma_addr); +} + +/** * intel_gvt_hypervisor_map_gfn_to_mfn - map a GFN region to MFN * @vgpu: a vGPU * @gfn: guest PFN diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index b3299f88e24e..685d1e04a5ff 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1224,7 +1224,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) enum intel_engine_id i; int ret; - ppgtt = i915_ppgtt_create(i915); + ppgtt = i915_ppgtt_create(&i915->gt); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index d5a6e4e3d0fd..85bd9bf4f6ee 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -212,9 +212,9 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt) */ void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu) { - mutex_lock(&vgpu->gvt->lock); + mutex_lock(&vgpu->vgpu_lock); vgpu->active = true; - mutex_unlock(&vgpu->gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); } /** diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index cfe09964622b..f3da5c06f331 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -605,12 +605,15 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct intel_engine_cs *engine) { intel_engine_mask_t tmp, mask = engine->mask; + struct llist_node *pos = NULL, *next; struct intel_gt *gt = engine->gt; - struct llist_node *pos, *next; int err; GEM_BUG_ON(i915_active_is_idle(ref)); - GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); + + /* Wait until the previous preallocation is completed */ + while (!llist_empty(&ref->preallocated_barriers)) + cond_resched(); /* * Preallocate a node for each physical engine supporting the target @@ -653,16 +656,24 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); GEM_BUG_ON(barrier_to_engine(node) != engine); - llist_add(barrier_to_ll(node), &ref->preallocated_barriers); + next = barrier_to_ll(node); + next->next = pos; + if (!pos) + pos = next; intel_engine_pm_get(engine); } + GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); + llist_add_batch(next, pos, &ref->preallocated_barriers); + return 0; unwind: - llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { + while (pos) { struct active_node *node = barrier_from_ll(pos); + pos = pos->next; + atomic_dec(&ref->count); intel_engine_pm_put(barrier_to_engine(node)); diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c index e9d4200ce3bc..66883af64ca1 100644 --- a/drivers/gpu/drm/i915/i915_buddy.c +++ b/drivers/gpu/drm/i915/i915_buddy.c @@ -262,8 +262,10 @@ void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects) { struct i915_buddy_block *block, *on; - list_for_each_entry_safe(block, on, objects, link) + list_for_each_entry_safe(block, on, objects, link) { i915_buddy_free(mm, block); + cond_resched(); + } INIT_LIST_HEAD(objects); } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d28468eaed57..d5a9b8a964c2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -321,16 +321,15 @@ static void print_context_stats(struct seq_file *m, for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - intel_context_lock_pinned(ce); - if (intel_context_is_pinned(ce)) { + if (intel_context_pin_if_active(ce)) { rcu_read_lock(); if (ce->state) per_file_stats(0, ce->state->obj, &kstats); per_file_stats(0, ce->ring->vma->obj, &kstats); rcu_read_unlock(); + intel_context_unpin(ce); } - intel_context_unlock_pinned(ce); } i915_gem_context_unlock_engines(ctx); @@ -367,12 +366,16 @@ static void print_context_stats(struct seq_file *m, static int i915_gem_object_info(struct seq_file *m, void *data) { struct drm_i915_private *i915 = node_to_i915(m->private); + struct intel_memory_region *mr; + enum intel_region_id id; seq_printf(m, "%u shrinkable [%u free] objects, %llu bytes\n", i915->mm.shrink_count, atomic_read(&i915->mm.free_count), i915->mm.shrink_memory); - + for_each_memory_region(mr, i915, id) + seq_printf(m, "%s: total:%pa, available:%pa bytes\n", + mr->name, &mr->total, &mr->avail); seq_putc(m, '\n'); print_context_stats(m, i915); @@ -682,7 +685,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) static ssize_t gpu_state_read(struct file *file, char __user *ubuf, size_t count, loff_t *pos) { - struct i915_gpu_state *error; + struct i915_gpu_coredump *error; ssize_t ret; void *buf; @@ -695,7 +698,7 @@ static ssize_t gpu_state_read(struct file *file, char __user *ubuf, if (!buf) return -ENOMEM; - ret = i915_gpu_state_copy_to_buffer(error, buf, *pos, count); + ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count); if (ret <= 0) goto out; @@ -711,19 +714,19 @@ out: static int gpu_state_release(struct inode *inode, struct file *file) { - i915_gpu_state_put(file->private_data); + i915_gpu_coredump_put(file->private_data); return 0; } static int i915_gpu_info_open(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; - struct i915_gpu_state *gpu; + struct i915_gpu_coredump *gpu; intel_wakeref_t wakeref; gpu = NULL; with_intel_runtime_pm(&i915->runtime_pm, wakeref) - gpu = i915_capture_gpu_state(i915); + gpu = i915_gpu_coredump(i915); if (IS_ERR(gpu)) return PTR_ERR(gpu); @@ -745,7 +748,7 @@ i915_error_state_write(struct file *filp, size_t cnt, loff_t *ppos) { - struct i915_gpu_state *error = filp->private_data; + struct i915_gpu_coredump *error = filp->private_data; if (!error) return 0; @@ -758,7 +761,7 @@ i915_error_state_write(struct file *filp, static int i915_error_state_open(struct inode *inode, struct file *file) { - struct i915_gpu_state *error; + struct i915_gpu_coredump *error; error = i915_first_error_state(inode->i_private); if (IS_ERR(error)) @@ -1001,7 +1004,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) return ret; } -static int ironlake_drpc_info(struct seq_file *m) +static int ilk_drpc_info(struct seq_file *m) { struct drm_i915_private *i915 = node_to_i915(m->private); struct intel_uncore *uncore = &i915->uncore; @@ -1209,7 +1212,7 @@ static int i915_drpc_info(struct seq_file *m, void *unused) else if (INTEL_GEN(dev_priv) >= 6) err = gen6_drpc_info(m); else - err = ironlake_drpc_info(m); + err = ilk_drpc_info(m); } return err; @@ -1509,15 +1512,14 @@ static int i915_context_status(struct seq_file *m, void *unused) for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - intel_context_lock_pinned(ce); - if (intel_context_is_pinned(ce)) { + if (intel_context_pin_if_active(ce)) { seq_printf(m, "%s: ", ce->engine->name); if (ce->state) describe_obj(m, ce->state->obj); describe_ctx_ring(m, ce->ring); seq_putc(m, '\n'); + intel_context_unpin(ce); } - intel_context_unlock_pinned(ce); } i915_gem_context_unlock_engines(ctx); @@ -1977,7 +1979,7 @@ static int i915_psr_sink_status_show(struct seq_file *m, void *data) struct drm_connector *connector = m->private; struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_dp *intel_dp = - enc_to_intel_dp(&intel_attached_encoder(connector)->base); + enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector))); int ret; if (!CAN_PSR(dev_priv)) { @@ -2389,7 +2391,7 @@ static void intel_dp_info(struct seq_file *m, struct intel_connector *intel_connector) { struct intel_encoder *intel_encoder = intel_connector->encoder; - struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base); + struct intel_dp *intel_dp = enc_to_intel_dp(intel_encoder); seq_printf(m, "\tDPCD rev: %x\n", intel_dp->dpcd[DP_DPCD_REV]); seq_printf(m, "\taudio support: %s\n", yesno(intel_dp->has_audio)); @@ -2409,7 +2411,7 @@ static void intel_dp_mst_info(struct seq_file *m, { struct intel_encoder *intel_encoder = intel_connector->encoder; struct intel_dp_mst_encoder *intel_mst = - enc_to_mst(&intel_encoder->base); + enc_to_mst(intel_encoder); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; bool has_audio = drm_dp_mst_port_has_audio(&intel_dp->mst_mgr, @@ -2422,7 +2424,7 @@ static void intel_hdmi_info(struct seq_file *m, struct intel_connector *intel_connector) { struct intel_encoder *intel_encoder = intel_connector->encoder; - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&intel_encoder->base); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(intel_encoder); seq_printf(m, "\taudio support: %s\n", yesno(intel_hdmi->has_audio)); if (intel_connector->hdcp.shim) { @@ -3012,11 +3014,11 @@ static int i915_dp_mst_info(struct seq_file *m, void *unused) if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort) continue; - intel_encoder = intel_attached_encoder(connector); + intel_encoder = intel_attached_encoder(to_intel_connector(connector)); if (!intel_encoder || intel_encoder->type == INTEL_OUTPUT_DP_MST) continue; - intel_dig_port = enc_to_dig_port(&intel_encoder->base); + intel_dig_port = enc_to_dig_port(intel_encoder); if (!intel_dig_port->dp.can_mst) continue; @@ -3066,7 +3068,7 @@ static ssize_t i915_displayport_test_active_write(struct file *file, continue; if (encoder && connector->status == connector_status_connected) { - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); status = kstrtoint(input_buffer, 10, &val); if (status < 0) break; @@ -3075,9 +3077,9 @@ static ssize_t i915_displayport_test_active_write(struct file *file, * testing code, only accept an actual value of 1 here */ if (val == 1) - intel_dp->compliance.test_active = 1; + intel_dp->compliance.test_active = true; else - intel_dp->compliance.test_active = 0; + intel_dp->compliance.test_active = false; } } drm_connector_list_iter_end(&conn_iter); @@ -3110,7 +3112,7 @@ static int i915_displayport_test_active_show(struct seq_file *m, void *data) continue; if (encoder && connector->status == connector_status_connected) { - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); if (intel_dp->compliance.test_active) seq_puts(m, "1"); else @@ -3160,7 +3162,7 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data) continue; if (encoder && connector->status == connector_status_connected) { - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); if (intel_dp->compliance.test_type == DP_TEST_LINK_EDID_READ) seq_printf(m, "%lx", @@ -3204,7 +3206,7 @@ static int i915_displayport_test_type_show(struct seq_file *m, void *data) continue; if (encoder && connector->status == connector_status_connected) { - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); seq_printf(m, "%02lx", intel_dp->compliance.test_type); } else seq_puts(m, "0"); @@ -3815,8 +3817,8 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, #undef SS_MAX } -static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, - struct sseu_dev_info *sseu) +static void bdw_sseu_device_status(struct drm_i915_private *dev_priv, + struct sseu_dev_info *sseu) { const struct intel_runtime_info *info = RUNTIME_INFO(dev_priv); u32 slice_info = I915_READ(GEN8_GT_SLICE_INFO); @@ -3901,7 +3903,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused) if (IS_CHERRYVIEW(dev_priv)) cherryview_sseu_device_status(dev_priv, &sseu); else if (IS_BROADWELL(dev_priv)) - broadwell_sseu_device_status(dev_priv, &sseu); + bdw_sseu_device_status(dev_priv, &sseu); else if (IS_GEN(dev_priv, 9)) gen9_sseu_device_status(dev_priv, &sseu); else if (INTEL_GEN(dev_priv) >= 10) @@ -4142,14 +4144,14 @@ static int i915_drrs_ctl_set(void *data, u64 val) drm_connector_mask(connector))) continue; - encoder = intel_attached_encoder(connector); + encoder = intel_attached_encoder(to_intel_connector(connector)); if (encoder->type != INTEL_OUTPUT_EDP) continue; DRM_DEBUG_DRIVER("Manually %sabling DRRS. %llu\n", val ? "en" : "dis", val); - intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp = enc_to_intel_dp(encoder); if (val) intel_edp_drrs_enable(intel_dp, crtc_state); @@ -4353,7 +4355,7 @@ static int i915_dpcd_show(struct seq_file *m, void *data) { struct drm_connector *connector = m->private; struct intel_dp *intel_dp = - enc_to_intel_dp(&intel_attached_encoder(connector)->base); + enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector))); u8 buf[16]; ssize_t err; int i; @@ -4388,7 +4390,7 @@ static int i915_panel_show(struct seq_file *m, void *data) { struct drm_connector *connector = m->private; struct intel_dp *intel_dp = - enc_to_intel_dp(&intel_attached_encoder(connector)->base); + enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector))); if (connector->status != connector_status_connected) return -ENODEV; @@ -4466,7 +4468,7 @@ static int i915_dsc_fec_support_show(struct seq_file *m, void *data) } else if (ret) { break; } - intel_dp = enc_to_intel_dp(&intel_attached_encoder(connector)->base); + intel_dp = enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector))); crtc_state = to_intel_crtc_state(crtc->state); seq_printf(m, "DSC_Enabled: %s\n", yesno(crtc_state->dsc.compression_enable)); @@ -4493,8 +4495,8 @@ static ssize_t i915_dsc_fec_support_write(struct file *file, int ret; struct drm_connector *connector = ((struct seq_file *)file->private_data)->private; - struct intel_encoder *encoder = intel_attached_encoder(connector); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); if (len == 0) return 0; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 59525094d0e3..f7385abdd74b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -469,6 +469,12 @@ static void vlv_free_s0ix_state(struct drm_i915_private *i915) i915->vlv_s0ix_state = NULL; } +static void sanitize_gpu(struct drm_i915_private *i915) +{ + if (!INTEL_INFO(i915)->gpu_reset_clobbers_display) + __intel_gt_reset(&i915->gt, ALL_ENGINES); +} + /** * i915_driver_early_probe - setup state not requiring device access * @dev_priv: device private @@ -602,6 +608,9 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) if (ret) goto err_uncore; + /* As early as possible, scrub existing GPU state before clobbering */ + sanitize_gpu(dev_priv); + return 0; err_uncore: @@ -1817,7 +1826,7 @@ static int i915_drm_resume(struct drm_device *dev) disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); - intel_gt_sanitize(&dev_priv->gt, true); + sanitize_gpu(dev_priv); ret = i915_ggtt_enable_hw(dev_priv); if (ret) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d05a968227f7..077af22b8340 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -46,6 +46,7 @@ #include <linux/dma-resv.h> #include <linux/shmem_fs.h> #include <linux/stackdepot.h> +#include <linux/xarray.h> #include <drm/intel-gtt.h> #include <drm/drm_legacy.h> /* for struct drm_dma_handle */ @@ -110,8 +111,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20191223" -#define DRIVER_TIMESTAMP 1577120893 +#define DRIVER_DATE "20200114" +#define DRIVER_TIMESTAMP 1579001978 struct drm_i915_gem_object; @@ -201,8 +202,7 @@ struct drm_i915_file_private { struct list_head request_list; } mm; - struct idr context_idr; - struct mutex context_idr_lock; /* guards context_idr */ + struct xarray context_xa; struct idr vm_idr; struct mutex vm_idr_lock; /* guards vm_idr */ @@ -505,6 +505,7 @@ struct i915_psr { bool dc3co_enabled; u32 dc3co_exit_delay; struct delayed_work idle_work; + bool initially_probed; }; #define QUIRK_LVDS_SSC_DISABLE (1<<1) @@ -1252,6 +1253,16 @@ struct drm_i915_private { struct llist_head free_list; struct work_struct free_work; } contexts; + + /* + * We replace the local file with a global mappings as the + * backing storage for the mmap is on the device and not + * on the struct file, and we do not want to prolong the + * lifetime of the local fd. To minimise the number of + * anonymous inodes we create, we use a global singleton to + * share the global mapping. + */ + struct file *mmap_singleton; } gem; u8 pch_ssc_use; @@ -1657,8 +1668,10 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_BROADWELL(dev_priv) || IS_GEN(dev_priv, 9)) /* WaRsDisableCoarsePowerGating:skl,cnl */ -#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - IS_GEN_RANGE(dev_priv, 9, 10) +#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ + (IS_CANNONLAKE(dev_priv) || \ + IS_SKL_GT3(dev_priv) || \ + IS_SKL_GT4(dev_priv)) #define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4) #define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \ @@ -1861,7 +1874,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) } static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, - struct intel_engine_cs *engine) + const struct intel_engine_cs *engine) { return atomic_read(&error->reset_engine_count[engine->uabi_class]); } @@ -1889,7 +1902,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags) static inline struct i915_gem_context * __i915_gem_context_lookup_rcu(struct drm_i915_file_private *file_priv, u32 id) { - return idr_find(&file_priv->context_idr, id); + return xa_load(&file_priv->context_xa, id); } static inline struct i915_gem_context * @@ -2015,6 +2028,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data, int remap_io_mapping(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, struct io_mapping *iomap); +int remap_io_sg(struct vm_area_struct *vma, + unsigned long addr, unsigned long size, + struct scatterlist *sgl, resource_size_t iobase); static inline int intel_hws_csb_write_index(struct drm_i915_private *i915) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9ddcf17230e6..94f993e4c12f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -45,6 +45,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" #include "gem/i915_gem_mman.h" +#include "gem/i915_gem_region.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -200,7 +201,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, static int i915_gem_create(struct drm_file *file, - struct drm_i915_private *dev_priv, + struct intel_memory_region *mr, u64 *size_p, u32 *handle_p) { @@ -209,12 +210,16 @@ i915_gem_create(struct drm_file *file, u64 size; int ret; - size = round_up(*size_p, PAGE_SIZE); + GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); + size = round_up(*size_p, mr->min_page_size); if (size == 0) return -EINVAL; + /* For most of the ABI (e.g. mmap) we think in system pages */ + GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); + /* Allocate the new object */ - obj = i915_gem_object_create_shmem(dev_priv, size); + obj = i915_gem_object_create_region(mr, size, 0); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -234,6 +239,7 @@ i915_gem_dumb_create(struct drm_file *file, struct drm_device *dev, struct drm_mode_create_dumb *args) { + enum intel_memory_type mem_type; int cpp = DIV_ROUND_UP(args->bpp, 8); u32 format; @@ -260,7 +266,14 @@ i915_gem_dumb_create(struct drm_file *file, args->pitch = ALIGN(args->pitch, 4096); args->size = args->pitch * args->height; - return i915_gem_create(file, to_i915(dev), + + mem_type = INTEL_MEMORY_SYSTEM; + if (HAS_LMEM(to_i915(dev))) + mem_type = INTEL_MEMORY_LOCAL; + + return i915_gem_create(file, + intel_memory_region_by_type(to_i915(dev), + mem_type), &args->size, &args->handle); } @@ -274,12 +287,14 @@ int i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_create *args = data; - i915_gem_flush_free_objects(dev_priv); + i915_gem_flush_free_objects(i915); - return i915_gem_create(file, dev_priv, + return i915_gem_create(file, + intel_memory_region_by_type(i915, + INTEL_MEMORY_SYSTEM), &args->size, &args->handle); } @@ -1172,6 +1187,8 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv) void i915_gem_driver_release(struct drm_i915_private *dev_priv) { + i915_gem_driver_release__contexts(dev_priv); + intel_gt_driver_release(&dev_priv->gt); intel_wa_list_free(&dev_priv->gt_wa_list); @@ -1179,8 +1196,6 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv) intel_uc_cleanup_firmwares(&dev_priv->gt.uc); i915_gem_cleanup_userptr(dev_priv); - i915_gem_driver_release__contexts(dev_priv); - i915_gem_drain_freed_objects(dev_priv); WARN_ON(!list_empty(&dev_priv->gem.contexts.list)); diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 71efccfde122..d9c34a23cd67 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -412,6 +412,9 @@ int i915_vma_pin_fence(struct i915_vma *vma) { int err; + if (!vma->fence && !i915_gem_object_is_tiled(vma->obj)) + return 0; + /* * Note that we revoke fences on runtime suspend. Therefore the user * must keep the device awake whilst using the fence. diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1efe58ad0ce9..e039eb56900f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1,26 +1,7 @@ +// SPDX-License-Identifier: MIT /* * Copyright © 2010 Daniel Vetter - * Copyright © 2011-2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * + * Copyright © 2020 Intel Corporation */ #include <linux/slab.h> /* fault-inject.h is not standalone! */ @@ -45,2116 +26,6 @@ #include "i915_trace.h" #include "i915_vgpu.h" -#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) - -#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT) -#define DBG(...) trace_printk(__VA_ARGS__) -#else -#define DBG(...) -#endif - -#define NALLOC 3 /* 1 normal, 1 for concurrent threads, 1 for preallocation */ - -/** - * DOC: Global GTT views - * - * Background and previous state - * - * Historically objects could exists (be bound) in global GTT space only as - * singular instances with a view representing all of the object's backing pages - * in a linear fashion. This view will be called a normal view. - * - * To support multiple views of the same object, where the number of mapped - * pages is not equal to the backing store, or where the layout of the pages - * is not linear, concept of a GGTT view was added. - * - * One example of an alternative view is a stereo display driven by a single - * image. In this case we would have a framebuffer looking like this - * (2x2 pages): - * - * 12 - * 34 - * - * Above would represent a normal GGTT view as normally mapped for GPU or CPU - * rendering. In contrast, fed to the display engine would be an alternative - * view which could look something like this: - * - * 1212 - * 3434 - * - * In this example both the size and layout of pages in the alternative view is - * different from the normal view. - * - * Implementation and usage - * - * GGTT views are implemented using VMAs and are distinguished via enum - * i915_ggtt_view_type and struct i915_ggtt_view. - * - * A new flavour of core GEM functions which work with GGTT bound objects were - * added with the _ggtt_ infix, and sometimes with _view postfix to avoid - * renaming in large amounts of code. They take the struct i915_ggtt_view - * parameter encapsulating all metadata required to implement a view. - * - * As a helper for callers which are only interested in the normal view, - * globally const i915_ggtt_view_normal singleton instance exists. All old core - * GEM API functions, the ones not taking the view parameter, are operating on, - * or with the normal GGTT view. - * - * Code wanting to add or use a new GGTT view needs to: - * - * 1. Add a new enum with a suitable name. - * 2. Extend the metadata in the i915_ggtt_view structure if required. - * 3. Add support to i915_get_vma_pages(). - * - * New views are required to build a scatter-gather table from within the - * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and - * exists for the lifetime of an VMA. - * - * Core API is designed to have copy semantics which means that passed in - * struct i915_ggtt_view does not need to be persistent (left around after - * calling the core API functions). - * - */ - -#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt) - -static int -i915_get_ggtt_vma_pages(struct i915_vma *vma); - -static void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) -{ - struct intel_uncore *uncore = ggtt->vm.gt->uncore; - - spin_lock_irq(&uncore->lock); - intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); - intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); - spin_unlock_irq(&uncore->lock); -} - -static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) -{ - struct intel_uncore *uncore = ggtt->vm.gt->uncore; - - /* - * Note that as an uncached mmio write, this will flush the - * WCB of the writes into the GGTT before it triggers the invalidate. - */ - intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); -} - -static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) -{ - struct intel_uncore *uncore = ggtt->vm.gt->uncore; - struct drm_i915_private *i915 = ggtt->vm.i915; - - gen8_ggtt_invalidate(ggtt); - - if (INTEL_GEN(i915) >= 12) - intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, - GEN12_GUC_TLB_INV_CR_INVALIDATE); - else - intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); -} - -static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) -{ - intel_gtt_chipset_flush(); -} - -static int ppgtt_bind_vma(struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) -{ - u32 pte_flags; - int err; - - if (flags & I915_VMA_ALLOC) { - err = vma->vm->allocate_va_range(vma->vm, - vma->node.start, vma->size); - if (err) - return err; - - set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); - } - - /* Applicable to VLV, and gen8+ */ - pte_flags = 0; - if (i915_gem_object_is_readonly(vma->obj)) - pte_flags |= PTE_READ_ONLY; - - GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))); - vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); - wmb(); - - return 0; -} - -static void ppgtt_unbind_vma(struct i915_vma *vma) -{ - if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) - vma->vm->clear_range(vma->vm, vma->node.start, vma->size); -} - -static int ppgtt_set_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(vma->pages); - - vma->pages = vma->obj->mm.pages; - - vma->page_sizes = vma->obj->mm.page_sizes; - - return 0; -} - -static void clear_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(!vma->pages); - - if (vma->pages != vma->obj->mm.pages) { - sg_free_table(vma->pages); - kfree(vma->pages); - } - vma->pages = NULL; - - memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); -} - -static u64 gen8_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; - - if (unlikely(flags & PTE_READ_ONLY)) - pte &= ~_PAGE_RW; - - switch (level) { - case I915_CACHE_NONE: - pte |= PPAT_UNCACHED; - break; - case I915_CACHE_WT: - pte |= PPAT_DISPLAY_ELLC; - break; - default: - pte |= PPAT_CACHED; - break; - } - - return pte; -} - -static u64 gen8_pde_encode(const dma_addr_t addr, - const enum i915_cache_level level) -{ - u64 pde = _PAGE_PRESENT | _PAGE_RW; - pde |= addr; - if (level != I915_CACHE_NONE) - pde |= PPAT_CACHED_PDE; - else - pde |= PPAT_UNCACHED; - return pde; -} - -static u64 snb_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_VALID; - pte |= GEN6_PTE_ADDR_ENCODE(addr); - - switch (level) { - case I915_CACHE_L3_LLC: - case I915_CACHE_LLC: - pte |= GEN6_PTE_CACHE_LLC; - break; - case I915_CACHE_NONE: - pte |= GEN6_PTE_UNCACHED; - break; - default: - MISSING_CASE(level); - } - - return pte; -} - -static u64 ivb_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_VALID; - pte |= GEN6_PTE_ADDR_ENCODE(addr); - - switch (level) { - case I915_CACHE_L3_LLC: - pte |= GEN7_PTE_CACHE_L3_LLC; - break; - case I915_CACHE_LLC: - pte |= GEN6_PTE_CACHE_LLC; - break; - case I915_CACHE_NONE: - pte |= GEN6_PTE_UNCACHED; - break; - default: - MISSING_CASE(level); - } - - return pte; -} - -static u64 byt_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_VALID; - pte |= GEN6_PTE_ADDR_ENCODE(addr); - - if (!(flags & PTE_READ_ONLY)) - pte |= BYT_PTE_WRITEABLE; - - if (level != I915_CACHE_NONE) - pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; - - return pte; -} - -static u64 hsw_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_VALID; - pte |= HSW_PTE_ADDR_ENCODE(addr); - - if (level != I915_CACHE_NONE) - pte |= HSW_WB_LLC_AGE3; - - return pte; -} - -static u64 iris_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_VALID; - pte |= HSW_PTE_ADDR_ENCODE(addr); - - switch (level) { - case I915_CACHE_NONE: - break; - case I915_CACHE_WT: - pte |= HSW_WT_ELLC_LLC_AGE3; - break; - default: - pte |= HSW_WB_ELLC_LLC_AGE3; - break; - } - - return pte; -} - -static void stash_init(struct pagestash *stash) -{ - pagevec_init(&stash->pvec); - spin_lock_init(&stash->lock); -} - -static struct page *stash_pop_page(struct pagestash *stash) -{ - struct page *page = NULL; - - spin_lock(&stash->lock); - if (likely(stash->pvec.nr)) - page = stash->pvec.pages[--stash->pvec.nr]; - spin_unlock(&stash->lock); - - return page; -} - -static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec) -{ - unsigned int nr; - - spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING); - - nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec)); - memcpy(stash->pvec.pages + stash->pvec.nr, - pvec->pages + pvec->nr - nr, - sizeof(pvec->pages[0]) * nr); - stash->pvec.nr += nr; - - spin_unlock(&stash->lock); - - pvec->nr -= nr; -} - -static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) -{ - struct pagevec stack; - struct page *page; - - if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) - i915_gem_shrink_all(vm->i915); - - page = stash_pop_page(&vm->free_pages); - if (page) - return page; - - if (!vm->pt_kmap_wc) - return alloc_page(gfp); - - /* Look in our global stash of WC pages... */ - page = stash_pop_page(&vm->i915->mm.wc_stash); - if (page) - return page; - - /* - * Otherwise batch allocate pages to amortize cost of set_pages_wc. - * - * We have to be careful as page allocation may trigger the shrinker - * (via direct reclaim) which will fill up the WC stash underneath us. - * So we add our WB pages into a temporary pvec on the stack and merge - * them into the WC stash after all the allocations are complete. - */ - pagevec_init(&stack); - do { - struct page *page; - - page = alloc_page(gfp); - if (unlikely(!page)) - break; - - stack.pages[stack.nr++] = page; - } while (pagevec_space(&stack)); - - if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) { - page = stack.pages[--stack.nr]; - - /* Merge spare WC pages to the global stash */ - if (stack.nr) - stash_push_pagevec(&vm->i915->mm.wc_stash, &stack); - - /* Push any surplus WC pages onto the local VM stash */ - if (stack.nr) - stash_push_pagevec(&vm->free_pages, &stack); - } - - /* Return unwanted leftovers */ - if (unlikely(stack.nr)) { - WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr)); - __pagevec_release(&stack); - } - - return page; -} - -static void vm_free_pages_release(struct i915_address_space *vm, - bool immediate) -{ - struct pagevec *pvec = &vm->free_pages.pvec; - struct pagevec stack; - - lockdep_assert_held(&vm->free_pages.lock); - GEM_BUG_ON(!pagevec_count(pvec)); - - if (vm->pt_kmap_wc) { - /* - * When we use WC, first fill up the global stash and then - * only if full immediately free the overflow. - */ - stash_push_pagevec(&vm->i915->mm.wc_stash, pvec); - - /* - * As we have made some room in the VM's free_pages, - * we can wait for it to fill again. Unless we are - * inside i915_address_space_fini() and must - * immediately release the pages! - */ - if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1)) - return; - - /* - * We have to drop the lock to allow ourselves to sleep, - * so take a copy of the pvec and clear the stash for - * others to use it as we sleep. - */ - stack = *pvec; - pagevec_reinit(pvec); - spin_unlock(&vm->free_pages.lock); - - pvec = &stack; - set_pages_array_wb(pvec->pages, pvec->nr); - - spin_lock(&vm->free_pages.lock); - } - - __pagevec_release(pvec); -} - -static void vm_free_page(struct i915_address_space *vm, struct page *page) -{ - /* - * On !llc, we need to change the pages back to WB. We only do so - * in bulk, so we rarely need to change the page attributes here, - * but doing so requires a stop_machine() from deep inside arch/x86/mm. - * To make detection of the possible sleep more likely, use an - * unconditional might_sleep() for everybody. - */ - might_sleep(); - spin_lock(&vm->free_pages.lock); - while (!pagevec_space(&vm->free_pages.pvec)) - vm_free_pages_release(vm, false); - GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE); - pagevec_add(&vm->free_pages.pvec, page); - spin_unlock(&vm->free_pages.lock); -} - -static void i915_address_space_fini(struct i915_address_space *vm) -{ - spin_lock(&vm->free_pages.lock); - if (pagevec_count(&vm->free_pages.pvec)) - vm_free_pages_release(vm, true); - GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec)); - spin_unlock(&vm->free_pages.lock); - - drm_mm_takedown(&vm->mm); - - mutex_destroy(&vm->mutex); -} - -void __i915_vm_close(struct i915_address_space *vm) -{ - struct i915_vma *vma, *vn; - - mutex_lock(&vm->mutex); - list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { - struct drm_i915_gem_object *obj = vma->obj; - - /* Keep the obj (and hence the vma) alive as _we_ destroy it */ - if (!kref_get_unless_zero(&obj->base.refcount)) - continue; - - atomic_and(~I915_VMA_PIN_MASK, &vma->flags); - WARN_ON(__i915_vma_unbind(vma)); - __i915_vma_put(vma); - - i915_gem_object_put(obj); - } - GEM_BUG_ON(!list_empty(&vm->bound_list)); - mutex_unlock(&vm->mutex); -} - -static void __i915_vm_release(struct work_struct *work) -{ - struct i915_address_space *vm = - container_of(work, struct i915_address_space, rcu.work); - - vm->cleanup(vm); - i915_address_space_fini(vm); - - kfree(vm); -} - -void i915_vm_release(struct kref *kref) -{ - struct i915_address_space *vm = - container_of(kref, struct i915_address_space, ref); - - GEM_BUG_ON(i915_is_ggtt(vm)); - trace_i915_ppgtt_release(vm); - - queue_rcu_work(vm->i915->wq, &vm->rcu); -} - -static void i915_address_space_init(struct i915_address_space *vm, int subclass) -{ - kref_init(&vm->ref); - INIT_RCU_WORK(&vm->rcu, __i915_vm_release); - atomic_set(&vm->open, 1); - - /* - * The vm->mutex must be reclaim safe (for use in the shrinker). - * Do a dummy acquire now under fs_reclaim so that any allocation - * attempt holding the lock is immediately reported by lockdep. - */ - mutex_init(&vm->mutex); - lockdep_set_subclass(&vm->mutex, subclass); - i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); - - GEM_BUG_ON(!vm->total); - drm_mm_init(&vm->mm, 0, vm->total); - vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; - - stash_init(&vm->free_pages); - - INIT_LIST_HEAD(&vm->bound_list); -} - -static int __setup_page_dma(struct i915_address_space *vm, - struct i915_page_dma *p, - gfp_t gfp) -{ - p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL); - if (unlikely(!p->page)) - return -ENOMEM; - - p->daddr = dma_map_page_attrs(vm->dma, - p->page, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC | - DMA_ATTR_NO_WARN); - if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { - vm_free_page(vm, p->page); - return -ENOMEM; - } - - return 0; -} - -static int setup_page_dma(struct i915_address_space *vm, - struct i915_page_dma *p) -{ - return __setup_page_dma(vm, p, __GFP_HIGHMEM); -} - -static void cleanup_page_dma(struct i915_address_space *vm, - struct i915_page_dma *p) -{ - dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - vm_free_page(vm, p->page); -} - -#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) - -static void -fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count) -{ - kunmap_atomic(memset64(kmap_atomic(p->page), val, count)); -} - -#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64)) -#define fill32_px(px, v) do { \ - u64 v__ = lower_32_bits(v); \ - fill_px((px), v__ << 32 | v__); \ -} while (0) - -static int -setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) -{ - unsigned long size; - - /* - * In order to utilize 64K pages for an object with a size < 2M, we will - * need to support a 64K scratch page, given that every 16th entry for a - * page-table operating in 64K mode must point to a properly aligned 64K - * region, including any PTEs which happen to point to scratch. - * - * This is only relevant for the 48b PPGTT where we support - * huge-gtt-pages, see also i915_vma_insert(). However, as we share the - * scratch (read-only) between all vm, we create one 64k scratch page - * for all. - */ - size = I915_GTT_PAGE_SIZE_4K; - if (i915_vm_is_4lvl(vm) && - HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { - size = I915_GTT_PAGE_SIZE_64K; - gfp |= __GFP_NOWARN; - } - gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL; - - do { - unsigned int order = get_order(size); - struct page *page; - dma_addr_t addr; - - page = alloc_pages(gfp, order); - if (unlikely(!page)) - goto skip; - - addr = dma_map_page_attrs(vm->dma, - page, 0, size, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC | - DMA_ATTR_NO_WARN); - if (unlikely(dma_mapping_error(vm->dma, addr))) - goto free_page; - - if (unlikely(!IS_ALIGNED(addr, size))) - goto unmap_page; - - vm->scratch[0].base.page = page; - vm->scratch[0].base.daddr = addr; - vm->scratch_order = order; - return 0; - -unmap_page: - dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL); -free_page: - __free_pages(page, order); -skip: - if (size == I915_GTT_PAGE_SIZE_4K) - return -ENOMEM; - - size = I915_GTT_PAGE_SIZE_4K; - gfp &= ~__GFP_NOWARN; - } while (1); -} - -static void cleanup_scratch_page(struct i915_address_space *vm) -{ - struct i915_page_dma *p = px_base(&vm->scratch[0]); - unsigned int order = vm->scratch_order; - - dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT, - PCI_DMA_BIDIRECTIONAL); - __free_pages(p->page, order); -} - -static void free_scratch(struct i915_address_space *vm) -{ - int i; - - if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */ - return; - - for (i = 1; i <= vm->top; i++) { - if (!px_dma(&vm->scratch[i])) - break; - cleanup_page_dma(vm, px_base(&vm->scratch[i])); - } - - cleanup_scratch_page(vm); -} - -static struct i915_page_table *alloc_pt(struct i915_address_space *vm) -{ - struct i915_page_table *pt; - - pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL); - if (unlikely(!pt)) - return ERR_PTR(-ENOMEM); - - if (unlikely(setup_page_dma(vm, &pt->base))) { - kfree(pt); - return ERR_PTR(-ENOMEM); - } - - atomic_set(&pt->used, 0); - return pt; -} - -static struct i915_page_directory *__alloc_pd(size_t sz) -{ - struct i915_page_directory *pd; - - pd = kzalloc(sz, I915_GFP_ALLOW_FAIL); - if (unlikely(!pd)) - return NULL; - - spin_lock_init(&pd->lock); - return pd; -} - -static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) -{ - struct i915_page_directory *pd; - - pd = __alloc_pd(sizeof(*pd)); - if (unlikely(!pd)) - return ERR_PTR(-ENOMEM); - - if (unlikely(setup_page_dma(vm, px_base(pd)))) { - kfree(pd); - return ERR_PTR(-ENOMEM); - } - - return pd; -} - -static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd) -{ - cleanup_page_dma(vm, pd); - kfree(pd); -} - -#define free_px(vm, px) free_pd(vm, px_base(px)) - -static inline void -write_dma_entry(struct i915_page_dma * const pdma, - const unsigned short idx, - const u64 encoded_entry) -{ - u64 * const vaddr = kmap_atomic(pdma->page); - - vaddr[idx] = encoded_entry; - kunmap_atomic(vaddr); -} - -static inline void -__set_pd_entry(struct i915_page_directory * const pd, - const unsigned short idx, - struct i915_page_dma * const to, - u64 (*encode)(const dma_addr_t, const enum i915_cache_level)) -{ - /* Each thread pre-pins the pd, and we may have a thread per pde. */ - GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry)); - - atomic_inc(px_used(pd)); - pd->entry[idx] = to; - write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC)); -} - -#define set_pd_entry(pd, idx, to) \ - __set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode) - -static inline void -clear_pd_entry(struct i915_page_directory * const pd, - const unsigned short idx, - const struct i915_page_scratch * const scratch) -{ - GEM_BUG_ON(atomic_read(px_used(pd)) == 0); - - write_dma_entry(px_base(pd), idx, scratch->encode); - pd->entry[idx] = NULL; - atomic_dec(px_used(pd)); -} - -static bool -release_pd_entry(struct i915_page_directory * const pd, - const unsigned short idx, - struct i915_page_table * const pt, - const struct i915_page_scratch * const scratch) -{ - bool free = false; - - if (atomic_add_unless(&pt->used, -1, 1)) - return false; - - spin_lock(&pd->lock); - if (atomic_dec_and_test(&pt->used)) { - clear_pd_entry(pd, idx, scratch); - free = true; - } - spin_unlock(&pd->lock); - - return free; -} - -static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) -{ - struct drm_i915_private *dev_priv = ppgtt->vm.i915; - enum vgt_g2v_type msg; - int i; - - if (create) - atomic_inc(px_used(ppgtt->pd)); /* never remove */ - else - atomic_dec(px_used(ppgtt->pd)); - - mutex_lock(&dev_priv->vgpu.lock); - - if (i915_vm_is_4lvl(&ppgtt->vm)) { - const u64 daddr = px_dma(ppgtt->pd); - - I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); - I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); - - msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : - VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); - } else { - for (i = 0; i < GEN8_3LVL_PDPES; i++) { - const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); - - I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); - I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); - } - - msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : - VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); - } - - /* g2v_notify atomically (via hv trap) consumes the message packet. */ - I915_WRITE(vgtif_reg(g2v_notify), msg); - - mutex_unlock(&dev_priv->vgpu.lock); -} - -/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */ -#define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */ -#define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE)) -#define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64)) -#define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES)) -#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl)) -#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl)) -#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl)) - -static inline unsigned int -gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx) -{ - const int shift = gen8_pd_shift(lvl); - const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); - - GEM_BUG_ON(start >= end); - end += ~mask >> gen8_pd_shift(1); - - *idx = i915_pde_index(start, shift); - if ((start ^ end) & mask) - return GEN8_PDES - *idx; - else - return i915_pde_index(end, shift) - *idx; -} - -static inline bool gen8_pd_contains(u64 start, u64 end, int lvl) -{ - const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); - - GEM_BUG_ON(start >= end); - return (start ^ end) & mask && (start & ~mask) == 0; -} - -static inline unsigned int gen8_pt_count(u64 start, u64 end) -{ - GEM_BUG_ON(start >= end); - if ((start ^ end) >> gen8_pd_shift(1)) - return GEN8_PDES - (start & (GEN8_PDES - 1)); - else - return end - start; -} - -static inline unsigned int gen8_pd_top_count(const struct i915_address_space *vm) -{ - unsigned int shift = __gen8_pte_shift(vm->top); - return (vm->total + (1ull << shift) - 1) >> shift; -} - -static inline struct i915_page_directory * -gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) -{ - struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); - - if (vm->top == 2) - return ppgtt->pd; - else - return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top)); -} - -static inline struct i915_page_directory * -gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr) -{ - return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT); -} - -static void __gen8_ppgtt_cleanup(struct i915_address_space *vm, - struct i915_page_directory *pd, - int count, int lvl) -{ - if (lvl) { - void **pde = pd->entry; - - do { - if (!*pde) - continue; - - __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1); - } while (pde++, --count); - } - - free_px(vm, pd); -} - -static void gen8_ppgtt_cleanup(struct i915_address_space *vm) -{ - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - - if (intel_vgpu_active(vm->i915)) - gen8_ppgtt_notify_vgt(ppgtt, false); - - __gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top); - free_scratch(vm); -} - -static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, - struct i915_page_directory * const pd, - u64 start, const u64 end, int lvl) -{ - const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; - unsigned int idx, len; - - GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); - - len = gen8_pd_range(start, end, lvl--, &idx); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", - __func__, vm, lvl + 1, start, end, - idx, len, atomic_read(px_used(pd))); - GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); - - do { - struct i915_page_table *pt = pd->entry[idx]; - - if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) && - gen8_pd_contains(start, end, lvl)) { - DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", - __func__, vm, lvl + 1, idx, start, end); - clear_pd_entry(pd, idx, scratch); - __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); - start += (u64)I915_PDES << gen8_pd_shift(lvl); - continue; - } - - if (lvl) { - start = __gen8_ppgtt_clear(vm, as_pd(pt), - start, end, lvl); - } else { - unsigned int count; - u64 *vaddr; - - count = gen8_pt_count(start, end); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", - __func__, vm, lvl, start, end, - gen8_pd_index(start, 0), count, - atomic_read(&pt->used)); - GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); - - vaddr = kmap_atomic_px(pt); - memset64(vaddr + gen8_pd_index(start, 0), - vm->scratch[0].encode, - count); - kunmap_atomic(vaddr); - - atomic_sub(count, &pt->used); - start += count; - } - - if (release_pd_entry(pd, idx, pt, scratch)) - free_px(vm, pt); - } while (idx++, --len); - - return start; -} - -static void gen8_ppgtt_clear(struct i915_address_space *vm, - u64 start, u64 length) -{ - GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); - GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); - GEM_BUG_ON(range_overflows(start, length, vm->total)); - - start >>= GEN8_PTE_SHIFT; - length >>= GEN8_PTE_SHIFT; - GEM_BUG_ON(length == 0); - - __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, - start, start + length, vm->top); -} - -static int __gen8_ppgtt_alloc(struct i915_address_space * const vm, - struct i915_page_directory * const pd, - u64 * const start, const u64 end, int lvl) -{ - const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; - struct i915_page_table *alloc = NULL; - unsigned int idx, len; - int ret = 0; - - GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); - - len = gen8_pd_range(*start, end, lvl--, &idx); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", - __func__, vm, lvl + 1, *start, end, - idx, len, atomic_read(px_used(pd))); - GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); - - spin_lock(&pd->lock); - GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */ - do { - struct i915_page_table *pt = pd->entry[idx]; - - if (!pt) { - spin_unlock(&pd->lock); - - DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", - __func__, vm, lvl + 1, idx); - - pt = fetch_and_zero(&alloc); - if (lvl) { - if (!pt) { - pt = &alloc_pd(vm)->pt; - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto out; - } - } - - fill_px(pt, vm->scratch[lvl].encode); - } else { - if (!pt) { - pt = alloc_pt(vm); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto out; - } - } - - if (intel_vgpu_active(vm->i915) || - gen8_pt_count(*start, end) < I915_PDES) - fill_px(pt, vm->scratch[lvl].encode); - } - - spin_lock(&pd->lock); - if (likely(!pd->entry[idx])) - set_pd_entry(pd, idx, pt); - else - alloc = pt, pt = pd->entry[idx]; - } - - if (lvl) { - atomic_inc(&pt->used); - spin_unlock(&pd->lock); - - ret = __gen8_ppgtt_alloc(vm, as_pd(pt), - start, end, lvl); - if (unlikely(ret)) { - if (release_pd_entry(pd, idx, pt, scratch)) - free_px(vm, pt); - goto out; - } - - spin_lock(&pd->lock); - atomic_dec(&pt->used); - GEM_BUG_ON(!atomic_read(&pt->used)); - } else { - unsigned int count = gen8_pt_count(*start, end); - - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", - __func__, vm, lvl, *start, end, - gen8_pd_index(*start, 0), count, - atomic_read(&pt->used)); - - atomic_add(count, &pt->used); - /* All other pdes may be simultaneously removed */ - GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES); - *start += count; - } - } while (idx++, --len); - spin_unlock(&pd->lock); -out: - if (alloc) - free_px(vm, alloc); - return ret; -} - -static int gen8_ppgtt_alloc(struct i915_address_space *vm, - u64 start, u64 length) -{ - u64 from; - int err; - - GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); - GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); - GEM_BUG_ON(range_overflows(start, length, vm->total)); - - start >>= GEN8_PTE_SHIFT; - length >>= GEN8_PTE_SHIFT; - GEM_BUG_ON(length == 0); - from = start; - - err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd, - &start, start + length, vm->top); - if (unlikely(err && from != start)) - __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, - from, start, vm->top); - - return err; -} - -static inline struct sgt_dma { - struct scatterlist *sg; - dma_addr_t dma, max; -} sgt_dma(struct i915_vma *vma) { - struct scatterlist *sg = vma->pages->sgl; - dma_addr_t addr = sg_dma_address(sg); - return (struct sgt_dma) { sg, addr, addr + sg->length }; -} - -static __always_inline u64 -gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, - struct i915_page_directory *pdp, - struct sgt_dma *iter, - u64 idx, - enum i915_cache_level cache_level, - u32 flags) -{ - struct i915_page_directory *pd; - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); - gen8_pte_t *vaddr; - - pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); - vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); - do { - vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; - - iter->dma += I915_GTT_PAGE_SIZE; - if (iter->dma >= iter->max) { - iter->sg = __sg_next(iter->sg); - if (!iter->sg) { - idx = 0; - break; - } - - iter->dma = sg_dma_address(iter->sg); - iter->max = iter->dma + iter->sg->length; - } - - if (gen8_pd_index(++idx, 0) == 0) { - if (gen8_pd_index(idx, 1) == 0) { - /* Limited by sg length for 3lvl */ - if (gen8_pd_index(idx, 2) == 0) - break; - - pd = pdp->entry[gen8_pd_index(idx, 2)]; - } - - kunmap_atomic(vaddr); - vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); - } - } while (1); - kunmap_atomic(vaddr); - - return idx; -} - -static void gen8_ppgtt_insert_huge(struct i915_vma *vma, - struct sgt_dma *iter, - enum i915_cache_level cache_level, - u32 flags) -{ - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); - u64 start = vma->node.start; - dma_addr_t rem = iter->sg->length; - - GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm)); - - do { - struct i915_page_directory * const pdp = - gen8_pdp_for_page_address(vma->vm, start); - struct i915_page_directory * const pd = - i915_pd_entry(pdp, __gen8_pte_index(start, 2)); - gen8_pte_t encode = pte_encode; - unsigned int maybe_64K = -1; - unsigned int page_size; - gen8_pte_t *vaddr; - u16 index; - - if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M && - IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && - rem >= I915_GTT_PAGE_SIZE_2M && - !__gen8_pte_index(start, 0)) { - index = __gen8_pte_index(start, 1); - encode |= GEN8_PDE_PS_2M; - page_size = I915_GTT_PAGE_SIZE_2M; - - vaddr = kmap_atomic_px(pd); - } else { - struct i915_page_table *pt = - i915_pt_entry(pd, __gen8_pte_index(start, 1)); - - index = __gen8_pte_index(start, 0); - page_size = I915_GTT_PAGE_SIZE; - - if (!index && - vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && - IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && - (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || - rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)) - maybe_64K = __gen8_pte_index(start, 1); - - vaddr = kmap_atomic_px(pt); - } - - do { - GEM_BUG_ON(iter->sg->length < page_size); - vaddr[index++] = encode | iter->dma; - - start += page_size; - iter->dma += page_size; - rem -= page_size; - if (iter->dma >= iter->max) { - iter->sg = __sg_next(iter->sg); - if (!iter->sg) - break; - - rem = iter->sg->length; - iter->dma = sg_dma_address(iter->sg); - iter->max = iter->dma + rem; - - if (maybe_64K != -1 && index < I915_PDES && - !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && - (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || - rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))) - maybe_64K = -1; - - if (unlikely(!IS_ALIGNED(iter->dma, page_size))) - break; - } - } while (rem >= page_size && index < I915_PDES); - - kunmap_atomic(vaddr); - - /* - * Is it safe to mark the 2M block as 64K? -- Either we have - * filled whole page-table with 64K entries, or filled part of - * it and have reached the end of the sg table and we have - * enough padding. - */ - if (maybe_64K != -1 && - (index == I915_PDES || - (i915_vm_has_scratch_64K(vma->vm) && - !iter->sg && IS_ALIGNED(vma->node.start + - vma->node.size, - I915_GTT_PAGE_SIZE_2M)))) { - vaddr = kmap_atomic_px(pd); - vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; - kunmap_atomic(vaddr); - page_size = I915_GTT_PAGE_SIZE_64K; - - /* - * We write all 4K page entries, even when using 64K - * pages. In order to verify that the HW isn't cheating - * by using the 4K PTE instead of the 64K PTE, we want - * to remove all the surplus entries. If the HW skipped - * the 64K PTE, it will read/write into the scratch page - * instead - which we detect as missing results during - * selftests. - */ - if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { - u16 i; - - encode = vma->vm->scratch[0].encode; - vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K)); - - for (i = 1; i < index; i += 16) - memset64(vaddr + i, encode, 15); - - kunmap_atomic(vaddr); - } - } - - vma->page_sizes.gtt |= page_size; - } while (iter->sg); -} - -static void gen8_ppgtt_insert(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) -{ - struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); - struct sgt_dma iter = sgt_dma(vma); - - if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { - gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags); - } else { - u64 idx = vma->node.start >> GEN8_PTE_SHIFT; - - do { - struct i915_page_directory * const pdp = - gen8_pdp_for_page_index(vm, idx); - - idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx, - cache_level, flags); - } while (idx); - - vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; - } -} - -static int gen8_init_scratch(struct i915_address_space *vm) -{ - int ret; - int i; - - /* - * If everybody agrees to not to write into the scratch page, - * we can reuse it for all vm, keeping contexts and processes separate. - */ - if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) { - struct i915_address_space *clone = vm->gt->vm; - - GEM_BUG_ON(!clone->has_read_only); - - vm->scratch_order = clone->scratch_order; - memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch)); - px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */ - return 0; - } - - ret = setup_scratch_page(vm, __GFP_HIGHMEM); - if (ret) - return ret; - - vm->scratch[0].encode = - gen8_pte_encode(px_dma(&vm->scratch[0]), - I915_CACHE_LLC, vm->has_read_only); - - for (i = 1; i <= vm->top; i++) { - if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i])))) - goto free_scratch; - - fill_px(&vm->scratch[i], vm->scratch[i - 1].encode); - vm->scratch[i].encode = - gen8_pde_encode(px_dma(&vm->scratch[i]), - I915_CACHE_LLC); - } - - return 0; - -free_scratch: - free_scratch(vm); - return -ENOMEM; -} - -static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) -{ - struct i915_address_space *vm = &ppgtt->vm; - struct i915_page_directory *pd = ppgtt->pd; - unsigned int idx; - - GEM_BUG_ON(vm->top != 2); - GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES); - - for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) { - struct i915_page_directory *pde; - - pde = alloc_pd(vm); - if (IS_ERR(pde)) - return PTR_ERR(pde); - - fill_px(pde, vm->scratch[1].encode); - set_pd_entry(pd, idx, pde); - atomic_inc(px_used(pde)); /* keep pinned */ - } - wmb(); - - return 0; -} - -static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) -{ - struct drm_i915_private *i915 = gt->i915; - - ppgtt->vm.gt = gt; - ppgtt->vm.i915 = i915; - ppgtt->vm.dma = &i915->drm.pdev->dev; - ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size); - - i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); - - ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma; - ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma; - ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages; - ppgtt->vm.vma_ops.clear_pages = clear_pages; -} - -static struct i915_page_directory * -gen8_alloc_top_pd(struct i915_address_space *vm) -{ - const unsigned int count = gen8_pd_top_count(vm); - struct i915_page_directory *pd; - - GEM_BUG_ON(count > ARRAY_SIZE(pd->entry)); - - pd = __alloc_pd(offsetof(typeof(*pd), entry[count])); - if (unlikely(!pd)) - return ERR_PTR(-ENOMEM); - - if (unlikely(setup_page_dma(vm, px_base(pd)))) { - kfree(pd); - return ERR_PTR(-ENOMEM); - } - - fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count); - atomic_inc(px_used(pd)); /* mark as pinned */ - return pd; -} - -/* - * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers - * with a net effect resembling a 2-level page table in normal x86 terms. Each - * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address - * space. - * - */ -static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) -{ - struct i915_ppgtt *ppgtt; - int err; - - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) - return ERR_PTR(-ENOMEM); - - ppgtt_init(ppgtt, &i915->gt); - ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; - - /* - * From bdw, there is hw support for read-only pages in the PPGTT. - * - * Gen11 has HSDES#:1807136187 unresolved. Disable ro support - * for now. - * - * Gen12 has inherited the same read-only fault issue from gen11. - */ - ppgtt->vm.has_read_only = !IS_GEN_RANGE(i915, 11, 12); - - /* There are only few exceptions for gen >=6. chv and bxt. - * And we are not sure about the latter so play safe for now. - */ - if (IS_CHERRYVIEW(i915) || IS_BROXTON(i915)) - ppgtt->vm.pt_kmap_wc = true; - - err = gen8_init_scratch(&ppgtt->vm); - if (err) - goto err_free; - - ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm); - if (IS_ERR(ppgtt->pd)) { - err = PTR_ERR(ppgtt->pd); - goto err_free_scratch; - } - - if (!i915_vm_is_4lvl(&ppgtt->vm)) { - err = gen8_preallocate_top_level_pdp(ppgtt); - if (err) - goto err_free_pd; - } - - ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; - ppgtt->vm.insert_entries = gen8_ppgtt_insert; - ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; - ppgtt->vm.clear_range = gen8_ppgtt_clear; - - if (intel_vgpu_active(i915)) - gen8_ppgtt_notify_vgt(ppgtt, true); - - ppgtt->vm.cleanup = gen8_ppgtt_cleanup; - - return ppgtt; - -err_free_pd: - __gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd, - gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top); -err_free_scratch: - free_scratch(&ppgtt->vm); -err_free: - kfree(ppgtt); - return ERR_PTR(err); -} - -/* Write pde (index) from the page directory @pd to the page table @pt */ -static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt, - const unsigned int pde, - const struct i915_page_table *pt) -{ - /* Caller needs to make sure the write completes if necessary */ - iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, - ppgtt->pd_addr + pde); -} - -static void gen7_ppgtt_enable(struct intel_gt *gt) -{ - struct drm_i915_private *i915 = gt->i915; - struct intel_uncore *uncore = gt->uncore; - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 ecochk; - - intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B); - - ecochk = intel_uncore_read(uncore, GAM_ECOCHK); - if (IS_HASWELL(i915)) { - ecochk |= ECOCHK_PPGTT_WB_HSW; - } else { - ecochk |= ECOCHK_PPGTT_LLC_IVB; - ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; - } - intel_uncore_write(uncore, GAM_ECOCHK, ecochk); - - for_each_engine(engine, gt, id) { - /* GFX_MODE is per-ring on gen7+ */ - ENGINE_WRITE(engine, - RING_MODE_GEN7, - _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - } -} - -static void gen6_ppgtt_enable(struct intel_gt *gt) -{ - struct intel_uncore *uncore = gt->uncore; - - intel_uncore_rmw(uncore, - GAC_ECO_BITS, - 0, - ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B); - - intel_uncore_rmw(uncore, - GAB_CTL, - 0, - GAB_CTL_CONT_AFTER_PAGEFAULT); - - intel_uncore_rmw(uncore, - GAM_ECOCHK, - 0, - ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); - - if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */ - intel_uncore_write(uncore, - GFX_MODE, - _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); -} - -/* PPGTT support for Sandybdrige/Gen6 and later */ -static void gen6_ppgtt_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - const unsigned int first_entry = start / I915_GTT_PAGE_SIZE; - const gen6_pte_t scratch_pte = vm->scratch[0].encode; - unsigned int pde = first_entry / GEN6_PTES; - unsigned int pte = first_entry % GEN6_PTES; - unsigned int num_entries = length / I915_GTT_PAGE_SIZE; - - while (num_entries) { - struct i915_page_table * const pt = - i915_pt_entry(ppgtt->base.pd, pde++); - const unsigned int count = min(num_entries, GEN6_PTES - pte); - gen6_pte_t *vaddr; - - GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1])); - - num_entries -= count; - - GEM_BUG_ON(count > atomic_read(&pt->used)); - if (!atomic_sub_return(count, &pt->used)) - ppgtt->scan_for_unused_pt = true; - - /* - * Note that the hw doesn't support removing PDE on the fly - * (they are cached inside the context with no means to - * invalidate the cache), so we can only reset the PTE - * entries back to scratch. - */ - - vaddr = kmap_atomic_px(pt); - memset32(vaddr + pte, scratch_pte, count); - kunmap_atomic(vaddr); - - pte = 0; - } -} - -static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) -{ - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct i915_page_directory * const pd = ppgtt->pd; - unsigned first_entry = vma->node.start / I915_GTT_PAGE_SIZE; - unsigned act_pt = first_entry / GEN6_PTES; - unsigned act_pte = first_entry % GEN6_PTES; - const u32 pte_encode = vm->pte_encode(0, cache_level, flags); - struct sgt_dma iter = sgt_dma(vma); - gen6_pte_t *vaddr; - - GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]); - - vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); - do { - vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); - - iter.dma += I915_GTT_PAGE_SIZE; - if (iter.dma == iter.max) { - iter.sg = __sg_next(iter.sg); - if (!iter.sg) - break; - - iter.dma = sg_dma_address(iter.sg); - iter.max = iter.dma + iter.sg->length; - } - - if (++act_pte == GEN6_PTES) { - kunmap_atomic(vaddr); - vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt)); - act_pte = 0; - } - } while (1); - kunmap_atomic(vaddr); - - vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; -} - -static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end) -{ - struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_table *pt; - unsigned int pde; - - start = round_down(start, SZ_64K); - end = round_up(end, SZ_64K) - start; - - mutex_lock(&ppgtt->flush); - - gen6_for_each_pde(pt, pd, start, end, pde) - gen6_write_pde(ppgtt, pde, pt); - - mb(); - ioread32(ppgtt->pd_addr + pde - 1); - gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt); - mb(); - - mutex_unlock(&ppgtt->flush); -} - -static int gen6_alloc_va_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_table *pt, *alloc = NULL; - intel_wakeref_t wakeref; - u64 from = start; - unsigned int pde; - int ret = 0; - - wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); - - spin_lock(&pd->lock); - gen6_for_each_pde(pt, pd, start, length, pde) { - const unsigned int count = gen6_pte_count(start, length); - - if (px_base(pt) == px_base(&vm->scratch[1])) { - spin_unlock(&pd->lock); - - pt = fetch_and_zero(&alloc); - if (!pt) - pt = alloc_pt(vm); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto unwind_out; - } - - fill32_px(pt, vm->scratch[0].encode); - - spin_lock(&pd->lock); - if (pd->entry[pde] == &vm->scratch[1]) { - pd->entry[pde] = pt; - } else { - alloc = pt; - pt = pd->entry[pde]; - } - } - - atomic_add(count, &pt->used); - } - spin_unlock(&pd->lock); - - if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) - gen6_flush_pd(ppgtt, from, start); - - goto out; - -unwind_out: - gen6_ppgtt_clear_range(vm, from, start - from); -out: - if (alloc) - free_px(vm, alloc); - intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); - return ret; -} - -static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) -{ - struct i915_address_space * const vm = &ppgtt->base.vm; - struct i915_page_directory * const pd = ppgtt->base.pd; - int ret; - - ret = setup_scratch_page(vm, __GFP_HIGHMEM); - if (ret) - return ret; - - vm->scratch[0].encode = - vm->pte_encode(px_dma(&vm->scratch[0]), - I915_CACHE_NONE, PTE_READ_ONLY); - - if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) { - cleanup_scratch_page(vm); - return -ENOMEM; - } - - fill32_px(&vm->scratch[1], vm->scratch[0].encode); - memset_p(pd->entry, &vm->scratch[1], I915_PDES); - - return 0; -} - -static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) -{ - struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_dma * const scratch = - px_base(&ppgtt->base.vm.scratch[1]); - struct i915_page_table *pt; - u32 pde; - - gen6_for_all_pdes(pt, pd, pde) - if (px_base(pt) != scratch) - free_px(&ppgtt->base.vm, pt); -} - -static void gen6_ppgtt_cleanup(struct i915_address_space *vm) -{ - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - - __i915_vma_put(ppgtt->vma); - - gen6_ppgtt_free_pd(ppgtt); - free_scratch(vm); - - mutex_destroy(&ppgtt->flush); - mutex_destroy(&ppgtt->pin_mutex); - kfree(ppgtt->base.pd); -} - -static int pd_vma_set_pages(struct i915_vma *vma) -{ - vma->pages = ERR_PTR(-ENODEV); - return 0; -} - -static void pd_vma_clear_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(!vma->pages); - - vma->pages = NULL; -} - -static int pd_vma_bind(struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 unused) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); - struct gen6_ppgtt *ppgtt = vma->private; - u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE; - - px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); - ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; - - gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total); - return 0; -} - -static void pd_vma_unbind(struct i915_vma *vma) -{ - struct gen6_ppgtt *ppgtt = vma->private; - struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_dma * const scratch = - px_base(&ppgtt->base.vm.scratch[1]); - struct i915_page_table *pt; - unsigned int pde; - - if (!ppgtt->scan_for_unused_pt) - return; - - /* Free all no longer used page tables */ - gen6_for_all_pdes(pt, ppgtt->base.pd, pde) { - if (px_base(pt) == scratch || atomic_read(&pt->used)) - continue; - - free_px(&ppgtt->base.vm, pt); - pd->entry[pde] = scratch; - } - - ppgtt->scan_for_unused_pt = false; -} - -static const struct i915_vma_ops pd_vma_ops = { - .set_pages = pd_vma_set_pages, - .clear_pages = pd_vma_clear_pages, - .bind_vma = pd_vma_bind, - .unbind_vma = pd_vma_unbind, -}; - -static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) -{ - struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; - struct i915_vma *vma; - - GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); - GEM_BUG_ON(size > ggtt->vm.total); - - vma = i915_vma_alloc(); - if (!vma) - return ERR_PTR(-ENOMEM); - - i915_active_init(&vma->active, NULL, NULL); - - kref_init(&vma->ref); - mutex_init(&vma->pages_mutex); - vma->vm = i915_vm_get(&ggtt->vm); - vma->ops = &pd_vma_ops; - vma->private = ppgtt; - - vma->size = size; - vma->fence_size = size; - atomic_set(&vma->flags, I915_VMA_GGTT); - vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ - - INIT_LIST_HEAD(&vma->obj_link); - INIT_LIST_HEAD(&vma->closed_link); - - return vma; -} - -int gen6_ppgtt_pin(struct i915_ppgtt *base) -{ - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); - int err = 0; - - GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open)); - - /* - * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt - * which will be pinned into every active context. - * (When vma->pin_count becomes atomic, I expect we will naturally - * need a larger, unpacked, type and kill this redundancy.) - */ - if (atomic_add_unless(&ppgtt->pin_count, 1, 0)) - return 0; - - if (mutex_lock_interruptible(&ppgtt->pin_mutex)) - return -EINTR; - - /* - * PPGTT PDEs reside in the GGTT and consists of 512 entries. The - * allocator works in address space sizes, so it's multiplied by page - * size. We allocate at the top of the GTT to avoid fragmentation. - */ - if (!atomic_read(&ppgtt->pin_count)) { - err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH); - } - if (!err) - atomic_inc(&ppgtt->pin_count); - mutex_unlock(&ppgtt->pin_mutex); - - return err; -} - -void gen6_ppgtt_unpin(struct i915_ppgtt *base) -{ - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); - - GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); - if (atomic_dec_and_test(&ppgtt->pin_count)) - i915_vma_unpin(ppgtt->vma); -} - -void gen6_ppgtt_unpin_all(struct i915_ppgtt *base) -{ - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); - - if (!atomic_read(&ppgtt->pin_count)) - return; - - i915_vma_unpin(ppgtt->vma); - atomic_set(&ppgtt->pin_count, 0); -} - -static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) -{ - struct i915_ggtt * const ggtt = &i915->ggtt; - struct gen6_ppgtt *ppgtt; - int err; - - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) - return ERR_PTR(-ENOMEM); - - mutex_init(&ppgtt->flush); - mutex_init(&ppgtt->pin_mutex); - - ppgtt_init(&ppgtt->base, &i915->gt); - ppgtt->base.vm.top = 1; - - ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; - ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; - ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; - ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; - ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; - - ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; - - ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd)); - if (!ppgtt->base.pd) { - err = -ENOMEM; - goto err_free; - } - - err = gen6_ppgtt_init_scratch(ppgtt); - if (err) - goto err_pd; - - ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); - if (IS_ERR(ppgtt->vma)) { - err = PTR_ERR(ppgtt->vma); - goto err_scratch; - } - - return &ppgtt->base; - -err_scratch: - free_scratch(&ppgtt->base.vm); -err_pd: - kfree(ppgtt->base.pd); -err_free: - mutex_destroy(&ppgtt->pin_mutex); - kfree(ppgtt); - return ERR_PTR(err); -} - -static void gtt_write_workarounds(struct intel_gt *gt) -{ - struct drm_i915_private *i915 = gt->i915; - struct intel_uncore *uncore = gt->uncore; - - /* This function is for gtt related workarounds. This function is - * called on driver load and after a GPU reset, so you can place - * workarounds here even if they get overwritten by GPU reset. - */ - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ - if (IS_BROADWELL(i915)) - intel_uncore_write(uncore, - GEN8_L3_LRA_1_GPGPU, - GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); - else if (IS_CHERRYVIEW(i915)) - intel_uncore_write(uncore, - GEN8_L3_LRA_1_GPGPU, - GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); - else if (IS_GEN9_LP(i915)) - intel_uncore_write(uncore, - GEN8_L3_LRA_1_GPGPU, - GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); - else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11) - intel_uncore_write(uncore, - GEN8_L3_LRA_1_GPGPU, - GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); - - /* - * To support 64K PTEs we need to first enable the use of the - * Intermediate-Page-Size(IPS) bit of the PDE field via some magical - * mmio, otherwise the page-walker will simply ignore the IPS bit. This - * shouldn't be needed after GEN10. - * - * 64K pages were first introduced from BDW+, although technically they - * only *work* from gen9+. For pre-BDW we instead have the option for - * 32K pages, but we don't currently have any support for it in our - * driver. - */ - if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && - INTEL_GEN(i915) <= 10) - intel_uncore_rmw(uncore, - GEN8_GAMW_ECO_DEV_RW_IA, - 0, - GAMW_ECO_ENABLE_64K_IPS_FIELD); - - if (IS_GEN_RANGE(i915, 8, 11)) { - bool can_use_gtt_cache = true; - - /* - * According to the BSpec if we use 2M/1G pages then we also - * need to disable the GTT cache. At least on BDW we can see - * visual corruption when using 2M pages, and not disabling the - * GTT cache. - */ - if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) - can_use_gtt_cache = false; - - /* WaGttCachingOffByDefault */ - intel_uncore_write(uncore, - HSW_GTT_CACHE_EN, - can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); - WARN_ON_ONCE(can_use_gtt_cache && - intel_uncore_read(uncore, - HSW_GTT_CACHE_EN) == 0); - } -} - -int i915_ppgtt_init_hw(struct intel_gt *gt) -{ - struct drm_i915_private *i915 = gt->i915; - - gtt_write_workarounds(gt); - - if (IS_GEN(i915, 6)) - gen6_ppgtt_enable(gt); - else if (IS_GEN(i915, 7)) - gen7_ppgtt_enable(gt); - - return 0; -} - -static struct i915_ppgtt * -__ppgtt_create(struct drm_i915_private *i915) -{ - if (INTEL_GEN(i915) < 8) - return gen6_ppgtt_create(i915); - else - return gen8_ppgtt_create(i915); -} - -struct i915_ppgtt * -i915_ppgtt_create(struct drm_i915_private *i915) -{ - struct i915_ppgtt *ppgtt; - - ppgtt = __ppgtt_create(i915); - if (IS_ERR(ppgtt)) - return ppgtt; - - trace_i915_ppgtt_create(&ppgtt->vm); - - return ppgtt; -} - -/* Certain Gen5 chipsets require require idling the GPU before - * unmapping anything from the GTT when VT-d is enabled. - */ -static bool needs_idle_maps(struct drm_i915_private *dev_priv) -{ - /* Query intel_iommu to see if we need the workaround. Presumably that - * was loaded first. - */ - return IS_GEN(dev_priv, 5) && IS_MOBILE(dev_priv) && intel_vtd_active(); -} - -static void ggtt_suspend_mappings(struct i915_ggtt *ggtt) -{ - struct drm_i915_private *i915 = ggtt->vm.i915; - - /* Don't bother messing with faults pre GEN6 as we have little - * documentation supporting that it's a good idea. - */ - if (INTEL_GEN(i915) < 6) - return; - - intel_gt_check_and_clear_faults(ggtt->vm.gt); - - ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); - - ggtt->invalidate(ggtt); -} - -void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915) -{ - ggtt_suspend_mappings(&i915->ggtt); -} - int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { @@ -2181,368 +52,6 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, return -ENOSPC; } -static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) -{ - writeq(pte, addr); -} - -static void gen8_ggtt_insert_page(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level level, - u32 unused) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen8_pte_t __iomem *pte = - (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; - - gen8_set_pte(pte, gen8_pte_encode(addr, level, 0)); - - ggtt->invalidate(ggtt); -} - -static void gen8_ggtt_insert_entries(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level level, - u32 flags) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - struct sgt_iter sgt_iter; - gen8_pte_t __iomem *gtt_entries; - const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); - dma_addr_t addr; - - /* - * Note that we ignore PTE_READ_ONLY here. The caller must be careful - * not to allow the user to override access to a read only page. - */ - - gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; - gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE; - for_each_sgt_daddr(addr, sgt_iter, vma->pages) - gen8_set_pte(gtt_entries++, pte_encode | addr); - - /* - * We want to flush the TLBs only after we're certain all the PTE - * updates have finished. - */ - ggtt->invalidate(ggtt); -} - -static void gen6_ggtt_insert_page(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level level, - u32 flags) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen6_pte_t __iomem *pte = - (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; - - iowrite32(vm->pte_encode(addr, level, flags), pte); - - ggtt->invalidate(ggtt); -} - -/* - * Binds an object into the global gtt with the specified cache level. The object - * will be accessible to the GPU via commands whose operands reference offsets - * within the global GTT as well as accessible by the GPU through the GMADR - * mapped BAR (dev_priv->mm.gtt->gtt). - */ -static void gen6_ggtt_insert_entries(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level level, - u32 flags) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; - unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE; - struct sgt_iter iter; - dma_addr_t addr; - for_each_sgt_daddr(addr, iter, vma->pages) - iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); - - /* - * We want to flush the TLBs only after we're certain all the PTE - * updates have finished. - */ - ggtt->invalidate(ggtt); -} - -static void nop_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ -} - -static void gen8_ggtt_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - unsigned first_entry = start / I915_GTT_PAGE_SIZE; - unsigned num_entries = length / I915_GTT_PAGE_SIZE; - const gen8_pte_t scratch_pte = vm->scratch[0].encode; - gen8_pte_t __iomem *gtt_base = - (gen8_pte_t __iomem *)ggtt->gsm + first_entry; - const int max_entries = ggtt_total_entries(ggtt) - first_entry; - int i; - - if (WARN(num_entries > max_entries, - "First entry = %d; Num entries = %d (max=%d)\n", - first_entry, num_entries, max_entries)) - num_entries = max_entries; - - for (i = 0; i < num_entries; i++) - gen8_set_pte(>t_base[i], scratch_pte); -} - -static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) -{ - struct drm_i915_private *dev_priv = vm->i915; - - /* - * Make sure the internal GAM fifo has been cleared of all GTT - * writes before exiting stop_machine(). This guarantees that - * any aperture accesses waiting to start in another process - * cannot back up behind the GTT writes causing a hang. - * The register can be any arbitrary GAM register. - */ - POSTING_READ(GFX_FLSH_CNTL_GEN6); -} - -struct insert_page { - struct i915_address_space *vm; - dma_addr_t addr; - u64 offset; - enum i915_cache_level level; -}; - -static int bxt_vtd_ggtt_insert_page__cb(void *_arg) -{ - struct insert_page *arg = _arg; - - gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); - bxt_vtd_ggtt_wa(arg->vm); - - return 0; -} - -static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level level, - u32 unused) -{ - struct insert_page arg = { vm, addr, offset, level }; - - stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); -} - -struct insert_entries { - struct i915_address_space *vm; - struct i915_vma *vma; - enum i915_cache_level level; - u32 flags; -}; - -static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) -{ - struct insert_entries *arg = _arg; - - gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); - bxt_vtd_ggtt_wa(arg->vm); - - return 0; -} - -static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level level, - u32 flags) -{ - struct insert_entries arg = { vm, vma, level, flags }; - - stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); -} - -struct clear_range { - struct i915_address_space *vm; - u64 start; - u64 length; -}; - -static int bxt_vtd_ggtt_clear_range__cb(void *_arg) -{ - struct clear_range *arg = _arg; - - gen8_ggtt_clear_range(arg->vm, arg->start, arg->length); - bxt_vtd_ggtt_wa(arg->vm); - - return 0; -} - -static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm, - u64 start, - u64 length) -{ - struct clear_range arg = { vm, start, length }; - - stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL); -} - -static void gen6_ggtt_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - unsigned first_entry = start / I915_GTT_PAGE_SIZE; - unsigned num_entries = length / I915_GTT_PAGE_SIZE; - gen6_pte_t scratch_pte, __iomem *gtt_base = - (gen6_pte_t __iomem *)ggtt->gsm + first_entry; - const int max_entries = ggtt_total_entries(ggtt) - first_entry; - int i; - - if (WARN(num_entries > max_entries, - "First entry = %d; Num entries = %d (max=%d)\n", - first_entry, num_entries, max_entries)) - num_entries = max_entries; - - scratch_pte = vm->scratch[0].encode; - for (i = 0; i < num_entries; i++) - iowrite32(scratch_pte, >t_base[i]); -} - -static void i915_ggtt_insert_page(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level cache_level, - u32 unused) -{ - unsigned int flags = (cache_level == I915_CACHE_NONE) ? - AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; - - intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); -} - -static void i915_ggtt_insert_entries(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 unused) -{ - unsigned int flags = (cache_level == I915_CACHE_NONE) ? - AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; - - intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT, - flags); -} - -static void i915_ggtt_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); -} - -static int ggtt_bind_vma(struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) -{ - struct drm_i915_private *i915 = vma->vm->i915; - struct drm_i915_gem_object *obj = vma->obj; - intel_wakeref_t wakeref; - u32 pte_flags; - - /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ - pte_flags = 0; - if (i915_gem_object_is_readonly(obj)) - pte_flags |= PTE_READ_ONLY; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); - - vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; - - /* - * Without aliasing PPGTT there's no difference between - * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally - * upgrade to both bound if we bind either to avoid double-binding. - */ - atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); - - return 0; -} - -static void ggtt_unbind_vma(struct i915_vma *vma) -{ - struct drm_i915_private *i915 = vma->vm->i915; - intel_wakeref_t wakeref; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - vma->vm->clear_range(vma->vm, vma->node.start, vma->size); -} - -static int aliasing_gtt_bind_vma(struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) -{ - struct drm_i915_private *i915 = vma->vm->i915; - u32 pte_flags; - int ret; - - /* Currently applicable only to VLV */ - pte_flags = 0; - if (i915_gem_object_is_readonly(vma->obj)) - pte_flags |= PTE_READ_ONLY; - - if (flags & I915_VMA_LOCAL_BIND) { - struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias; - - if (flags & I915_VMA_ALLOC) { - ret = alias->vm.allocate_va_range(&alias->vm, - vma->node.start, - vma->size); - if (ret) - return ret; - - set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); - } - - GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, - __i915_vma_flags(vma))); - alias->vm.insert_entries(&alias->vm, vma, - cache_level, pte_flags); - } - - if (flags & I915_VMA_GLOBAL_BIND) { - intel_wakeref_t wakeref; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - vma->vm->insert_entries(vma->vm, vma, - cache_level, pte_flags); - } - } - - return 0; -} - -static void aliasing_gtt_unbind_vma(struct i915_vma *vma) -{ - struct drm_i915_private *i915 = vma->vm->i915; - - if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { - struct i915_address_space *vm = vma->vm; - intel_wakeref_t wakeref; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - vm->clear_range(vm, vma->node.start, vma->size); - } - - if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) { - struct i915_address_space *vm = - &i915_vm_to_ggtt(vma->vm)->alias->vm; - - vm->clear_range(vm, vma->node.start, vma->size); - } -} - void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { @@ -2563,1070 +72,6 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); } -static int ggtt_set_pages(struct i915_vma *vma) -{ - int ret; - - GEM_BUG_ON(vma->pages); - - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; - - vma->page_sizes = vma->obj->mm.page_sizes; - - return 0; -} - -static void i915_ggtt_color_adjust(const struct drm_mm_node *node, - unsigned long color, - u64 *start, - u64 *end) -{ - if (i915_node_color_differs(node, color)) - *start += I915_GTT_PAGE_SIZE; - - /* Also leave a space between the unallocated reserved node after the - * GTT and any objects within the GTT, i.e. we use the color adjustment - * to insert a guard page to prevent prefetches crossing over the - * GTT boundary. - */ - node = list_next_entry(node, node_list); - if (node->color != color) - *end -= I915_GTT_PAGE_SIZE; -} - -static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) -{ - struct i915_ppgtt *ppgtt; - int err; - - ppgtt = i915_ppgtt_create(ggtt->vm.i915); - if (IS_ERR(ppgtt)) - return PTR_ERR(ppgtt); - - if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { - err = -ENODEV; - goto err_ppgtt; - } - - /* - * Note we only pre-allocate as far as the end of the global - * GTT. On 48b / 4-level page-tables, the difference is very, - * very significant! We have to preallocate as GVT/vgpu does - * not like the page directory disappearing. - */ - err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total); - if (err) - goto err_ppgtt; - - ggtt->alias = ppgtt; - ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; - - GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); - ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; - - GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); - ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; - - return 0; - -err_ppgtt: - i915_vm_put(&ppgtt->vm); - return err; -} - -static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) -{ - struct i915_ppgtt *ppgtt; - - ppgtt = fetch_and_zero(&ggtt->alias); - if (!ppgtt) - return; - - i915_vm_put(&ppgtt->vm); - - ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; - ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; -} - -static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) -{ - u64 size; - int ret; - - if (!USES_GUC(ggtt->vm.i915)) - return 0; - - GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); - size = ggtt->vm.total - GUC_GGTT_TOP; - - ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, - GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, - PIN_NOEVICT); - if (ret) - DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n"); - - return ret; -} - -static void ggtt_release_guc_top(struct i915_ggtt *ggtt) -{ - if (drm_mm_node_allocated(&ggtt->uc_fw)) - drm_mm_remove_node(&ggtt->uc_fw); -} - -static void cleanup_init_ggtt(struct i915_ggtt *ggtt) -{ - ggtt_release_guc_top(ggtt); - if (drm_mm_node_allocated(&ggtt->error_capture)) - drm_mm_remove_node(&ggtt->error_capture); -} - -static int init_ggtt(struct i915_ggtt *ggtt) -{ - /* Let GEM Manage all of the aperture. - * - * However, leave one page at the end still bound to the scratch page. - * There are a number of places where the hardware apparently prefetches - * past the end of the object, and we've seen multiple hangs with the - * GPU head pointer stuck in a batchbuffer bound at the last page of the - * aperture. One page should be enough to keep any prefetching inside - * of the aperture. - */ - unsigned long hole_start, hole_end; - struct drm_mm_node *entry; - int ret; - - /* - * GuC requires all resources that we're sharing with it to be placed in - * non-WOPCM memory. If GuC is not present or not in use we still need a - * small bias as ring wraparound at offset 0 sometimes hangs. No idea - * why. - */ - ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, - intel_wopcm_guc_size(&ggtt->vm.i915->wopcm)); - - ret = intel_vgt_balloon(ggtt); - if (ret) - return ret; - - if (ggtt->mappable_end) { - /* Reserve a mappable slot for our lockless error capture */ - ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture, - PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); - if (ret) - return ret; - } - - /* - * The upper portion of the GuC address space has a sizeable hole - * (several MB) that is inaccessible by GuC. Reserve this range within - * GGTT as it can comfortably hold GuC/HuC firmware images. - */ - ret = ggtt_reserve_guc_top(ggtt); - if (ret) - goto err; - - /* Clear any non-preallocated blocks */ - drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { - DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", - hole_start, hole_end); - ggtt->vm.clear_range(&ggtt->vm, hole_start, - hole_end - hole_start); - } - - /* And finally clear the reserved guard page */ - ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); - - return 0; - -err: - cleanup_init_ggtt(ggtt); - return ret; -} - -int i915_init_ggtt(struct drm_i915_private *i915) -{ - int ret; - - ret = init_ggtt(&i915->ggtt); - if (ret) - return ret; - - if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { - ret = init_aliasing_ppgtt(&i915->ggtt); - if (ret) - cleanup_init_ggtt(&i915->ggtt); - } - - return 0; -} - -static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) -{ - struct i915_vma *vma, *vn; - - atomic_set(&ggtt->vm.open, 0); - - rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ - flush_workqueue(ggtt->vm.i915->wq); - - mutex_lock(&ggtt->vm.mutex); - - list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) - WARN_ON(__i915_vma_unbind(vma)); - - if (drm_mm_node_allocated(&ggtt->error_capture)) - drm_mm_remove_node(&ggtt->error_capture); - - ggtt_release_guc_top(ggtt); - intel_vgt_deballoon(ggtt); - - ggtt->vm.cleanup(&ggtt->vm); - - mutex_unlock(&ggtt->vm.mutex); - i915_address_space_fini(&ggtt->vm); - - arch_phys_wc_del(ggtt->mtrr); - - if (ggtt->iomap.size) - io_mapping_fini(&ggtt->iomap); -} - -/** - * i915_ggtt_driver_release - Clean up GGTT hardware initialization - * @i915: i915 device - */ -void i915_ggtt_driver_release(struct drm_i915_private *i915) -{ - struct pagevec *pvec; - - fini_aliasing_ppgtt(&i915->ggtt); - - ggtt_cleanup_hw(&i915->ggtt); - - pvec = &i915->mm.wc_stash.pvec; - if (pvec->nr) { - set_pages_array_wb(pvec->pages, pvec->nr); - __pagevec_release(pvec); - } -} - -static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) -{ - snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; - snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; - return snb_gmch_ctl << 20; -} - -static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) -{ - bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; - bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; - if (bdw_gmch_ctl) - bdw_gmch_ctl = 1 << bdw_gmch_ctl; - -#ifdef CONFIG_X86_32 - /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ - if (bdw_gmch_ctl > 4) - bdw_gmch_ctl = 4; -#endif - - return bdw_gmch_ctl << 20; -} - -static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) -{ - gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; - gmch_ctrl &= SNB_GMCH_GGMS_MASK; - - if (gmch_ctrl) - return 1 << (20 + gmch_ctrl); - - return 0; -} - -static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) -{ - struct drm_i915_private *dev_priv = ggtt->vm.i915; - struct pci_dev *pdev = dev_priv->drm.pdev; - phys_addr_t phys_addr; - int ret; - - /* For Modern GENs the PTEs and register space are split in the BAR */ - phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; - - /* - * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range - * will be dropped. For WC mappings in general we have 64 byte burst - * writes when the WC buffer is flushed, so we can't use it, but have to - * resort to an uncached mapping. The WC issue is easily caught by the - * readback check when writing GTT PTE entries. - */ - if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10) - ggtt->gsm = ioremap_nocache(phys_addr, size); - else - ggtt->gsm = ioremap_wc(phys_addr, size); - if (!ggtt->gsm) { - DRM_ERROR("Failed to map the ggtt page table\n"); - return -ENOMEM; - } - - ret = setup_scratch_page(&ggtt->vm, GFP_DMA32); - if (ret) { - DRM_ERROR("Scratch setup failed\n"); - /* iounmap will also get called at remove, but meh */ - iounmap(ggtt->gsm); - return ret; - } - - ggtt->vm.scratch[0].encode = - ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]), - I915_CACHE_NONE, 0); - - return 0; -} - -static void tgl_setup_private_ppat(struct intel_uncore *uncore) -{ - /* TGL doesn't support LLC or AGE settings */ - intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); - intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); - intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); - intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); - intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); - intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); - intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); - intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); -} - -static void cnl_setup_private_ppat(struct intel_uncore *uncore) -{ - intel_uncore_write(uncore, - GEN10_PAT_INDEX(0), - GEN8_PPAT_WB | GEN8_PPAT_LLC); - intel_uncore_write(uncore, - GEN10_PAT_INDEX(1), - GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); - intel_uncore_write(uncore, - GEN10_PAT_INDEX(2), - GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); - intel_uncore_write(uncore, - GEN10_PAT_INDEX(3), - GEN8_PPAT_UC); - intel_uncore_write(uncore, - GEN10_PAT_INDEX(4), - GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); - intel_uncore_write(uncore, - GEN10_PAT_INDEX(5), - GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); - intel_uncore_write(uncore, - GEN10_PAT_INDEX(6), - GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); - intel_uncore_write(uncore, - GEN10_PAT_INDEX(7), - GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); -} - -/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability - * bits. When using advanced contexts each context stores its own PAT, but - * writing this data shouldn't be harmful even in those cases. */ -static void bdw_setup_private_ppat(struct intel_uncore *uncore) -{ - u64 pat; - - pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ - GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ - GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ - GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ - GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | - GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | - GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | - GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); - - intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); - intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); -} - -static void chv_setup_private_ppat(struct intel_uncore *uncore) -{ - u64 pat; - - /* - * Map WB on BDW to snooped on CHV. - * - * Only the snoop bit has meaning for CHV, the rest is - * ignored. - * - * The hardware will never snoop for certain types of accesses: - * - CPU GTT (GMADR->GGTT->no snoop->memory) - * - PPGTT page tables - * - some other special cycles - * - * As with BDW, we also need to consider the following for GT accesses: - * "For GGTT, there is NO pat_sel[2:0] from the entry, - * so RTL will always use the value corresponding to - * pat_sel = 000". - * Which means we must set the snoop bit in PAT entry 0 - * in order to keep the global status page working. - */ - - pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | - GEN8_PPAT(1, 0) | - GEN8_PPAT(2, 0) | - GEN8_PPAT(3, 0) | - GEN8_PPAT(4, CHV_PPAT_SNOOP) | - GEN8_PPAT(5, CHV_PPAT_SNOOP) | - GEN8_PPAT(6, CHV_PPAT_SNOOP) | - GEN8_PPAT(7, CHV_PPAT_SNOOP); - - intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); - intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); -} - -static void gen6_gmch_remove(struct i915_address_space *vm) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - - iounmap(ggtt->gsm); - cleanup_scratch_page(vm); -} - -static void setup_private_pat(struct intel_uncore *uncore) -{ - struct drm_i915_private *i915 = uncore->i915; - - GEM_BUG_ON(INTEL_GEN(i915) < 8); - - if (INTEL_GEN(i915) >= 12) - tgl_setup_private_ppat(uncore); - else if (INTEL_GEN(i915) >= 10) - cnl_setup_private_ppat(uncore); - else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) - chv_setup_private_ppat(uncore); - else - bdw_setup_private_ppat(uncore); -} - -static struct resource pci_resource(struct pci_dev *pdev, int bar) -{ - return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar), - pci_resource_len(pdev, bar)); -} - -static int gen8_gmch_probe(struct i915_ggtt *ggtt) -{ - struct drm_i915_private *dev_priv = ggtt->vm.i915; - struct pci_dev *pdev = dev_priv->drm.pdev; - unsigned int size; - u16 snb_gmch_ctl; - int err; - - /* TODO: We're not aware of mappable constraints on gen8 yet */ - if (!IS_DGFX(dev_priv)) { - ggtt->gmadr = pci_resource(pdev, 2); - ggtt->mappable_end = resource_size(&ggtt->gmadr); - } - - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); - if (!err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); - if (err) - DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); - - pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - if (IS_CHERRYVIEW(dev_priv)) - size = chv_get_total_gtt_size(snb_gmch_ctl); - else - size = gen8_get_total_gtt_size(snb_gmch_ctl); - - ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; - ggtt->vm.cleanup = gen6_gmch_remove; - ggtt->vm.insert_page = gen8_ggtt_insert_page; - ggtt->vm.clear_range = nop_clear_range; - if (intel_scanout_needs_vtd_wa(dev_priv)) - ggtt->vm.clear_range = gen8_ggtt_clear_range; - - ggtt->vm.insert_entries = gen8_ggtt_insert_entries; - - /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ - if (intel_ggtt_update_needs_vtd_wa(dev_priv) || - IS_CHERRYVIEW(dev_priv) /* fails with concurrent use/update */) { - ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; - ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; - if (ggtt->vm.clear_range != nop_clear_range) - ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; - } - - ggtt->invalidate = gen8_ggtt_invalidate; - - ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; - ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - ggtt->vm.vma_ops.set_pages = ggtt_set_pages; - ggtt->vm.vma_ops.clear_pages = clear_pages; - - ggtt->vm.pte_encode = gen8_pte_encode; - - setup_private_pat(ggtt->vm.gt->uncore); - - return ggtt_probe_common(ggtt, size); -} - -static int gen6_gmch_probe(struct i915_ggtt *ggtt) -{ - struct drm_i915_private *dev_priv = ggtt->vm.i915; - struct pci_dev *pdev = dev_priv->drm.pdev; - unsigned int size; - u16 snb_gmch_ctl; - int err; - - ggtt->gmadr = - (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2), - pci_resource_len(pdev, 2)); - ggtt->mappable_end = resource_size(&ggtt->gmadr); - - /* 64/512MB is the current min/max we actually know of, but this is just - * a coarse sanity check. - */ - if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { - DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end); - return -ENXIO; - } - - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); - if (!err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); - if (err) - DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); - pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - - size = gen6_get_total_gtt_size(snb_gmch_ctl); - ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; - - ggtt->vm.clear_range = nop_clear_range; - if (!HAS_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) - ggtt->vm.clear_range = gen6_ggtt_clear_range; - ggtt->vm.insert_page = gen6_ggtt_insert_page; - ggtt->vm.insert_entries = gen6_ggtt_insert_entries; - ggtt->vm.cleanup = gen6_gmch_remove; - - ggtt->invalidate = gen6_ggtt_invalidate; - - if (HAS_EDRAM(dev_priv)) - ggtt->vm.pte_encode = iris_pte_encode; - else if (IS_HASWELL(dev_priv)) - ggtt->vm.pte_encode = hsw_pte_encode; - else if (IS_VALLEYVIEW(dev_priv)) - ggtt->vm.pte_encode = byt_pte_encode; - else if (INTEL_GEN(dev_priv) >= 7) - ggtt->vm.pte_encode = ivb_pte_encode; - else - ggtt->vm.pte_encode = snb_pte_encode; - - ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; - ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - ggtt->vm.vma_ops.set_pages = ggtt_set_pages; - ggtt->vm.vma_ops.clear_pages = clear_pages; - - return ggtt_probe_common(ggtt, size); -} - -static void i915_gmch_remove(struct i915_address_space *vm) -{ - intel_gmch_remove(); -} - -static int i915_gmch_probe(struct i915_ggtt *ggtt) -{ - struct drm_i915_private *dev_priv = ggtt->vm.i915; - phys_addr_t gmadr_base; - int ret; - - ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); - if (!ret) { - DRM_ERROR("failed to set up gmch\n"); - return -EIO; - } - - intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); - - ggtt->gmadr = - (struct resource) DEFINE_RES_MEM(gmadr_base, - ggtt->mappable_end); - - ggtt->do_idle_maps = needs_idle_maps(dev_priv); - ggtt->vm.insert_page = i915_ggtt_insert_page; - ggtt->vm.insert_entries = i915_ggtt_insert_entries; - ggtt->vm.clear_range = i915_ggtt_clear_range; - ggtt->vm.cleanup = i915_gmch_remove; - - ggtt->invalidate = gmch_ggtt_invalidate; - - ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; - ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - ggtt->vm.vma_ops.set_pages = ggtt_set_pages; - ggtt->vm.vma_ops.clear_pages = clear_pages; - - if (unlikely(ggtt->do_idle_maps)) - dev_notice(dev_priv->drm.dev, - "Applying Ironlake quirks for intel_iommu\n"); - - return 0; -} - -static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) -{ - struct drm_i915_private *i915 = gt->i915; - int ret; - - ggtt->vm.gt = gt; - ggtt->vm.i915 = i915; - ggtt->vm.dma = &i915->drm.pdev->dev; - - if (INTEL_GEN(i915) <= 5) - ret = i915_gmch_probe(ggtt); - else if (INTEL_GEN(i915) < 8) - ret = gen6_gmch_probe(ggtt); - else - ret = gen8_gmch_probe(ggtt); - if (ret) - return ret; - - if ((ggtt->vm.total - 1) >> 32) { - DRM_ERROR("We never expected a Global GTT with more than 32bits" - " of address space! Found %lldM!\n", - ggtt->vm.total >> 20); - ggtt->vm.total = 1ULL << 32; - ggtt->mappable_end = - min_t(u64, ggtt->mappable_end, ggtt->vm.total); - } - - if (ggtt->mappable_end > ggtt->vm.total) { - DRM_ERROR("mappable aperture extends past end of GGTT," - " aperture=%pa, total=%llx\n", - &ggtt->mappable_end, ggtt->vm.total); - ggtt->mappable_end = ggtt->vm.total; - } - - /* GMADR is the PCI mmio aperture into the global GTT. */ - DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20); - DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20); - DRM_DEBUG_DRIVER("DSM size = %lluM\n", - (u64)resource_size(&intel_graphics_stolen_res) >> 20); - - return 0; -} - -/** - * i915_ggtt_probe_hw - Probe GGTT hardware location - * @i915: i915 device - */ -int i915_ggtt_probe_hw(struct drm_i915_private *i915) -{ - int ret; - - ret = ggtt_probe_hw(&i915->ggtt, &i915->gt); - if (ret) - return ret; - - if (intel_vtd_active()) - dev_info(i915->drm.dev, "VT-d active for gfx access\n"); - - return 0; -} - -static int ggtt_init_hw(struct i915_ggtt *ggtt) -{ - struct drm_i915_private *i915 = ggtt->vm.i915; - - i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); - - ggtt->vm.is_ggtt = true; - - /* Only VLV supports read-only GGTT mappings */ - ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); - - if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) - ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; - - if (ggtt->mappable_end) { - if (!io_mapping_init_wc(&ggtt->iomap, - ggtt->gmadr.start, - ggtt->mappable_end)) { - ggtt->vm.cleanup(&ggtt->vm); - return -EIO; - } - - ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, - ggtt->mappable_end); - } - - i915_ggtt_init_fences(ggtt); - - return 0; -} - -/** - * i915_ggtt_init_hw - Initialize GGTT hardware - * @dev_priv: i915 device - */ -int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) -{ - int ret; - - stash_init(&dev_priv->mm.wc_stash); - - /* Note that we use page colouring to enforce a guard page at the - * end of the address space. This is required as the CS may prefetch - * beyond the end of the batch buffer, across the page boundary, - * and beyond the end of the GTT if we do not provide a guard. - */ - ret = ggtt_init_hw(&dev_priv->ggtt); - if (ret) - return ret; - - return 0; -} - -int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) -{ - if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) - return -EIO; - - return 0; -} - -void i915_ggtt_enable_guc(struct i915_ggtt *ggtt) -{ - GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate); - - ggtt->invalidate = guc_ggtt_invalidate; - - ggtt->invalidate(ggtt); -} - -void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) -{ - /* XXX Temporary pardon for error unload */ - if (ggtt->invalidate == gen8_ggtt_invalidate) - return; - - /* We should only be called after i915_ggtt_enable_guc() */ - GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate); - - ggtt->invalidate = gen8_ggtt_invalidate; - - ggtt->invalidate(ggtt); -} - -static void ggtt_restore_mappings(struct i915_ggtt *ggtt) -{ - struct i915_vma *vma, *vn; - bool flush = false; - int open; - - intel_gt_check_and_clear_faults(ggtt->vm.gt); - - mutex_lock(&ggtt->vm.mutex); - - /* First fill our portion of the GTT with scratch pages */ - ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); - - /* Skip rewriting PTE on VMA unbind. */ - open = atomic_xchg(&ggtt->vm.open, 0); - - /* clflush objects bound into the GGTT and rebind them. */ - list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { - struct drm_i915_gem_object *obj = vma->obj; - - if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) - continue; - - if (!__i915_vma_unbind(vma)) - continue; - - clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); - WARN_ON(i915_vma_bind(vma, - obj ? obj->cache_level : 0, - PIN_GLOBAL, NULL)); - if (obj) { /* only used during resume => exclusive access */ - flush |= fetch_and_zero(&obj->write_domain); - obj->read_domains |= I915_GEM_DOMAIN_GTT; - } - } - - atomic_set(&ggtt->vm.open, open); - ggtt->invalidate(ggtt); - - mutex_unlock(&ggtt->vm.mutex); - - if (flush) - wbinvd_on_all_cpus(); -} - -void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915) -{ - struct i915_ggtt *ggtt = &i915->ggtt; - - ggtt_restore_mappings(ggtt); - - if (INTEL_GEN(i915) >= 8) - setup_private_pat(ggtt->vm.gt->uncore); -} - -static struct scatterlist * -rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, - unsigned int width, unsigned int height, - unsigned int stride, - struct sg_table *st, struct scatterlist *sg) -{ - unsigned int column, row; - unsigned int src_idx; - - for (column = 0; column < width; column++) { - src_idx = stride * (height - 1) + column + offset; - for (row = 0; row < height; row++) { - st->nents++; - /* We don't need the pages, but need to initialize - * the entries so the sg list can be happily traversed. - * The only thing we need are DMA addresses. - */ - sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0); - sg_dma_address(sg) = - i915_gem_object_get_dma_address(obj, src_idx); - sg_dma_len(sg) = I915_GTT_PAGE_SIZE; - sg = sg_next(sg); - src_idx -= stride; - } - } - - return sg; -} - -static noinline struct sg_table * -intel_rotate_pages(struct intel_rotation_info *rot_info, - struct drm_i915_gem_object *obj) -{ - unsigned int size = intel_rotation_info_size(rot_info); - struct sg_table *st; - struct scatterlist *sg; - int ret = -ENOMEM; - int i; - - /* Allocate target SG list. */ - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - goto err_st_alloc; - - ret = sg_alloc_table(st, size, GFP_KERNEL); - if (ret) - goto err_sg_alloc; - - st->nents = 0; - sg = st->sgl; - - for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { - sg = rotate_pages(obj, rot_info->plane[i].offset, - rot_info->plane[i].width, rot_info->plane[i].height, - rot_info->plane[i].stride, st, sg); - } - - return st; - -err_sg_alloc: - kfree(st); -err_st_alloc: - - DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", - obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); - - return ERR_PTR(ret); -} - -static struct scatterlist * -remap_pages(struct drm_i915_gem_object *obj, unsigned int offset, - unsigned int width, unsigned int height, - unsigned int stride, - struct sg_table *st, struct scatterlist *sg) -{ - unsigned int row; - - for (row = 0; row < height; row++) { - unsigned int left = width * I915_GTT_PAGE_SIZE; - - while (left) { - dma_addr_t addr; - unsigned int length; - - /* We don't need the pages, but need to initialize - * the entries so the sg list can be happily traversed. - * The only thing we need are DMA addresses. - */ - - addr = i915_gem_object_get_dma_address_len(obj, offset, &length); - - length = min(left, length); - - st->nents++; - - sg_set_page(sg, NULL, length, 0); - sg_dma_address(sg) = addr; - sg_dma_len(sg) = length; - sg = sg_next(sg); - - offset += length / I915_GTT_PAGE_SIZE; - left -= length; - } - - offset += stride - width; - } - - return sg; -} - -static noinline struct sg_table * -intel_remap_pages(struct intel_remapped_info *rem_info, - struct drm_i915_gem_object *obj) -{ - unsigned int size = intel_remapped_info_size(rem_info); - struct sg_table *st; - struct scatterlist *sg; - int ret = -ENOMEM; - int i; - - /* Allocate target SG list. */ - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - goto err_st_alloc; - - ret = sg_alloc_table(st, size, GFP_KERNEL); - if (ret) - goto err_sg_alloc; - - st->nents = 0; - sg = st->sgl; - - for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { - sg = remap_pages(obj, rem_info->plane[i].offset, - rem_info->plane[i].width, rem_info->plane[i].height, - rem_info->plane[i].stride, st, sg); - } - - i915_sg_trim(st); - - return st; - -err_sg_alloc: - kfree(st); -err_st_alloc: - - DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", - obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size); - - return ERR_PTR(ret); -} - -static noinline struct sg_table * -intel_partial_pages(const struct i915_ggtt_view *view, - struct drm_i915_gem_object *obj) -{ - struct sg_table *st; - struct scatterlist *sg, *iter; - unsigned int count = view->partial.size; - unsigned int offset; - int ret = -ENOMEM; - - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - goto err_st_alloc; - - ret = sg_alloc_table(st, count, GFP_KERNEL); - if (ret) - goto err_sg_alloc; - - iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); - GEM_BUG_ON(!iter); - - sg = st->sgl; - st->nents = 0; - do { - unsigned int len; - - len = min(iter->length - (offset << PAGE_SHIFT), - count << PAGE_SHIFT); - sg_set_page(sg, NULL, len, 0); - sg_dma_address(sg) = - sg_dma_address(iter) + (offset << PAGE_SHIFT); - sg_dma_len(sg) = len; - - st->nents++; - count -= len >> PAGE_SHIFT; - if (count == 0) { - sg_mark_end(sg); - i915_sg_trim(st); /* Drop any unused tail entries. */ - - return st; - } - - sg = __sg_next(sg); - iter = __sg_next(iter); - offset = 0; - } while (1); - -err_sg_alloc: - kfree(st); -err_st_alloc: - return ERR_PTR(ret); -} - -static int -i915_get_ggtt_vma_pages(struct i915_vma *vma) -{ - int ret; - - /* The vma->pages are only valid within the lifespan of the borrowed - * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so - * must be the vma->pages. A simple rule is that vma->pages must only - * be accessed when the obj->mm.pages are pinned. - */ - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); - - switch (vma->ggtt_view.type) { - default: - GEM_BUG_ON(vma->ggtt_view.type); - /* fall through */ - case I915_GGTT_VIEW_NORMAL: - vma->pages = vma->obj->mm.pages; - return 0; - - case I915_GGTT_VIEW_ROTATED: - vma->pages = - intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); - break; - - case I915_GGTT_VIEW_REMAPPED: - vma->pages = - intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); - break; - - case I915_GGTT_VIEW_PARTIAL: - vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); - break; - } - - ret = 0; - if (IS_ERR(vma->pages)) { - ret = PTR_ERR(vma->pages); - vma->pages = NULL; - DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", - vma->ggtt_view.type, ret); - } - return ret; -} - /** * i915_gem_gtt_reserve - reserve a node in an address_space (GTT) * @vm: the &struct i915_address_space @@ -3848,6 +293,5 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_gtt.c" #include "selftests/i915_gem_gtt.c" #endif diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 31a4a96ddd0d..f6226df9f972 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -1,639 +1,21 @@ +/* SPDX-License-Identifier: MIT */ /* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Please try to maintain the following order within this file unless it makes - * sense to do otherwise. From top to bottom: - * 1. typedefs - * 2. #defines, and macros - * 3. structure definitions - * 4. function prototypes - * - * Within each section, please try to order by generation in ascending order, - * from top to bottom (ie. gen6 on the top, gen8 on the bottom). + * Copyright © 2020 Intel Corporation */ #ifndef __I915_GEM_GTT_H__ #define __I915_GEM_GTT_H__ #include <linux/io-mapping.h> -#include <linux/kref.h> -#include <linux/mm.h> -#include <linux/pagevec.h> -#include <linux/workqueue.h> +#include <linux/types.h> #include <drm/drm_mm.h> -#include "gt/intel_reset.h" -#include "i915_gem_fence_reg.h" -#include "i915_request.h" +#include "gt/intel_gtt.h" #include "i915_scatterlist.h" -#include "i915_selftest.h" -#include "gt/intel_timeline.h" -#define I915_GTT_PAGE_SIZE_4K BIT_ULL(12) -#define I915_GTT_PAGE_SIZE_64K BIT_ULL(16) -#define I915_GTT_PAGE_SIZE_2M BIT_ULL(21) - -#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K -#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M - -#define I915_GTT_PAGE_MASK -I915_GTT_PAGE_SIZE - -#define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE - -#define I915_FENCE_REG_NONE -1 -#define I915_MAX_NUM_FENCES 32 -/* 32 fences + sign bit for FENCE_REG_NONE */ -#define I915_MAX_NUM_FENCE_BITS 6 - -struct drm_i915_file_private; struct drm_i915_gem_object; -struct i915_vma; -struct intel_gt; - -typedef u32 gen6_pte_t; -typedef u64 gen8_pte_t; - -#define ggtt_total_entries(ggtt) ((ggtt)->vm.total >> PAGE_SHIFT) - -/* gen6-hsw has bit 11-4 for physical addr bit 39-32 */ -#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) -#define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) -#define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) -#define GEN6_PTE_CACHE_LLC (2 << 1) -#define GEN6_PTE_UNCACHED (1 << 1) -#define GEN6_PTE_VALID (1 << 0) - -#define I915_PTES(pte_len) ((unsigned int)(PAGE_SIZE / (pte_len))) -#define I915_PTE_MASK(pte_len) (I915_PTES(pte_len) - 1) -#define I915_PDES 512 -#define I915_PDE_MASK (I915_PDES - 1) -#define NUM_PTE(pde_shift) (1 << (pde_shift - PAGE_SHIFT)) - -#define GEN6_PTES I915_PTES(sizeof(gen6_pte_t)) -#define GEN6_PD_SIZE (I915_PDES * PAGE_SIZE) -#define GEN6_PD_ALIGN (PAGE_SIZE * 16) -#define GEN6_PDE_SHIFT 22 -#define GEN6_PDE_VALID (1 << 0) - -#define GEN7_PTE_CACHE_L3_LLC (3 << 1) - -#define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2) -#define BYT_PTE_WRITEABLE (1 << 1) - -/* Cacheability Control is a 4-bit value. The low three bits are stored in bits - * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. - */ -#define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \ - (((bits) & 0x8) << (11 - 3))) -#define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2) -#define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3) -#define HSW_WB_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x8) -#define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) -#define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7) -#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) -#define HSW_PTE_UNCACHED (0) -#define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) -#define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) - -/* - * GEN8 32b style address is defined as a 3 level page table: - * 31:30 | 29:21 | 20:12 | 11:0 - * PDPE | PDE | PTE | offset - * The difference as compared to normal x86 3 level page table is the PDPEs are - * programmed via register. - * - * GEN8 48b style address is defined as a 4 level page table: - * 47:39 | 38:30 | 29:21 | 20:12 | 11:0 - * PML4E | PDPE | PDE | PTE | offset - */ -#define GEN8_3LVL_PDPES 4 - -#define PPAT_UNCACHED (_PAGE_PWT | _PAGE_PCD) -#define PPAT_CACHED_PDE 0 /* WB LLC */ -#define PPAT_CACHED _PAGE_PAT /* WB LLCeLLC */ -#define PPAT_DISPLAY_ELLC _PAGE_PCD /* WT eLLC */ - -#define CHV_PPAT_SNOOP (1<<6) -#define GEN8_PPAT_AGE(x) ((x)<<4) -#define GEN8_PPAT_LLCeLLC (3<<2) -#define GEN8_PPAT_LLCELLC (2<<2) -#define GEN8_PPAT_LLC (1<<2) -#define GEN8_PPAT_WB (3<<0) -#define GEN8_PPAT_WT (2<<0) -#define GEN8_PPAT_WC (1<<0) -#define GEN8_PPAT_UC (0<<0) -#define GEN8_PPAT_ELLC_OVERRIDE (0<<2) -#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) - -#define GEN8_PDE_IPS_64K BIT(11) -#define GEN8_PDE_PS_2M BIT(7) - -#define for_each_sgt_daddr(__dp, __iter, __sgt) \ - __for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE) - -struct intel_remapped_plane_info { - /* in gtt pages */ - unsigned int width, height, stride, offset; -} __packed; - -struct intel_remapped_info { - struct intel_remapped_plane_info plane[2]; - unsigned int unused_mbz; -} __packed; - -struct intel_rotation_info { - struct intel_remapped_plane_info plane[2]; -} __packed; - -struct intel_partial_info { - u64 offset; - unsigned int size; -} __packed; - -enum i915_ggtt_view_type { - I915_GGTT_VIEW_NORMAL = 0, - I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info), - I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info), - I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info), -}; - -static inline void assert_i915_gem_gtt_types(void) -{ - BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 8*sizeof(unsigned int)); - BUILD_BUG_ON(sizeof(struct intel_partial_info) != sizeof(u64) + sizeof(unsigned int)); - BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 9*sizeof(unsigned int)); - - /* Check that rotation/remapped shares offsets for simplicity */ - BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) != - offsetof(struct intel_rotation_info, plane[0])); - BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) != - offsetofend(struct intel_rotation_info, plane[1])); - - /* As we encode the size of each branch inside the union into its type, - * we have to be careful that each branch has a unique size. - */ - switch ((enum i915_ggtt_view_type)0) { - case I915_GGTT_VIEW_NORMAL: - case I915_GGTT_VIEW_PARTIAL: - case I915_GGTT_VIEW_ROTATED: - case I915_GGTT_VIEW_REMAPPED: - /* gcc complains if these are identical cases */ - break; - } -} - -struct i915_ggtt_view { - enum i915_ggtt_view_type type; - union { - /* Members need to contain no holes/padding */ - struct intel_partial_info partial; - struct intel_rotation_info rotated; - struct intel_remapped_info remapped; - }; -}; - -enum i915_cache_level; - -struct i915_vma; - -struct i915_page_dma { - struct page *page; - union { - dma_addr_t daddr; - - /* For gen6/gen7 only. This is the offset in the GGTT - * where the page directory entries for PPGTT begin - */ - u32 ggtt_offset; - }; -}; - -struct i915_page_scratch { - struct i915_page_dma base; - u64 encode; -}; - -struct i915_page_table { - struct i915_page_dma base; - atomic_t used; -}; - -struct i915_page_directory { - struct i915_page_table pt; - spinlock_t lock; - void *entry[512]; -}; - -#define __px_choose_expr(x, type, expr, other) \ - __builtin_choose_expr( \ - __builtin_types_compatible_p(typeof(x), type) || \ - __builtin_types_compatible_p(typeof(x), const type), \ - ({ type __x = (type)(x); expr; }), \ - other) - -#define px_base(px) \ - __px_choose_expr(px, struct i915_page_dma *, __x, \ - __px_choose_expr(px, struct i915_page_scratch *, &__x->base, \ - __px_choose_expr(px, struct i915_page_table *, &__x->base, \ - __px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \ - (void)0)))) -#define px_dma(px) (px_base(px)->daddr) - -#define px_pt(px) \ - __px_choose_expr(px, struct i915_page_table *, __x, \ - __px_choose_expr(px, struct i915_page_directory *, &__x->pt, \ - (void)0)) -#define px_used(px) (&px_pt(px)->used) - -struct i915_vma_ops { - /* Map an object into an address space with the given cache flags. */ - int (*bind_vma)(struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags); - /* - * Unmap an object from an address space. This usually consists of - * setting the valid PTE entries to a reserved scratch page. - */ - void (*unbind_vma)(struct i915_vma *vma); - - int (*set_pages)(struct i915_vma *vma); - void (*clear_pages)(struct i915_vma *vma); -}; - -struct pagestash { - spinlock_t lock; - struct pagevec pvec; -}; - -struct i915_address_space { - struct kref ref; - struct rcu_work rcu; - - struct drm_mm mm; - struct intel_gt *gt; - struct drm_i915_private *i915; - struct device *dma; - /* Every address space belongs to a struct file - except for the global - * GTT that is owned by the driver (and so @file is set to NULL). In - * principle, no information should leak from one context to another - * (or between files/processes etc) unless explicitly shared by the - * owner. Tracking the owner is important in order to free up per-file - * objects along with the file, to aide resource tracking, and to - * assign blame. - */ - struct drm_i915_file_private *file; - u64 total; /* size addr space maps (ex. 2GB for ggtt) */ - u64 reserved; /* size addr space reserved */ - - unsigned int bind_async_flags; - - /* - * Each active user context has its own address space (in full-ppgtt). - * Since the vm may be shared between multiple contexts, we count how - * many contexts keep us "open". Once open hits zero, we are closed - * and do not allow any new attachments, and proceed to shutdown our - * vma and page directories. - */ - atomic_t open; - - struct mutex mutex; /* protects vma and our lists */ -#define VM_CLASS_GGTT 0 -#define VM_CLASS_PPGTT 1 - - struct i915_page_scratch scratch[4]; - unsigned int scratch_order; - unsigned int top; - - /** - * List of vma currently bound. - */ - struct list_head bound_list; - - struct pagestash free_pages; - - /* Global GTT */ - bool is_ggtt:1; - - /* Some systems require uncached updates of the page directories */ - bool pt_kmap_wc:1; - - /* Some systems support read-only mappings for GGTT and/or PPGTT */ - bool has_read_only:1; - - u64 (*pte_encode)(dma_addr_t addr, - enum i915_cache_level level, - u32 flags); /* Create a valid PTE */ -#define PTE_READ_ONLY (1<<0) - - int (*allocate_va_range)(struct i915_address_space *vm, - u64 start, u64 length); - void (*clear_range)(struct i915_address_space *vm, - u64 start, u64 length); - void (*insert_page)(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level cache_level, - u32 flags); - void (*insert_entries)(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags); - void (*cleanup)(struct i915_address_space *vm); - - struct i915_vma_ops vma_ops; - - I915_SELFTEST_DECLARE(struct fault_attr fault_attr); - I915_SELFTEST_DECLARE(bool scrub_64K); -}; - -#define i915_is_ggtt(vm) ((vm)->is_ggtt) - -static inline bool -i915_vm_is_4lvl(const struct i915_address_space *vm) -{ - return (vm->total - 1) >> 32; -} - -static inline bool -i915_vm_has_scratch_64K(struct i915_address_space *vm) -{ - return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K); -} - -static inline bool -i915_vm_has_cache_coloring(struct i915_address_space *vm) -{ - return i915_is_ggtt(vm) && vm->mm.color_adjust; -} - -/* The Graphics Translation Table is the way in which GEN hardware translates a - * Graphics Virtual Address into a Physical Address. In addition to the normal - * collateral associated with any va->pa translations GEN hardware also has a - * portion of the GTT which can be mapped by the CPU and remain both coherent - * and correct (in cases like swizzling). That region is referred to as GMADR in - * the spec. - */ -struct i915_ggtt { - struct i915_address_space vm; - - struct io_mapping iomap; /* Mapping to our CPU mappable region */ - struct resource gmadr; /* GMADR resource */ - resource_size_t mappable_end; /* End offset that we can CPU map */ - - /** "Graphics Stolen Memory" holds the global PTEs */ - void __iomem *gsm; - void (*invalidate)(struct i915_ggtt *ggtt); - - /** PPGTT used for aliasing the PPGTT with the GTT */ - struct i915_ppgtt *alias; - - bool do_idle_maps; - - int mtrr; - - /** Bit 6 swizzling required for X tiling */ - u32 bit_6_swizzle_x; - /** Bit 6 swizzling required for Y tiling */ - u32 bit_6_swizzle_y; - - u32 pin_bias; - - unsigned int num_fences; - struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; - struct list_head fence_list; - - /** List of all objects in gtt_space, currently mmaped by userspace. - * All objects within this list must also be on bound_list. - */ - struct list_head userfault_list; - - /* Manual runtime pm autosuspend delay for user GGTT mmaps */ - struct intel_wakeref_auto userfault_wakeref; - - struct drm_mm_node error_capture; - struct drm_mm_node uc_fw; -}; - -struct i915_ppgtt { - struct i915_address_space vm; - - struct i915_page_directory *pd; -}; - -struct gen6_ppgtt { - struct i915_ppgtt base; - - struct mutex flush; - struct i915_vma *vma; - gen6_pte_t __iomem *pd_addr; - - atomic_t pin_count; - struct mutex pin_mutex; - - bool scan_for_unused_pt; -}; - -#define __to_gen6_ppgtt(base) container_of(base, struct gen6_ppgtt, base) - -static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base) -{ - BUILD_BUG_ON(offsetof(struct gen6_ppgtt, base)); - return __to_gen6_ppgtt(base); -} - -/* - * gen6_for_each_pde() iterates over every pde from start until start+length. - * If start and start+length are not perfectly divisible, the macro will round - * down and up as needed. Start=0 and length=2G effectively iterates over - * every PDE in the system. The macro modifies ALL its parameters except 'pd', - * so each of the other parameters should preferably be a simple variable, or - * at most an lvalue with no side-effects! - */ -#define gen6_for_each_pde(pt, pd, start, length, iter) \ - for (iter = gen6_pde_index(start); \ - length > 0 && iter < I915_PDES && \ - (pt = i915_pt_entry(pd, iter), true); \ - ({ u32 temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT); \ - temp = min(temp - start, length); \ - start += temp, length -= temp; }), ++iter) - -#define gen6_for_all_pdes(pt, pd, iter) \ - for (iter = 0; \ - iter < I915_PDES && \ - (pt = i915_pt_entry(pd, iter), true); \ - ++iter) - -static inline u32 i915_pte_index(u64 address, unsigned int pde_shift) -{ - const u32 mask = NUM_PTE(pde_shift) - 1; - - return (address >> PAGE_SHIFT) & mask; -} - -/* Helper to counts the number of PTEs within the given length. This count - * does not cross a page table boundary, so the max value would be - * GEN6_PTES for GEN6, and GEN8_PTES for GEN8. -*/ -static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift) -{ - const u64 mask = ~((1ULL << pde_shift) - 1); - u64 end; - - GEM_BUG_ON(length == 0); - GEM_BUG_ON(offset_in_page(addr | length)); - - end = addr + length; - - if ((addr & mask) != (end & mask)) - return NUM_PTE(pde_shift) - i915_pte_index(addr, pde_shift); - - return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift); -} - -static inline u32 i915_pde_index(u64 addr, u32 shift) -{ - return (addr >> shift) & I915_PDE_MASK; -} - -static inline u32 gen6_pte_index(u32 addr) -{ - return i915_pte_index(addr, GEN6_PDE_SHIFT); -} - -static inline u32 gen6_pte_count(u32 addr, u32 length) -{ - return i915_pte_count(addr, length, GEN6_PDE_SHIFT); -} - -static inline u32 gen6_pde_index(u32 addr) -{ - return i915_pde_index(addr, GEN6_PDE_SHIFT); -} - -static inline struct i915_page_table * -i915_pt_entry(const struct i915_page_directory * const pd, - const unsigned short n) -{ - return pd->entry[n]; -} - -static inline struct i915_page_directory * -i915_pd_entry(const struct i915_page_directory * const pdp, - const unsigned short n) -{ - return pdp->entry[n]; -} - -static inline dma_addr_t -i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) -{ - struct i915_page_dma *pt = ppgtt->pd->entry[n]; - - return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top])); -} - -static inline struct i915_ggtt * -i915_vm_to_ggtt(struct i915_address_space *vm) -{ - BUILD_BUG_ON(offsetof(struct i915_ggtt, vm)); - GEM_BUG_ON(!i915_is_ggtt(vm)); - return container_of(vm, struct i915_ggtt, vm); -} - -static inline struct i915_ppgtt * -i915_vm_to_ppgtt(struct i915_address_space *vm) -{ - BUILD_BUG_ON(offsetof(struct i915_ppgtt, vm)); - GEM_BUG_ON(i915_is_ggtt(vm)); - return container_of(vm, struct i915_ppgtt, vm); -} - -int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); -int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); -int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); -void i915_ggtt_enable_guc(struct i915_ggtt *ggtt); -void i915_ggtt_disable_guc(struct i915_ggtt *ggtt); -int i915_init_ggtt(struct drm_i915_private *dev_priv); -void i915_ggtt_driver_release(struct drm_i915_private *dev_priv); - -static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt) -{ - return ggtt->mappable_end > 0; -} - -int i915_ppgtt_init_hw(struct intel_gt *gt); - -struct i915_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv); - -static inline struct i915_address_space * -i915_vm_get(struct i915_address_space *vm) -{ - kref_get(&vm->ref); - return vm; -} - -void i915_vm_release(struct kref *kref); - -static inline void i915_vm_put(struct i915_address_space *vm) -{ - kref_put(&vm->ref, i915_vm_release); -} - -static inline struct i915_address_space * -i915_vm_open(struct i915_address_space *vm) -{ - GEM_BUG_ON(!atomic_read(&vm->open)); - atomic_inc(&vm->open); - return i915_vm_get(vm); -} - -static inline bool -i915_vm_tryopen(struct i915_address_space *vm) -{ - if (atomic_add_unless(&vm->open, 1, 0)) - return i915_vm_get(vm); - - return false; -} - -void __i915_vm_close(struct i915_address_space *vm); - -static inline void -i915_vm_close(struct i915_address_space *vm) -{ - GEM_BUG_ON(!atomic_read(&vm->open)); - if (atomic_dec_and_test(&vm->open)) - __i915_vm_close(vm); - - i915_vm_put(vm); -} - -int gen6_ppgtt_pin(struct i915_ppgtt *base); -void gen6_ppgtt_unpin(struct i915_ppgtt *base); -void gen6_ppgtt_unpin_all(struct i915_ppgtt *base); - -void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv); -void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv); +struct i915_address_space; int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, struct sg_table *pages); @@ -664,6 +46,6 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, #define PIN_GLOBAL BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */ #define PIN_USER BIT_ULL(11) /* I915_VMA_LOCAL_BIND */ -#define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE) +#define PIN_OFFSET_MASK I915_GTT_PAGE_MASK #endif diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index fda0977d2059..4c1836f0a991 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -41,6 +41,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_lmem.h" +#include "gt/intel_gt_pm.h" #include "i915_drv.h" #include "i915_gpu_error.h" @@ -232,14 +233,13 @@ static void pool_free(struct pagevec *pv, void *addr) #ifdef CONFIG_DRM_I915_COMPRESS_ERROR -struct compress { +struct i915_vma_compress { struct pagevec pool; struct z_stream_s zstream; void *tmp; - bool wc; }; -static bool compress_init(struct compress *c) +static bool compress_init(struct i915_vma_compress *c) { struct z_stream_s *zstream = &c->zstream; @@ -261,7 +261,7 @@ static bool compress_init(struct compress *c) return true; } -static bool compress_start(struct compress *c) +static bool compress_start(struct i915_vma_compress *c) { struct z_stream_s *zstream = &c->zstream; void *workspace = zstream->workspace; @@ -272,8 +272,8 @@ static bool compress_start(struct compress *c) return zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) == Z_OK; } -static void *compress_next_page(struct compress *c, - struct drm_i915_error_object *dst) +static void *compress_next_page(struct i915_vma_compress *c, + struct i915_vma_coredump *dst) { void *page; @@ -287,14 +287,15 @@ static void *compress_next_page(struct compress *c, return dst->pages[dst->page_count++] = page; } -static int compress_page(struct compress *c, +static int compress_page(struct i915_vma_compress *c, void *src, - struct drm_i915_error_object *dst) + struct i915_vma_coredump *dst, + bool wc) { struct z_stream_s *zstream = &c->zstream; zstream->next_in = src; - if (c->wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE)) + if (wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE)) zstream->next_in = c->tmp; zstream->avail_in = PAGE_SIZE; @@ -318,8 +319,8 @@ static int compress_page(struct compress *c, return 0; } -static int compress_flush(struct compress *c, - struct drm_i915_error_object *dst) +static int compress_flush(struct i915_vma_compress *c, + struct i915_vma_coredump *dst) { struct z_stream_s *zstream = &c->zstream; @@ -347,12 +348,12 @@ end: return 0; } -static void compress_finish(struct compress *c) +static void compress_finish(struct i915_vma_compress *c) { zlib_deflateEnd(&c->zstream); } -static void compress_fini(struct compress *c) +static void compress_fini(struct i915_vma_compress *c) { kfree(c->zstream.workspace); if (c->tmp) @@ -367,24 +368,24 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m) #else -struct compress { +struct i915_vma_compress { struct pagevec pool; - bool wc; }; -static bool compress_init(struct compress *c) +static bool compress_init(struct i915_vma_compress *c) { return pool_init(&c->pool, ALLOW_FAIL) == 0; } -static bool compress_start(struct compress *c) +static bool compress_start(struct i915_vma_compress *c) { return true; } -static int compress_page(struct compress *c, +static int compress_page(struct i915_vma_compress *c, void *src, - struct drm_i915_error_object *dst) + struct i915_vma_coredump *dst, + bool wc) { void *ptr; @@ -392,24 +393,24 @@ static int compress_page(struct compress *c, if (!ptr) return -ENOMEM; - if (!(c->wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE))) + if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE))) memcpy(ptr, src, PAGE_SIZE); dst->pages[dst->page_count++] = ptr; return 0; } -static int compress_flush(struct compress *c, - struct drm_i915_error_object *dst) +static int compress_flush(struct i915_vma_compress *c, + struct i915_vma_coredump *dst) { return 0; } -static void compress_finish(struct compress *c) +static void compress_finish(struct i915_vma_compress *c) { } -static void compress_fini(struct compress *c) +static void compress_fini(struct i915_vma_compress *c) { pool_fini(&c->pool); } @@ -422,7 +423,7 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m) #endif static void error_print_instdone(struct drm_i915_error_state_buf *m, - const struct drm_i915_error_engine *ee) + const struct intel_engine_coredump *ee) { const struct sseu_dev_info *sseu = &RUNTIME_INFO(m->i915)->sseu; int slice; @@ -453,40 +454,56 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, static void error_print_request(struct drm_i915_error_state_buf *m, const char *prefix, - const struct drm_i915_error_request *erq, - const unsigned long epoch) + const struct i915_request_coredump *erq) { if (!erq->seqno) return; - err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", + err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, start %08x, head %08x, tail %08x\n", prefix, erq->pid, erq->context, erq->seqno, test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &erq->flags) ? "!" : "", test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &erq->flags) ? "+" : "", erq->sched_attr.priority, - jiffies_to_msecs(erq->jiffies - epoch), erq->start, erq->head, erq->tail); } static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, - const struct drm_i915_error_context *ctx) + const struct i915_gem_context_coredump *ctx) { err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n", header, ctx->comm, ctx->pid, ctx->sched_attr.priority, ctx->guilty, ctx->active); } +static struct i915_vma_coredump * +__find_vma(struct i915_vma_coredump *vma, const char *name) +{ + while (vma) { + if (strcmp(vma->name, name) == 0) + return vma; + vma = vma->next; + } + + return NULL; +} + +static struct i915_vma_coredump * +find_batch(const struct intel_engine_coredump *ee) +{ + return __find_vma(ee->vma, "batch"); +} + static void error_print_engine(struct drm_i915_error_state_buf *m, - const struct drm_i915_error_engine *ee, - const unsigned long epoch) + const struct intel_engine_coredump *ee) { + struct i915_vma_coredump *batch; int n; err_printf(m, "%s command stream:\n", ee->engine->name); - err_printf(m, " IDLE?: %s\n", yesno(ee->idle)); + err_printf(m, " CCID: 0x%08x\n", ee->ccid); err_printf(m, " START: 0x%08x\n", ee->start); err_printf(m, " HEAD: 0x%08x [0x%08x]\n", ee->head, ee->rq_head); err_printf(m, " TAIL: 0x%08x [0x%08x, 0x%08x]\n", @@ -501,9 +518,10 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, error_print_instdone(m, ee); - if (ee->batchbuffer) { - u64 start = ee->batchbuffer->gtt_offset; - u64 end = start + ee->batchbuffer->gtt_size; + batch = find_batch(ee); + if (batch) { + u64 start = batch->gtt_offset; + u64 end = start + batch->gtt_size; err_printf(m, " batch: [0x%08x_%08x, 0x%08x_%08x]\n", upper_32_bits(start), lower_32_bits(start), @@ -535,13 +553,11 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, ee->vm_info.pp_dir_base); } } - err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); - err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); err_printf(m, " engine reset count: %u\n", ee->reset_count); for (n = 0; n < ee->num_ports; n++) { err_printf(m, " ELSP[%d]:", n); - error_print_request(m, " ", &ee->execlist[n], epoch); + error_print_request(m, " ", &ee->execlist[n]); } error_print_context(m, " Active context: ", &ee->context); @@ -556,38 +572,35 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) va_end(args); } -static void print_error_obj(struct drm_i915_error_state_buf *m, +static void print_error_vma(struct drm_i915_error_state_buf *m, const struct intel_engine_cs *engine, - const char *name, - const struct drm_i915_error_object *obj) + const struct i915_vma_coredump *vma) { char out[ASCII85_BUFSZ]; int page; - if (!obj) + if (!vma) return; - if (name) { - err_printf(m, "%s --- %s = 0x%08x %08x\n", - engine ? engine->name : "global", name, - upper_32_bits(obj->gtt_offset), - lower_32_bits(obj->gtt_offset)); - } + err_printf(m, "%s --- %s = 0x%08x %08x\n", + engine ? engine->name : "global", vma->name, + upper_32_bits(vma->gtt_offset), + lower_32_bits(vma->gtt_offset)); - if (obj->gtt_page_sizes > I915_GTT_PAGE_SIZE_4K) - err_printf(m, "gtt_page_sizes = 0x%08x\n", obj->gtt_page_sizes); + if (vma->gtt_page_sizes > I915_GTT_PAGE_SIZE_4K) + err_printf(m, "gtt_page_sizes = 0x%08x\n", vma->gtt_page_sizes); err_compression_marker(m); - for (page = 0; page < obj->page_count; page++) { + for (page = 0; page < vma->page_count; page++) { int i, len; len = PAGE_SIZE; - if (page == obj->page_count - 1) - len -= obj->unused; + if (page == vma->page_count - 1) + len -= vma->unused; len = ascii85_encode_len(len); for (i = 0; i < len; i++) - err_puts(m, ascii85_encode(obj->pages[page][i], out)); + err_puts(m, ascii85_encode(vma->pages[page][i], out)); } err_puts(m, "\n"); } @@ -626,18 +639,13 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m, } static void err_print_uc(struct drm_i915_error_state_buf *m, - const struct i915_error_uc *error_uc) + const struct intel_uc_coredump *error_uc) { struct drm_printer p = i915_error_printer(m); - const struct i915_gpu_state *error = - container_of(error_uc, typeof(*error), uc); - - if (!error->device_info.has_gt_uc) - return; intel_uc_fw_dump(&error_uc->guc_fw, &p); intel_uc_fw_dump(&error_uc->huc_fw, &p); - print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log); + print_error_vma(m, NULL, error_uc->guc_log); } static void err_free_sgl(struct scatterlist *sgl) @@ -657,12 +665,69 @@ static void err_free_sgl(struct scatterlist *sgl) } } +static void err_print_gt(struct drm_i915_error_state_buf *m, + struct intel_gt_coredump *gt) +{ + const struct intel_engine_coredump *ee; + int i; + + err_printf(m, "GT awake: %s\n", yesno(gt->awake)); + err_printf(m, "EIR: 0x%08x\n", gt->eir); + err_printf(m, "IER: 0x%08x\n", gt->ier); + for (i = 0; i < gt->ngtier; i++) + err_printf(m, "GTIER[%d]: 0x%08x\n", i, gt->gtier[i]); + err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er); + err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake); + err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr); + + for (i = 0; i < gt->nfence; i++) + err_printf(m, " fence[%d] = %08llx\n", i, gt->fence[i]); + + if (IS_GEN_RANGE(m->i915, 6, 11)) { + err_printf(m, "ERROR: 0x%08x\n", gt->error); + err_printf(m, "DONE_REG: 0x%08x\n", gt->done_reg); + } + + if (INTEL_GEN(m->i915) >= 8) + err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n", + gt->fault_data1, gt->fault_data0); + + if (IS_GEN(m->i915, 7)) + err_printf(m, "ERR_INT: 0x%08x\n", gt->err_int); + + if (IS_GEN_RANGE(m->i915, 8, 11)) + err_printf(m, "GTT_CACHE_EN: 0x%08x\n", gt->gtt_cache); + + if (IS_GEN(m->i915, 12)) + err_printf(m, "AUX_ERR_DBG: 0x%08x\n", gt->aux_err); + + if (INTEL_GEN(m->i915) >= 12) { + int i; + + for (i = 0; i < GEN12_SFC_DONE_MAX; i++) + err_printf(m, " SFC_DONE[%d]: 0x%08x\n", i, + gt->sfc_done[i]); + + err_printf(m, " GAM_DONE: 0x%08x\n", gt->gam_done); + } + + for (ee = gt->engine; ee; ee = ee->next) { + const struct i915_vma_coredump *vma; + + error_print_engine(m, ee); + for (vma = ee->vma; vma; vma = vma->next) + print_error_vma(m, ee->engine, vma); + } + + if (gt->uc) + err_print_uc(m, gt->uc); +} + static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, - struct i915_gpu_state *error) + struct i915_gpu_coredump *error) { - const struct drm_i915_error_engine *ee; + const struct intel_engine_coredump *ee; struct timespec64 ts; - int i, j; if (*error->error_msg) err_printf(m, "%s\n", error->error_msg); @@ -682,7 +747,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, err_printf(m, "Capture: %lu jiffies; %d ms ago\n", error->capture, jiffies_to_msecs(jiffies - error->capture)); - for (ee = error->engine; ee; ee = ee->next) + for (ee = error->gt ? error->gt->engine : NULL; ee; ee = ee->next) err_printf(m, "Active process (on ring %s): %s [%d]\n", ee->engine->name, ee->context.comm, @@ -708,90 +773,11 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, CSR_VERSION_MINOR(csr->version)); } - err_printf(m, "GT awake: %s\n", yesno(error->awake)); err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock)); err_printf(m, "PM suspended: %s\n", yesno(error->suspended)); - err_printf(m, "EIR: 0x%08x\n", error->eir); - err_printf(m, "IER: 0x%08x\n", error->ier); - for (i = 0; i < error->ngtier; i++) - err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]); - err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er); - err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); - err_printf(m, "DERRMR: 0x%08x\n", error->derrmr); - err_printf(m, "CCID: 0x%08x\n", error->ccid); - - for (i = 0; i < error->nfence; i++) - err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); - - if (IS_GEN_RANGE(m->i915, 6, 11)) { - err_printf(m, "ERROR: 0x%08x\n", error->error); - err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg); - } - if (INTEL_GEN(m->i915) >= 8) - err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n", - error->fault_data1, error->fault_data0); - - if (IS_GEN(m->i915, 7)) - err_printf(m, "ERR_INT: 0x%08x\n", error->err_int); - - if (IS_GEN_RANGE(m->i915, 8, 11)) - err_printf(m, "GTT_CACHE_EN: 0x%08x\n", error->gtt_cache); - - if (IS_GEN(m->i915, 12)) - err_printf(m, "AUX_ERR_DBG: 0x%08x\n", error->aux_err); - - if (INTEL_GEN(m->i915) >= 12) { - int i; - - for (i = 0; i < GEN12_SFC_DONE_MAX; i++) - err_printf(m, " SFC_DONE[%d]: 0x%08x\n", i, - error->sfc_done[i]); - - err_printf(m, " GAM_DONE: 0x%08x\n", error->gam_done); - } - - for (ee = error->engine; ee; ee = ee->next) - error_print_engine(m, ee, error->capture); - - for (ee = error->engine; ee; ee = ee->next) { - const struct drm_i915_error_object *obj; - - obj = ee->batchbuffer; - if (obj) { - err_puts(m, ee->engine->name); - if (ee->context.pid) - err_printf(m, " (submitted by %s [%d])", - ee->context.comm, - ee->context.pid); - err_printf(m, " --- gtt_offset = 0x%08x %08x\n", - upper_32_bits(obj->gtt_offset), - lower_32_bits(obj->gtt_offset)); - print_error_obj(m, ee->engine, NULL, obj); - } - - for (j = 0; j < ee->user_bo_count; j++) - print_error_obj(m, ee->engine, "user", ee->user_bo[j]); - - if (ee->num_requests) { - err_printf(m, "%s --- %d requests\n", - ee->engine->name, - ee->num_requests); - for (j = 0; j < ee->num_requests; j++) - error_print_request(m, " ", - &ee->requests[j], - error->capture); - } - - print_error_obj(m, ee->engine, "ringbuffer", ee->ringbuffer); - print_error_obj(m, ee->engine, "HW Status", ee->hws_page); - print_error_obj(m, ee->engine, "HW context", ee->ctx); - print_error_obj(m, ee->engine, "WA context", ee->wa_ctx); - print_error_obj(m, ee->engine, - "WA batchbuffer", ee->wa_batchbuffer); - print_error_obj(m, ee->engine, - "NULL context", ee->default_state); - } + if (error->gt) + err_print_gt(m, error->gt); if (error->overlay) intel_overlay_print_error_state(m, error->overlay); @@ -802,10 +788,9 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, err_print_capabilities(m, &error->device_info, &error->runtime_info, &error->driver_caps); err_print_params(m, &error->params); - err_print_uc(m, &error->uc); } -static int err_print_to_sgl(struct i915_gpu_state *error) +static int err_print_to_sgl(struct i915_gpu_coredump *error) { struct drm_i915_error_state_buf m; @@ -842,8 +827,8 @@ static int err_print_to_sgl(struct i915_gpu_state *error) return 0; } -ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error, - char *buf, loff_t off, size_t rem) +ssize_t i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error, + char *buf, loff_t off, size_t rem) { struct scatterlist *sg; size_t count; @@ -906,85 +891,88 @@ ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error, return count; } -static void i915_error_object_free(struct drm_i915_error_object *obj) +static void i915_vma_coredump_free(struct i915_vma_coredump *vma) { - int page; + while (vma) { + struct i915_vma_coredump *next = vma->next; + int page; - if (obj == NULL) - return; + for (page = 0; page < vma->page_count; page++) + free_page((unsigned long)vma->pages[page]); - for (page = 0; page < obj->page_count; page++) - free_page((unsigned long)obj->pages[page]); - - kfree(obj); + kfree(vma); + vma = next; + } } - -static void cleanup_params(struct i915_gpu_state *error) +static void cleanup_params(struct i915_gpu_coredump *error) { i915_params_free(&error->params); } -static void cleanup_uc_state(struct i915_gpu_state *error) +static void cleanup_uc(struct intel_uc_coredump *uc) { - struct i915_error_uc *error_uc = &error->uc; + kfree(uc->guc_fw.path); + kfree(uc->huc_fw.path); + i915_vma_coredump_free(uc->guc_log); - kfree(error_uc->guc_fw.path); - kfree(error_uc->huc_fw.path); - i915_error_object_free(error_uc->guc_log); + kfree(uc); } -void __i915_gpu_state_free(struct kref *error_ref) +static void cleanup_gt(struct intel_gt_coredump *gt) { - struct i915_gpu_state *error = - container_of(error_ref, typeof(*error), ref); - long i; + while (gt->engine) { + struct intel_engine_coredump *ee = gt->engine; + + gt->engine = ee->next; + + i915_vma_coredump_free(ee->vma); + kfree(ee); + } - while (error->engine) { - struct drm_i915_error_engine *ee = error->engine; + if (gt->uc) + cleanup_uc(gt->uc); - error->engine = ee->next; + kfree(gt); +} - for (i = 0; i < ee->user_bo_count; i++) - i915_error_object_free(ee->user_bo[i]); - kfree(ee->user_bo); +void __i915_gpu_coredump_free(struct kref *error_ref) +{ + struct i915_gpu_coredump *error = + container_of(error_ref, typeof(*error), ref); - i915_error_object_free(ee->batchbuffer); - i915_error_object_free(ee->wa_batchbuffer); - i915_error_object_free(ee->ringbuffer); - i915_error_object_free(ee->hws_page); - i915_error_object_free(ee->ctx); - i915_error_object_free(ee->wa_ctx); + while (error->gt) { + struct intel_gt_coredump *gt = error->gt; - kfree(ee->requests); - kfree(ee); + error->gt = gt->next; + cleanup_gt(gt); } kfree(error->overlay); kfree(error->display); cleanup_params(error); - cleanup_uc_state(error); err_free_sgl(error->sgl); kfree(error); } -static struct drm_i915_error_object * -i915_error_object_create(struct drm_i915_private *i915, - struct i915_vma *vma, - struct compress *compress) +static struct i915_vma_coredump * +i915_vma_coredump_create(const struct intel_gt *gt, + const struct i915_vma *vma, + const char *name, + struct i915_vma_compress *compress) { - struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_ggtt *ggtt = gt->ggtt; const u64 slot = ggtt->error_capture.start; - struct drm_i915_error_object *dst; + struct i915_vma_coredump *dst; unsigned long num_pages; struct sgt_iter iter; int ret; might_sleep(); - if (!vma || !vma->pages) + if (!vma || !vma->pages || !compress) return NULL; num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT; @@ -998,6 +986,9 @@ i915_error_object_create(struct drm_i915_private *i915, return NULL; } + strcpy(dst->name, name); + dst->next = NULL; + dst->gtt_offset = vma->node.start; dst->gtt_size = vma->node.size; dst->gtt_page_sizes = vma->page_sizes.gtt; @@ -1005,9 +996,6 @@ i915_error_object_create(struct drm_i915_private *i915, dst->page_count = 0; dst->unused = 0; - compress->wc = i915_gem_object_is_lmem(vma->obj) || - drm_mm_node_allocated(&ggtt->error_capture); - ret = -EINVAL; if (drm_mm_node_allocated(&ggtt->error_capture)) { void __iomem *s; @@ -1016,9 +1004,12 @@ i915_error_object_create(struct drm_i915_private *i915, for_each_sgt_daddr(dma, iter, vma->pages) { ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); + mb(); s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE); - ret = compress_page(compress, (void __force *)s, dst); + ret = compress_page(compress, + (void __force *)s, dst, + true); io_mapping_unmap(s); if (ret) break; @@ -1031,7 +1022,9 @@ i915_error_object_create(struct drm_i915_private *i915, void __iomem *s; s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE); - ret = compress_page(compress, (void __force *)s, dst); + ret = compress_page(compress, + (void __force *)s, dst, + true); io_mapping_unmap(s); if (ret) break; @@ -1045,7 +1038,7 @@ i915_error_object_create(struct drm_i915_private *i915, drm_clflush_pages(&page, 1); s = kmap(page); - ret = compress_page(compress, s, dst); + ret = compress_page(compress, s, dst, false); kunmap(page); drm_clflush_pages(&page, 1); @@ -1066,77 +1059,56 @@ i915_error_object_create(struct drm_i915_private *i915, return dst; } -/* - * Generate a semi-unique error code. The code is not meant to have meaning, The - * code's only purpose is to try to prevent false duplicated bug reports by - * grossly estimating a GPU error state. - * - * TODO Ideally, hashing the batchbuffer would be a very nice way to determine - * the hang if we could strip the GTT offset information from it. - * - * It's only a small step better than a random number in its current form. - */ -static u32 i915_error_generate_code(struct i915_gpu_state *error) -{ - const struct drm_i915_error_engine *ee = error->engine; - - /* - * IPEHR would be an ideal way to detect errors, as it's the gross - * measure of "the command that hung." However, has some very common - * synchronization commands which almost always appear in the case - * strictly a client bug. Use instdone to differentiate those some. - */ - return ee ? ee->ipehr ^ ee->instdone.instdone : 0; -} - -static void gem_record_fences(struct i915_gpu_state *error) +static void gt_record_fences(struct intel_gt_coredump *gt) { - struct drm_i915_private *dev_priv = error->i915; - struct intel_uncore *uncore = &dev_priv->uncore; + struct i915_ggtt *ggtt = gt->_gt->ggtt; + struct intel_uncore *uncore = gt->_gt->uncore; int i; - if (INTEL_GEN(dev_priv) >= 6) { - for (i = 0; i < dev_priv->ggtt.num_fences; i++) - error->fence[i] = + if (INTEL_GEN(uncore->i915) >= 6) { + for (i = 0; i < ggtt->num_fences; i++) + gt->fence[i] = intel_uncore_read64(uncore, FENCE_REG_GEN6_LO(i)); - } else if (INTEL_GEN(dev_priv) >= 4) { - for (i = 0; i < dev_priv->ggtt.num_fences; i++) - error->fence[i] = + } else if (INTEL_GEN(uncore->i915) >= 4) { + for (i = 0; i < ggtt->num_fences; i++) + gt->fence[i] = intel_uncore_read64(uncore, FENCE_REG_965_LO(i)); } else { - for (i = 0; i < dev_priv->ggtt.num_fences; i++) - error->fence[i] = + for (i = 0; i < ggtt->num_fences; i++) + gt->fence[i] = intel_uncore_read(uncore, FENCE_REG(i)); } - error->nfence = i; + gt->nfence = i; } -static void error_record_engine_registers(struct i915_gpu_state *error, - struct intel_engine_cs *engine, - struct drm_i915_error_engine *ee) +static void engine_record_registers(struct intel_engine_coredump *ee) { - struct drm_i915_private *dev_priv = engine->i915; + const struct intel_engine_cs *engine = ee->engine; + struct drm_i915_private *i915 = engine->i915; - if (INTEL_GEN(dev_priv) >= 6) { + if (INTEL_GEN(i915) >= 6) { ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL); - if (INTEL_GEN(dev_priv) >= 12) - ee->fault_reg = I915_READ(GEN12_RING_FAULT_REG); - else if (INTEL_GEN(dev_priv) >= 8) - ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG); + if (INTEL_GEN(i915) >= 12) + ee->fault_reg = intel_uncore_read(engine->uncore, + GEN12_RING_FAULT_REG); + else if (INTEL_GEN(i915) >= 8) + ee->fault_reg = intel_uncore_read(engine->uncore, + GEN8_RING_FAULT_REG); else ee->fault_reg = GEN6_RING_FAULT_REG_READ(engine); } - if (INTEL_GEN(dev_priv) >= 4) { + if (INTEL_GEN(i915) >= 4) { ee->faddr = ENGINE_READ(engine, RING_DMA_FADD); ee->ipeir = ENGINE_READ(engine, RING_IPEIR); ee->ipehr = ENGINE_READ(engine, RING_IPEHR); ee->instps = ENGINE_READ(engine, RING_INSTPS); ee->bbaddr = ENGINE_READ(engine, RING_BBADDR); - if (INTEL_GEN(dev_priv) >= 8) { + ee->ccid = ENGINE_READ(engine, CCID); + if (INTEL_GEN(i915) >= 8) { ee->faddr |= (u64)ENGINE_READ(engine, RING_DMA_FADD_UDW) << 32; ee->bbaddr |= (u64)ENGINE_READ(engine, RING_BBADDR_UDW) << 32; } @@ -1155,13 +1127,13 @@ static void error_record_engine_registers(struct i915_gpu_state *error, ee->head = ENGINE_READ(engine, RING_HEAD); ee->tail = ENGINE_READ(engine, RING_TAIL); ee->ctl = ENGINE_READ(engine, RING_CTL); - if (INTEL_GEN(dev_priv) > 2) + if (INTEL_GEN(i915) > 2) ee->mode = ENGINE_READ(engine, RING_MI_MODE); - if (!HWS_NEEDS_PHYSICAL(dev_priv)) { + if (!HWS_NEEDS_PHYSICAL(i915)) { i915_reg_t mmio; - if (IS_GEN(dev_priv, 7)) { + if (IS_GEN(i915, 7)) { switch (engine->id) { default: MISSING_CASE(engine->id); @@ -1186,40 +1158,40 @@ static void error_record_engine_registers(struct i915_gpu_state *error, mmio = RING_HWS_PGA(engine->mmio_base); } - ee->hws = I915_READ(mmio); + ee->hws = intel_uncore_read(engine->uncore, mmio); } - ee->idle = intel_engine_is_idle(engine); - ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error, - engine); + ee->reset_count = i915_reset_engine_count(&i915->gpu_error, engine); - if (HAS_PPGTT(dev_priv)) { + if (HAS_PPGTT(i915)) { int i; ee->vm_info.gfx_mode = ENGINE_READ(engine, RING_MODE_GEN7); - if (IS_GEN(dev_priv, 6)) { + if (IS_GEN(i915, 6)) { ee->vm_info.pp_dir_base = ENGINE_READ(engine, RING_PP_DIR_BASE_READ); - } else if (IS_GEN(dev_priv, 7)) { + } else if (IS_GEN(i915, 7)) { ee->vm_info.pp_dir_base = ENGINE_READ(engine, RING_PP_DIR_BASE); - } else if (INTEL_GEN(dev_priv) >= 8) { + } else if (INTEL_GEN(i915) >= 8) { u32 base = engine->mmio_base; for (i = 0; i < 4; i++) { ee->vm_info.pdp[i] = - I915_READ(GEN8_RING_PDP_UDW(base, i)); + intel_uncore_read(engine->uncore, + GEN8_RING_PDP_UDW(base, i)); ee->vm_info.pdp[i] <<= 32; ee->vm_info.pdp[i] |= - I915_READ(GEN8_RING_PDP_LDW(base, i)); + intel_uncore_read(engine->uncore, + GEN8_RING_PDP_LDW(base, i)); } } } } static void record_request(const struct i915_request *request, - struct drm_i915_error_request *erq) + struct i915_request_coredump *erq) { const struct i915_gem_context *ctx; @@ -1227,7 +1199,6 @@ static void record_request(const struct i915_request *request, erq->context = request->fence.context; erq->seqno = request->fence.seqno; erq->sched_attr = request->sched.attr; - erq->jiffies = request->emitted_jiffies; erq->start = i915_ggtt_offset(request->ring->vma); erq->head = request->head; erq->tail = request->tail; @@ -1240,59 +1211,10 @@ static void record_request(const struct i915_request *request, rcu_read_unlock(); } -static void engine_record_requests(struct intel_engine_cs *engine, - struct i915_request *first, - struct drm_i915_error_engine *ee) +static void engine_record_execlists(struct intel_engine_coredump *ee) { - struct i915_request *request; - int count; - - count = 0; - request = first; - list_for_each_entry_from(request, &engine->active.requests, sched.link) - count++; - if (!count) - return; - - ee->requests = kcalloc(count, sizeof(*ee->requests), ATOMIC_MAYFAIL); - if (!ee->requests) - return; - - ee->num_requests = count; - - count = 0; - request = first; - list_for_each_entry_from(request, - &engine->active.requests, sched.link) { - if (count >= ee->num_requests) { - /* - * If the ring request list was changed in - * between the point where the error request - * list was created and dimensioned and this - * point then just exit early to avoid crashes. - * - * We don't need to communicate that the - * request list changed state during error - * state capture and that the error state is - * slightly incorrect as a consequence since we - * are typically only interested in the request - * list state at the point of error state - * capture, not in any changes happening during - * the capture. - */ - break; - } - - record_request(request, &ee->requests[count++]); - } - ee->num_requests = count; -} - -static void error_record_engine_execlists(const struct intel_engine_cs *engine, - struct drm_i915_error_engine *ee) -{ - const struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request * const *port = execlists->active; + const struct intel_engine_execlists * const el = &ee->engine->execlists; + struct i915_request * const *port = el->active; unsigned int n = 0; while (*port) @@ -1301,7 +1223,7 @@ static void error_record_engine_execlists(const struct intel_engine_cs *engine, ee->num_ports = n; } -static bool record_context(struct drm_i915_error_context *e, +static bool record_context(struct i915_gem_context_coredump *e, const struct i915_request *rq) { struct i915_gem_context *ctx; @@ -1334,23 +1256,24 @@ static bool record_context(struct drm_i915_error_context *e, return capture; } -struct capture_vma { - struct capture_vma *next; - void **slot; +struct intel_engine_capture_vma { + struct intel_engine_capture_vma *next; + struct i915_vma *vma; + char name[16]; }; -static struct capture_vma * -capture_vma(struct capture_vma *next, +static struct intel_engine_capture_vma * +capture_vma(struct intel_engine_capture_vma *next, struct i915_vma *vma, - struct drm_i915_error_object **out) + const char *name, + gfp_t gfp) { - struct capture_vma *c; + struct intel_engine_capture_vma *c; - *out = NULL; if (!vma) return next; - c = kmalloc(sizeof(*c), ATOMIC_MAYFAIL); + c = kmalloc(sizeof(*c), gfp); if (!c) return next; @@ -1359,54 +1282,31 @@ capture_vma(struct capture_vma *next, return next; } - c->slot = (void **)out; - *c->slot = i915_vma_get(vma); + strcpy(c->name, name); + c->vma = i915_vma_get(vma); c->next = next; return c; } -static struct capture_vma * -request_record_user_bo(struct i915_request *request, - struct drm_i915_error_engine *ee, - struct capture_vma *capture) +static struct intel_engine_capture_vma * +capture_user(struct intel_engine_capture_vma *capture, + const struct i915_request *rq, + gfp_t gfp) { struct i915_capture_list *c; - struct drm_i915_error_object **bo; - long count, max; - - max = 0; - for (c = request->capture_list; c; c = c->next) - max++; - if (!max) - return capture; - - bo = kmalloc_array(max, sizeof(*bo), ATOMIC_MAYFAIL); - if (!bo) { - /* If we can't capture everything, try to capture something. */ - max = min_t(long, max, PAGE_SIZE / sizeof(*bo)); - bo = kmalloc_array(max, sizeof(*bo), ATOMIC_MAYFAIL); - } - if (!bo) - return capture; - count = 0; - for (c = request->capture_list; c; c = c->next) { - capture = capture_vma(capture, c->vma, &bo[count]); - if (++count == max) - break; - } - - ee->user_bo = bo; - ee->user_bo_count = count; + for (c = rq->capture_list; c; c = c->next) + capture = capture_vma(capture, c->vma, "user", gfp); return capture; } -static struct drm_i915_error_object * -capture_object(struct drm_i915_private *dev_priv, +static struct i915_vma_coredump * +capture_object(const struct intel_gt *gt, struct drm_i915_gem_object *obj, - struct compress *compress) + const char *name, + struct i915_vma_compress *compress) { if (obj && i915_gem_object_has_pages(obj)) { struct i915_vma fake = { @@ -1416,127 +1316,175 @@ capture_object(struct drm_i915_private *dev_priv, .obj = obj, }; - return i915_error_object_create(dev_priv, &fake, compress); + return i915_vma_coredump_create(gt, &fake, name, compress); } else { return NULL; } } -static void -gem_record_rings(struct i915_gpu_state *error, struct compress *compress) +static void add_vma(struct intel_engine_coredump *ee, + struct i915_vma_coredump *vma) { - struct drm_i915_private *i915 = error->i915; - struct intel_engine_cs *engine; - struct drm_i915_error_engine *ee; + if (vma) { + vma->next = ee->vma; + ee->vma = vma; + } +} + +struct intel_engine_coredump * +intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp) +{ + struct intel_engine_coredump *ee; - ee = kzalloc(sizeof(*ee), GFP_KERNEL); + ee = kzalloc(sizeof(*ee), gfp); if (!ee) - return; + return NULL; - for_each_uabi_engine(engine, i915) { - struct capture_vma *capture = NULL; - struct i915_request *request; - unsigned long flags; + ee->engine = engine; - /* Refill our page pool before entering atomic section */ - pool_refill(&compress->pool, ALLOW_FAIL); + engine_record_registers(ee); + engine_record_execlists(ee); - spin_lock_irqsave(&engine->active.lock, flags); - request = intel_engine_find_active_request(engine); - if (!request) { - spin_unlock_irqrestore(&engine->active.lock, flags); - continue; - } + return ee; +} - error->simulated |= record_context(&ee->context, request); +struct intel_engine_capture_vma * +intel_engine_coredump_add_request(struct intel_engine_coredump *ee, + struct i915_request *rq, + gfp_t gfp) +{ + struct intel_engine_capture_vma *vma = NULL; - /* - * We need to copy these to an anonymous buffer - * as the simplest method to avoid being overwritten - * by userspace. - */ - capture = capture_vma(capture, - request->batch, - &ee->batchbuffer); + ee->simulated |= record_context(&ee->context, rq); + if (ee->simulated) + return NULL; - if (HAS_BROKEN_CS_TLB(i915)) - capture = capture_vma(capture, - engine->gt->scratch, - &ee->wa_batchbuffer); + /* + * We need to copy these to an anonymous buffer + * as the simplest method to avoid being overwritten + * by userspace. + */ + vma = capture_vma(vma, rq->batch, "batch", gfp); + vma = capture_user(vma, rq, gfp); + vma = capture_vma(vma, rq->ring->vma, "ring", gfp); + vma = capture_vma(vma, rq->context->state, "HW context", gfp); - capture = request_record_user_bo(request, ee, capture); + ee->rq_head = rq->head; + ee->rq_post = rq->postfix; + ee->rq_tail = rq->tail; - capture = capture_vma(capture, - request->context->state, - &ee->ctx); + return vma; +} - capture = capture_vma(capture, - request->ring->vma, - &ee->ringbuffer); +void +intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, + struct intel_engine_capture_vma *capture, + struct i915_vma_compress *compress) +{ + const struct intel_engine_cs *engine = ee->engine; - ee->cpu_ring_head = request->ring->head; - ee->cpu_ring_tail = request->ring->tail; + while (capture) { + struct intel_engine_capture_vma *this = capture; + struct i915_vma *vma = this->vma; - ee->rq_head = request->head; - ee->rq_post = request->postfix; - ee->rq_tail = request->tail; + add_vma(ee, + i915_vma_coredump_create(engine->gt, + vma, this->name, + compress)); - engine_record_requests(engine, request, ee); - spin_unlock_irqrestore(&engine->active.lock, flags); + i915_active_release(&vma->active); + i915_vma_put(vma); - error_record_engine_registers(error, engine, ee); - error_record_engine_execlists(engine, ee); + capture = this->next; + kfree(this); + } - while (capture) { - struct capture_vma *this = capture; - struct i915_vma *vma = *this->slot; + add_vma(ee, + i915_vma_coredump_create(engine->gt, + engine->status_page.vma, + "HW Status", + compress)); - *this->slot = - i915_error_object_create(i915, vma, compress); + add_vma(ee, + i915_vma_coredump_create(engine->gt, + engine->wa_ctx.vma, + "WA context", + compress)); - i915_active_release(&vma->active); - i915_vma_put(vma); + add_vma(ee, + capture_object(engine->gt, + engine->default_state, + "NULL context", + compress)); +} - capture = this->next; - kfree(this); - } +static struct intel_engine_coredump * +capture_engine(struct intel_engine_cs *engine, + struct i915_vma_compress *compress) +{ + struct intel_engine_capture_vma *capture = NULL; + struct intel_engine_coredump *ee; + struct i915_request *rq; + unsigned long flags; - ee->hws_page = - i915_error_object_create(i915, - engine->status_page.vma, - compress); + ee = intel_engine_coredump_alloc(engine, GFP_KERNEL); + if (!ee) + return NULL; + + spin_lock_irqsave(&engine->active.lock, flags); + rq = intel_engine_find_active_request(engine); + if (rq) + capture = intel_engine_coredump_add_request(ee, rq, + ATOMIC_MAYFAIL); + spin_unlock_irqrestore(&engine->active.lock, flags); + if (!capture) { + kfree(ee); + return NULL; + } - ee->wa_ctx = - i915_error_object_create(i915, - engine->wa_ctx.vma, - compress); + intel_engine_coredump_add_vma(ee, capture, compress); - ee->default_state = - capture_object(i915, engine->default_state, compress); + return ee; +} - ee->engine = engine; +static void +gt_record_engines(struct intel_gt_coredump *gt, + struct i915_vma_compress *compress) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; - ee->next = error->engine; - error->engine = ee; + for_each_engine(engine, gt->_gt, id) { + struct intel_engine_coredump *ee; - ee = kzalloc(sizeof(*ee), GFP_KERNEL); + /* Refill our page pool before entering atomic section */ + pool_refill(&compress->pool, ALLOW_FAIL); + + ee = capture_engine(engine, compress); if (!ee) - return; - } + continue; - kfree(ee); + gt->simulated |= ee->simulated; + if (ee->simulated) { + kfree(ee); + continue; + } + + ee->next = gt->engine; + gt->engine = ee; + } } -static void -capture_uc_state(struct i915_gpu_state *error, struct compress *compress) +static struct intel_uc_coredump * +gt_record_uc(struct intel_gt_coredump *gt, + struct i915_vma_compress *compress) { - struct drm_i915_private *i915 = error->i915; - struct i915_error_uc *error_uc = &error->uc; - struct intel_uc *uc = &i915->gt.uc; + const struct intel_uc *uc = >->_gt->uc; + struct intel_uc_coredump *error_uc; - /* Capturing uC state won't be useful if there is no GuC */ - if (!error->device_info.has_gt_uc) - return; + error_uc = kzalloc(sizeof(*error_uc), ALLOW_FAIL); + if (!error_uc) + return NULL; memcpy(&error_uc->guc_fw, &uc->guc.fw, sizeof(uc->guc.fw)); memcpy(&error_uc->huc_fw, &uc->huc.fw, sizeof(uc->huc.fw)); @@ -1547,19 +1495,42 @@ capture_uc_state(struct i915_gpu_state *error, struct compress *compress) */ error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL); error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL); - error_uc->guc_log = i915_error_object_create(i915, - uc->guc.log.vma, - compress); + error_uc->guc_log = + i915_vma_coredump_create(gt->_gt, + uc->guc.log.vma, "GuC log buffer", + compress); + + return error_uc; +} + +static void gt_capture_prepare(struct intel_gt_coredump *gt) +{ + struct i915_ggtt *ggtt = gt->_gt->ggtt; + + mutex_lock(&ggtt->error_mutex); +} + +static void gt_capture_finish(struct intel_gt_coredump *gt) +{ + struct i915_ggtt *ggtt = gt->_gt->ggtt; + + if (drm_mm_node_allocated(&ggtt->error_capture)) + ggtt->vm.clear_range(&ggtt->vm, + ggtt->error_capture.start, + PAGE_SIZE); + + mutex_unlock(&ggtt->error_mutex); } /* Capture all registers which don't fit into another category. */ -static void capture_reg_state(struct i915_gpu_state *error) +static void gt_record_regs(struct intel_gt_coredump *gt) { - struct drm_i915_private *i915 = error->i915; - struct intel_uncore *uncore = &i915->uncore; + struct intel_uncore *uncore = gt->_gt->uncore; + struct drm_i915_private *i915 = uncore->i915; int i; - /* General organization + /* + * General organization * 1. Registers specific to a single generation * 2. Registers which belong to multiple generations * 3. Feature specific registers. @@ -1569,138 +1540,162 @@ static void capture_reg_state(struct i915_gpu_state *error) /* 1: Registers specific to a single generation */ if (IS_VALLEYVIEW(i915)) { - error->gtier[0] = intel_uncore_read(uncore, GTIER); - error->ier = intel_uncore_read(uncore, VLV_IER); - error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV); + gt->gtier[0] = intel_uncore_read(uncore, GTIER); + gt->ier = intel_uncore_read(uncore, VLV_IER); + gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV); } if (IS_GEN(i915, 7)) - error->err_int = intel_uncore_read(uncore, GEN7_ERR_INT); + gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT); if (INTEL_GEN(i915) >= 12) { - error->fault_data0 = intel_uncore_read(uncore, - GEN12_FAULT_TLB_DATA0); - error->fault_data1 = intel_uncore_read(uncore, - GEN12_FAULT_TLB_DATA1); + gt->fault_data0 = intel_uncore_read(uncore, + GEN12_FAULT_TLB_DATA0); + gt->fault_data1 = intel_uncore_read(uncore, + GEN12_FAULT_TLB_DATA1); } else if (INTEL_GEN(i915) >= 8) { - error->fault_data0 = intel_uncore_read(uncore, - GEN8_FAULT_TLB_DATA0); - error->fault_data1 = intel_uncore_read(uncore, - GEN8_FAULT_TLB_DATA1); + gt->fault_data0 = intel_uncore_read(uncore, + GEN8_FAULT_TLB_DATA0); + gt->fault_data1 = intel_uncore_read(uncore, + GEN8_FAULT_TLB_DATA1); } if (IS_GEN(i915, 6)) { - error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE); - error->gab_ctl = intel_uncore_read(uncore, GAB_CTL); - error->gfx_mode = intel_uncore_read(uncore, GFX_MODE); + gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE); + gt->gab_ctl = intel_uncore_read(uncore, GAB_CTL); + gt->gfx_mode = intel_uncore_read(uncore, GFX_MODE); } /* 2: Registers which belong to multiple generations */ if (INTEL_GEN(i915) >= 7) - error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT); + gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT); if (INTEL_GEN(i915) >= 6) { - error->derrmr = intel_uncore_read(uncore, DERRMR); + gt->derrmr = intel_uncore_read(uncore, DERRMR); if (INTEL_GEN(i915) < 12) { - error->error = intel_uncore_read(uncore, ERROR_GEN6); - error->done_reg = intel_uncore_read(uncore, DONE_REG); + gt->error = intel_uncore_read(uncore, ERROR_GEN6); + gt->done_reg = intel_uncore_read(uncore, DONE_REG); } } - if (INTEL_GEN(i915) >= 5) - error->ccid = intel_uncore_read(uncore, CCID(RENDER_RING_BASE)); - /* 3: Feature specific registers */ if (IS_GEN_RANGE(i915, 6, 7)) { - error->gam_ecochk = intel_uncore_read(uncore, GAM_ECOCHK); - error->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS); + gt->gam_ecochk = intel_uncore_read(uncore, GAM_ECOCHK); + gt->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS); } if (IS_GEN_RANGE(i915, 8, 11)) - error->gtt_cache = intel_uncore_read(uncore, HSW_GTT_CACHE_EN); + gt->gtt_cache = intel_uncore_read(uncore, HSW_GTT_CACHE_EN); if (IS_GEN(i915, 12)) - error->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG); + gt->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG); if (INTEL_GEN(i915) >= 12) { for (i = 0; i < GEN12_SFC_DONE_MAX; i++) { - error->sfc_done[i] = + gt->sfc_done[i] = intel_uncore_read(uncore, GEN12_SFC_DONE(i)); } - error->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE); + gt->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE); } /* 4: Everything else */ if (INTEL_GEN(i915) >= 11) { - error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); - error->gtier[0] = + gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); + gt->gtier[0] = intel_uncore_read(uncore, GEN11_RENDER_COPY_INTR_ENABLE); - error->gtier[1] = + gt->gtier[1] = intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE); - error->gtier[2] = + gt->gtier[2] = intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE); - error->gtier[3] = + gt->gtier[3] = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); - error->gtier[4] = + gt->gtier[4] = intel_uncore_read(uncore, GEN11_CRYPTO_RSVD_INTR_ENABLE); - error->gtier[5] = + gt->gtier[5] = intel_uncore_read(uncore, GEN11_GUNIT_CSME_INTR_ENABLE); - error->ngtier = 6; + gt->ngtier = 6; } else if (INTEL_GEN(i915) >= 8) { - error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); + gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); for (i = 0; i < 4; i++) - error->gtier[i] = intel_uncore_read(uncore, - GEN8_GT_IER(i)); - error->ngtier = 4; + gt->gtier[i] = + intel_uncore_read(uncore, GEN8_GT_IER(i)); + gt->ngtier = 4; } else if (HAS_PCH_SPLIT(i915)) { - error->ier = intel_uncore_read(uncore, DEIER); - error->gtier[0] = intel_uncore_read(uncore, GTIER); - error->ngtier = 1; + gt->ier = intel_uncore_read(uncore, DEIER); + gt->gtier[0] = intel_uncore_read(uncore, GTIER); + gt->ngtier = 1; } else if (IS_GEN(i915, 2)) { - error->ier = intel_uncore_read16(uncore, GEN2_IER); + gt->ier = intel_uncore_read16(uncore, GEN2_IER); } else if (!IS_VALLEYVIEW(i915)) { - error->ier = intel_uncore_read(uncore, GEN2_IER); + gt->ier = intel_uncore_read(uncore, GEN2_IER); } - error->eir = intel_uncore_read(uncore, EIR); - error->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER); + gt->eir = intel_uncore_read(uncore, EIR); + gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER); } -static const char * -error_msg(struct i915_gpu_state *error, - intel_engine_mask_t engines, const char *msg) +/* + * Generate a semi-unique error code. The code is not meant to have meaning, The + * code's only purpose is to try to prevent false duplicated bug reports by + * grossly estimating a GPU error state. + * + * TODO Ideally, hashing the batchbuffer would be a very nice way to determine + * the hang if we could strip the GTT offset information from it. + * + * It's only a small step better than a random number in its current form. + */ +static u32 generate_ecode(const struct intel_engine_coredump *ee) { + /* + * IPEHR would be an ideal way to detect errors, as it's the gross + * measure of "the command that hung." However, has some very common + * synchronization commands which almost always appear in the case + * strictly a client bug. Use instdone to differentiate those some. + */ + return ee ? ee->ipehr ^ ee->instdone.instdone : 0; +} + +static const char *error_msg(struct i915_gpu_coredump *error) +{ + struct intel_engine_coredump *first = NULL; + struct intel_gt_coredump *gt; + intel_engine_mask_t engines; int len; + engines = 0; + for (gt = error->gt; gt; gt = gt->next) { + struct intel_engine_coredump *cs; + + if (gt->engine && !first) + first = gt->engine; + + for (cs = gt->engine; cs; cs = cs->next) + engines |= cs->engine->mask; + } + len = scnprintf(error->error_msg, sizeof(error->error_msg), - "GPU HANG: ecode %d:%x:0x%08x", + "GPU HANG: ecode %d:%x:%08x", INTEL_GEN(error->i915), engines, - i915_error_generate_code(error)); - if (error->engine) { + generate_ecode(first)); + if (first) { /* Just show the first executing process, more is confusing */ len += scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", in %s [%d]", - error->engine->context.comm, - error->engine->context.pid); + first->context.comm, first->context.pid); } - if (msg) - len += scnprintf(error->error_msg + len, - sizeof(error->error_msg) - len, - ", %s", msg); return error->error_msg; } -static void capture_gen_state(struct i915_gpu_state *error) +static void capture_gen(struct i915_gpu_coredump *error) { struct drm_i915_private *i915 = error->i915; - error->awake = i915->gt.awake; error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count); error->suspended = i915->runtime_pm.suspended; @@ -1711,6 +1706,7 @@ static void capture_gen_state(struct i915_gpu_state *error) error->reset_count = i915_reset_count(&i915->gpu_error); error->suspend_count = i915->suspend_count; + i915_params_copy(&error->params, &i915_modparams); memcpy(&error->device_info, INTEL_INFO(i915), sizeof(error->device_info)); @@ -1720,115 +1716,138 @@ static void capture_gen_state(struct i915_gpu_state *error) error->driver_caps = i915->caps; } -static void capture_params(struct i915_gpu_state *error) +struct i915_gpu_coredump * +i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp) { - i915_params_copy(&error->params, &i915_modparams); + struct i915_gpu_coredump *error; + + if (!i915_modparams.error_capture) + return NULL; + + error = kzalloc(sizeof(*error), gfp); + if (!error) + return NULL; + + kref_init(&error->ref); + error->i915 = i915; + + error->time = ktime_get_real(); + error->boottime = ktime_get_boottime(); + error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time); + error->capture = jiffies; + + capture_gen(error); + + return error; } -static void capture_finish(struct i915_gpu_state *error) +#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) + +struct intel_gt_coredump * +intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp) { - struct i915_ggtt *ggtt = &error->i915->ggtt; + struct intel_gt_coredump *gc; - if (drm_mm_node_allocated(&ggtt->error_capture)) { - const u64 slot = ggtt->error_capture.start; + gc = kzalloc(sizeof(*gc), gfp); + if (!gc) + return NULL; + + gc->_gt = gt; + gc->awake = intel_gt_pm_is_awake(gt); - ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); + gt_record_regs(gc); + gt_record_fences(gc); + + return gc; +} + +struct i915_vma_compress * +i915_vma_capture_prepare(struct intel_gt_coredump *gt) +{ + struct i915_vma_compress *compress; + + compress = kmalloc(sizeof(*compress), ALLOW_FAIL); + if (!compress) + return NULL; + + if (!compress_init(compress)) { + kfree(compress); + return NULL; } + + gt_capture_prepare(gt); + + return compress; } -#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) +void i915_vma_capture_finish(struct intel_gt_coredump *gt, + struct i915_vma_compress *compress) +{ + if (!compress) + return; + + gt_capture_finish(gt); -struct i915_gpu_state * -i915_capture_gpu_state(struct drm_i915_private *i915) + compress_fini(compress); + kfree(compress); +} + +struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915) { - struct i915_gpu_state *error; - struct compress compress; + struct i915_gpu_coredump *error; /* Check if GPU capture has been disabled */ error = READ_ONCE(i915->gpu_error.first_error); if (IS_ERR(error)) return error; - error = kzalloc(sizeof(*error), ALLOW_FAIL); - if (!error) { - i915_disable_error_state(i915, -ENOMEM); + error = i915_gpu_coredump_alloc(i915, ALLOW_FAIL); + if (!error) return ERR_PTR(-ENOMEM); - } - if (!compress_init(&compress)) { - kfree(error); - i915_disable_error_state(i915, -ENOMEM); - return ERR_PTR(-ENOMEM); - } + error->gt = intel_gt_coredump_alloc(&i915->gt, ALLOW_FAIL); + if (error->gt) { + struct i915_vma_compress *compress; - kref_init(&error->ref); - error->i915 = i915; + compress = i915_vma_capture_prepare(error->gt); + if (!compress) { + kfree(error->gt); + kfree(error); + return ERR_PTR(-ENOMEM); + } - error->time = ktime_get_real(); - error->boottime = ktime_get_boottime(); - error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time); - error->capture = jiffies; + gt_record_engines(error->gt, compress); - capture_params(error); - capture_gen_state(error); - capture_uc_state(error, &compress); - capture_reg_state(error); - gem_record_fences(error); - gem_record_rings(error, &compress); + if (INTEL_INFO(i915)->has_gt_uc) + error->gt->uc = gt_record_uc(error->gt, compress); + + i915_vma_capture_finish(error->gt, compress); + + error->simulated |= error->gt->simulated; + } error->overlay = intel_overlay_capture_error_state(i915); error->display = intel_display_capture_error_state(i915); - capture_finish(error); - compress_fini(&compress); - return error; } -/** - * i915_capture_error_state - capture an error record for later analysis - * @i915: i915 device - * @engine_mask: the mask of engines triggering the hang - * @msg: a message to insert into the error capture header - * - * Should be called when an error is detected (either a hang or an error - * interrupt) to capture error state from the time of the error. Fills - * out a structure which becomes available in debugfs for user level tools - * to pick up. - */ -void i915_capture_error_state(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - const char *msg) +void i915_error_state_store(struct i915_gpu_coredump *error) { + struct drm_i915_private *i915; static bool warned; - struct i915_gpu_state *error; - unsigned long flags; - if (!i915_modparams.error_capture) + if (IS_ERR_OR_NULL(error)) return; - if (READ_ONCE(i915->gpu_error.first_error)) - return; + i915 = error->i915; + dev_info(i915->drm.dev, "%s\n", error_msg(error)); - error = i915_capture_gpu_state(i915); - if (IS_ERR(error)) + if (error->simulated || + cmpxchg(&i915->gpu_error.first_error, NULL, error)) return; - dev_info(i915->drm.dev, "%s\n", error_msg(error, engine_mask, msg)); - - if (!error->simulated) { - spin_lock_irqsave(&i915->gpu_error.lock, flags); - if (!i915->gpu_error.first_error) { - i915->gpu_error.first_error = error; - error = NULL; - } - spin_unlock_irqrestore(&i915->gpu_error.lock, flags); - } - - if (error) { - __i915_gpu_state_free(&error->ref); - return; - } + i915_gpu_coredump_get(error); if (!xchg(&warned, true) && ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) { @@ -1841,15 +1860,38 @@ void i915_capture_error_state(struct drm_i915_private *i915, } } -struct i915_gpu_state * +/** + * i915_capture_error_state - capture an error record for later analysis + * @i915: i915 device + * + * Should be called when an error is detected (either a hang or an error + * interrupt) to capture error state from the time of the error. Fills + * out a structure which becomes available in debugfs for user level tools + * to pick up. + */ +void i915_capture_error_state(struct drm_i915_private *i915) +{ + struct i915_gpu_coredump *error; + + error = i915_gpu_coredump(i915); + if (IS_ERR(error)) { + cmpxchg(&i915->gpu_error.first_error, NULL, error); + return; + } + + i915_error_state_store(error); + i915_gpu_coredump_put(error); +} + +struct i915_gpu_coredump * i915_first_error_state(struct drm_i915_private *i915) { - struct i915_gpu_state *error; + struct i915_gpu_coredump *error; spin_lock_irq(&i915->gpu_error.lock); error = i915->gpu_error.first_error; if (!IS_ERR_OR_NULL(error)) - i915_gpu_state_get(error); + i915_gpu_coredump_get(error); spin_unlock_irq(&i915->gpu_error.lock); return error; @@ -1857,7 +1899,7 @@ i915_first_error_state(struct drm_i915_private *i915) void i915_reset_error_state(struct drm_i915_private *i915) { - struct i915_gpu_state *error; + struct i915_gpu_coredump *error; spin_lock_irq(&i915->gpu_error.lock); error = i915->gpu_error.first_error; @@ -1866,7 +1908,7 @@ void i915_reset_error_state(struct drm_i915_private *i915) spin_unlock_irq(&i915->gpu_error.lock); if (!IS_ERR_OR_NULL(error)) - i915_gpu_state_put(error); + i915_gpu_coredump_put(error); } void i915_disable_error_state(struct drm_i915_private *i915, int err) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 5d2c3372ff99..9109004956bd 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -25,43 +25,100 @@ #include "i915_scheduler.h" struct drm_i915_private; +struct i915_vma_compress; +struct intel_engine_capture_vma; struct intel_overlay_error_state; struct intel_display_error_state; -struct i915_gpu_state { - struct kref ref; - ktime_t time; - ktime_t boottime; - ktime_t uptime; - unsigned long capture; +struct i915_vma_coredump { + struct i915_vma_coredump *next; - struct drm_i915_private *i915; + char name[20]; + + u64 gtt_offset; + u64 gtt_size; + u32 gtt_page_sizes; + + int num_pages; + int page_count; + int unused; + u32 *pages[0]; +}; + +struct i915_request_coredump { + unsigned long flags; + pid_t pid; + u32 context; + u32 seqno; + u32 start; + u32 head; + u32 tail; + struct i915_sched_attr sched_attr; +}; + +struct intel_engine_coredump { + const struct intel_engine_cs *engine; - char error_msg[128]; bool simulated; - bool awake; - bool wakelock; - bool suspended; - int iommu; u32 reset_count; - u32 suspend_count; - struct intel_device_info device_info; - struct intel_runtime_info runtime_info; - struct intel_driver_caps driver_caps; - struct i915_params params; - struct i915_error_uc { - struct intel_uc_fw guc_fw; - struct intel_uc_fw huc_fw; - struct drm_i915_error_object *guc_log; - } uc; + /* position of active request inside the ring */ + u32 rq_head, rq_post, rq_tail; + + /* Register state */ + u32 ccid; + u32 start; + u32 tail; + u32 head; + u32 ctl; + u32 mode; + u32 hws; + u32 ipeir; + u32 ipehr; + u32 bbstate; + u32 instpm; + u32 instps; + u64 bbaddr; + u64 acthd; + u32 fault_reg; + u64 faddr; + u32 rc_psmi; /* sleep state */ + struct intel_instdone instdone; + + struct i915_gem_context_coredump { + char comm[TASK_COMM_LEN]; + pid_t pid; + int active; + int guilty; + struct i915_sched_attr sched_attr; + } context; + + struct i915_vma_coredump *vma; + + struct i915_request_coredump execlist[EXECLIST_MAX_PORTS]; + unsigned int num_ports; + + struct { + u32 gfx_mode; + union { + u64 pdp[4]; + u32 pp_dir_base; + }; + } vm_info; + + struct intel_engine_coredump *next; +}; + +struct intel_gt_coredump { + const struct intel_gt *_gt; + bool awake; + bool simulated; /* Generic register state */ u32 eir; u32 pgtbl_er; u32 ier; u32 gtier[6], ngtier; - u32 ccid; u32 derrmr; u32 forcewake; u32 error; /* gen6+ */ @@ -80,91 +137,45 @@ struct i915_gpu_state { u32 nfence; u64 fence[I915_MAX_NUM_FENCES]; + + struct intel_engine_coredump *engine; + + struct intel_uc_coredump { + struct intel_uc_fw guc_fw; + struct intel_uc_fw huc_fw; + struct i915_vma_coredump *guc_log; + } *uc; + + struct intel_gt_coredump *next; +}; + +struct i915_gpu_coredump { + struct kref ref; + ktime_t time; + ktime_t boottime; + ktime_t uptime; + unsigned long capture; + + struct drm_i915_private *i915; + + struct intel_gt_coredump *gt; + + char error_msg[128]; + bool simulated; + bool wakelock; + bool suspended; + int iommu; + u32 reset_count; + u32 suspend_count; + + struct intel_device_info device_info; + struct intel_runtime_info runtime_info; + struct intel_driver_caps driver_caps; + struct i915_params params; + struct intel_overlay_error_state *overlay; struct intel_display_error_state *display; - struct drm_i915_error_engine { - const struct intel_engine_cs *engine; - - /* Software tracked state */ - bool idle; - int num_requests; - u32 reset_count; - - /* position of active request inside the ring */ - u32 rq_head, rq_post, rq_tail; - - /* our own tracking of ring head and tail */ - u32 cpu_ring_head; - u32 cpu_ring_tail; - - /* Register state */ - u32 start; - u32 tail; - u32 head; - u32 ctl; - u32 mode; - u32 hws; - u32 ipeir; - u32 ipehr; - u32 bbstate; - u32 instpm; - u32 instps; - u64 bbaddr; - u64 acthd; - u32 fault_reg; - u64 faddr; - u32 rc_psmi; /* sleep state */ - struct intel_instdone instdone; - - struct drm_i915_error_context { - char comm[TASK_COMM_LEN]; - pid_t pid; - int active; - int guilty; - struct i915_sched_attr sched_attr; - } context; - - struct drm_i915_error_object { - u64 gtt_offset; - u64 gtt_size; - u32 gtt_page_sizes; - int num_pages; - int page_count; - int unused; - u32 *pages[0]; - } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; - - struct drm_i915_error_object **user_bo; - long user_bo_count; - - struct drm_i915_error_object *wa_ctx; - struct drm_i915_error_object *default_state; - - struct drm_i915_error_request { - unsigned long flags; - long jiffies; - pid_t pid; - u32 context; - u32 seqno; - u32 start; - u32 head; - u32 tail; - struct i915_sched_attr sched_attr; - } *requests, execlist[EXECLIST_MAX_PORTS]; - unsigned int num_ports; - - struct { - u32 gfx_mode; - union { - u64 pdp[4]; - u32 pp_dir_base; - }; - } vm_info; - - struct drm_i915_error_engine *next; - } *engine; - struct scatterlist *sgl, *fit; }; @@ -172,7 +183,7 @@ struct i915_gpu_error { /* For reset and error_state handling. */ spinlock_t lock; /* Protected by the above dev->gpu_error.lock. */ - struct i915_gpu_state *first_error; + struct i915_gpu_coredump *first_error; atomic_t pending_fb_pin; @@ -200,41 +211,115 @@ struct drm_i915_error_state_buf { __printf(2, 3) void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); -struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); -void i915_capture_error_state(struct drm_i915_private *dev_priv, - intel_engine_mask_t engine_mask, - const char *error_msg); +struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915); +void i915_capture_error_state(struct drm_i915_private *i915); + +struct i915_gpu_coredump * +i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp); + +struct intel_gt_coredump * +intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp); + +struct intel_engine_coredump * +intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp); + +struct intel_engine_capture_vma * +intel_engine_coredump_add_request(struct intel_engine_coredump *ee, + struct i915_request *rq, + gfp_t gfp); -static inline struct i915_gpu_state * -i915_gpu_state_get(struct i915_gpu_state *gpu) +void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, + struct intel_engine_capture_vma *capture, + struct i915_vma_compress *compress); + +struct i915_vma_compress * +i915_vma_capture_prepare(struct intel_gt_coredump *gt); + +void i915_vma_capture_finish(struct intel_gt_coredump *gt, + struct i915_vma_compress *compress); + +void i915_error_state_store(struct i915_gpu_coredump *error); + +static inline struct i915_gpu_coredump * +i915_gpu_coredump_get(struct i915_gpu_coredump *gpu) { kref_get(&gpu->ref); return gpu; } -ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error, - char *buf, loff_t offset, size_t count); +ssize_t +i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error, + char *buf, loff_t offset, size_t count); -void __i915_gpu_state_free(struct kref *kref); -static inline void i915_gpu_state_put(struct i915_gpu_state *gpu) +void __i915_gpu_coredump_free(struct kref *kref); +static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) { if (gpu) - kref_put(&gpu->ref, __i915_gpu_state_free); + kref_put(&gpu->ref, __i915_gpu_coredump_free); } -struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915); +struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915); void i915_reset_error_state(struct drm_i915_private *i915); void i915_disable_error_state(struct drm_i915_private *i915, int err); #else -static inline void i915_capture_error_state(struct drm_i915_private *dev_priv, - u32 engine_mask, - const char *error_msg) +static inline void i915_capture_error_state(struct drm_i915_private *i915) +{ +} + +static inline struct i915_gpu_coredump * +i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp) +{ + return NULL; +} + +static inline struct intel_gt_coredump * +intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp) +{ + return NULL; +} + +static inline struct intel_engine_coredump * +intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp) +{ + return NULL; +} + +static inline struct intel_engine_capture_vma * +intel_engine_coredump_add_request(struct intel_engine_coredump *ee, + struct i915_request *rq, + gfp_t gfp) +{ + return NULL; +} + +static inline void +intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, + struct intel_engine_capture_vma *capture, + struct i915_vma_compress *compress) +{ +} + +static inline struct i915_vma_compress * +i915_vma_capture_prepare(struct intel_gt_coredump *gt) +{ + return NULL; +} + +static inline void +i915_vma_capture_finish(struct intel_gt_coredump *gt, + struct i915_vma_compress *compress) +{ +} + +static inline void +i915_error_state_store(struct drm_i915_private *i915, + struct i915_gpu_coredump *error) { } -static inline struct i915_gpu_state * +static inline struct i915_gpu_coredump * i915_first_error_state(struct drm_i915_private *i915) { return ERR_PTR(-ENODEV); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 42b79f577500..afc6aad9bf8c 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -893,7 +893,7 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc) } /** - * ivybridge_parity_work - Workqueue called when a parity error interrupt + * ivb_parity_work - Workqueue called when a parity error interrupt * occurred. * @work: workqueue struct * @@ -901,7 +901,7 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc) * this event, userspace should try to remap the bad rows since statistically * it is likely the same row is more likely to go bad again. */ -static void ivybridge_parity_work(struct work_struct *work) +static void ivb_parity_work(struct work_struct *work) { struct drm_i915_private *dev_priv = container_of(work, typeof(*dev_priv), l3_parity.error_work); @@ -2031,7 +2031,7 @@ static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, * 4 - Process the interrupt(s) that had bits set in the IIRs. * 5 - Re-enable Master Interrupt Control. */ -static irqreturn_t ironlake_irq_handler(int irq, void *arg) +static irqreturn_t ilk_irq_handler(int irq, void *arg) { struct drm_i915_private *dev_priv = arg; u32 de_iir, gt_iir, de_ier, sde_ier = 0; @@ -2742,7 +2742,7 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) /* drm_dma.h hooks */ -static void ironlake_irq_reset(struct drm_i915_private *dev_priv) +static void ilk_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; @@ -3225,7 +3225,7 @@ static void ibx_irq_postinstall(struct drm_i915_private *dev_priv) spt_hpd_detection_setup(dev_priv); } -static void ironlake_irq_postinstall(struct drm_i915_private *dev_priv) +static void ilk_irq_postinstall(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; u32 display_mask, extra_mask; @@ -3899,7 +3899,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv) intel_hpd_init_work(dev_priv); - INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); + INIT_WORK(&dev_priv->l3_parity.error_work, ivb_parity_work); for (i = 0; i < MAX_L3_SLICES; ++i) dev_priv->l3_parity.remap_info[i] = NULL; @@ -3980,7 +3980,7 @@ static irq_handler_t intel_irq_handler(struct drm_i915_private *dev_priv) else if (INTEL_GEN(dev_priv) >= 8) return gen8_irq_handler; else - return ironlake_irq_handler; + return ilk_irq_handler; } } @@ -4003,7 +4003,7 @@ static void intel_irq_reset(struct drm_i915_private *dev_priv) else if (INTEL_GEN(dev_priv) >= 8) gen8_irq_reset(dev_priv); else - ironlake_irq_reset(dev_priv); + ilk_irq_reset(dev_priv); } } @@ -4026,7 +4026,7 @@ static void intel_irq_postinstall(struct drm_i915_private *dev_priv) else if (INTEL_GEN(dev_priv) >= 8) gen8_irq_postinstall(dev_priv); else - ironlake_irq_postinstall(dev_priv); + ilk_irq_postinstall(dev_priv); } } diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c index 318562ce64c0..b6376b25ef63 100644 --- a/drivers/gpu/drm/i915/i915_mm.c +++ b/drivers/gpu/drm/i915/i915_mm.c @@ -33,6 +33,9 @@ struct remap_pfn { struct mm_struct *mm; unsigned long pfn; pgprot_t prot; + + struct sgt_iter sgt; + resource_size_t iobase; }; static int remap_pfn(pte_t *pte, unsigned long addr, void *data) @@ -46,6 +49,35 @@ static int remap_pfn(pte_t *pte, unsigned long addr, void *data) return 0; } +#define use_dma(io) ((io) != -1) + +static inline unsigned long sgt_pfn(const struct remap_pfn *r) +{ + if (use_dma(r->iobase)) + return (r->sgt.dma + r->sgt.curr + r->iobase) >> PAGE_SHIFT; + else + return r->sgt.pfn + (r->sgt.curr >> PAGE_SHIFT); +} + +static int remap_sg(pte_t *pte, unsigned long addr, void *data) +{ + struct remap_pfn *r = data; + + if (GEM_WARN_ON(!r->sgt.pfn)) + return -EINVAL; + + /* Special PTE are not associated with any struct page */ + set_pte_at(r->mm, addr, pte, + pte_mkspecial(pfn_pte(sgt_pfn(r), r->prot))); + r->pfn++; /* track insertions in case we need to unwind later */ + + r->sgt.curr += PAGE_SIZE; + if (r->sgt.curr >= r->sgt.max) + r->sgt = __sgt_iter(__sg_next(r->sgt.sgp), use_dma(r->iobase)); + + return 0; +} + /** * remap_io_mapping - remap an IO mapping to userspace * @vma: user vma to map to @@ -80,3 +112,40 @@ int remap_io_mapping(struct vm_area_struct *vma, return 0; } + +/** + * remap_io_sg - remap an IO mapping to userspace + * @vma: user vma to map to + * @addr: target user address to start at + * @size: size of map area + * @sgl: Start sg entry + * @iobase: Use stored dma address offset by this address or pfn if -1 + * + * Note: this is only safe if the mm semaphore is held when called. + */ +int remap_io_sg(struct vm_area_struct *vma, + unsigned long addr, unsigned long size, + struct scatterlist *sgl, resource_size_t iobase) +{ + struct remap_pfn r = { + .mm = vma->vm_mm, + .prot = vma->vm_page_prot, + .sgt = __sgt_iter(sgl, use_dma(iobase)), + .iobase = iobase, + }; + int err; + + /* We rely on prevalidation of the io-mapping to skip track_pfn(). */ + GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS); + + if (!use_dma(iobase)) + flush_cache_range(vma, addr, size); + + err = apply_to_page_range(r.mm, addr, size, remap_sg, &r); + if (unlikely(err)) { + zap_vma_ptes(vma, addr, r.pfn << PAGE_SHIFT); + return err; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 9571611b4b16..83f01401b8b5 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -193,23 +193,23 @@ GEN_DEFAULT_PAGE_SIZES, \ GEN_DEFAULT_REGIONS -static const struct intel_device_info intel_i830_info = { +static const struct intel_device_info i830_info = { I830_FEATURES, PLATFORM(INTEL_I830), }; -static const struct intel_device_info intel_i845g_info = { +static const struct intel_device_info i845g_info = { I845_FEATURES, PLATFORM(INTEL_I845G), }; -static const struct intel_device_info intel_i85x_info = { +static const struct intel_device_info i85x_info = { I830_FEATURES, PLATFORM(INTEL_I85X), .display.has_fbc = 1, }; -static const struct intel_device_info intel_i865g_info = { +static const struct intel_device_info i865g_info = { I845_FEATURES, PLATFORM(INTEL_I865G), }; @@ -228,7 +228,7 @@ static const struct intel_device_info intel_i865g_info = { GEN_DEFAULT_PAGE_SIZES, \ GEN_DEFAULT_REGIONS -static const struct intel_device_info intel_i915g_info = { +static const struct intel_device_info i915g_info = { GEN3_FEATURES, PLATFORM(INTEL_I915G), .has_coherent_ggtt = false, @@ -239,7 +239,7 @@ static const struct intel_device_info intel_i915g_info = { .unfenced_needs_alignment = 1, }; -static const struct intel_device_info intel_i915gm_info = { +static const struct intel_device_info i915gm_info = { GEN3_FEATURES, PLATFORM(INTEL_I915GM), .is_mobile = 1, @@ -252,7 +252,7 @@ static const struct intel_device_info intel_i915gm_info = { .unfenced_needs_alignment = 1, }; -static const struct intel_device_info intel_i945g_info = { +static const struct intel_device_info i945g_info = { GEN3_FEATURES, PLATFORM(INTEL_I945G), .display.has_hotplug = 1, @@ -263,7 +263,7 @@ static const struct intel_device_info intel_i945g_info = { .unfenced_needs_alignment = 1, }; -static const struct intel_device_info intel_i945gm_info = { +static const struct intel_device_info i945gm_info = { GEN3_FEATURES, PLATFORM(INTEL_I945GM), .is_mobile = 1, @@ -277,21 +277,21 @@ static const struct intel_device_info intel_i945gm_info = { .unfenced_needs_alignment = 1, }; -static const struct intel_device_info intel_g33_info = { +static const struct intel_device_info g33_info = { GEN3_FEATURES, PLATFORM(INTEL_G33), .display.has_hotplug = 1, .display.has_overlay = 1, }; -static const struct intel_device_info intel_pineview_g_info = { +static const struct intel_device_info pnv_g_info = { GEN3_FEATURES, PLATFORM(INTEL_PINEVIEW), .display.has_hotplug = 1, .display.has_overlay = 1, }; -static const struct intel_device_info intel_pineview_m_info = { +static const struct intel_device_info pnv_m_info = { GEN3_FEATURES, PLATFORM(INTEL_PINEVIEW), .is_mobile = 1, @@ -314,7 +314,7 @@ static const struct intel_device_info intel_pineview_m_info = { GEN_DEFAULT_PAGE_SIZES, \ GEN_DEFAULT_REGIONS -static const struct intel_device_info intel_i965g_info = { +static const struct intel_device_info i965g_info = { GEN4_FEATURES, PLATFORM(INTEL_I965G), .display.has_overlay = 1, @@ -322,7 +322,7 @@ static const struct intel_device_info intel_i965g_info = { .has_snoop = false, }; -static const struct intel_device_info intel_i965gm_info = { +static const struct intel_device_info i965gm_info = { GEN4_FEATURES, PLATFORM(INTEL_I965GM), .is_mobile = 1, @@ -333,14 +333,14 @@ static const struct intel_device_info intel_i965gm_info = { .has_snoop = false, }; -static const struct intel_device_info intel_g45_info = { +static const struct intel_device_info g45_info = { GEN4_FEATURES, PLATFORM(INTEL_G45), .engine_mask = BIT(RCS0) | BIT(VCS0), .gpu_reset_clobbers_display = false, }; -static const struct intel_device_info intel_gm45_info = { +static const struct intel_device_info gm45_info = { GEN4_FEATURES, PLATFORM(INTEL_GM45), .is_mobile = 1, @@ -365,12 +365,12 @@ static const struct intel_device_info intel_gm45_info = { GEN_DEFAULT_PAGE_SIZES, \ GEN_DEFAULT_REGIONS -static const struct intel_device_info intel_ironlake_d_info = { +static const struct intel_device_info ilk_d_info = { GEN5_FEATURES, PLATFORM(INTEL_IRONLAKE), }; -static const struct intel_device_info intel_ironlake_m_info = { +static const struct intel_device_info ilk_m_info = { GEN5_FEATURES, PLATFORM(INTEL_IRONLAKE), .is_mobile = 1, @@ -400,12 +400,12 @@ static const struct intel_device_info intel_ironlake_m_info = { GEN6_FEATURES, \ PLATFORM(INTEL_SANDYBRIDGE) -static const struct intel_device_info intel_sandybridge_d_gt1_info = { +static const struct intel_device_info snb_d_gt1_info = { SNB_D_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_sandybridge_d_gt2_info = { +static const struct intel_device_info snb_d_gt2_info = { SNB_D_PLATFORM, .gt = 2, }; @@ -416,12 +416,12 @@ static const struct intel_device_info intel_sandybridge_d_gt2_info = { .is_mobile = 1 -static const struct intel_device_info intel_sandybridge_m_gt1_info = { +static const struct intel_device_info snb_m_gt1_info = { SNB_M_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_sandybridge_m_gt2_info = { +static const struct intel_device_info snb_m_gt2_info = { SNB_M_PLATFORM, .gt = 2, }; @@ -450,12 +450,12 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { PLATFORM(INTEL_IVYBRIDGE), \ .has_l3_dpf = 1 -static const struct intel_device_info intel_ivybridge_d_gt1_info = { +static const struct intel_device_info ivb_d_gt1_info = { IVB_D_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_ivybridge_d_gt2_info = { +static const struct intel_device_info ivb_d_gt2_info = { IVB_D_PLATFORM, .gt = 2, }; @@ -466,17 +466,17 @@ static const struct intel_device_info intel_ivybridge_d_gt2_info = { .is_mobile = 1, \ .has_l3_dpf = 1 -static const struct intel_device_info intel_ivybridge_m_gt1_info = { +static const struct intel_device_info ivb_m_gt1_info = { IVB_M_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_ivybridge_m_gt2_info = { +static const struct intel_device_info ivb_m_gt2_info = { IVB_M_PLATFORM, .gt = 2, }; -static const struct intel_device_info intel_ivybridge_q_info = { +static const struct intel_device_info ivb_q_info = { GEN7_FEATURES, PLATFORM(INTEL_IVYBRIDGE), .gt = 2, @@ -484,7 +484,7 @@ static const struct intel_device_info intel_ivybridge_q_info = { .has_l3_dpf = 1, }; -static const struct intel_device_info intel_valleyview_info = { +static const struct intel_device_info vlv_info = { PLATFORM(INTEL_VALLEYVIEW), GEN(7), .is_lp = 1, @@ -523,17 +523,17 @@ static const struct intel_device_info intel_valleyview_info = { PLATFORM(INTEL_HASWELL), \ .has_l3_dpf = 1 -static const struct intel_device_info intel_haswell_gt1_info = { +static const struct intel_device_info hsw_gt1_info = { HSW_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_haswell_gt2_info = { +static const struct intel_device_info hsw_gt2_info = { HSW_PLATFORM, .gt = 2, }; -static const struct intel_device_info intel_haswell_gt3_info = { +static const struct intel_device_info hsw_gt3_info = { HSW_PLATFORM, .gt = 3, }; @@ -551,17 +551,17 @@ static const struct intel_device_info intel_haswell_gt3_info = { GEN8_FEATURES, \ PLATFORM(INTEL_BROADWELL) -static const struct intel_device_info intel_broadwell_gt1_info = { +static const struct intel_device_info bdw_gt1_info = { BDW_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_broadwell_gt2_info = { +static const struct intel_device_info bdw_gt2_info = { BDW_PLATFORM, .gt = 2, }; -static const struct intel_device_info intel_broadwell_rsvd_info = { +static const struct intel_device_info bdw_rsvd_info = { BDW_PLATFORM, .gt = 3, /* According to the device ID those devices are GT3, they were @@ -569,14 +569,14 @@ static const struct intel_device_info intel_broadwell_rsvd_info = { */ }; -static const struct intel_device_info intel_broadwell_gt3_info = { +static const struct intel_device_info bdw_gt3_info = { BDW_PLATFORM, .gt = 3, .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1), }; -static const struct intel_device_info intel_cherryview_info = { +static const struct intel_device_info chv_info = { PLATFORM(INTEL_CHERRYVIEW), GEN(8), .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), @@ -621,12 +621,12 @@ static const struct intel_device_info intel_cherryview_info = { GEN9_FEATURES, \ PLATFORM(INTEL_SKYLAKE) -static const struct intel_device_info intel_skylake_gt1_info = { +static const struct intel_device_info skl_gt1_info = { SKL_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_skylake_gt2_info = { +static const struct intel_device_info skl_gt2_info = { SKL_PLATFORM, .gt = 2, }; @@ -637,12 +637,12 @@ static const struct intel_device_info intel_skylake_gt2_info = { BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1) -static const struct intel_device_info intel_skylake_gt3_info = { +static const struct intel_device_info skl_gt3_info = { SKL_GT3_PLUS_PLATFORM, .gt = 3, }; -static const struct intel_device_info intel_skylake_gt4_info = { +static const struct intel_device_info skl_gt4_info = { SKL_GT3_PLUS_PLATFORM, .gt = 4, }; @@ -679,13 +679,13 @@ static const struct intel_device_info intel_skylake_gt4_info = { GEN9_DEFAULT_PAGE_SIZES, \ GEN_DEFAULT_REGIONS -static const struct intel_device_info intel_broxton_info = { +static const struct intel_device_info bxt_info = { GEN9_LP_FEATURES, PLATFORM(INTEL_BROXTON), .ddb_size = 512, }; -static const struct intel_device_info intel_geminilake_info = { +static const struct intel_device_info glk_info = { GEN9_LP_FEATURES, PLATFORM(INTEL_GEMINILAKE), .ddb_size = 1024, @@ -696,17 +696,17 @@ static const struct intel_device_info intel_geminilake_info = { GEN9_FEATURES, \ PLATFORM(INTEL_KABYLAKE) -static const struct intel_device_info intel_kabylake_gt1_info = { +static const struct intel_device_info kbl_gt1_info = { KBL_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_kabylake_gt2_info = { +static const struct intel_device_info kbl_gt2_info = { KBL_PLATFORM, .gt = 2, }; -static const struct intel_device_info intel_kabylake_gt3_info = { +static const struct intel_device_info kbl_gt3_info = { KBL_PLATFORM, .gt = 3, .engine_mask = @@ -717,17 +717,17 @@ static const struct intel_device_info intel_kabylake_gt3_info = { GEN9_FEATURES, \ PLATFORM(INTEL_COFFEELAKE) -static const struct intel_device_info intel_coffeelake_gt1_info = { +static const struct intel_device_info cfl_gt1_info = { CFL_PLATFORM, .gt = 1, }; -static const struct intel_device_info intel_coffeelake_gt2_info = { +static const struct intel_device_info cfl_gt2_info = { CFL_PLATFORM, .gt = 2, }; -static const struct intel_device_info intel_coffeelake_gt3_info = { +static const struct intel_device_info cfl_gt3_info = { CFL_PLATFORM, .gt = 3, .engine_mask = @@ -742,7 +742,7 @@ static const struct intel_device_info intel_coffeelake_gt3_info = { .has_coherent_ggtt = false, \ GLK_COLORS -static const struct intel_device_info intel_cannonlake_info = { +static const struct intel_device_info cnl_info = { GEN10_FEATURES, PLATFORM(INTEL_CANNONLAKE), .gt = 2, @@ -777,14 +777,14 @@ static const struct intel_device_info intel_cannonlake_info = { .has_logical_ring_elsq = 1, \ .color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 } -static const struct intel_device_info intel_icelake_11_info = { +static const struct intel_device_info icl_info = { GEN11_FEATURES, PLATFORM(INTEL_ICELAKE), .engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), }; -static const struct intel_device_info intel_elkhartlake_info = { +static const struct intel_device_info ehl_info = { GEN11_FEATURES, PLATFORM(INTEL_ELKHARTLAKE), .require_force_probe = 1, @@ -815,7 +815,7 @@ static const struct intel_device_info intel_elkhartlake_info = { .has_global_mocs = 1, \ .display.has_dsb = 1 -static const struct intel_device_info intel_tigerlake_12_info = { +static const struct intel_device_info tgl_info = { GEN12_FEATURES, PLATFORM(INTEL_TIGERLAKE), .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), @@ -840,70 +840,70 @@ static const struct intel_device_info intel_tigerlake_12_info = { * PCI ID matches, otherwise we'll use the wrong info struct above. */ static const struct pci_device_id pciidlist[] = { - INTEL_I830_IDS(&intel_i830_info), - INTEL_I845G_IDS(&intel_i845g_info), - INTEL_I85X_IDS(&intel_i85x_info), - INTEL_I865G_IDS(&intel_i865g_info), - INTEL_I915G_IDS(&intel_i915g_info), - INTEL_I915GM_IDS(&intel_i915gm_info), - INTEL_I945G_IDS(&intel_i945g_info), - INTEL_I945GM_IDS(&intel_i945gm_info), - INTEL_I965G_IDS(&intel_i965g_info), - INTEL_G33_IDS(&intel_g33_info), - INTEL_I965GM_IDS(&intel_i965gm_info), - INTEL_GM45_IDS(&intel_gm45_info), - INTEL_G45_IDS(&intel_g45_info), - INTEL_PINEVIEW_G_IDS(&intel_pineview_g_info), - INTEL_PINEVIEW_M_IDS(&intel_pineview_m_info), - INTEL_IRONLAKE_D_IDS(&intel_ironlake_d_info), - INTEL_IRONLAKE_M_IDS(&intel_ironlake_m_info), - INTEL_SNB_D_GT1_IDS(&intel_sandybridge_d_gt1_info), - INTEL_SNB_D_GT2_IDS(&intel_sandybridge_d_gt2_info), - INTEL_SNB_M_GT1_IDS(&intel_sandybridge_m_gt1_info), - INTEL_SNB_M_GT2_IDS(&intel_sandybridge_m_gt2_info), - INTEL_IVB_Q_IDS(&intel_ivybridge_q_info), /* must be first IVB */ - INTEL_IVB_M_GT1_IDS(&intel_ivybridge_m_gt1_info), - INTEL_IVB_M_GT2_IDS(&intel_ivybridge_m_gt2_info), - INTEL_IVB_D_GT1_IDS(&intel_ivybridge_d_gt1_info), - INTEL_IVB_D_GT2_IDS(&intel_ivybridge_d_gt2_info), - INTEL_HSW_GT1_IDS(&intel_haswell_gt1_info), - INTEL_HSW_GT2_IDS(&intel_haswell_gt2_info), - INTEL_HSW_GT3_IDS(&intel_haswell_gt3_info), - INTEL_VLV_IDS(&intel_valleyview_info), - INTEL_BDW_GT1_IDS(&intel_broadwell_gt1_info), - INTEL_BDW_GT2_IDS(&intel_broadwell_gt2_info), - INTEL_BDW_GT3_IDS(&intel_broadwell_gt3_info), - INTEL_BDW_RSVD_IDS(&intel_broadwell_rsvd_info), - INTEL_CHV_IDS(&intel_cherryview_info), - INTEL_SKL_GT1_IDS(&intel_skylake_gt1_info), - INTEL_SKL_GT2_IDS(&intel_skylake_gt2_info), - INTEL_SKL_GT3_IDS(&intel_skylake_gt3_info), - INTEL_SKL_GT4_IDS(&intel_skylake_gt4_info), - INTEL_BXT_IDS(&intel_broxton_info), - INTEL_GLK_IDS(&intel_geminilake_info), - INTEL_KBL_GT1_IDS(&intel_kabylake_gt1_info), - INTEL_KBL_GT2_IDS(&intel_kabylake_gt2_info), - INTEL_KBL_GT3_IDS(&intel_kabylake_gt3_info), - INTEL_KBL_GT4_IDS(&intel_kabylake_gt3_info), - INTEL_AML_KBL_GT2_IDS(&intel_kabylake_gt2_info), - INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info), - INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info), - INTEL_CFL_H_GT1_IDS(&intel_coffeelake_gt1_info), - INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info), - INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info), - INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info), - INTEL_WHL_U_GT1_IDS(&intel_coffeelake_gt1_info), - INTEL_WHL_U_GT2_IDS(&intel_coffeelake_gt2_info), - INTEL_AML_CFL_GT2_IDS(&intel_coffeelake_gt2_info), - INTEL_WHL_U_GT3_IDS(&intel_coffeelake_gt3_info), - INTEL_CML_GT1_IDS(&intel_coffeelake_gt1_info), - INTEL_CML_GT2_IDS(&intel_coffeelake_gt2_info), - INTEL_CML_U_GT1_IDS(&intel_coffeelake_gt1_info), - INTEL_CML_U_GT2_IDS(&intel_coffeelake_gt2_info), - INTEL_CNL_IDS(&intel_cannonlake_info), - INTEL_ICL_11_IDS(&intel_icelake_11_info), - INTEL_EHL_IDS(&intel_elkhartlake_info), - INTEL_TGL_12_IDS(&intel_tigerlake_12_info), + INTEL_I830_IDS(&i830_info), + INTEL_I845G_IDS(&i845g_info), + INTEL_I85X_IDS(&i85x_info), + INTEL_I865G_IDS(&i865g_info), + INTEL_I915G_IDS(&i915g_info), + INTEL_I915GM_IDS(&i915gm_info), + INTEL_I945G_IDS(&i945g_info), + INTEL_I945GM_IDS(&i945gm_info), + INTEL_I965G_IDS(&i965g_info), + INTEL_G33_IDS(&g33_info), + INTEL_I965GM_IDS(&i965gm_info), + INTEL_GM45_IDS(&gm45_info), + INTEL_G45_IDS(&g45_info), + INTEL_PINEVIEW_G_IDS(&pnv_g_info), + INTEL_PINEVIEW_M_IDS(&pnv_m_info), + INTEL_IRONLAKE_D_IDS(&ilk_d_info), + INTEL_IRONLAKE_M_IDS(&ilk_m_info), + INTEL_SNB_D_GT1_IDS(&snb_d_gt1_info), + INTEL_SNB_D_GT2_IDS(&snb_d_gt2_info), + INTEL_SNB_M_GT1_IDS(&snb_m_gt1_info), + INTEL_SNB_M_GT2_IDS(&snb_m_gt2_info), + INTEL_IVB_Q_IDS(&ivb_q_info), /* must be first IVB */ + INTEL_IVB_M_GT1_IDS(&ivb_m_gt1_info), + INTEL_IVB_M_GT2_IDS(&ivb_m_gt2_info), + INTEL_IVB_D_GT1_IDS(&ivb_d_gt1_info), + INTEL_IVB_D_GT2_IDS(&ivb_d_gt2_info), + INTEL_HSW_GT1_IDS(&hsw_gt1_info), + INTEL_HSW_GT2_IDS(&hsw_gt2_info), + INTEL_HSW_GT3_IDS(&hsw_gt3_info), + INTEL_VLV_IDS(&vlv_info), + INTEL_BDW_GT1_IDS(&bdw_gt1_info), + INTEL_BDW_GT2_IDS(&bdw_gt2_info), + INTEL_BDW_GT3_IDS(&bdw_gt3_info), + INTEL_BDW_RSVD_IDS(&bdw_rsvd_info), + INTEL_CHV_IDS(&chv_info), + INTEL_SKL_GT1_IDS(&skl_gt1_info), + INTEL_SKL_GT2_IDS(&skl_gt2_info), + INTEL_SKL_GT3_IDS(&skl_gt3_info), + INTEL_SKL_GT4_IDS(&skl_gt4_info), + INTEL_BXT_IDS(&bxt_info), + INTEL_GLK_IDS(&glk_info), + INTEL_KBL_GT1_IDS(&kbl_gt1_info), + INTEL_KBL_GT2_IDS(&kbl_gt2_info), + INTEL_KBL_GT3_IDS(&kbl_gt3_info), + INTEL_KBL_GT4_IDS(&kbl_gt3_info), + INTEL_AML_KBL_GT2_IDS(&kbl_gt2_info), + INTEL_CFL_S_GT1_IDS(&cfl_gt1_info), + INTEL_CFL_S_GT2_IDS(&cfl_gt2_info), + INTEL_CFL_H_GT1_IDS(&cfl_gt1_info), + INTEL_CFL_H_GT2_IDS(&cfl_gt2_info), + INTEL_CFL_U_GT2_IDS(&cfl_gt2_info), + INTEL_CFL_U_GT3_IDS(&cfl_gt3_info), + INTEL_WHL_U_GT1_IDS(&cfl_gt1_info), + INTEL_WHL_U_GT2_IDS(&cfl_gt2_info), + INTEL_AML_CFL_GT2_IDS(&cfl_gt2_info), + INTEL_WHL_U_GT3_IDS(&cfl_gt3_info), + INTEL_CML_GT1_IDS(&cfl_gt1_info), + INTEL_CML_GT2_IDS(&cfl_gt2_info), + INTEL_CML_U_GT1_IDS(&cfl_gt1_info), + INTEL_CML_U_GT2_IDS(&cfl_gt2_info), + INTEL_CNL_IDS(&cnl_info), + INTEL_ICL_11_IDS(&icl_info), + INTEL_EHL_IDS(&ehl_info), + INTEL_TGL_12_IDS(&tgl_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 84350c7bc711..0f556d80ba36 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2159,8 +2159,6 @@ static int gen8_modify_context(struct intel_context *ce, struct i915_request *rq; int err; - lockdep_assert_held(&ce->pin_mutex); - rq = intel_engine_create_kernel_request(ce->engine); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -2203,17 +2201,14 @@ static int gen8_configure_context(struct i915_gem_context *ctx, if (ce->engine->class != RENDER_CLASS) continue; - err = intel_context_lock_pinned(ce); - if (err) - break; + /* Otherwise OA settings will be set upon first use */ + if (!intel_context_pin_if_active(ce)) + continue; flex->value = intel_sseu_make_rpcs(ctx->i915, &ce->sseu); + err = gen8_modify_context(ce, flex, count); - /* Otherwise OA settings will be set upon first use */ - if (intel_context_is_pinned(ce)) - err = gen8_modify_context(ce, flex, count); - - intel_context_unlock_pinned(ce); + intel_context_unpin(ce); if (err) break; } diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index f3ef6700a5f2..28a82c849bac 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -1117,12 +1117,17 @@ void i915_pmu_register(struct drm_i915_private *i915) hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample; - if (!is_igp(i915)) + if (!is_igp(i915)) { pmu->name = kasprintf(GFP_KERNEL, - "i915-%s", + "i915_%s", dev_name(i915->drm.dev)); - else + if (pmu->name) { + /* tools/perf reserves colons as special. */ + strreplace((char *)pmu->name, ':', '_'); + } + } else { pmu->name = "i915"; + } if (!pmu->name) goto err; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index bbfedeb00b7f..6cc55c103f67 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2244,26 +2244,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) MG_DP_MODE_LN1_ACU_PORT1) #define MG_DP_MODE_CFG_DP_X2_MODE (1 << 7) #define MG_DP_MODE_CFG_DP_X1_MODE (1 << 6) -#define MG_DP_MODE_CFG_TR2PWR_GATING (1 << 5) -#define MG_DP_MODE_CFG_TRPWR_GATING (1 << 4) -#define MG_DP_MODE_CFG_CLNPWR_GATING (1 << 3) -#define MG_DP_MODE_CFG_DIGPWR_GATING (1 << 2) -#define MG_DP_MODE_CFG_GAONPWR_GATING (1 << 1) - -#define MG_MISC_SUS0_PORT1 0x168814 -#define MG_MISC_SUS0_PORT2 0x169814 -#define MG_MISC_SUS0_PORT3 0x16A814 -#define MG_MISC_SUS0_PORT4 0x16B814 -#define MG_MISC_SUS0(tc_port) \ - _MMIO(_PORT(tc_port, MG_MISC_SUS0_PORT1, MG_MISC_SUS0_PORT2)) -#define MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE_MASK (3 << 14) -#define MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE(x) ((x) << 14) -#define MG_MISC_SUS0_CFG_TR2PWR_GATING (1 << 12) -#define MG_MISC_SUS0_CFG_CL2PWR_GATING (1 << 11) -#define MG_MISC_SUS0_CFG_GAONPWR_GATING (1 << 10) -#define MG_MISC_SUS0_CFG_TRPWR_GATING (1 << 7) -#define MG_MISC_SUS0_CFG_CL1PWR_GATING (1 << 6) -#define MG_MISC_SUS0_CFG_DGPWR_GATING (1 << 5) /* The spec defines this only for BXT PHY0, but lets assume that this * would exist for PHY1 too if it had a second channel. @@ -4177,7 +4157,13 @@ enum { #define CPSSUNIT_CLKGATE_DIS REG_BIT(9) #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) -#define VFUNIT_CLKGATE_DIS (1 << 20) +#define VFUNIT_CLKGATE_DIS REG_BIT(20) +#define HSUNIT_CLKGATE_DIS REG_BIT(8) +#define VSUNIT_CLKGATE_DIS REG_BIT(3) + +#define UNSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x94e4) +#define VSUNIT_CLKGATE_DIS_TGL REG_BIT(19) +#define PSDUNIT_CLKGATE_DIS REG_BIT(5) #define INF_UNIT_LEVEL_CLKGATE _MMIO(0x9560) #define CGPSF_CLKGATE_DIS (1 << 3) @@ -6808,6 +6794,7 @@ enum { #define PLANE_CTL_TILED_Y (4 << 10) #define PLANE_CTL_TILED_YF (5 << 10) #define PLANE_CTL_FLIP_HORIZONTAL (1 << 8) +#define PLANE_CTL_MEDIA_DECOMPRESSION_ENABLE (1 << 4) /* TGL+ */ #define PLANE_CTL_ALPHA_MASK (0x3 << 4) /* Pre-GLK */ #define PLANE_CTL_ALPHA_DISABLE (0 << 4) #define PLANE_CTL_ALPHA_SW_PREMULTIPLY (2 << 4) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 44a0d1a950c5..be185886e4fc 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -658,7 +658,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->engine = ce->engine; rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; - rq->flags = 0; RCU_INIT_POINTER(rq->timeline, tl); RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 565322640378..031433691a06 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -51,7 +51,7 @@ struct i915_capture_list { #define RQ_TRACE(rq, fmt, ...) do { \ const struct i915_request *rq__ = (rq); \ - ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d" fmt, \ + ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d " fmt, \ rq__->fence.context, rq__->fence.seqno, \ hwsp_seqno(rq__), ##__VA_ARGS__); \ } while (0) @@ -77,6 +77,38 @@ enum { * a request is on the various signal_list. */ I915_FENCE_FLAG_SIGNAL, + + /* + * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted + * + * The execution of some requests should not be interrupted. This is + * a sensitive operation as it makes the request super important, + * blocking other higher priority work. Abuse of this flag will + * lead to quality of service issues. + */ + I915_FENCE_FLAG_NOPREEMPT, + + /* + * I915_FENCE_FLAG_SENTINEL - this request should be last in the queue + * + * A high priority sentinel request may be submitted to clear the + * submission queue. As it will be the only request in-flight, upon + * execution all other active requests will have been preempted and + * unsubmitted. This preemptive pulse is used to re-evaluate the + * in-flight requests, particularly in cases where an active context + * is banned and those active requests need to be cancelled. + */ + I915_FENCE_FLAG_SENTINEL, + + /* + * I915_FENCE_FLAG_BOOST - upclock the gpu for this request + * + * Some requests are more important than others! In particular, a + * request that the user is waiting on is typically required for + * interactive latency, for which we want to minimise by upclocking + * the GPU. Here we track such boost requests on a per-request basis. + */ + I915_FENCE_FLAG_BOOST, }; /** @@ -225,11 +257,6 @@ struct i915_request { /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; - unsigned long flags; -#define I915_REQUEST_WAITBOOST BIT(0) -#define I915_REQUEST_NOPREEMPT BIT(1) -#define I915_REQUEST_SENTINEL BIT(2) - /** timeline->request entry for this request */ struct list_head link; @@ -442,18 +469,18 @@ static inline void i915_request_mark_complete(struct i915_request *rq) static inline bool i915_request_has_waitboost(const struct i915_request *rq) { - return rq->flags & I915_REQUEST_WAITBOOST; + return test_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); } static inline bool i915_request_has_nopreempt(const struct i915_request *rq) { /* Preemption should only be disabled very rarely */ - return unlikely(rq->flags & I915_REQUEST_NOPREEMPT); + return unlikely(test_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags)); } static inline bool i915_request_has_sentinel(const struct i915_request *rq) { - return unlikely(rq->flags & I915_REQUEST_SENTINEL); + return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags)); } static inline struct intel_timeline * diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index ad2b1b833d7b..0cef3130db05 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -498,15 +498,15 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, struct device *kdev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); - struct i915_gpu_state *gpu; + struct i915_gpu_coredump *gpu; ssize_t ret; gpu = i915_first_error_state(i915); if (IS_ERR(gpu)) { ret = PTR_ERR(gpu); } else if (gpu) { - ret = i915_gpu_state_copy_to_buffer(gpu, buf, off, count); - i915_gpu_state_put(gpu); + ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count); + i915_gpu_coredump_put(gpu); } else { const char *str = "No error state collected\n"; size_t len = strlen(str); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index cbd783c31adb..17d7c525ea5c 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -423,8 +423,6 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) void __iomem *ptr; int err; - /* Access through the GTT requires the device to be awake. */ - assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm); if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { err = -ENODEV; goto err; @@ -456,6 +454,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) goto err_unpin; i915_vma_set_ggtt_write(vma); + + /* NB Access through the GTT requires the device to be awake. */ return ptr; err_unpin: @@ -858,6 +858,7 @@ static void vma_unbind_pages(struct i915_vma *vma) int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { struct i915_vma_work *work = NULL; + intel_wakeref_t wakeref = 0; unsigned int bound; int err; @@ -883,6 +884,9 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) } } + if (flags & PIN_GLOBAL) + wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); + /* No more allocations allowed once we hold vm->mutex */ err = mutex_lock_interruptible(&vma->vm->mutex); if (err) @@ -946,6 +950,8 @@ err_unlock: err_fence: if (work) dma_fence_work_commit(&work->base); + if (wakeref) + intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); err_pages: vma_put_pages(vma); return err; @@ -1246,11 +1252,16 @@ int __i915_vma_unbind(struct i915_vma *vma) int i915_vma_unbind(struct i915_vma *vma) { struct i915_address_space *vm = vma->vm; + intel_wakeref_t wakeref = 0; int err; if (!drm_mm_node_allocated(&vma->node)) return 0; + if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) + /* XXX not always required: nop_clear_range */ + wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); + err = mutex_lock_interruptible(&vm->mutex); if (err) return err; @@ -1258,6 +1269,9 @@ int i915_vma_unbind(struct i915_vma *vma) err = __i915_vma_unbind(vma); mutex_unlock(&vm->mutex); + if (wakeref) + intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); + return err; } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 5fffa3c58908..02b31a62951e 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -30,148 +30,14 @@ #include <drm/drm_mm.h> +#include "gem/i915_gem_object.h" + #include "i915_gem_gtt.h" #include "i915_gem_fence_reg.h" -#include "gem/i915_gem_object.h" #include "i915_active.h" #include "i915_request.h" - -enum i915_cache_level; - -/** - * DOC: Virtual Memory Address - * - * A VMA represents a GEM BO that is bound into an address space. Therefore, a - * VMA's presence cannot be guaranteed before binding, or after unbinding the - * object into/from the address space. - * - * To make things as simple as possible (ie. no refcounting), a VMA's lifetime - * will always be <= an objects lifetime. So object refcounting should cover us. - */ -struct i915_vma { - struct drm_mm_node node; - - struct i915_address_space *vm; - const struct i915_vma_ops *ops; - - struct drm_i915_gem_object *obj; - struct dma_resv *resv; /** Alias of obj->resv */ - - struct sg_table *pages; - void __iomem *iomap; - void *private; /* owned by creator */ - - struct i915_fence_reg *fence; - - u64 size; - u64 display_alignment; - struct i915_page_sizes page_sizes; - - /* mmap-offset associated with fencing for this vma */ - struct i915_mmap_offset *mmo; - - u32 fence_size; - u32 fence_alignment; - - /** - * Count of the number of times this vma has been opened by different - * handles (but same file) for execbuf, i.e. the number of aliases - * that exist in the ctx->handle_vmas LUT for this vma. - */ - struct kref ref; - atomic_t open_count; - atomic_t flags; - /** - * How many users have pinned this object in GTT space. - * - * This is a tightly bound, fairly small number of users, so we - * stuff inside the flags field so that we can both check for overflow - * and detect a no-op i915_vma_pin() in a single check, while also - * pinning the vma. - * - * The worst case display setup would have the same vma pinned for - * use on each plane on each crtc, while also building the next atomic - * state and holding a pin for the length of the cleanup queue. In the - * future, the flip queue may be increased from 1. - * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84 - * - * For GEM, the number of concurrent users for pwrite/pread is - * unbounded. For execbuffer, it is currently one but will in future - * be extended to allow multiple clients to pin vma concurrently. - * - * We also use suballocated pages, with each suballocation claiming - * its own pin on the shared vma. At present, this is limited to - * exclusive cachelines of a single page, so a maximum of 64 possible - * users. - */ -#define I915_VMA_PIN_MASK 0x3ff -#define I915_VMA_OVERFLOW 0x200 - - /** Flags and address space this VMA is bound to */ -#define I915_VMA_GLOBAL_BIND_BIT 10 -#define I915_VMA_LOCAL_BIND_BIT 11 - -#define I915_VMA_GLOBAL_BIND ((int)BIT(I915_VMA_GLOBAL_BIND_BIT)) -#define I915_VMA_LOCAL_BIND ((int)BIT(I915_VMA_LOCAL_BIND_BIT)) - -#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND) - -#define I915_VMA_ALLOC_BIT 12 -#define I915_VMA_ALLOC ((int)BIT(I915_VMA_ALLOC_BIT)) - -#define I915_VMA_ERROR_BIT 13 -#define I915_VMA_ERROR ((int)BIT(I915_VMA_ERROR_BIT)) - -#define I915_VMA_GGTT_BIT 14 -#define I915_VMA_CAN_FENCE_BIT 15 -#define I915_VMA_USERFAULT_BIT 16 -#define I915_VMA_GGTT_WRITE_BIT 17 - -#define I915_VMA_GGTT ((int)BIT(I915_VMA_GGTT_BIT)) -#define I915_VMA_CAN_FENCE ((int)BIT(I915_VMA_CAN_FENCE_BIT)) -#define I915_VMA_USERFAULT ((int)BIT(I915_VMA_USERFAULT_BIT)) -#define I915_VMA_GGTT_WRITE ((int)BIT(I915_VMA_GGTT_WRITE_BIT)) - - struct i915_active active; - -#define I915_VMA_PAGES_BIAS 24 -#define I915_VMA_PAGES_ACTIVE (BIT(24) | 1) - atomic_t pages_count; /* number of active binds to the pages */ - struct mutex pages_mutex; /* protect acquire/release of backing pages */ - - /** - * Support different GGTT views into the same object. - * This means there can be multiple VMA mappings per object and per VM. - * i915_ggtt_view_type is used to distinguish between those entries. - * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also - * assumed in GEM functions which take no ggtt view parameter. - */ - struct i915_ggtt_view ggtt_view; - - /** This object's place on the active/inactive lists */ - struct list_head vm_link; - - struct list_head obj_link; /* Link in the object's VMA list */ - struct rb_node obj_node; - struct hlist_node obj_hash; - - /** This vma's place in the execbuf reservation list */ - struct list_head exec_link; - struct list_head reloc_link; - - /** This vma's place in the eviction list */ - struct list_head evict_link; - - struct list_head closed_link; - - /** - * Used for performing relocations during execbuffer insertion. - */ - unsigned int *exec_flags; - struct hlist_node exec_node; - u32 exec_handle; -}; +#include "i915_vma_types.h" struct i915_vma * i915_vma_instance(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h new file mode 100644 index 000000000000..e0942efd5236 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_vma_types.h @@ -0,0 +1,294 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_VMA_TYPES_H__ +#define __I915_VMA_TYPES_H__ + +#include <linux/rbtree.h> + +#include <drm/drm_mm.h> + +#include "gem/i915_gem_object_types.h" + +enum i915_cache_level; + +/** + * DOC: Global GTT views + * + * Background and previous state + * + * Historically objects could exists (be bound) in global GTT space only as + * singular instances with a view representing all of the object's backing pages + * in a linear fashion. This view will be called a normal view. + * + * To support multiple views of the same object, where the number of mapped + * pages is not equal to the backing store, or where the layout of the pages + * is not linear, concept of a GGTT view was added. + * + * One example of an alternative view is a stereo display driven by a single + * image. In this case we would have a framebuffer looking like this + * (2x2 pages): + * + * 12 + * 34 + * + * Above would represent a normal GGTT view as normally mapped for GPU or CPU + * rendering. In contrast, fed to the display engine would be an alternative + * view which could look something like this: + * + * 1212 + * 3434 + * + * In this example both the size and layout of pages in the alternative view is + * different from the normal view. + * + * Implementation and usage + * + * GGTT views are implemented using VMAs and are distinguished via enum + * i915_ggtt_view_type and struct i915_ggtt_view. + * + * A new flavour of core GEM functions which work with GGTT bound objects were + * added with the _ggtt_ infix, and sometimes with _view postfix to avoid + * renaming in large amounts of code. They take the struct i915_ggtt_view + * parameter encapsulating all metadata required to implement a view. + * + * As a helper for callers which are only interested in the normal view, + * globally const i915_ggtt_view_normal singleton instance exists. All old core + * GEM API functions, the ones not taking the view parameter, are operating on, + * or with the normal GGTT view. + * + * Code wanting to add or use a new GGTT view needs to: + * + * 1. Add a new enum with a suitable name. + * 2. Extend the metadata in the i915_ggtt_view structure if required. + * 3. Add support to i915_get_vma_pages(). + * + * New views are required to build a scatter-gather table from within the + * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and + * exists for the lifetime of an VMA. + * + * Core API is designed to have copy semantics which means that passed in + * struct i915_ggtt_view does not need to be persistent (left around after + * calling the core API functions). + * + */ + +struct intel_remapped_plane_info { + /* in gtt pages */ + unsigned int width, height, stride, offset; +} __packed; + +struct intel_remapped_info { + struct intel_remapped_plane_info plane[2]; + unsigned int unused_mbz; +} __packed; + +struct intel_rotation_info { + struct intel_remapped_plane_info plane[2]; +} __packed; + +struct intel_partial_info { + u64 offset; + unsigned int size; +} __packed; + +enum i915_ggtt_view_type { + I915_GGTT_VIEW_NORMAL = 0, + I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info), + I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info), + I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info), +}; + +static inline void assert_i915_gem_gtt_types(void) +{ + BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 8*sizeof(unsigned int)); + BUILD_BUG_ON(sizeof(struct intel_partial_info) != sizeof(u64) + sizeof(unsigned int)); + BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 9*sizeof(unsigned int)); + + /* Check that rotation/remapped shares offsets for simplicity */ + BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) != + offsetof(struct intel_rotation_info, plane[0])); + BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) != + offsetofend(struct intel_rotation_info, plane[1])); + + /* As we encode the size of each branch inside the union into its type, + * we have to be careful that each branch has a unique size. + */ + switch ((enum i915_ggtt_view_type)0) { + case I915_GGTT_VIEW_NORMAL: + case I915_GGTT_VIEW_PARTIAL: + case I915_GGTT_VIEW_ROTATED: + case I915_GGTT_VIEW_REMAPPED: + /* gcc complains if these are identical cases */ + break; + } +} + +struct i915_ggtt_view { + enum i915_ggtt_view_type type; + union { + /* Members need to contain no holes/padding */ + struct intel_partial_info partial; + struct intel_rotation_info rotated; + struct intel_remapped_info remapped; + }; +}; + +/** + * DOC: Virtual Memory Address + * + * A VMA represents a GEM BO that is bound into an address space. Therefore, a + * VMA's presence cannot be guaranteed before binding, or after unbinding the + * object into/from the address space. + * + * To make things as simple as possible (ie. no refcounting), a VMA's lifetime + * will always be <= an objects lifetime. So object refcounting should cover us. + */ +struct i915_vma { + struct drm_mm_node node; + + struct i915_address_space *vm; + const struct i915_vma_ops *ops; + + struct drm_i915_gem_object *obj; + struct dma_resv *resv; /** Alias of obj->resv */ + + struct sg_table *pages; + void __iomem *iomap; + void *private; /* owned by creator */ + + struct i915_fence_reg *fence; + + u64 size; + u64 display_alignment; + struct i915_page_sizes page_sizes; + + /* mmap-offset associated with fencing for this vma */ + struct i915_mmap_offset *mmo; + + u32 fence_size; + u32 fence_alignment; + + /** + * Count of the number of times this vma has been opened by different + * handles (but same file) for execbuf, i.e. the number of aliases + * that exist in the ctx->handle_vmas LUT for this vma. + */ + struct kref ref; + atomic_t open_count; + atomic_t flags; + /** + * How many users have pinned this object in GTT space. + * + * This is a tightly bound, fairly small number of users, so we + * stuff inside the flags field so that we can both check for overflow + * and detect a no-op i915_vma_pin() in a single check, while also + * pinning the vma. + * + * The worst case display setup would have the same vma pinned for + * use on each plane on each crtc, while also building the next atomic + * state and holding a pin for the length of the cleanup queue. In the + * future, the flip queue may be increased from 1. + * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84 + * + * For GEM, the number of concurrent users for pwrite/pread is + * unbounded. For execbuffer, it is currently one but will in future + * be extended to allow multiple clients to pin vma concurrently. + * + * We also use suballocated pages, with each suballocation claiming + * its own pin on the shared vma. At present, this is limited to + * exclusive cachelines of a single page, so a maximum of 64 possible + * users. + */ +#define I915_VMA_PIN_MASK 0x3ff +#define I915_VMA_OVERFLOW 0x200 + + /** Flags and address space this VMA is bound to */ +#define I915_VMA_GLOBAL_BIND_BIT 10 +#define I915_VMA_LOCAL_BIND_BIT 11 + +#define I915_VMA_GLOBAL_BIND ((int)BIT(I915_VMA_GLOBAL_BIND_BIT)) +#define I915_VMA_LOCAL_BIND ((int)BIT(I915_VMA_LOCAL_BIND_BIT)) + +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND) + +#define I915_VMA_ALLOC_BIT 12 +#define I915_VMA_ALLOC ((int)BIT(I915_VMA_ALLOC_BIT)) + +#define I915_VMA_ERROR_BIT 13 +#define I915_VMA_ERROR ((int)BIT(I915_VMA_ERROR_BIT)) + +#define I915_VMA_GGTT_BIT 14 +#define I915_VMA_CAN_FENCE_BIT 15 +#define I915_VMA_USERFAULT_BIT 16 +#define I915_VMA_GGTT_WRITE_BIT 17 + +#define I915_VMA_GGTT ((int)BIT(I915_VMA_GGTT_BIT)) +#define I915_VMA_CAN_FENCE ((int)BIT(I915_VMA_CAN_FENCE_BIT)) +#define I915_VMA_USERFAULT ((int)BIT(I915_VMA_USERFAULT_BIT)) +#define I915_VMA_GGTT_WRITE ((int)BIT(I915_VMA_GGTT_WRITE_BIT)) + + struct i915_active active; + +#define I915_VMA_PAGES_BIAS 24 +#define I915_VMA_PAGES_ACTIVE (BIT(24) | 1) + atomic_t pages_count; /* number of active binds to the pages */ + struct mutex pages_mutex; /* protect acquire/release of backing pages */ + + /** + * Support different GGTT views into the same object. + * This means there can be multiple VMA mappings per object and per VM. + * i915_ggtt_view_type is used to distinguish between those entries. + * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also + * assumed in GEM functions which take no ggtt view parameter. + */ + struct i915_ggtt_view ggtt_view; + + /** This object's place on the active/inactive lists */ + struct list_head vm_link; + + struct list_head obj_link; /* Link in the object's VMA list */ + struct rb_node obj_node; + struct hlist_node obj_hash; + + /** This vma's place in the execbuf reservation list */ + struct list_head exec_link; + struct list_head reloc_link; + + /** This vma's place in the eviction list */ + struct list_head evict_link; + + struct list_head closed_link; + + /** + * Used for performing relocations during execbuffer insertion. + */ + unsigned int *exec_flags; + struct hlist_node exec_node; + u32 exec_handle; +}; + +#endif + diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 1acb5db77431..6670a0763be2 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -519,7 +519,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) } } -static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) +static void bdw_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; int s, ss; @@ -600,7 +600,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) sseu->has_eu_pg = 0; } -static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) +static void hsw_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; u32 fuse1; @@ -1021,11 +1021,11 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) /* Initialize slice/subslice/EU info */ if (IS_HASWELL(dev_priv)) - haswell_sseu_info_init(dev_priv); + hsw_sseu_info_init(dev_priv); else if (IS_CHERRYVIEW(dev_priv)) cherryview_sseu_info_init(dev_priv); else if (IS_BROADWELL(dev_priv)) - broadwell_sseu_info_init(dev_priv); + bdw_sseu_info_init(dev_priv); else if (IS_GEN(dev_priv, 9)) gen9_sseu_info_init(dev_priv); else if (IS_GEN(dev_priv, 10)) @@ -1093,7 +1093,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) * hooked up to an SFC (Scaler & Format Converter) unit. * In TGL each VDBOX has access to an SFC. */ - if (IS_TIGERLAKE(dev_priv) || logical_vdbox++ % 2 == 0) + if (INTEL_GEN(dev_priv) >= 12 || logical_vdbox++ % 2 == 0) RUNTIME_INFO(dev_priv)->vdbox_sfc_access |= BIT(i); } DRM_DEBUG_DRIVER("vdbox enable: %04x, instances: %04lx\n", diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index e24c280e5930..d0d038b3cd79 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -16,6 +16,20 @@ const u32 intel_region_map[] = { [INTEL_REGION_STOLEN] = REGION_MAP(INTEL_MEMORY_STOLEN, 0), }; +struct intel_memory_region * +intel_memory_region_by_type(struct drm_i915_private *i915, + enum intel_memory_type mem_type) +{ + struct intel_memory_region *mr; + int id; + + for_each_memory_region(mr, i915, id) + if (mr->type == mem_type) + return mr; + + return NULL; +} + static u64 intel_memory_region_free_pages(struct intel_memory_region *mem, struct list_head *blocks) @@ -37,7 +51,7 @@ __intel_memory_region_put_pages_buddy(struct intel_memory_region *mem, struct list_head *blocks) { mutex_lock(&mem->mm_lock); - intel_memory_region_free_pages(mem, blocks); + mem->avail += intel_memory_region_free_pages(mem, blocks); mutex_unlock(&mem->mm_lock); } @@ -106,6 +120,7 @@ __intel_memory_region_get_pages_buddy(struct intel_memory_region *mem, break; } while (1); + mem->avail -= size; mutex_unlock(&mem->mm_lock); return 0; @@ -164,6 +179,8 @@ intel_memory_region_create(struct drm_i915_private *i915, mem->io_start = io_start; mem->min_page_size = min_page_size; mem->ops = ops; + mem->total = size; + mem->avail = mem->total; mutex_init(&mem->objects.lock); INIT_LIST_HEAD(&mem->objects.list); @@ -185,6 +202,16 @@ err_free: return ERR_PTR(err); } +void intel_memory_region_set_name(struct intel_memory_region *mem, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vsnprintf(mem->name, sizeof(mem->name), fmt, ap); + va_end(ap); +} + static void __intel_memory_region_destroy(struct kref *kref) { struct intel_memory_region *mem = diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 238722009677..232490d89a83 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -47,6 +47,10 @@ enum intel_region_id { #define I915_ALLOC_MIN_PAGE_SIZE BIT(0) #define I915_ALLOC_CONTIGUOUS BIT(1) +#define for_each_memory_region(mr, i915, id) \ + for (id = 0; id < ARRAY_SIZE((i915)->mm.regions); id++) \ + for_each_if((mr) = (i915)->mm.regions[id]) + /** * Memory regions encoded as type | instance */ @@ -82,10 +86,13 @@ struct intel_memory_region { resource_size_t io_start; resource_size_t min_page_size; + resource_size_t total; + resource_size_t avail; unsigned int type; unsigned int instance; unsigned int id; + char name[8]; dma_addr_t remap_addr; @@ -125,5 +132,12 @@ void intel_memory_region_put(struct intel_memory_region *mem); int intel_memory_regions_hw_probe(struct drm_i915_private *i915); void intel_memory_regions_driver_release(struct drm_i915_private *i915); +struct intel_memory_region * +intel_memory_region_by_type(struct drm_i915_private *i915, + enum intel_memory_type mem_type); + +__printf(2, 3) void +intel_memory_region_set_name(struct intel_memory_region *mem, + const char *fmt, ...); #endif diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index 43b68b5fc562..4ed60e1f01db 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -12,90 +12,91 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) { switch (id) { case INTEL_PCH_IBX_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Ibex Peak PCH\n"); + drm_dbg_kms(&dev_priv->drm, "Found Ibex Peak PCH\n"); WARN_ON(!IS_GEN(dev_priv, 5)); return PCH_IBX; case INTEL_PCH_CPT_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found CougarPoint PCH\n"); + drm_dbg_kms(&dev_priv->drm, "Found CougarPoint PCH\n"); WARN_ON(!IS_GEN(dev_priv, 6) && !IS_IVYBRIDGE(dev_priv)); return PCH_CPT; case INTEL_PCH_PPT_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found PantherPoint PCH\n"); + drm_dbg_kms(&dev_priv->drm, "Found PantherPoint PCH\n"); WARN_ON(!IS_GEN(dev_priv, 6) && !IS_IVYBRIDGE(dev_priv)); /* PantherPoint is CPT compatible */ return PCH_CPT; case INTEL_PCH_LPT_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found LynxPoint PCH\n"); + drm_dbg_kms(&dev_priv->drm, "Found LynxPoint PCH\n"); WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); WARN_ON(IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv)); return PCH_LPT; case INTEL_PCH_LPT_LP_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found LynxPoint LP PCH\n"); + drm_dbg_kms(&dev_priv->drm, "Found LynxPoint LP PCH\n"); WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); WARN_ON(!IS_HSW_ULT(dev_priv) && !IS_BDW_ULT(dev_priv)); return PCH_LPT; case INTEL_PCH_WPT_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found WildcatPoint PCH\n"); + drm_dbg_kms(&dev_priv->drm, "Found WildcatPoint PCH\n"); WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); WARN_ON(IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv)); /* WildcatPoint is LPT compatible */ return PCH_LPT; case INTEL_PCH_WPT_LP_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found WildcatPoint LP PCH\n"); + drm_dbg_kms(&dev_priv->drm, "Found WildcatPoint LP PCH\n"); WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); WARN_ON(!IS_HSW_ULT(dev_priv) && !IS_BDW_ULT(dev_priv)); /* WildcatPoint is LPT compatible */ return PCH_LPT; case INTEL_PCH_SPT_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found SunrisePoint PCH\n"); + drm_dbg_kms(&dev_priv->drm, "Found SunrisePoint PCH\n"); WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)); return PCH_SPT; case INTEL_PCH_SPT_LP_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n"); + drm_dbg_kms(&dev_priv->drm, "Found SunrisePoint LP PCH\n"); WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); return PCH_SPT; case INTEL_PCH_KBP_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Kaby Lake PCH (KBP)\n"); + drm_dbg_kms(&dev_priv->drm, "Found Kaby Lake PCH (KBP)\n"); WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); /* KBP is SPT compatible */ return PCH_SPT; case INTEL_PCH_CNP_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n"); + drm_dbg_kms(&dev_priv->drm, "Found Cannon Lake PCH (CNP)\n"); WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); return PCH_CNP; case INTEL_PCH_CNP_LP_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Cannon Lake LP PCH (CNP-LP)\n"); + drm_dbg_kms(&dev_priv->drm, + "Found Cannon Lake LP PCH (CNP-LP)\n"); WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); return PCH_CNP; case INTEL_PCH_CMP_DEVICE_ID_TYPE: case INTEL_PCH_CMP2_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Comet Lake PCH (CMP)\n"); + drm_dbg_kms(&dev_priv->drm, "Found Comet Lake PCH (CMP)\n"); WARN_ON(!IS_COFFEELAKE(dev_priv)); /* CometPoint is CNP Compatible */ return PCH_CNP; case INTEL_PCH_CMP_V_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Comet Lake V PCH (CMP-V)\n"); + drm_dbg_kms(&dev_priv->drm, "Found Comet Lake V PCH (CMP-V)\n"); WARN_ON(!IS_COFFEELAKE(dev_priv)); /* Comet Lake V PCH is based on KBP, which is SPT compatible */ return PCH_SPT; case INTEL_PCH_ICP_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Ice Lake PCH\n"); + drm_dbg_kms(&dev_priv->drm, "Found Ice Lake PCH\n"); WARN_ON(!IS_ICELAKE(dev_priv)); return PCH_ICP; case INTEL_PCH_MCC_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Mule Creek Canyon PCH\n"); + drm_dbg_kms(&dev_priv->drm, "Found Mule Creek Canyon PCH\n"); WARN_ON(!IS_ELKHARTLAKE(dev_priv)); return PCH_MCC; case INTEL_PCH_TGP_DEVICE_ID_TYPE: case INTEL_PCH_TGP2_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Tiger Lake LP PCH\n"); + drm_dbg_kms(&dev_priv->drm, "Found Tiger Lake LP PCH\n"); WARN_ON(!IS_TIGERLAKE(dev_priv)); return PCH_TGP; case INTEL_PCH_JSP_DEVICE_ID_TYPE: case INTEL_PCH_JSP2_DEVICE_ID_TYPE: - DRM_DEBUG_KMS("Found Jasper Lake PCH\n"); + drm_dbg_kms(&dev_priv->drm, "Found Jasper Lake PCH\n"); WARN_ON(!IS_ELKHARTLAKE(dev_priv)); return PCH_JSP; default: @@ -145,9 +146,9 @@ intel_virt_detect_pch(const struct drm_i915_private *dev_priv) id = INTEL_PCH_IBX_DEVICE_ID_TYPE; if (id) - DRM_DEBUG_KMS("Assuming PCH ID %04x\n", id); + drm_dbg_kms(&dev_priv->drm, "Assuming PCH ID %04x\n", id); else - DRM_DEBUG_KMS("Assuming no PCH\n"); + drm_dbg_kms(&dev_priv->drm, "Assuming no PCH\n"); return id; } @@ -201,13 +202,14 @@ void intel_detect_pch(struct drm_i915_private *dev_priv) * display. */ if (pch && !HAS_DISPLAY(dev_priv)) { - DRM_DEBUG_KMS("Display disabled, reverting to NOP PCH\n"); + drm_dbg_kms(&dev_priv->drm, + "Display disabled, reverting to NOP PCH\n"); dev_priv->pch_type = PCH_NOP; dev_priv->pch_id = 0; } if (!pch) - DRM_DEBUG_KMS("No PCH found.\n"); + drm_dbg_kms(&dev_priv->drm, "No PCH found.\n"); pci_dev_put(pch); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 31ec82337e4f..bd2d30ecc030 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -140,7 +140,7 @@ static void glk_init_clock_gating(struct drm_i915_private *dev_priv) } -static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv) +static void pnv_get_mem_freq(struct drm_i915_private *dev_priv) { u32 tmp; @@ -178,7 +178,7 @@ static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv) dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0; } -static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) +static void ilk_get_mem_freq(struct drm_i915_private *dev_priv) { u16 ddrpll, csipll; @@ -199,8 +199,8 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) dev_priv->mem_freq = 1600; break; default: - DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n", - ddrpll & 0xff); + drm_dbg(&dev_priv->drm, "unknown memory frequency 0x%02x\n", + ddrpll & 0xff); dev_priv->mem_freq = 0; break; } @@ -228,8 +228,8 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) dev_priv->fsb_freq = 6400; break; default: - DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n", - csipll & 0x3ff); + drm_dbg(&dev_priv->drm, "unknown fsb frequency 0x%04x\n", + csipll & 0x3ff); dev_priv->fsb_freq = 0; break; } @@ -314,7 +314,8 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) - DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); + drm_err(&dev_priv->drm, + "timed out waiting for Punit DDR DVFS request\n"); vlv_punit_put(dev_priv); } @@ -383,9 +384,9 @@ static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enabl trace_intel_memory_cxsr(dev_priv, was_enabled, enable); - DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n", - enableddisabled(enable), - enableddisabled(was_enabled)); + drm_dbg_kms(&dev_priv->drm, "memory self-refresh is %s (was %s)\n", + enableddisabled(enable), + enableddisabled(was_enabled)); return was_enabled; } @@ -510,8 +511,8 @@ static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, if (i9xx_plane == PLANE_B) size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size; - DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n", - dsparb, plane_name(i9xx_plane), size); + drm_dbg_kms(&dev_priv->drm, "FIFO size - (0x%08x) %c: %d\n", + dsparb, plane_name(i9xx_plane), size); return size; } @@ -527,8 +528,8 @@ static int i830_get_fifo_size(struct drm_i915_private *dev_priv, size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size; size >>= 1; /* Convert to cachelines */ - DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n", - dsparb, plane_name(i9xx_plane), size); + drm_dbg_kms(&dev_priv->drm, "FIFO size - (0x%08x) %c: %d\n", + dsparb, plane_name(i9xx_plane), size); return size; } @@ -542,41 +543,45 @@ static int i845_get_fifo_size(struct drm_i915_private *dev_priv, size = dsparb & 0x7f; size >>= 2; /* Convert to cachelines */ - DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n", - dsparb, plane_name(i9xx_plane), size); + drm_dbg_kms(&dev_priv->drm, "FIFO size - (0x%08x) %c: %d\n", + dsparb, plane_name(i9xx_plane), size); return size; } /* Pineview has different values for various configs */ -static const struct intel_watermark_params pineview_display_wm = { +static const struct intel_watermark_params pnv_display_wm = { .fifo_size = PINEVIEW_DISPLAY_FIFO, .max_wm = PINEVIEW_MAX_WM, .default_wm = PINEVIEW_DFT_WM, .guard_size = PINEVIEW_GUARD_WM, .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, }; -static const struct intel_watermark_params pineview_display_hplloff_wm = { + +static const struct intel_watermark_params pnv_display_hplloff_wm = { .fifo_size = PINEVIEW_DISPLAY_FIFO, .max_wm = PINEVIEW_MAX_WM, .default_wm = PINEVIEW_DFT_HPLLOFF_WM, .guard_size = PINEVIEW_GUARD_WM, .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, }; -static const struct intel_watermark_params pineview_cursor_wm = { + +static const struct intel_watermark_params pnv_cursor_wm = { .fifo_size = PINEVIEW_CURSOR_FIFO, .max_wm = PINEVIEW_CURSOR_MAX_WM, .default_wm = PINEVIEW_CURSOR_DFT_WM, .guard_size = PINEVIEW_CURSOR_GUARD_WM, .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, }; -static const struct intel_watermark_params pineview_cursor_hplloff_wm = { + +static const struct intel_watermark_params pnv_cursor_hplloff_wm = { .fifo_size = PINEVIEW_CURSOR_FIFO, .max_wm = PINEVIEW_CURSOR_MAX_WM, .default_wm = PINEVIEW_CURSOR_DFT_WM, .guard_size = PINEVIEW_CURSOR_GUARD_WM, .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, }; + static const struct intel_watermark_params i965_cursor_wm_info = { .fifo_size = I965_CURSOR_FIFO, .max_wm = I965_CURSOR_MAX_WM, @@ -584,6 +589,7 @@ static const struct intel_watermark_params i965_cursor_wm_info = { .guard_size = 2, .cacheline_size = I915_FIFO_LINE_SIZE, }; + static const struct intel_watermark_params i945_wm_info = { .fifo_size = I945_FIFO_SIZE, .max_wm = I915_MAX_WM, @@ -591,6 +597,7 @@ static const struct intel_watermark_params i945_wm_info = { .guard_size = 2, .cacheline_size = I915_FIFO_LINE_SIZE, }; + static const struct intel_watermark_params i915_wm_info = { .fifo_size = I915_FIFO_SIZE, .max_wm = I915_MAX_WM, @@ -598,6 +605,7 @@ static const struct intel_watermark_params i915_wm_info = { .guard_size = 2, .cacheline_size = I915_FIFO_LINE_SIZE, }; + static const struct intel_watermark_params i830_a_wm_info = { .fifo_size = I855GM_FIFO_SIZE, .max_wm = I915_MAX_WM, @@ -605,6 +613,7 @@ static const struct intel_watermark_params i830_a_wm_info = { .guard_size = 2, .cacheline_size = I830_FIFO_LINE_SIZE, }; + static const struct intel_watermark_params i830_bc_wm_info = { .fifo_size = I855GM_FIFO_SIZE, .max_wm = I915_MAX_WM/2, @@ -612,6 +621,7 @@ static const struct intel_watermark_params i830_bc_wm_info = { .guard_size = 2, .cacheline_size = I830_FIFO_LINE_SIZE, }; + static const struct intel_watermark_params i845_wm_info = { .fifo_size = I830_FIFO_SIZE, .max_wm = I915_MAX_WM, @@ -848,7 +858,7 @@ static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv) return enabled; } -static void pineview_update_wm(struct intel_crtc *unused_crtc) +static void pnv_update_wm(struct intel_crtc *unused_crtc) { struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); struct intel_crtc *crtc; @@ -861,7 +871,8 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) dev_priv->fsb_freq, dev_priv->mem_freq); if (!latency) { - DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); + drm_dbg_kms(&dev_priv->drm, + "Unknown FSB/MEM found, disable CxSR\n"); intel_set_memory_cxsr(dev_priv, false); return; } @@ -876,18 +887,18 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) int clock = adjusted_mode->crtc_clock; /* Display SR */ - wm = intel_calculate_wm(clock, &pineview_display_wm, - pineview_display_wm.fifo_size, + wm = intel_calculate_wm(clock, &pnv_display_wm, + pnv_display_wm.fifo_size, cpp, latency->display_sr); reg = I915_READ(DSPFW1); reg &= ~DSPFW_SR_MASK; reg |= FW_WM(wm, SR); I915_WRITE(DSPFW1, reg); - DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg); + drm_dbg_kms(&dev_priv->drm, "DSPFW1 register is %x\n", reg); /* cursor SR */ - wm = intel_calculate_wm(clock, &pineview_cursor_wm, - pineview_display_wm.fifo_size, + wm = intel_calculate_wm(clock, &pnv_cursor_wm, + pnv_display_wm.fifo_size, 4, latency->cursor_sr); reg = I915_READ(DSPFW3); reg &= ~DSPFW_CURSOR_SR_MASK; @@ -895,8 +906,8 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) I915_WRITE(DSPFW3, reg); /* Display HPLL off SR */ - wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm, - pineview_display_hplloff_wm.fifo_size, + wm = intel_calculate_wm(clock, &pnv_display_hplloff_wm, + pnv_display_hplloff_wm.fifo_size, cpp, latency->display_hpll_disable); reg = I915_READ(DSPFW3); reg &= ~DSPFW_HPLL_SR_MASK; @@ -904,14 +915,14 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) I915_WRITE(DSPFW3, reg); /* cursor HPLL off SR */ - wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm, - pineview_display_hplloff_wm.fifo_size, + wm = intel_calculate_wm(clock, &pnv_cursor_hplloff_wm, + pnv_display_hplloff_wm.fifo_size, 4, latency->cursor_hpll_disable); reg = I915_READ(DSPFW3); reg &= ~DSPFW_HPLL_CURSOR_MASK; reg |= FW_WM(wm, HPLL_CURSOR); I915_WRITE(DSPFW3, reg); - DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg); + drm_dbg_kms(&dev_priv->drm, "DSPFW3 register is %x\n", reg); intel_set_memory_cxsr(dev_priv, true); } else { @@ -1202,6 +1213,7 @@ static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); int num_levels = intel_wm_num_levels(to_i915(plane->base.dev)); enum plane_id plane_id = plane->id; bool dirty = false; @@ -1254,16 +1266,18 @@ static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, out: if (dirty) { - DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n", - plane->base.name, - crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id], - crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id], - crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]); + drm_dbg_kms(&dev_priv->drm, + "%s watermarks: normal=%d, SR=%d, HPLL=%d\n", + plane->base.name, + crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id], + crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id], + crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]); if (plane_id == PLANE_PRIMARY) - DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n", - crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc, - crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc); + drm_dbg_kms(&dev_priv->drm, + "FBC watermarks: SR=%d, HPLL=%d\n", + crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc, + crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc); } return dirty; @@ -1781,6 +1795,7 @@ static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); enum plane_id plane_id = plane->id; int num_levels = intel_wm_num_levels(to_i915(plane->base.dev)); int level; @@ -1808,11 +1823,12 @@ static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, out: if (dirty) - DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n", - plane->base.name, - crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], - crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], - crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]); + drm_dbg_kms(&dev_priv->drm, + "%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n", + plane->base.name, + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]); return dirty; } @@ -2227,8 +2243,9 @@ static void i965_update_wm(struct intel_crtc *unused_crtc) if (srwm < 0) srwm = 1; srwm &= 0x1ff; - DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n", - entries, srwm); + drm_dbg_kms(&dev_priv->drm, + "self-refresh entries: %d, wm: %d\n", + entries, srwm); entries = intel_wm_method2(clock, htotal, crtc->base.cursor->state->crtc_w, 4, @@ -2241,8 +2258,9 @@ static void i965_update_wm(struct intel_crtc *unused_crtc) if (cursor_sr > i965_cursor_wm_info.max_wm) cursor_sr = i965_cursor_wm_info.max_wm; - DRM_DEBUG_KMS("self-refresh watermark: display plane %d " - "cursor %d\n", srwm, cursor_sr); + drm_dbg_kms(&dev_priv->drm, + "self-refresh watermark: display plane %d " + "cursor %d\n", srwm, cursor_sr); cxsr_enabled = true; } else { @@ -2251,8 +2269,9 @@ static void i965_update_wm(struct intel_crtc *unused_crtc) intel_set_memory_cxsr(dev_priv, false); } - DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n", - srwm); + drm_dbg_kms(&dev_priv->drm, + "Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n", + srwm); /* 965 has limitations... */ I915_WRITE(DSPFW1, FW_WM(srwm, SR) | @@ -2342,7 +2361,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) planeb_wm = wm_info->max_wm; } - DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm); + drm_dbg_kms(&dev_priv->drm, + "FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm); if (IS_I915GM(dev_priv) && enabled) { struct drm_i915_gem_object *obj; @@ -2384,7 +2404,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) entries = intel_wm_method2(clock, htotal, hdisplay, cpp, sr_latency_ns / 100); entries = DIV_ROUND_UP(entries, wm_info->cacheline_size); - DRM_DEBUG_KMS("self-refresh entries: %d\n", entries); + drm_dbg_kms(&dev_priv->drm, + "self-refresh entries: %d\n", entries); srwm = wm_info->fifo_size - entries; if (srwm < 0) srwm = 1; @@ -2396,8 +2417,9 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) I915_WRITE(FW_BLC_SELF, srwm & 0x3f); } - DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n", - planea_wm, planeb_wm, cwm, srwm); + drm_dbg_kms(&dev_priv->drm, + "Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n", + planea_wm, planeb_wm, cwm, srwm); fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f); fwater_hi = (cwm & 0x1f); @@ -2433,7 +2455,8 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) fwater_lo = I915_READ(FW_BLC) & ~0xfff; fwater_lo |= (3<<8) | planea_wm; - DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm); + drm_dbg_kms(&dev_priv->drm, + "Setting FIFO watermarks - A: %d\n", planea_wm); I915_WRITE(FW_BLC, fwater_lo); } @@ -2832,7 +2855,8 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, &val, NULL); if (ret) { - DRM_ERROR("SKL Mailbox read error = %d\n", ret); + drm_err(&dev_priv->drm, + "SKL Mailbox read error = %d\n", ret); return; } @@ -2850,7 +2874,8 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, GEN9_PCODE_READ_MEM_LATENCY, &val, NULL); if (ret) { - DRM_ERROR("SKL Mailbox read error = %d\n", ret); + drm_err(&dev_priv->drm, + "SKL Mailbox read error = %d\n", ret); return; } @@ -2968,8 +2993,9 @@ static void intel_print_wm_latency(struct drm_i915_private *dev_priv, unsigned int latency = wm[level]; if (latency == 0) { - DRM_DEBUG_KMS("%s WM%d latency not provided\n", - name, level); + drm_dbg_kms(&dev_priv->drm, + "%s WM%d latency not provided\n", + name, level); continue; } @@ -2982,9 +3008,9 @@ static void intel_print_wm_latency(struct drm_i915_private *dev_priv, else if (level > 0) latency *= 5; - DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n", - name, level, wm[level], - latency / 10, latency % 10); + drm_dbg_kms(&dev_priv->drm, + "%s WM%d latency %u (%u.%u usec)\n", name, level, + wm[level], latency / 10, latency % 10); } } @@ -3018,7 +3044,8 @@ static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv) if (!changed) return; - DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n"); + drm_dbg_kms(&dev_priv->drm, + "WM latency values increased to avoid potential underruns\n"); intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency); intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency); intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency); @@ -3046,7 +3073,8 @@ static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv) dev_priv->wm.spr_latency[3] = 0; dev_priv->wm.cur_latency[3] = 0; - DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n"); + drm_dbg_kms(&dev_priv->drm, + "LP3 watermarks disabled due to potential for lost interrupts\n"); intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency); intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency); intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency); @@ -3096,7 +3124,7 @@ static bool ilk_validate_pipe_wm(const struct drm_i915_private *dev_priv, /* At least LP0 must be valid */ if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) { - DRM_DEBUG_KMS("LP0 watermark invalid\n"); + drm_dbg_kms(&dev_priv->drm, "LP0 watermark invalid\n"); return false; } @@ -3673,7 +3701,7 @@ skl_setup_sagv_block_time(struct drm_i915_private *dev_priv) return; } - DRM_DEBUG_DRIVER("Couldn't read SAGV block time!\n"); + drm_dbg(&dev_priv->drm, "Couldn't read SAGV block time!\n"); } else if (IS_GEN(dev_priv, 11)) { dev_priv->sagv_block_time_us = 10; return; @@ -3713,7 +3741,7 @@ intel_enable_sagv(struct drm_i915_private *dev_priv) if (dev_priv->sagv_status == I915_SAGV_ENABLED) return 0; - DRM_DEBUG_KMS("Enabling SAGV\n"); + drm_dbg_kms(&dev_priv->drm, "Enabling SAGV\n"); ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_ENABLE); @@ -3724,11 +3752,11 @@ intel_enable_sagv(struct drm_i915_private *dev_priv) * don't actually have SAGV. */ if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { - DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); + drm_dbg(&dev_priv->drm, "No SAGV found on system, ignoring\n"); dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; return 0; } else if (ret < 0) { - DRM_ERROR("Failed to enable SAGV\n"); + drm_err(&dev_priv->drm, "Failed to enable SAGV\n"); return ret; } @@ -3747,7 +3775,7 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) if (dev_priv->sagv_status == I915_SAGV_DISABLED) return 0; - DRM_DEBUG_KMS("Disabling SAGV\n"); + drm_dbg_kms(&dev_priv->drm, "Disabling SAGV\n"); /* bspec says to keep retrying for at least 1 ms */ ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_DISABLE, @@ -3758,11 +3786,11 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) * don't actually have SAGV. */ if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { - DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); + drm_dbg(&dev_priv->drm, "No SAGV found on system, ignoring\n"); dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; return 0; } else if (ret < 0) { - DRM_ERROR("Failed to disable SAGV (%d)\n", ret); + drm_err(&dev_priv->drm, "Failed to disable SAGV (%d)\n", ret); return ret; } @@ -4331,9 +4359,10 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state, } if (level < 0) { - DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations"); - DRM_DEBUG_KMS("minimum required %d/%d\n", blocks, - alloc_size); + drm_dbg_kms(&dev_priv->drm, + "Requested display configuration exceeds system DDB limitations"); + drm_dbg_kms(&dev_priv->drm, "minimum required %d/%d\n", + blocks, alloc_size); return -EINVAL; } @@ -4561,7 +4590,8 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, /* only planar format has two planes */ if (color_plane == 1 && !intel_format_info_is_yuv_semiplanar(format, modifier)) { - DRM_DEBUG_KMS("Non planar format have single plane\n"); + drm_dbg_kms(&dev_priv->drm, + "Non planar format have single plane\n"); return -EINVAL; } @@ -5260,10 +5290,11 @@ skl_print_wm_changes(struct intel_atomic_state *state) if (skl_ddb_entry_equal(old, new)) continue; - DRM_DEBUG_KMS("[PLANE:%d:%s] ddb (%4d - %4d) -> (%4d - %4d), size %4d -> %4d\n", - plane->base.base.id, plane->base.name, - old->start, old->end, new->start, new->end, - skl_ddb_entry_size(old), skl_ddb_entry_size(new)); + drm_dbg_kms(&dev_priv->drm, + "[PLANE:%d:%s] ddb (%4d - %4d) -> (%4d - %4d), size %4d -> %4d\n", + plane->base.base.id, plane->base.name, + old->start, old->end, new->start, new->end, + skl_ddb_entry_size(old), skl_ddb_entry_size(new)); } for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { @@ -5276,70 +5307,74 @@ skl_print_wm_changes(struct intel_atomic_state *state) if (skl_plane_wm_equals(dev_priv, old_wm, new_wm)) continue; - DRM_DEBUG_KMS("[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm" - " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n", - plane->base.base.id, plane->base.name, - enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en), - enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en), - enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en), - enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en), - enast(old_wm->trans_wm.plane_en), - enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en), - enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en), - enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en), - enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en), - enast(new_wm->trans_wm.plane_en)); - - DRM_DEBUG_KMS("[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d" + drm_dbg_kms(&dev_priv->drm, + "[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm" + " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n", + plane->base.base.id, plane->base.name, + enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en), + enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en), + enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en), + enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en), + enast(old_wm->trans_wm.plane_en), + enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en), + enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en), + enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en), + enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en), + enast(new_wm->trans_wm.plane_en)); + + drm_dbg_kms(&dev_priv->drm, + "[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d" " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n", - plane->base.base.id, plane->base.name, - enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l, - enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l, - enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].plane_res_l, - enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].plane_res_l, - enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].plane_res_l, - enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].plane_res_l, - enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l, - enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l, - enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l, - - enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l, - enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l, - enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].plane_res_l, - enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].plane_res_l, - enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].plane_res_l, - enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l, - enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l, - enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l, - enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l); - - DRM_DEBUG_KMS("[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" - " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", - plane->base.base.id, plane->base.name, - old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b, - old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b, - old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b, - old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b, - old_wm->trans_wm.plane_res_b, - new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b, - new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b, - new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b, - new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b, - new_wm->trans_wm.plane_res_b); - - DRM_DEBUG_KMS("[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" - " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", - plane->base.base.id, plane->base.name, - old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc, - old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc, - old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc, - old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc, - old_wm->trans_wm.min_ddb_alloc, - new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc, - new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc, - new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc, - new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc, - new_wm->trans_wm.min_ddb_alloc); + plane->base.base.id, plane->base.name, + enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l, + enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l, + enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].plane_res_l, + enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].plane_res_l, + enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].plane_res_l, + enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].plane_res_l, + enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l, + enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l, + enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l, + + enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l, + enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l, + enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].plane_res_l, + enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].plane_res_l, + enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].plane_res_l, + enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l, + enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l, + enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l, + enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l); + + drm_dbg_kms(&dev_priv->drm, + "[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" + " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", + plane->base.base.id, plane->base.name, + old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b, + old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b, + old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b, + old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b, + old_wm->trans_wm.plane_res_b, + new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b, + new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b, + new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b, + new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b, + new_wm->trans_wm.plane_res_b); + + drm_dbg_kms(&dev_priv->drm, + "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" + " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", + plane->base.base.id, plane->base.name, + old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc, + old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc, + old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc, + old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc, + old_wm->trans_wm.min_ddb_alloc, + new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc, + new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc, + new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc, + new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc, + new_wm->trans_wm.min_ddb_alloc); } } } @@ -5931,19 +5966,22 @@ void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv) crtc_state->wm.g4x.optimal = *active; crtc_state->wm.g4x.intermediate = *active; - DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n", - pipe_name(pipe), - wm->pipe[pipe].plane[PLANE_PRIMARY], - wm->pipe[pipe].plane[PLANE_CURSOR], - wm->pipe[pipe].plane[PLANE_SPRITE0]); + drm_dbg_kms(&dev_priv->drm, + "Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n", + pipe_name(pipe), + wm->pipe[pipe].plane[PLANE_PRIMARY], + wm->pipe[pipe].plane[PLANE_CURSOR], + wm->pipe[pipe].plane[PLANE_SPRITE0]); } - DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n", - wm->sr.plane, wm->sr.cursor, wm->sr.fbc); - DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n", - wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc); - DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n", - yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en)); + drm_dbg_kms(&dev_priv->drm, + "Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n", + wm->sr.plane, wm->sr.cursor, wm->sr.fbc); + drm_dbg_kms(&dev_priv->drm, + "Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n", + wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc); + drm_dbg_kms(&dev_priv->drm, "Initial SR=%s HPLL=%s FBC=%s\n", + yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en)); } void g4x_wm_sanitize(struct drm_i915_private *dev_priv) @@ -6035,8 +6073,9 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv) if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) { - DRM_DEBUG_KMS("Punit not acking DDR DVFS request, " - "assuming DDR DVFS is disabled\n"); + drm_dbg_kms(&dev_priv->drm, + "Punit not acking DDR DVFS request, " + "assuming DDR DVFS is disabled\n"); dev_priv->wm.max_level = VLV_WM_LEVEL_PM5; } else { val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); @@ -6087,16 +6126,18 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv) crtc_state->wm.vlv.optimal = *active; crtc_state->wm.vlv.intermediate = *active; - DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", - pipe_name(pipe), - wm->pipe[pipe].plane[PLANE_PRIMARY], - wm->pipe[pipe].plane[PLANE_CURSOR], - wm->pipe[pipe].plane[PLANE_SPRITE0], - wm->pipe[pipe].plane[PLANE_SPRITE1]); + drm_dbg_kms(&dev_priv->drm, + "Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", + pipe_name(pipe), + wm->pipe[pipe].plane[PLANE_PRIMARY], + wm->pipe[pipe].plane[PLANE_CURSOR], + wm->pipe[pipe].plane[PLANE_SPRITE0], + wm->pipe[pipe].plane[PLANE_SPRITE1]); } - DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n", - wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); + drm_dbg_kms(&dev_priv->drm, + "Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n", + wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); } void vlv_wm_sanitize(struct drm_i915_private *dev_priv) @@ -6412,8 +6453,9 @@ static void gen6_check_mch_setup(struct drm_i915_private *dev_priv) tmp = I915_READ(MCH_SSKPD); if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) - DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", - tmp); + drm_dbg_kms(&dev_priv->drm, + "Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", + tmp); } static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) @@ -6590,6 +6632,17 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv) /* WaEnable32PlaneMode:icl */ I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN11_ENABLE_32_PLANE_MODE)); + + /* + * Wa_1408615072:icl,ehl (vsunit) + * Wa_1407596294:icl,ehl (hsunit) + */ + intel_uncore_rmw(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE, + 0, VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS); + + /* Wa_1407352427:icl,ehl */ + intel_uncore_rmw(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE2, + 0, PSDUNIT_CLKGATE_DIS); } static void tgl_init_clock_gating(struct drm_i915_private *dev_priv) @@ -6597,6 +6650,10 @@ static void tgl_init_clock_gating(struct drm_i915_private *dev_priv) u32 vd_pg_enable = 0; unsigned int i; + /* Wa_1408615072:tgl */ + intel_uncore_rmw(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE2, + 0, VSUNIT_CLKGATE_DIS_TGL); + /* This is not a WA. Enable VD HCP & MFX_ENC powergate */ for (i = 0; i < I915_MAX_VCS; i++) { if (HAS_ENGINE(dev_priv, _VCS(i))) @@ -7113,7 +7170,8 @@ void intel_suspend_hw(struct drm_i915_private *dev_priv) static void nop_init_clock_gating(struct drm_i915_private *dev_priv) { - DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n"); + drm_dbg_kms(&dev_priv->drm, + "No clock gating settings or workarounds applied.\n"); } /** @@ -7180,9 +7238,9 @@ void intel_init_pm(struct drm_i915_private *dev_priv) { /* For cxsr */ if (IS_PINEVIEW(dev_priv)) - i915_pineview_get_mem_freq(dev_priv); + pnv_get_mem_freq(dev_priv); else if (IS_GEN(dev_priv, 5)) - i915_ironlake_get_mem_freq(dev_priv); + ilk_get_mem_freq(dev_priv); if (intel_has_sagv(dev_priv)) skl_setup_sagv_block_time(dev_priv); @@ -7208,8 +7266,9 @@ void intel_init_pm(struct drm_i915_private *dev_priv) dev_priv->display.optimize_watermarks = ilk_optimize_watermarks; } else { - DRM_DEBUG_KMS("Failed to read display plane latency. " - "Disable CxSR\n"); + drm_dbg_kms(&dev_priv->drm, + "Failed to read display plane latency. " + "Disable CxSR\n"); } } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_setup_wm_latency(dev_priv); @@ -7229,7 +7288,8 @@ void intel_init_pm(struct drm_i915_private *dev_priv) dev_priv->is_ddr3, dev_priv->fsb_freq, dev_priv->mem_freq)) { - DRM_INFO("failed to find known CxSR latency " + drm_info(&dev_priv->drm, + "failed to find known CxSR latency " "(found ddr%s fsb freq %d, mem freq %d), " "disabling CxSR\n", (dev_priv->is_ddr3 == 1) ? "3" : "2", @@ -7238,7 +7298,7 @@ void intel_init_pm(struct drm_i915_private *dev_priv) intel_set_memory_cxsr(dev_priv, false); dev_priv->display.update_wm = NULL; } else - dev_priv->display.update_wm = pineview_update_wm; + dev_priv->display.update_wm = pnv_update_wm; } else if (IS_GEN(dev_priv, 4)) { dev_priv->display.update_wm = i965_update_wm; } else if (IS_GEN(dev_priv, 3)) { @@ -7253,7 +7313,8 @@ void intel_init_pm(struct drm_i915_private *dev_priv) dev_priv->display.get_fifo_size = i830_get_fifo_size; } } else { - DRM_ERROR("unexpected fall-through in intel_init_pm\n"); + drm_err(&dev_priv->drm, + "unexpected fall-through in %s\n", __func__); } } diff --git a/drivers/gpu/drm/i915/intel_region_lmem.c b/drivers/gpu/drm/i915/intel_region_lmem.c index eddb392917aa..14b59b899c9b 100644 --- a/drivers/gpu/drm/i915/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/intel_region_lmem.c @@ -90,6 +90,8 @@ region_lmem_init(struct intel_memory_region *mem) if (ret) io_mapping_fini(&mem->iomap); + intel_memory_region_set_name(mem, "local"); + return ret; } @@ -123,10 +125,12 @@ intel_setup_fake_lmem(struct drm_i915_private *i915) io_start, &intel_region_lmem_ops); if (!IS_ERR(mem)) { - DRM_INFO("Intel graphics fake LMEM: %pR\n", &mem->region); - DRM_INFO("Intel graphics fake LMEM IO start: %llx\n", - (u64)mem->io_start); - DRM_INFO("Intel graphics fake LMEM size: %llx\n", + drm_info(&i915->drm, "Intel graphics fake LMEM: %pR\n", + &mem->region); + drm_info(&i915->drm, + "Intel graphics fake LMEM IO start: %llx\n", + (u64)mem->io_start); + drm_info(&i915->drm, "Intel graphics fake LMEM size: %llx\n", (u64)resource_size(&mem->region)); } diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index e06b35b844a0..cbfb7171d62d 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -105,8 +105,8 @@ static int vlv_sideband_rw(struct drm_i915_private *i915, if (intel_wait_for_register(uncore, VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0, 5)) { - DRM_DEBUG_DRIVER("IOSF sideband idle wait (%s) timed out\n", - is_read ? "read" : "write"); + drm_dbg(&i915->drm, "IOSF sideband idle wait (%s) timed out\n", + is_read ? "read" : "write"); return -EAGAIN; } @@ -129,8 +129,8 @@ static int vlv_sideband_rw(struct drm_i915_private *i915, *val = intel_uncore_read_fw(uncore, VLV_IOSF_DATA); err = 0; } else { - DRM_DEBUG_DRIVER("IOSF sideband finish wait (%s) timed out\n", - is_read ? "read" : "write"); + drm_dbg(&i915->drm, "IOSF sideband finish wait (%s) timed out\n", + is_read ? "read" : "write"); err = -ETIMEDOUT; } @@ -283,7 +283,8 @@ static int intel_sbi_rw(struct drm_i915_private *i915, u16 reg, if (intel_wait_for_register_fw(uncore, SBI_CTL_STAT, SBI_BUSY, 0, 100)) { - DRM_ERROR("timeout waiting for SBI to become ready\n"); + drm_err(&i915->drm, + "timeout waiting for SBI to become ready\n"); return -EBUSY; } @@ -301,12 +302,13 @@ static int intel_sbi_rw(struct drm_i915_private *i915, u16 reg, if (__intel_wait_for_register_fw(uncore, SBI_CTL_STAT, SBI_BUSY, 0, 100, 100, &cmd)) { - DRM_ERROR("timeout waiting for SBI to complete read\n"); + drm_err(&i915->drm, + "timeout waiting for SBI to complete read\n"); return -ETIMEDOUT; } if (cmd & SBI_RESPONSE_FAIL) { - DRM_ERROR("error during SBI read of reg %x\n", reg); + drm_err(&i915->drm, "error during SBI read of reg %x\n", reg); return -ENXIO; } @@ -426,8 +428,9 @@ int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox, mutex_unlock(&i915->sb_lock); if (err) { - DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n", - mbox, __builtin_return_address(0), err); + drm_dbg(&i915->drm, + "warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n", + mbox, __builtin_return_address(0), err); } return err; @@ -447,8 +450,9 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *i915, mutex_unlock(&i915->sb_lock); if (err) { - DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n", - val, mbox, __builtin_return_address(0), err); + drm_dbg(&i915->drm, + "warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n", + val, mbox, __builtin_return_address(0), err); } return err; @@ -519,7 +523,8 @@ int skl_pcode_request(struct drm_i915_private *i915, u32 mbox, u32 request, * requests, and for any quirks of the PCODE firmware that delays * the request completion. */ - DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n"); + drm_dbg_kms(&i915->drm, + "PCODE timeout, retrying with preemption disabled\n"); WARN_ON_ONCE(timeout_base_ms > 3); preempt_disable(); ret = wait_for_atomic(COND, 50); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 94a97bf8c021..5f2cf6f43b8b 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -359,7 +359,8 @@ static void __gen6_gt_wait_for_fifo(struct intel_uncore *uncore) if (wait_for_atomic((n = fifo_free_entries(uncore)) > GT_FIFO_NUM_RESERVED_ENTRIES, GT_FIFO_TIMEOUT_MS)) { - DRM_DEBUG("GT_FIFO timeout, entries: %u\n", n); + drm_dbg(&uncore->i915->drm, + "GT_FIFO timeout, entries: %u\n", n); return; } } @@ -432,7 +433,7 @@ intel_uncore_forcewake_reset(struct intel_uncore *uncore) break; if (--retry_count == 0) { - DRM_ERROR("Timed out waiting for forcewake timers to finish\n"); + drm_err(&uncore->i915->drm, "Timed out waiting for forcewake timers to finish\n"); break; } @@ -490,7 +491,7 @@ gen6_check_for_fifo_debug(struct intel_uncore *uncore) fifodbg = __raw_uncore_read32(uncore, GTFIFODBG); if (unlikely(fifodbg)) { - DRM_DEBUG_DRIVER("GTFIFODBG = 0x08%x\n", fifodbg); + drm_dbg(&uncore->i915->drm, "GTFIFODBG = 0x08%x\n", fifodbg); __raw_uncore_write32(uncore, GTFIFODBG, fifodbg); } @@ -562,7 +563,7 @@ void intel_uncore_resume_early(struct intel_uncore *uncore) unsigned int restore_forcewake; if (intel_uncore_unclaimed_mmio(uncore)) - DRM_DEBUG("unclaimed mmio detected on resume, clearing\n"); + drm_dbg(&uncore->i915->drm, "unclaimed mmio detected on resume, clearing\n"); if (!intel_uncore_has_forcewake(uncore)) return; @@ -1595,8 +1596,8 @@ static int intel_uncore_fw_domains_init(struct intel_uncore *uncore) spin_unlock_irq(&uncore->lock); if (!(ecobus & FORCEWAKE_MT_ENABLE)) { - DRM_INFO("No MT forcewake available on Ivybridge, this can result in issues\n"); - DRM_INFO("when using vblank-synced partial screen updates.\n"); + drm_info(&i915->drm, "No MT forcewake available on Ivybridge, this can result in issues\n"); + drm_info(&i915->drm, "when using vblank-synced partial screen updates.\n"); fw_domain_fini(uncore, FW_DOMAIN_ID_RENDER); fw_domain_init(uncore, FW_DOMAIN_ID_RENDER, FORCEWAKE, FORCEWAKE_ACK); @@ -1683,8 +1684,7 @@ static int uncore_mmio_setup(struct intel_uncore *uncore) mmio_size = 2 * 1024 * 1024; uncore->regs = pci_iomap(pdev, mmio_bar, mmio_size); if (uncore->regs == NULL) { - DRM_ERROR("failed to map registers\n"); - + drm_err(&i915->drm, "failed to map registers\n"); return -EIO; } @@ -1807,7 +1807,7 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) /* clear out unclaimed reg detection bit */ if (intel_uncore_unclaimed_mmio(uncore)) - DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n"); + drm_dbg(&i915->drm, "unclaimed mmio detected on uncore init, clearing\n"); return 0; @@ -2072,9 +2072,10 @@ intel_uncore_arm_unclaimed_mmio_detection(struct intel_uncore *uncore) if (unlikely(check_for_unclaimed_mmio(uncore))) { if (!i915_modparams.mmio_debug) { - DRM_DEBUG("Unclaimed register detected, " - "enabling oneshot unclaimed register reporting. " - "Please use i915.mmio_debug=N for more information.\n"); + drm_dbg(&uncore->i915->drm, + "Unclaimed register detected, " + "enabling oneshot unclaimed register reporting. " + "Please use i915.mmio_debug=N for more information.\n"); i915_modparams.mmio_debug++; } uncore->debug->unclaimed_mmio_check--; diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index 59aa1b6f1827..8fbf6f4d3f26 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -95,16 +95,17 @@ static void __intel_wakeref_put_work(struct work_struct *wrk) void __intel_wakeref_init(struct intel_wakeref *wf, struct intel_runtime_pm *rpm, const struct intel_wakeref_ops *ops, - struct lock_class_key *key) + struct intel_wakeref_lockclass *key) { wf->rpm = rpm; wf->ops = ops; - __mutex_init(&wf->mutex, "wakeref", key); + __mutex_init(&wf->mutex, "wakeref.mutex", &key->mutex); atomic_set(&wf->count, 0); wf->wakeref = 0; INIT_WORK(&wf->work, __intel_wakeref_put_work); + lockdep_init_map(&wf->work.lockdep_map, "wakeref.work", &key->work, 0); } int intel_wakeref_wait_for_idle(struct intel_wakeref *wf) diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index 8d945db94b7a..7d1e676b71ef 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -44,12 +44,17 @@ struct intel_wakeref { struct work_struct work; }; +struct intel_wakeref_lockclass { + struct lock_class_key mutex; + struct lock_class_key work; +}; + void __intel_wakeref_init(struct intel_wakeref *wf, struct intel_runtime_pm *rpm, const struct intel_wakeref_ops *ops, - struct lock_class_key *key); + struct intel_wakeref_lockclass *key); #define intel_wakeref_init(wf, rpm, ops) do { \ - static struct lock_class_key __key; \ + static struct intel_wakeref_lockclass __key; \ \ __intel_wakeref_init((wf), (rpm), (ops), &__key); \ } while (0) diff --git a/drivers/gpu/drm/i915/oa/Makefile b/drivers/gpu/drm/i915/oa/Makefile deleted file mode 100644 index df028e2b0d64..000000000000 --- a/drivers/gpu/drm/i915/oa/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: MIT - -# For building individual subdir files on the command line -subdir-ccflags-y += -I$(srctree)/$(src)/.. - -# Extra header tests -header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index b37fc53973cc..78f36faf2bbe 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -124,8 +124,6 @@ static void pm_resume(struct drm_i915_private *i915) * that runtime-pm just works. */ with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - intel_gt_sanitize(&i915->gt, false); - i915_gem_restore_gtt_mappings(i915); i915_gem_restore_fences(&i915->ggtt); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 80cde5bda922..b342bef5e7c9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -34,6 +34,7 @@ #include "mock_drm.h" #include "mock_gem_device.h" +#include "mock_gtt.h" #include "igt_flush_test.h" static void cleanup_freed_objects(struct drm_i915_private *i915) @@ -151,7 +152,7 @@ static int igt_ppgtt_alloc(void *arg) if (!HAS_PPGTT(dev_priv)) return 0; - ppgtt = __ppgtt_create(dev_priv); + ppgtt = i915_ppgtt_create(&dev_priv->gt); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); @@ -206,8 +207,7 @@ err_ppgtt_cleanup: return err; } -static int lowlevel_hole(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int lowlevel_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { @@ -256,7 +256,7 @@ static int lowlevel_hole(struct drm_i915_private *i915, * memory. We expect to hit -ENOMEM. */ - obj = fake_dma_object(i915, BIT_ULL(size)); + obj = fake_dma_object(vm->i915, BIT_ULL(size)); if (IS_ERR(obj)) { kfree(order); break; @@ -291,7 +291,7 @@ static int lowlevel_hole(struct drm_i915_private *i915, mock_vma->node.size = BIT_ULL(size); mock_vma->node.start = addr; - with_intel_runtime_pm(&i915->runtime_pm, wakeref) + with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref) vm->insert_entries(vm, mock_vma, I915_CACHE_NONE, 0); } @@ -303,7 +303,7 @@ static int lowlevel_hole(struct drm_i915_private *i915, intel_wakeref_t wakeref; GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); - with_intel_runtime_pm(&i915->runtime_pm, wakeref) + with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref) vm->clear_range(vm, addr, BIT_ULL(size)); } @@ -312,7 +312,7 @@ static int lowlevel_hole(struct drm_i915_private *i915, kfree(order); - cleanup_freed_objects(i915); + cleanup_freed_objects(vm->i915); } kfree(mock_vma); @@ -340,8 +340,7 @@ static void close_object_list(struct list_head *objects, } } -static int fill_hole(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int fill_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { @@ -374,7 +373,7 @@ static int fill_hole(struct drm_i915_private *i915, { } }, *p; - obj = fake_dma_object(i915, full_size); + obj = fake_dma_object(vm->i915, full_size); if (IS_ERR(obj)) break; @@ -542,7 +541,7 @@ static int fill_hole(struct drm_i915_private *i915, } close_object_list(&objects, vm); - cleanup_freed_objects(i915); + cleanup_freed_objects(vm->i915); } return 0; @@ -552,8 +551,7 @@ err: return err; } -static int walk_hole(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int walk_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { @@ -575,7 +573,7 @@ static int walk_hole(struct drm_i915_private *i915, u64 addr; int err = 0; - obj = fake_dma_object(i915, size << PAGE_SHIFT); + obj = fake_dma_object(vm->i915, size << PAGE_SHIFT); if (IS_ERR(obj)) break; @@ -630,14 +628,13 @@ err_put: if (err) return err; - cleanup_freed_objects(i915); + cleanup_freed_objects(vm->i915); } return 0; } -static int pot_hole(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int pot_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { @@ -651,7 +648,7 @@ static int pot_hole(struct drm_i915_private *i915, if (i915_is_ggtt(vm)) flags |= PIN_GLOBAL; - obj = i915_gem_object_create_internal(i915, 2 * I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(vm->i915, 2 * I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -712,8 +709,7 @@ err_obj: return err; } -static int drunk_hole(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int drunk_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { @@ -758,7 +754,7 @@ static int drunk_hole(struct drm_i915_private *i915, * memory. We expect to hit -ENOMEM. */ - obj = fake_dma_object(i915, BIT_ULL(size)); + obj = fake_dma_object(vm->i915, BIT_ULL(size)); if (IS_ERR(obj)) { kfree(order); break; @@ -816,14 +812,13 @@ err_obj: if (err) return err; - cleanup_freed_objects(i915); + cleanup_freed_objects(vm->i915); } return 0; } -static int __shrink_hole(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int __shrink_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { @@ -840,7 +835,7 @@ static int __shrink_hole(struct drm_i915_private *i915, u64 size = BIT_ULL(order++); size = min(size, hole_end - addr); - obj = fake_dma_object(i915, size); + obj = fake_dma_object(vm->i915, size); if (IS_ERR(obj)) { err = PTR_ERR(obj); break; @@ -894,12 +889,11 @@ static int __shrink_hole(struct drm_i915_private *i915, } close_object_list(&objects, vm); - cleanup_freed_objects(i915); + cleanup_freed_objects(vm->i915); return err; } -static int shrink_hole(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int shrink_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { @@ -911,7 +905,7 @@ static int shrink_hole(struct drm_i915_private *i915, for_each_prime_number_from(prime, 0, ULONG_MAX - 1) { vm->fault_attr.interval = prime; - err = __shrink_hole(i915, vm, hole_start, hole_end, end_time); + err = __shrink_hole(vm, hole_start, hole_end, end_time); if (err) break; } @@ -921,8 +915,7 @@ static int shrink_hole(struct drm_i915_private *i915, return err; } -static int shrink_boom(struct drm_i915_private *i915, - struct i915_address_space *vm, +static int shrink_boom(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { @@ -944,7 +937,7 @@ static int shrink_boom(struct drm_i915_private *i915, unsigned int size = sizes[i]; struct i915_vma *vma; - purge = fake_dma_object(i915, size); + purge = fake_dma_object(vm->i915, size); if (IS_ERR(purge)) return PTR_ERR(purge); @@ -961,7 +954,7 @@ static int shrink_boom(struct drm_i915_private *i915, /* Should now be ripe for purging */ i915_vma_unpin(vma); - explode = fake_dma_object(i915, size); + explode = fake_dma_object(vm->i915, size); if (IS_ERR(explode)) { err = PTR_ERR(explode); goto err_purge; @@ -987,7 +980,7 @@ static int shrink_boom(struct drm_i915_private *i915, i915_gem_object_put(explode); memset(&vm->fault_attr, 0, sizeof(vm->fault_attr)); - cleanup_freed_objects(i915); + cleanup_freed_objects(vm->i915); } return 0; @@ -1001,8 +994,7 @@ err_purge: } static int exercise_ppgtt(struct drm_i915_private *dev_priv, - int (*func)(struct drm_i915_private *i915, - struct i915_address_space *vm, + int (*func)(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time)) { @@ -1018,7 +1010,7 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv, if (IS_ERR(file)) return PTR_ERR(file); - ppgtt = i915_ppgtt_create(dev_priv); + ppgtt = i915_ppgtt_create(&dev_priv->gt); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); goto out_free; @@ -1026,7 +1018,7 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv, GEM_BUG_ON(offset_in_page(ppgtt->vm.total)); GEM_BUG_ON(!atomic_read(&ppgtt->vm.open)); - err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time); + err = func(&ppgtt->vm, 0, ppgtt->vm.total, end_time); i915_vm_put(&ppgtt->vm); @@ -1082,8 +1074,7 @@ static int sort_holes(void *priv, struct list_head *A, struct list_head *B) } static int exercise_ggtt(struct drm_i915_private *i915, - int (*func)(struct drm_i915_private *i915, - struct i915_address_space *vm, + int (*func)(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time)) { @@ -1105,7 +1096,7 @@ restart: if (hole_start >= hole_end) continue; - err = func(i915, &ggtt->vm, hole_start, hole_end, end_time); + err = func(&ggtt->vm, hole_start, hole_end, end_time); if (err) break; @@ -1252,8 +1243,7 @@ static void track_vma_bind(struct i915_vma *vma) } static int exercise_mock(struct drm_i915_private *i915, - int (*func)(struct drm_i915_private *i915, - struct i915_address_space *vm, + int (*func)(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time)) { @@ -1268,7 +1258,7 @@ static int exercise_mock(struct drm_i915_private *i915, return -ENOMEM; vm = i915_gem_context_get_vm_rcu(ctx); - err = func(i915, vm, 0, min(vm->total, limit), end_time); + err = func(vm, 0, min(vm->total, limit), end_time); i915_vm_put(vm); mock_context_close(ctx); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 476fba2ed8bb..34138c7bdd15 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -1,5 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* List each unit test as selftest(name, function) + +#ifndef selftest +#define selftest(x, y) +#endif + +/* + * List each unit test as selftest(name, function) * * The name is used as both an enum and expanded as subtest__name to create * a module parameter. It must be unique and legal for a C identifier. diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index aa5a0e7f5d9e..5b39bab4da1d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -1,5 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* List each unit test as selftest(name, function) + +#ifndef selftest +#define selftest(x, y) +#endif + +/* + * List each unit test as selftest(name, function) * * The name is used as both an enum and expanded as subtest__name to create * a module parameter. It must be unique and legal for a C identifier. diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h index f7129a243daa..5a577a1332f5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h @@ -1,5 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* List each unit test as selftest(name, function) + +#ifndef selftest +#define selftest(x, y) +#endif + +/* + * List each unit test as selftest(name, function) * * The name is used as both an enum and expanded as subtest__name to create * a module parameter. It must be unique and legal for a C identifier. diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h index 35cc69a3a1b9..05364eca20f7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.h +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -25,6 +25,7 @@ #ifndef __I915_SELFTESTS_RANDOM_H__ #define __I915_SELFTESTS_RANDOM_H__ +#include <linux/math64.h> #include <linux/random.h> #include "../i915_selftest.h" diff --git a/drivers/gpu/drm/i915/selftests/igt_atomic.c b/drivers/gpu/drm/i915/selftests/igt_atomic.c new file mode 100644 index 000000000000..fb506b699095 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_atomic.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2018 Intel Corporation + */ + +#include <linux/preempt.h> +#include <linux/bottom_half.h> +#include <linux/irqflags.h> + +#include "igt_atomic.h" + +static void __preempt_begin(void) +{ + preempt_disable(); +} + +static void __preempt_end(void) +{ + preempt_enable(); +} + +static void __softirq_begin(void) +{ + local_bh_disable(); +} + +static void __softirq_end(void) +{ + local_bh_enable(); +} + +static void __hardirq_begin(void) +{ + local_irq_disable(); +} + +static void __hardirq_end(void) +{ + local_irq_enable(); +} + +const struct igt_atomic_section igt_atomic_phases[] = { + { "preempt", __preempt_begin, __preempt_end }, + { "softirq", __softirq_begin, __softirq_end }, + { "hardirq", __hardirq_begin, __hardirq_end }, + { } +}; diff --git a/drivers/gpu/drm/i915/selftests/igt_atomic.h b/drivers/gpu/drm/i915/selftests/igt_atomic.h index 93ec89f487ec..1991798abf4b 100644 --- a/drivers/gpu/drm/i915/selftests/igt_atomic.h +++ b/drivers/gpu/drm/i915/selftests/igt_atomic.h @@ -6,51 +6,12 @@ #ifndef IGT_ATOMIC_H #define IGT_ATOMIC_H -#include <linux/preempt.h> -#include <linux/bottom_half.h> -#include <linux/irqflags.h> - -static void __preempt_begin(void) -{ - preempt_disable(); -} - -static void __preempt_end(void) -{ - preempt_enable(); -} - -static void __softirq_begin(void) -{ - local_bh_disable(); -} - -static void __softirq_end(void) -{ - local_bh_enable(); -} - -static void __hardirq_begin(void) -{ - local_irq_disable(); -} - -static void __hardirq_end(void) -{ - local_irq_enable(); -} - struct igt_atomic_section { const char *name; void (*critical_section_begin)(void); void (*critical_section_end)(void); }; -static const struct igt_atomic_section igt_atomic_phases[] = { - { "preempt", __preempt_begin, __preempt_end }, - { "softirq", __softirq_begin, __softirq_end }, - { "hardirq", __hardirq_begin, __hardirq_end }, - { } -}; +extern const struct igt_atomic_section igt_atomic_phases[]; #endif /* IGT_ATOMIC_H */ diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h index c0e9f99d50de..36ed42736c52 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.h +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h @@ -7,7 +7,7 @@ #ifndef IGT_LIVE_TEST_H #define IGT_LIVE_TEST_H -#include "../i915_gem.h" +#include "gt/intel_engine.h" /* for I915_NUM_ENGINES */ struct drm_i915_private; diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 04d0aa7b349e..3ef3620e0da5 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -270,36 +270,31 @@ static int igt_gpu_write_dw(struct intel_context *ce, static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) { - unsigned long n; + unsigned long n = obj->base.size >> PAGE_SHIFT; + u32 *ptr; int err; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_wc_domain(obj, false); - i915_gem_object_unlock(obj); - if (err) - return err; - - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); if (err) return err; - for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { - u32 __iomem *base; - u32 read_val; - - base = i915_gem_object_lmem_io_map_page_atomic(obj, n); + ptr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); - read_val = ioread32(base + dword); - io_mapping_unmap_atomic(base); - if (read_val != val) { - pr_err("n=%lu base[%u]=%u, val=%u\n", - n, dword, read_val, val); + ptr += dword; + while (n--) { + if (*ptr != val) { + pr_err("base[%u]=%08x, val=%08x\n", + dword, *ptr, val); err = -EINVAL; break; } + + ptr += PAGE_SIZE / sizeof(*ptr); } - i915_gem_object_unpin_pages(obj); + i915_gem_object_unpin_map(obj); return err; } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index ac641f5360e1..3b8986983afc 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -58,6 +58,8 @@ static void mock_device_release(struct drm_device *dev) mock_device_flush(i915); intel_gt_driver_remove(&i915->gt); + i915_gem_driver_release__contexts(i915); + i915_gem_drain_workqueue(i915); i915_gem_drain_freed_objects(i915); @@ -184,6 +186,7 @@ struct drm_i915_private *mock_gem_device(void) if (mock_engine_init(i915->engine[RCS0])) goto err_context; + __clear_bit(I915_WEDGED, &i915->gt.reset.flags); intel_engines_driver_register(i915); return i915; diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 20ac3844edec..edc5e3dda8ca 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -55,6 +55,11 @@ static void mock_cleanup(struct i915_address_space *vm) { } +static void mock_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ +} + struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) { struct i915_ppgtt *ppgtt; @@ -70,7 +75,7 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); - ppgtt->vm.clear_range = nop_clear_range; + ppgtt->vm.clear_range = mock_clear_range; ppgtt->vm.insert_page = mock_insert_page; ppgtt->vm.insert_entries = mock_insert_entries; ppgtt->vm.cleanup = mock_cleanup; @@ -107,7 +112,7 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt) ggtt->mappable_end = resource_size(&ggtt->gmadr); ggtt->vm.total = 4096 * PAGE_SIZE; - ggtt->vm.clear_range = nop_clear_range; + ggtt->vm.clear_range = mock_clear_range; ggtt->vm.insert_page = mock_insert_page; ggtt->vm.insert_entries = mock_insert_entries; ggtt->vm.cleanup = mock_cleanup; diff --git a/drivers/gpu/drm/mediatek/Makefile b/drivers/gpu/drm/mediatek/Makefile index 5044dfb8e3d6..b7a82ed5788f 100644 --- a/drivers/gpu/drm/mediatek/Makefile +++ b/drivers/gpu/drm/mediatek/Makefile @@ -20,7 +20,7 @@ obj-$(CONFIG_DRM_MEDIATEK) += mediatek-drm.o mediatek-drm-hdmi-objs := mtk_cec.o \ mtk_hdmi.o \ mtk_hdmi_ddc.o \ - mtk_mt2701_hdmi_phy.o \ + mtk_mt2701_hdmi_phy.o \ mtk_mt8173_hdmi_phy.o \ mtk_hdmi_phy.o diff --git a/drivers/gpu/drm/mediatek/mtk_disp_color.c b/drivers/gpu/drm/mediatek/mtk_disp_color.c index 59de2a46aa49..6fb0d6983a4a 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_color.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_color.c @@ -9,6 +9,7 @@ #include <linux/of_device.h> #include <linux/of_irq.h> #include <linux/platform_device.h> +#include <linux/soc/mediatek/mtk-cmdq.h> #include "mtk_drm_crtc.h" #include "mtk_drm_ddp_comp.h" @@ -45,12 +46,12 @@ static inline struct mtk_disp_color *comp_to_color(struct mtk_ddp_comp *comp) static void mtk_color_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int h, unsigned int vrefresh, - unsigned int bpc) + unsigned int bpc, struct cmdq_pkt *cmdq_pkt) { struct mtk_disp_color *color = comp_to_color(comp); - writel(w, comp->regs + DISP_COLOR_WIDTH(color)); - writel(h, comp->regs + DISP_COLOR_HEIGHT(color)); + mtk_ddp_write(cmdq_pkt, w, comp, DISP_COLOR_WIDTH(color)); + mtk_ddp_write(cmdq_pkt, h, comp, DISP_COLOR_HEIGHT(color)); } static void mtk_color_start(struct mtk_ddp_comp *comp) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index 4a55bb6e2213..891d80c73e04 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -11,6 +11,7 @@ #include <linux/of_device.h> #include <linux/of_irq.h> #include <linux/platform_device.h> +#include <linux/soc/mediatek/mtk-cmdq.h> #include "mtk_drm_crtc.h" #include "mtk_drm_ddp_comp.h" @@ -124,14 +125,15 @@ static void mtk_ovl_stop(struct mtk_ddp_comp *comp) static void mtk_ovl_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int h, unsigned int vrefresh, - unsigned int bpc) + unsigned int bpc, struct cmdq_pkt *cmdq_pkt) { if (w != 0 && h != 0) - writel_relaxed(h << 16 | w, comp->regs + DISP_REG_OVL_ROI_SIZE); - writel_relaxed(0x0, comp->regs + DISP_REG_OVL_ROI_BGCLR); + mtk_ddp_write_relaxed(cmdq_pkt, h << 16 | w, comp, + DISP_REG_OVL_ROI_SIZE); + mtk_ddp_write_relaxed(cmdq_pkt, 0x0, comp, DISP_REG_OVL_ROI_BGCLR); - writel(0x1, comp->regs + DISP_REG_OVL_RST); - writel(0x0, comp->regs + DISP_REG_OVL_RST); + mtk_ddp_write(cmdq_pkt, 0x1, comp, DISP_REG_OVL_RST); + mtk_ddp_write(cmdq_pkt, 0x0, comp, DISP_REG_OVL_RST); } static unsigned int mtk_ovl_layer_nr(struct mtk_ddp_comp *comp) @@ -175,16 +177,16 @@ static int mtk_ovl_layer_check(struct mtk_ddp_comp *comp, unsigned int idx, return 0; } -static void mtk_ovl_layer_on(struct mtk_ddp_comp *comp, unsigned int idx) +static void mtk_ovl_layer_on(struct mtk_ddp_comp *comp, unsigned int idx, + struct cmdq_pkt *cmdq_pkt) { - unsigned int reg; unsigned int gmc_thrshd_l; unsigned int gmc_thrshd_h; unsigned int gmc_value; struct mtk_disp_ovl *ovl = comp_to_ovl(comp); - writel(0x1, comp->regs + DISP_REG_OVL_RDMA_CTRL(idx)); - + mtk_ddp_write(cmdq_pkt, 0x1, comp, + DISP_REG_OVL_RDMA_CTRL(idx)); gmc_thrshd_l = GMC_THRESHOLD_LOW >> (GMC_THRESHOLD_BITS - ovl->data->gmc_bits); gmc_thrshd_h = GMC_THRESHOLD_HIGH >> @@ -194,22 +196,19 @@ static void mtk_ovl_layer_on(struct mtk_ddp_comp *comp, unsigned int idx) else gmc_value = gmc_thrshd_l | gmc_thrshd_l << 8 | gmc_thrshd_h << 16 | gmc_thrshd_h << 24; - writel(gmc_value, comp->regs + DISP_REG_OVL_RDMA_GMC(idx)); - - reg = readl(comp->regs + DISP_REG_OVL_SRC_CON); - reg = reg | BIT(idx); - writel(reg, comp->regs + DISP_REG_OVL_SRC_CON); + mtk_ddp_write(cmdq_pkt, gmc_value, + comp, DISP_REG_OVL_RDMA_GMC(idx)); + mtk_ddp_write_mask(cmdq_pkt, BIT(idx), comp, + DISP_REG_OVL_SRC_CON, BIT(idx)); } -static void mtk_ovl_layer_off(struct mtk_ddp_comp *comp, unsigned int idx) +static void mtk_ovl_layer_off(struct mtk_ddp_comp *comp, unsigned int idx, + struct cmdq_pkt *cmdq_pkt) { - unsigned int reg; - - reg = readl(comp->regs + DISP_REG_OVL_SRC_CON); - reg = reg & ~BIT(idx); - writel(reg, comp->regs + DISP_REG_OVL_SRC_CON); - - writel(0x0, comp->regs + DISP_REG_OVL_RDMA_CTRL(idx)); + mtk_ddp_write_mask(cmdq_pkt, 0, comp, + DISP_REG_OVL_SRC_CON, BIT(idx)); + mtk_ddp_write(cmdq_pkt, 0, comp, + DISP_REG_OVL_RDMA_CTRL(idx)); } static unsigned int ovl_fmt_convert(struct mtk_disp_ovl *ovl, unsigned int fmt) @@ -249,7 +248,8 @@ static unsigned int ovl_fmt_convert(struct mtk_disp_ovl *ovl, unsigned int fmt) } static void mtk_ovl_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, - struct mtk_plane_state *state) + struct mtk_plane_state *state, + struct cmdq_pkt *cmdq_pkt) { struct mtk_disp_ovl *ovl = comp_to_ovl(comp); struct mtk_plane_pending_state *pending = &state->pending; @@ -260,11 +260,13 @@ static void mtk_ovl_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, unsigned int src_size = (pending->height << 16) | pending->width; unsigned int con; - if (!pending->enable) - mtk_ovl_layer_off(comp, idx); + if (!pending->enable) { + mtk_ovl_layer_off(comp, idx, cmdq_pkt); + return; + } con = ovl_fmt_convert(ovl, fmt); - if (idx != 0) + if (state->base.fb->format->has_alpha) con |= OVL_CON_AEN | OVL_CON_ALPHA; if (pending->rotation & DRM_MODE_REFLECT_Y) { @@ -277,14 +279,18 @@ static void mtk_ovl_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, addr += pending->pitch - 1; } - writel_relaxed(con, comp->regs + DISP_REG_OVL_CON(idx)); - writel_relaxed(pitch, comp->regs + DISP_REG_OVL_PITCH(idx)); - writel_relaxed(src_size, comp->regs + DISP_REG_OVL_SRC_SIZE(idx)); - writel_relaxed(offset, comp->regs + DISP_REG_OVL_OFFSET(idx)); - writel_relaxed(addr, comp->regs + DISP_REG_OVL_ADDR(ovl, idx)); - - if (pending->enable) - mtk_ovl_layer_on(comp, idx); + mtk_ddp_write_relaxed(cmdq_pkt, con, comp, + DISP_REG_OVL_CON(idx)); + mtk_ddp_write_relaxed(cmdq_pkt, pitch, comp, + DISP_REG_OVL_PITCH(idx)); + mtk_ddp_write_relaxed(cmdq_pkt, src_size, comp, + DISP_REG_OVL_SRC_SIZE(idx)); + mtk_ddp_write_relaxed(cmdq_pkt, offset, comp, + DISP_REG_OVL_OFFSET(idx)); + mtk_ddp_write_relaxed(cmdq_pkt, addr, comp, + DISP_REG_OVL_ADDR(ovl, idx)); + + mtk_ovl_layer_on(comp, idx, cmdq_pkt); } static void mtk_ovl_bgclr_in_on(struct mtk_ddp_comp *comp) @@ -313,8 +319,6 @@ static const struct mtk_ddp_comp_funcs mtk_disp_ovl_funcs = { .disable_vblank = mtk_ovl_disable_vblank, .supported_rotations = mtk_ovl_supported_rotations, .layer_nr = mtk_ovl_layer_nr, - .layer_on = mtk_ovl_layer_on, - .layer_off = mtk_ovl_layer_off, .layer_check = mtk_ovl_layer_check, .layer_config = mtk_ovl_layer_config, .bgclr_in_on = mtk_ovl_bgclr_in_on, diff --git a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c index 405afef31407..0cb848d64206 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c @@ -9,6 +9,7 @@ #include <linux/of_device.h> #include <linux/of_irq.h> #include <linux/platform_device.h> +#include <linux/soc/mediatek/mtk-cmdq.h> #include "mtk_drm_crtc.h" #include "mtk_drm_ddp_comp.h" @@ -125,14 +126,16 @@ static void mtk_rdma_stop(struct mtk_ddp_comp *comp) static void mtk_rdma_config(struct mtk_ddp_comp *comp, unsigned int width, unsigned int height, unsigned int vrefresh, - unsigned int bpc) + unsigned int bpc, struct cmdq_pkt *cmdq_pkt) { unsigned int threshold; unsigned int reg; struct mtk_disp_rdma *rdma = comp_to_rdma(comp); - rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0, 0xfff, width); - rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_1, 0xfffff, height); + mtk_ddp_write_mask(cmdq_pkt, width, comp, + DISP_REG_RDMA_SIZE_CON_0, 0xfff); + mtk_ddp_write_mask(cmdq_pkt, height, comp, + DISP_REG_RDMA_SIZE_CON_1, 0xfffff); /* * Enable FIFO underflow since DSI and DPI can't be blocked. @@ -144,7 +147,7 @@ static void mtk_rdma_config(struct mtk_ddp_comp *comp, unsigned int width, reg = RDMA_FIFO_UNDERFLOW_EN | RDMA_FIFO_PSEUDO_SIZE(RDMA_FIFO_SIZE(rdma)) | RDMA_OUTPUT_VALID_FIFO_THRESHOLD(threshold); - writel(reg, comp->regs + DISP_REG_RDMA_FIFO_CON); + mtk_ddp_write(cmdq_pkt, reg, comp, DISP_REG_RDMA_FIFO_CON); } static unsigned int rdma_fmt_convert(struct mtk_disp_rdma *rdma, @@ -190,7 +193,8 @@ static unsigned int mtk_rdma_layer_nr(struct mtk_ddp_comp *comp) } static void mtk_rdma_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, - struct mtk_plane_state *state) + struct mtk_plane_state *state, + struct cmdq_pkt *cmdq_pkt) { struct mtk_disp_rdma *rdma = comp_to_rdma(comp); struct mtk_plane_pending_state *pending = &state->pending; @@ -200,24 +204,27 @@ static void mtk_rdma_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, unsigned int con; con = rdma_fmt_convert(rdma, fmt); - writel_relaxed(con, comp->regs + DISP_RDMA_MEM_CON); + mtk_ddp_write_relaxed(cmdq_pkt, con, comp, DISP_RDMA_MEM_CON); if (fmt == DRM_FORMAT_UYVY || fmt == DRM_FORMAT_YUYV) { - rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0, - RDMA_MATRIX_ENABLE, RDMA_MATRIX_ENABLE); - rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0, - RDMA_MATRIX_INT_MTX_SEL, - RDMA_MATRIX_INT_MTX_BT601_to_RGB); + mtk_ddp_write_mask(cmdq_pkt, RDMA_MATRIX_ENABLE, comp, + DISP_REG_RDMA_SIZE_CON_0, + RDMA_MATRIX_ENABLE); + mtk_ddp_write_mask(cmdq_pkt, RDMA_MATRIX_INT_MTX_BT601_to_RGB, + comp, DISP_REG_RDMA_SIZE_CON_0, + RDMA_MATRIX_INT_MTX_SEL); } else { - rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0, - RDMA_MATRIX_ENABLE, 0); + mtk_ddp_write_mask(cmdq_pkt, 0, comp, + DISP_REG_RDMA_SIZE_CON_0, + RDMA_MATRIX_ENABLE); } + mtk_ddp_write_relaxed(cmdq_pkt, addr, comp, DISP_RDMA_MEM_START_ADDR); + mtk_ddp_write_relaxed(cmdq_pkt, pitch, comp, DISP_RDMA_MEM_SRC_PITCH); + mtk_ddp_write(cmdq_pkt, RDMA_MEM_GMC, comp, + DISP_RDMA_MEM_GMC_SETTING_0); + mtk_ddp_write_mask(cmdq_pkt, RDMA_MODE_MEMORY, comp, + DISP_REG_RDMA_GLOBAL_CON, RDMA_MODE_MEMORY); - writel_relaxed(addr, comp->regs + DISP_RDMA_MEM_START_ADDR); - writel_relaxed(pitch, comp->regs + DISP_RDMA_MEM_SRC_PITCH); - writel(RDMA_MEM_GMC, comp->regs + DISP_RDMA_MEM_GMC_SETTING_0); - rdma_update_bits(comp, DISP_REG_RDMA_GLOBAL_CON, - RDMA_MODE_MEMORY, RDMA_MODE_MEMORY); } static const struct mtk_ddp_comp_funcs mtk_disp_rdma_funcs = { diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index f80a8ba75977..0dfcd1787e65 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -5,6 +5,7 @@ #include <linux/clk.h> #include <linux/pm_runtime.h> +#include <linux/soc/mediatek/mtk-cmdq.h> #include <asm/barrier.h> #include <soc/mediatek/smi.h> @@ -42,11 +43,20 @@ struct mtk_drm_crtc { struct drm_plane *planes; unsigned int layer_nr; bool pending_planes; + bool pending_async_planes; + +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + struct cmdq_client *cmdq_client; + u32 cmdq_event; +#endif void __iomem *config_regs; struct mtk_disp_mutex *mutex; unsigned int ddp_comp_nr; struct mtk_ddp_comp **ddp_comp; + + /* lock for display hardware access */ + struct mutex hw_lock; }; struct mtk_crtc_state { @@ -215,11 +225,12 @@ struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc, struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); struct mtk_ddp_comp *comp; int i, count = 0; + unsigned int local_index = plane - mtk_crtc->planes; for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) { comp = mtk_crtc->ddp_comp[i]; - if (plane->index < (count + mtk_ddp_comp_layer_nr(comp))) { - *local_layer = plane->index - count; + if (local_index < (count + mtk_ddp_comp_layer_nr(comp))) { + *local_layer = local_index - count; return comp; } count += mtk_ddp_comp_layer_nr(comp); @@ -229,6 +240,13 @@ struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc, return NULL; } +#if IS_REACHABLE(CONFIG_MTK_CMDQ) +static void ddp_cmdq_cb(struct cmdq_cb_data data) +{ + cmdq_pkt_destroy(data.data); +} +#endif + static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) { struct drm_crtc *crtc = &mtk_crtc->base; @@ -297,7 +315,7 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) if (i == 1) mtk_ddp_comp_bgclr_in_on(comp); - mtk_ddp_comp_config(comp, width, height, vrefresh, bpc); + mtk_ddp_comp_config(comp, width, height, vrefresh, bpc, NULL); mtk_ddp_comp_start(comp); } @@ -310,7 +328,9 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) plane_state = to_mtk_plane_state(plane->state); comp = mtk_drm_ddp_comp_for_plane(crtc, plane, &local_layer); - mtk_ddp_comp_layer_config(comp, local_layer, plane_state); + if (comp) + mtk_ddp_comp_layer_config(comp, local_layer, + plane_state, NULL); } return 0; @@ -325,6 +345,7 @@ err_pm_runtime_put: static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc) { struct drm_device *drm = mtk_crtc->base.dev; + struct drm_crtc *crtc = &mtk_crtc->base; int i; DRM_DEBUG_DRIVER("%s\n", __func__); @@ -350,9 +371,17 @@ static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc) mtk_disp_mutex_unprepare(mtk_crtc->mutex); pm_runtime_put(drm->dev); + + if (crtc->state->event && !crtc->state->active) { + spin_lock_irq(&crtc->dev->event_lock); + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + spin_unlock_irq(&crtc->dev->event_lock); + } } -static void mtk_crtc_ddp_config(struct drm_crtc *crtc) +static void mtk_crtc_ddp_config(struct drm_crtc *crtc, + struct cmdq_pkt *cmdq_handle) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); struct mtk_crtc_state *state = to_mtk_crtc_state(mtk_crtc->base.state); @@ -368,7 +397,8 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc) if (state->pending_config) { mtk_ddp_comp_config(comp, state->pending_width, state->pending_height, - state->pending_vrefresh, 0); + state->pending_vrefresh, 0, + cmdq_handle); state->pending_config = false; } @@ -386,12 +416,84 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc) comp = mtk_drm_ddp_comp_for_plane(crtc, plane, &local_layer); - mtk_ddp_comp_layer_config(comp, local_layer, - plane_state); + if (comp) + mtk_ddp_comp_layer_config(comp, local_layer, + plane_state, + cmdq_handle); plane_state->pending.config = false; } mtk_crtc->pending_planes = false; } + + if (mtk_crtc->pending_async_planes) { + for (i = 0; i < mtk_crtc->layer_nr; i++) { + struct drm_plane *plane = &mtk_crtc->planes[i]; + struct mtk_plane_state *plane_state; + + plane_state = to_mtk_plane_state(plane->state); + + if (!plane_state->pending.async_config) + continue; + + comp = mtk_drm_ddp_comp_for_plane(crtc, plane, + &local_layer); + + if (comp) + mtk_ddp_comp_layer_config(comp, local_layer, + plane_state, + cmdq_handle); + plane_state->pending.async_config = false; + } + mtk_crtc->pending_async_planes = false; + } +} + +static void mtk_drm_crtc_hw_config(struct mtk_drm_crtc *mtk_crtc) +{ +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + struct cmdq_pkt *cmdq_handle; +#endif + struct drm_crtc *crtc = &mtk_crtc->base; + struct mtk_drm_private *priv = crtc->dev->dev_private; + unsigned int pending_planes = 0, pending_async_planes = 0; + int i; + + mutex_lock(&mtk_crtc->hw_lock); + for (i = 0; i < mtk_crtc->layer_nr; i++) { + struct drm_plane *plane = &mtk_crtc->planes[i]; + struct mtk_plane_state *plane_state; + + plane_state = to_mtk_plane_state(plane->state); + if (plane_state->pending.dirty) { + plane_state->pending.config = true; + plane_state->pending.dirty = false; + pending_planes |= BIT(i); + } else if (plane_state->pending.async_dirty) { + plane_state->pending.async_config = true; + plane_state->pending.async_dirty = false; + pending_async_planes |= BIT(i); + } + } + if (pending_planes) + mtk_crtc->pending_planes = true; + if (pending_async_planes) + mtk_crtc->pending_async_planes = true; + + if (priv->data->shadow_register) { + mtk_disp_mutex_acquire(mtk_crtc->mutex); + mtk_crtc_ddp_config(crtc, NULL); + mtk_disp_mutex_release(mtk_crtc->mutex); + } +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + if (mtk_crtc->cmdq_client) { + cmdq_handle = cmdq_pkt_create(mtk_crtc->cmdq_client, PAGE_SIZE); + cmdq_pkt_clear_event(cmdq_handle, mtk_crtc->cmdq_event); + cmdq_pkt_wfe(cmdq_handle, mtk_crtc->cmdq_event); + mtk_crtc_ddp_config(crtc, cmdq_handle); + cmdq_pkt_flush_async(cmdq_handle, ddp_cmdq_cb, cmdq_handle); + } +#endif + mutex_unlock(&mtk_crtc->hw_lock); } int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane, @@ -401,7 +503,23 @@ int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane, struct mtk_ddp_comp *comp; comp = mtk_drm_ddp_comp_for_plane(crtc, plane, &local_layer); - return mtk_ddp_comp_layer_check(comp, local_layer, state); + if (comp) + return mtk_ddp_comp_layer_check(comp, local_layer, state); + return 0; +} + +void mtk_drm_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane, + struct drm_plane_state *new_state) +{ + struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); + const struct drm_plane_helper_funcs *plane_helper_funcs = + plane->helper_private; + + if (!mtk_crtc->enabled) + return; + + plane_helper_funcs->atomic_update(plane, new_state); + mtk_drm_crtc_hw_config(mtk_crtc); } static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc, @@ -451,6 +569,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc, } mtk_crtc->pending_planes = true; + mtk_drm_crtc_hw_config(mtk_crtc); /* Wait for planes to be disabled */ drm_crtc_wait_one_vblank(crtc); @@ -482,34 +601,16 @@ static void mtk_drm_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); - struct mtk_drm_private *priv = crtc->dev->dev_private; - unsigned int pending_planes = 0; int i; if (mtk_crtc->event) mtk_crtc->pending_needs_vblank = true; - for (i = 0; i < mtk_crtc->layer_nr; i++) { - struct drm_plane *plane = &mtk_crtc->planes[i]; - struct mtk_plane_state *plane_state; - - plane_state = to_mtk_plane_state(plane->state); - if (plane_state->pending.dirty) { - plane_state->pending.config = true; - plane_state->pending.dirty = false; - pending_planes |= BIT(i); - } - } - if (pending_planes) - mtk_crtc->pending_planes = true; if (crtc->state->color_mgmt_changed) - for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) + for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) { mtk_ddp_gamma_set(mtk_crtc->ddp_comp[i], crtc->state); - - if (priv->data->shadow_register) { - mtk_disp_mutex_acquire(mtk_crtc->mutex); - mtk_crtc_ddp_config(crtc); - mtk_disp_mutex_release(mtk_crtc->mutex); - } + mtk_ddp_ctm_set(mtk_crtc->ddp_comp[i], crtc->state); + } + mtk_drm_crtc_hw_config(mtk_crtc); } static const struct drm_crtc_funcs mtk_crtc_funcs = { @@ -559,8 +660,12 @@ void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *comp) struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); struct mtk_drm_private *priv = crtc->dev->dev_private; +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + if (!priv->data->shadow_register && !mtk_crtc->cmdq_client) +#else if (!priv->data->shadow_register) - mtk_crtc_ddp_config(crtc); +#endif + mtk_crtc_ddp_config(crtc, NULL); mtk_drm_finish_page_flip(mtk_crtc); } @@ -627,6 +732,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, int pipe = priv->num_pipes; int ret; int i; + bool has_ctm = false; + uint gamma_lut_size = 0; if (!path) return 0; @@ -677,6 +784,14 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, } mtk_crtc->ddp_comp[i] = comp; + + if (comp->funcs) { + if (comp->funcs->gamma_set) + gamma_lut_size = MTK_LUT_SIZE; + + if (comp->funcs->ctm_set) + has_ctm = true; + } } for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) @@ -697,9 +812,28 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, NULL, pipe); if (ret < 0) return ret; - drm_mode_crtc_set_gamma_size(&mtk_crtc->base, MTK_LUT_SIZE); - drm_crtc_enable_color_mgmt(&mtk_crtc->base, 0, false, MTK_LUT_SIZE); - priv->num_pipes++; + if (gamma_lut_size) + drm_mode_crtc_set_gamma_size(&mtk_crtc->base, gamma_lut_size); + drm_crtc_enable_color_mgmt(&mtk_crtc->base, 0, has_ctm, gamma_lut_size); + priv->num_pipes++; + mutex_init(&mtk_crtc->hw_lock); + +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + mtk_crtc->cmdq_client = + cmdq_mbox_create(dev, drm_crtc_index(&mtk_crtc->base), + 2000); + if (IS_ERR(mtk_crtc->cmdq_client)) { + dev_dbg(dev, "mtk_crtc %d failed to create mailbox client, writing register by CPU now\n", + drm_crtc_index(&mtk_crtc->base)); + mtk_crtc->cmdq_client = NULL; + } + ret = of_property_read_u32_index(dev->of_node, "mediatek,gce-events", + drm_crtc_index(&mtk_crtc->base), + &mtk_crtc->cmdq_event); + if (ret) + dev_dbg(dev, "mtk_crtc %d failed to get mediatek,gce-events property\n", + drm_crtc_index(&mtk_crtc->base)); +#endif return 0; } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h index 6afe1c19557a..a2b4677a451c 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h @@ -21,5 +21,7 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, unsigned int path_len); int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane, struct mtk_plane_state *state); +void mtk_drm_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane, + struct drm_plane_state *plane_state); #endif /* MTK_DRM_CRTC_H */ diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index 7f21307cda75..1f5a112bb034 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -12,7 +12,7 @@ #include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/platform_device.h> - +#include <linux/soc/mediatek/mtk-cmdq.h> #include "mtk_drm_drv.h" #include "mtk_drm_plane.h" #include "mtk_drm_ddp_comp.h" @@ -37,7 +37,15 @@ #define CCORR_EN BIT(0) #define DISP_CCORR_CFG 0x0020 #define CCORR_RELAY_MODE BIT(0) +#define CCORR_ENGINE_EN BIT(1) +#define CCORR_GAMMA_OFF BIT(2) +#define CCORR_WGAMUT_SRC_CLIP BIT(3) #define DISP_CCORR_SIZE 0x0030 +#define DISP_CCORR_COEF_0 0x0080 +#define DISP_CCORR_COEF_1 0x0084 +#define DISP_CCORR_COEF_2 0x0088 +#define DISP_CCORR_COEF_3 0x008C +#define DISP_CCORR_COEF_4 0x0090 #define DISP_DITHER_EN 0x0000 #define DITHER_EN BIT(0) @@ -76,36 +84,84 @@ #define DITHER_ADD_LSHIFT_G(x) (((x) & 0x7) << 4) #define DITHER_ADD_RSHIFT_G(x) (((x) & 0x7) << 0) +void mtk_ddp_write(struct cmdq_pkt *cmdq_pkt, unsigned int value, + struct mtk_ddp_comp *comp, unsigned int offset) +{ +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + if (cmdq_pkt) + cmdq_pkt_write(cmdq_pkt, comp->subsys, + comp->regs_pa + offset, value); + else +#endif + writel(value, comp->regs + offset); +} + +void mtk_ddp_write_relaxed(struct cmdq_pkt *cmdq_pkt, unsigned int value, + struct mtk_ddp_comp *comp, + unsigned int offset) +{ +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + if (cmdq_pkt) + cmdq_pkt_write(cmdq_pkt, comp->subsys, + comp->regs_pa + offset, value); + else +#endif + writel_relaxed(value, comp->regs + offset); +} + +void mtk_ddp_write_mask(struct cmdq_pkt *cmdq_pkt, + unsigned int value, + struct mtk_ddp_comp *comp, + unsigned int offset, + unsigned int mask) +{ +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + if (cmdq_pkt) { + cmdq_pkt_write_mask(cmdq_pkt, comp->subsys, + comp->regs_pa + offset, value, mask); + } else { +#endif + u32 tmp = readl(comp->regs + offset); + + tmp = (tmp & ~mask) | (value & mask); + writel(tmp, comp->regs + offset); +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + } +#endif +} + void mtk_dither_set(struct mtk_ddp_comp *comp, unsigned int bpc, - unsigned int CFG) + unsigned int CFG, struct cmdq_pkt *cmdq_pkt) { /* If bpc equal to 0, the dithering function didn't be enabled */ if (bpc == 0) return; if (bpc >= MTK_MIN_BPC) { - writel(0, comp->regs + DISP_DITHER_5); - writel(0, comp->regs + DISP_DITHER_7); - writel(DITHER_LSB_ERR_SHIFT_R(MTK_MAX_BPC - bpc) | - DITHER_ADD_LSHIFT_R(MTK_MAX_BPC - bpc) | - DITHER_NEW_BIT_MODE, - comp->regs + DISP_DITHER_15); - writel(DITHER_LSB_ERR_SHIFT_B(MTK_MAX_BPC - bpc) | - DITHER_ADD_LSHIFT_B(MTK_MAX_BPC - bpc) | - DITHER_LSB_ERR_SHIFT_G(MTK_MAX_BPC - bpc) | - DITHER_ADD_LSHIFT_G(MTK_MAX_BPC - bpc), - comp->regs + DISP_DITHER_16); - writel(DISP_DITHERING, comp->regs + CFG); + mtk_ddp_write(cmdq_pkt, 0, comp, DISP_DITHER_5); + mtk_ddp_write(cmdq_pkt, 0, comp, DISP_DITHER_7); + mtk_ddp_write(cmdq_pkt, + DITHER_LSB_ERR_SHIFT_R(MTK_MAX_BPC - bpc) | + DITHER_ADD_LSHIFT_R(MTK_MAX_BPC - bpc) | + DITHER_NEW_BIT_MODE, + comp, DISP_DITHER_15); + mtk_ddp_write(cmdq_pkt, + DITHER_LSB_ERR_SHIFT_B(MTK_MAX_BPC - bpc) | + DITHER_ADD_LSHIFT_B(MTK_MAX_BPC - bpc) | + DITHER_LSB_ERR_SHIFT_G(MTK_MAX_BPC - bpc) | + DITHER_ADD_LSHIFT_G(MTK_MAX_BPC - bpc), + comp, DISP_DITHER_16); + mtk_ddp_write(cmdq_pkt, DISP_DITHERING, comp, CFG); } } static void mtk_od_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int h, unsigned int vrefresh, - unsigned int bpc) + unsigned int bpc, struct cmdq_pkt *cmdq_pkt) { - writel(w << 16 | h, comp->regs + DISP_OD_SIZE); - writel(OD_RELAYMODE, comp->regs + DISP_OD_CFG); - mtk_dither_set(comp, bpc, DISP_OD_CFG); + mtk_ddp_write(cmdq_pkt, w << 16 | h, comp, DISP_OD_SIZE); + mtk_ddp_write(cmdq_pkt, OD_RELAYMODE, comp, DISP_OD_CFG); + mtk_dither_set(comp, bpc, DISP_OD_CFG, cmdq_pkt); } static void mtk_od_start(struct mtk_ddp_comp *comp) @@ -120,9 +176,9 @@ static void mtk_ufoe_start(struct mtk_ddp_comp *comp) static void mtk_aal_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int h, unsigned int vrefresh, - unsigned int bpc) + unsigned int bpc, struct cmdq_pkt *cmdq_pkt) { - writel(h << 16 | w, comp->regs + DISP_AAL_SIZE); + mtk_ddp_write(cmdq_pkt, h << 16 | w, comp, DISP_AAL_SIZE); } static void mtk_aal_start(struct mtk_ddp_comp *comp) @@ -137,10 +193,10 @@ static void mtk_aal_stop(struct mtk_ddp_comp *comp) static void mtk_ccorr_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int h, unsigned int vrefresh, - unsigned int bpc) + unsigned int bpc, struct cmdq_pkt *cmdq_pkt) { - writel(h << 16 | w, comp->regs + DISP_CCORR_SIZE); - writel(CCORR_RELAY_MODE, comp->regs + DISP_CCORR_CFG); + mtk_ddp_write(cmdq_pkt, h << 16 | w, comp, DISP_CCORR_SIZE); + mtk_ddp_write(cmdq_pkt, CCORR_ENGINE_EN, comp, DISP_CCORR_CFG); } static void mtk_ccorr_start(struct mtk_ddp_comp *comp) @@ -153,12 +209,63 @@ static void mtk_ccorr_stop(struct mtk_ddp_comp *comp) writel_relaxed(0x0, comp->regs + DISP_CCORR_EN); } +/* Converts a DRM S31.32 value to the HW S1.10 format. */ +static u16 mtk_ctm_s31_32_to_s1_10(u64 in) +{ + u16 r; + + /* Sign bit. */ + r = in & BIT_ULL(63) ? BIT(11) : 0; + + if ((in & GENMASK_ULL(62, 33)) > 0) { + /* identity value 0x100000000 -> 0x400, */ + /* if bigger this, set it to max 0x7ff. */ + r |= GENMASK(10, 0); + } else { + /* take the 11 most important bits. */ + r |= (in >> 22) & GENMASK(10, 0); + } + + return r; +} + +static void mtk_ccorr_ctm_set(struct mtk_ddp_comp *comp, + struct drm_crtc_state *state) +{ + struct drm_property_blob *blob = state->ctm; + struct drm_color_ctm *ctm; + const u64 *input; + uint16_t coeffs[9] = { 0 }; + int i; + struct cmdq_pkt *cmdq_pkt = NULL; + + if (!blob) + return; + + ctm = (struct drm_color_ctm *)blob->data; + input = ctm->matrix; + + for (i = 0; i < ARRAY_SIZE(coeffs); i++) + coeffs[i] = mtk_ctm_s31_32_to_s1_10(input[i]); + + mtk_ddp_write(cmdq_pkt, coeffs[0] << 16 | coeffs[1], + comp, DISP_CCORR_COEF_0); + mtk_ddp_write(cmdq_pkt, coeffs[2] << 16 | coeffs[3], + comp, DISP_CCORR_COEF_1); + mtk_ddp_write(cmdq_pkt, coeffs[4] << 16 | coeffs[5], + comp, DISP_CCORR_COEF_2); + mtk_ddp_write(cmdq_pkt, coeffs[6] << 16 | coeffs[7], + comp, DISP_CCORR_COEF_3); + mtk_ddp_write(cmdq_pkt, coeffs[8] << 16, + comp, DISP_CCORR_COEF_4); +} + static void mtk_dither_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int h, unsigned int vrefresh, - unsigned int bpc) + unsigned int bpc, struct cmdq_pkt *cmdq_pkt) { - writel(h << 16 | w, comp->regs + DISP_DITHER_SIZE); - writel(DITHER_RELAY_MODE, comp->regs + DISP_DITHER_CFG); + mtk_ddp_write(cmdq_pkt, h << 16 | w, comp, DISP_DITHER_SIZE); + mtk_ddp_write(cmdq_pkt, DITHER_RELAY_MODE, comp, DISP_DITHER_CFG); } static void mtk_dither_start(struct mtk_ddp_comp *comp) @@ -173,10 +280,10 @@ static void mtk_dither_stop(struct mtk_ddp_comp *comp) static void mtk_gamma_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int h, unsigned int vrefresh, - unsigned int bpc) + unsigned int bpc, struct cmdq_pkt *cmdq_pkt) { - writel(h << 16 | w, comp->regs + DISP_GAMMA_SIZE); - mtk_dither_set(comp, bpc, DISP_GAMMA_CFG); + mtk_ddp_write(cmdq_pkt, h << 16 | w, comp, DISP_GAMMA_SIZE); + mtk_dither_set(comp, bpc, DISP_GAMMA_CFG, cmdq_pkt); } static void mtk_gamma_start(struct mtk_ddp_comp *comp) @@ -223,6 +330,7 @@ static const struct mtk_ddp_comp_funcs ddp_ccorr = { .config = mtk_ccorr_config, .start = mtk_ccorr_start, .stop = mtk_ccorr_stop, + .ctm_set = mtk_ccorr_ctm_set, }; static const struct mtk_ddp_comp_funcs ddp_dither = { @@ -326,6 +434,11 @@ int mtk_ddp_comp_init(struct device *dev, struct device_node *node, enum mtk_ddp_comp_type type; struct device_node *larb_node; struct platform_device *larb_pdev; +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + struct resource res; + struct cmdq_client_reg cmdq_reg; + int ret; +#endif if (comp_id < 0 || comp_id >= DDP_COMPONENT_ID_MAX) return -EINVAL; @@ -379,6 +492,19 @@ int mtk_ddp_comp_init(struct device *dev, struct device_node *node, comp->larb_dev = &larb_pdev->dev; +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + if (of_address_to_resource(node, 0, &res) != 0) { + dev_err(dev, "Missing reg in %s node\n", node->full_name); + return -EINVAL; + } + comp->regs_pa = res.start; + + ret = cmdq_dev_get_client_reg(dev, &cmdq_reg, 0); + if (ret) + dev_dbg(dev, "get mediatek,gce-client-reg fail!\n"); + else + comp->subsys = cmdq_reg.subsys; +#endif return 0; } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h index 2f1e9e75b8da..debe36395fe7 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h @@ -69,27 +69,29 @@ enum mtk_ddp_comp_id { }; struct mtk_ddp_comp; - +struct cmdq_pkt; struct mtk_ddp_comp_funcs { void (*config)(struct mtk_ddp_comp *comp, unsigned int w, - unsigned int h, unsigned int vrefresh, unsigned int bpc); + unsigned int h, unsigned int vrefresh, + unsigned int bpc, struct cmdq_pkt *cmdq_pkt); void (*start)(struct mtk_ddp_comp *comp); void (*stop)(struct mtk_ddp_comp *comp); void (*enable_vblank)(struct mtk_ddp_comp *comp, struct drm_crtc *crtc); void (*disable_vblank)(struct mtk_ddp_comp *comp); unsigned int (*supported_rotations)(struct mtk_ddp_comp *comp); unsigned int (*layer_nr)(struct mtk_ddp_comp *comp); - void (*layer_on)(struct mtk_ddp_comp *comp, unsigned int idx); - void (*layer_off)(struct mtk_ddp_comp *comp, unsigned int idx); int (*layer_check)(struct mtk_ddp_comp *comp, unsigned int idx, struct mtk_plane_state *state); void (*layer_config)(struct mtk_ddp_comp *comp, unsigned int idx, - struct mtk_plane_state *state); + struct mtk_plane_state *state, + struct cmdq_pkt *cmdq_pkt); void (*gamma_set)(struct mtk_ddp_comp *comp, struct drm_crtc_state *state); void (*bgclr_in_on)(struct mtk_ddp_comp *comp); void (*bgclr_in_off)(struct mtk_ddp_comp *comp); + void (*ctm_set)(struct mtk_ddp_comp *comp, + struct drm_crtc_state *state); }; struct mtk_ddp_comp { @@ -99,14 +101,17 @@ struct mtk_ddp_comp { struct device *larb_dev; enum mtk_ddp_comp_id id; const struct mtk_ddp_comp_funcs *funcs; + resource_size_t regs_pa; + u8 subsys; }; static inline void mtk_ddp_comp_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int h, - unsigned int vrefresh, unsigned int bpc) + unsigned int vrefresh, unsigned int bpc, + struct cmdq_pkt *cmdq_pkt) { if (comp->funcs && comp->funcs->config) - comp->funcs->config(comp, w, h, vrefresh, bpc); + comp->funcs->config(comp, w, h, vrefresh, bpc, cmdq_pkt); } static inline void mtk_ddp_comp_start(struct mtk_ddp_comp *comp) @@ -151,20 +156,6 @@ static inline unsigned int mtk_ddp_comp_layer_nr(struct mtk_ddp_comp *comp) return 0; } -static inline void mtk_ddp_comp_layer_on(struct mtk_ddp_comp *comp, - unsigned int idx) -{ - if (comp->funcs && comp->funcs->layer_on) - comp->funcs->layer_on(comp, idx); -} - -static inline void mtk_ddp_comp_layer_off(struct mtk_ddp_comp *comp, - unsigned int idx) -{ - if (comp->funcs && comp->funcs->layer_off) - comp->funcs->layer_off(comp, idx); -} - static inline int mtk_ddp_comp_layer_check(struct mtk_ddp_comp *comp, unsigned int idx, struct mtk_plane_state *state) @@ -176,10 +167,11 @@ static inline int mtk_ddp_comp_layer_check(struct mtk_ddp_comp *comp, static inline void mtk_ddp_comp_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, - struct mtk_plane_state *state) + struct mtk_plane_state *state, + struct cmdq_pkt *cmdq_pkt) { if (comp->funcs && comp->funcs->layer_config) - comp->funcs->layer_config(comp, idx, state); + comp->funcs->layer_config(comp, idx, state, cmdq_pkt); } static inline void mtk_ddp_gamma_set(struct mtk_ddp_comp *comp, @@ -201,6 +193,13 @@ static inline void mtk_ddp_comp_bgclr_in_off(struct mtk_ddp_comp *comp) comp->funcs->bgclr_in_off(comp); } +static inline void mtk_ddp_ctm_set(struct mtk_ddp_comp *comp, + struct drm_crtc_state *state) +{ + if (comp->funcs && comp->funcs->ctm_set) + comp->funcs->ctm_set(comp, state); +} + int mtk_ddp_comp_get_id(struct device_node *node, enum mtk_ddp_comp_type comp_type); int mtk_ddp_comp_init(struct device *dev, struct device_node *comp_node, @@ -209,6 +208,13 @@ int mtk_ddp_comp_init(struct device *dev, struct device_node *comp_node, int mtk_ddp_comp_register(struct drm_device *drm, struct mtk_ddp_comp *comp); void mtk_ddp_comp_unregister(struct drm_device *drm, struct mtk_ddp_comp *comp); void mtk_dither_set(struct mtk_ddp_comp *comp, unsigned int bpc, - unsigned int CFG); - + unsigned int CFG, struct cmdq_pkt *cmdq_pkt); +enum mtk_ddp_comp_type mtk_ddp_comp_get_type(enum mtk_ddp_comp_id comp_id); +void mtk_ddp_write(struct cmdq_pkt *cmdq_pkt, unsigned int value, + struct mtk_ddp_comp *comp, unsigned int offset); +void mtk_ddp_write_relaxed(struct cmdq_pkt *cmdq_pkt, unsigned int value, + struct mtk_ddp_comp *comp, unsigned int offset); +void mtk_ddp_write_mask(struct cmdq_pkt *cmdq_pkt, unsigned int value, + struct mtk_ddp_comp *comp, unsigned int offset, + unsigned int mask); #endif /* MTK_DRM_DDP_COMP_H */ diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 2b1c122066ea..0563c6813333 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -37,84 +37,9 @@ #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 -static void mtk_atomic_schedule(struct mtk_drm_private *private, - struct drm_atomic_state *state) -{ - private->commit.state = state; - schedule_work(&private->commit.work); -} - -static void mtk_atomic_complete(struct mtk_drm_private *private, - struct drm_atomic_state *state) -{ - struct drm_device *drm = private->drm; - - drm_atomic_helper_wait_for_fences(drm, state, false); - - /* - * Mediatek drm supports runtime PM, so plane registers cannot be - * written when their crtc is disabled. - * - * The comment for drm_atomic_helper_commit states: - * For drivers supporting runtime PM the recommended sequence is - * - * drm_atomic_helper_commit_modeset_disables(dev, state); - * drm_atomic_helper_commit_modeset_enables(dev, state); - * drm_atomic_helper_commit_planes(dev, state, - * DRM_PLANE_COMMIT_ACTIVE_ONLY); - * - * See the kerneldoc entries for these three functions for more details. - */ - drm_atomic_helper_commit_modeset_disables(drm, state); - drm_atomic_helper_commit_modeset_enables(drm, state); - drm_atomic_helper_commit_planes(drm, state, - DRM_PLANE_COMMIT_ACTIVE_ONLY); - - drm_atomic_helper_wait_for_vblanks(drm, state); - - drm_atomic_helper_cleanup_planes(drm, state); - drm_atomic_state_put(state); -} - -static void mtk_atomic_work(struct work_struct *work) -{ - struct mtk_drm_private *private = container_of(work, - struct mtk_drm_private, commit.work); - - mtk_atomic_complete(private, private->commit.state); -} - -static int mtk_atomic_commit(struct drm_device *drm, - struct drm_atomic_state *state, - bool async) -{ - struct mtk_drm_private *private = drm->dev_private; - int ret; - - ret = drm_atomic_helper_prepare_planes(drm, state); - if (ret) - return ret; - - mutex_lock(&private->commit.lock); - flush_work(&private->commit.work); - - ret = drm_atomic_helper_swap_state(state, true); - if (ret) { - mutex_unlock(&private->commit.lock); - drm_atomic_helper_cleanup_planes(drm, state); - return ret; - } - - drm_atomic_state_get(state); - if (async) - mtk_atomic_schedule(private, state); - else - mtk_atomic_complete(private, state); - - mutex_unlock(&private->commit.lock); - - return 0; -} +static const struct drm_mode_config_helper_funcs mtk_drm_mode_config_helpers = { + .atomic_commit_tail = drm_atomic_helper_commit_tail_rpm, +}; static struct drm_framebuffer * mtk_drm_mode_fb_create(struct drm_device *dev, @@ -132,7 +57,7 @@ mtk_drm_mode_fb_create(struct drm_device *dev, static const struct drm_mode_config_funcs mtk_drm_mode_config_funcs = { .fb_create = mtk_drm_mode_fb_create, .atomic_check = drm_atomic_helper_check, - .atomic_commit = mtk_atomic_commit, + .atomic_commit = drm_atomic_helper_commit, }; static const enum mtk_ddp_comp_id mt2701_mtk_ddp_main[] = { @@ -250,6 +175,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) drm->mode_config.max_width = 4096; drm->mode_config.max_height = 4096; drm->mode_config.funcs = &mtk_drm_mode_config_funcs; + drm->mode_config.helper_private = &mtk_drm_mode_config_helpers; ret = component_bind_all(drm->dev, drm); if (ret) @@ -509,8 +435,6 @@ static int mtk_drm_probe(struct platform_device *pdev) if (!private) return -ENOMEM; - mutex_init(&private->commit.lock); - INIT_WORK(&private->commit.work, mtk_atomic_work); private->data = of_device_get_match_data(dev); mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.h b/drivers/gpu/drm/mediatek/mtk_drm_drv.h index e03fea12ff59..17bc99b9f5d4 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.h @@ -43,13 +43,6 @@ struct mtk_drm_private { struct device_node *comp_node[DDP_COMPONENT_ID_MAX]; struct mtk_ddp_comp *ddp_comp[DDP_COMPONENT_ID_MAX]; const struct mtk_mmsys_driver_data *data; - - struct { - struct drm_atomic_state *state; - struct work_struct work; - struct mutex lock; - } commit; - struct drm_atomic_state *suspend_state; bool dma_parms_allocated; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index f0b0325381e0..914cc7619cd7 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -7,6 +7,7 @@ #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_fourcc.h> +#include <drm/drm_atomic_uapi.h> #include <drm/drm_plane_helper.h> #include <drm/drm_gem_framebuffer_helper.h> @@ -75,6 +76,50 @@ static void mtk_drm_plane_destroy_state(struct drm_plane *plane, kfree(to_mtk_plane_state(state)); } +static int mtk_plane_atomic_async_check(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct drm_crtc_state *crtc_state; + + if (plane != state->crtc->cursor) + return -EINVAL; + + if (!plane->state) + return -EINVAL; + + if (!plane->state->fb) + return -EINVAL; + + if (state->state) + crtc_state = drm_atomic_get_existing_crtc_state(state->state, + state->crtc); + else /* Special case for asynchronous cursor updates. */ + crtc_state = state->crtc->state; + + return drm_atomic_helper_check_plane_state(plane->state, crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, true); +} + +static void mtk_plane_atomic_async_update(struct drm_plane *plane, + struct drm_plane_state *new_state) +{ + struct mtk_plane_state *state = to_mtk_plane_state(plane->state); + + plane->state->crtc_x = new_state->crtc_x; + plane->state->crtc_y = new_state->crtc_y; + plane->state->crtc_h = new_state->crtc_h; + plane->state->crtc_w = new_state->crtc_w; + plane->state->src_x = new_state->src_x; + plane->state->src_y = new_state->src_y; + plane->state->src_h = new_state->src_h; + plane->state->src_w = new_state->src_w; + state->pending.async_dirty = true; + + mtk_drm_crtc_async_update(new_state->crtc, plane, new_state); +} + static const struct drm_plane_funcs mtk_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, @@ -163,6 +208,8 @@ static const struct drm_plane_helper_funcs mtk_plane_helper_funcs = { .atomic_check = mtk_plane_atomic_check, .atomic_update = mtk_plane_atomic_update, .atomic_disable = mtk_plane_atomic_disable, + .atomic_async_update = mtk_plane_atomic_async_update, + .atomic_async_check = mtk_plane_atomic_async_check, }; int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.h b/drivers/gpu/drm/mediatek/mtk_drm_plane.h index 760885e35b27..d454bece9535 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.h @@ -22,6 +22,8 @@ struct mtk_plane_pending_state { unsigned int height; unsigned int rotation; bool dirty; + bool async_dirty; + bool async_config; }; struct mtk_plane_state { diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 3b5e016d16c4..5fa1073cf26b 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -230,28 +230,25 @@ static void mtk_dsi_mask(struct mtk_dsi *dsi, u32 offset, u32 mask, u32 data) static void mtk_dsi_phy_timconfig(struct mtk_dsi *dsi) { u32 timcon0, timcon1, timcon2, timcon3; - u32 ui, cycle_time; + u32 data_rate_mhz = DIV_ROUND_UP(dsi->data_rate, 1000000); struct mtk_phy_timing *timing = &dsi->phy_timing; - ui = DIV_ROUND_UP(1000000000, dsi->data_rate); - cycle_time = div_u64(8000000000ULL, dsi->data_rate); + timing->lpx = (60 * data_rate_mhz / (8 * 1000)) + 1; + timing->da_hs_prepare = (80 * data_rate_mhz + 4 * 1000) / 8000; + timing->da_hs_zero = (170 * data_rate_mhz + 10 * 1000) / 8000 + 1 - + timing->da_hs_prepare; + timing->da_hs_trail = timing->da_hs_prepare + 1; - timing->lpx = NS_TO_CYCLE(60, cycle_time); - timing->da_hs_prepare = NS_TO_CYCLE(50 + 5 * ui, cycle_time); - timing->da_hs_zero = NS_TO_CYCLE(110 + 6 * ui, cycle_time); - timing->da_hs_trail = NS_TO_CYCLE(77 + 4 * ui, cycle_time); + timing->ta_go = 4 * timing->lpx - 2; + timing->ta_sure = timing->lpx + 2; + timing->ta_get = 4 * timing->lpx; + timing->da_hs_exit = 2 * timing->lpx + 1; - timing->ta_go = 4 * timing->lpx; - timing->ta_sure = 3 * timing->lpx / 2; - timing->ta_get = 5 * timing->lpx; - timing->da_hs_exit = 2 * timing->lpx; - - timing->clk_hs_zero = NS_TO_CYCLE(336, cycle_time); - timing->clk_hs_trail = NS_TO_CYCLE(100, cycle_time) + 10; - - timing->clk_hs_prepare = NS_TO_CYCLE(64, cycle_time); - timing->clk_hs_post = NS_TO_CYCLE(80 + 52 * ui, cycle_time); - timing->clk_hs_exit = 2 * timing->lpx; + timing->clk_hs_prepare = 70 * data_rate_mhz / (8 * 1000); + timing->clk_hs_post = timing->clk_hs_prepare + 8; + timing->clk_hs_trail = timing->clk_hs_prepare; + timing->clk_hs_zero = timing->clk_hs_trail * 4; + timing->clk_hs_exit = 2 * timing->clk_hs_trail; timcon0 = timing->lpx | timing->da_hs_prepare << 8 | timing->da_hs_zero << 16 | timing->da_hs_trail << 24; @@ -482,27 +479,39 @@ static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi) dsi_tmp_buf_bpp - 10); data_phy_cycles = timing->lpx + timing->da_hs_prepare + - timing->da_hs_zero + timing->da_hs_exit + 2; + timing->da_hs_zero + timing->da_hs_exit + 3; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) { - if (vm->hfront_porch * dsi_tmp_buf_bpp > + if ((vm->hfront_porch + vm->hback_porch) * dsi_tmp_buf_bpp > data_phy_cycles * dsi->lanes + 18) { - horizontal_frontporch_byte = vm->hfront_porch * - dsi_tmp_buf_bpp - - data_phy_cycles * - dsi->lanes - 18; + horizontal_frontporch_byte = + vm->hfront_porch * dsi_tmp_buf_bpp - + (data_phy_cycles * dsi->lanes + 18) * + vm->hfront_porch / + (vm->hfront_porch + vm->hback_porch); + + horizontal_backporch_byte = + horizontal_backporch_byte - + (data_phy_cycles * dsi->lanes + 18) * + vm->hback_porch / + (vm->hfront_porch + vm->hback_porch); } else { DRM_WARN("HFP less than d-phy, FPS will under 60Hz\n"); horizontal_frontporch_byte = vm->hfront_porch * dsi_tmp_buf_bpp; } } else { - if (vm->hfront_porch * dsi_tmp_buf_bpp > + if ((vm->hfront_porch + vm->hback_porch) * dsi_tmp_buf_bpp > data_phy_cycles * dsi->lanes + 12) { - horizontal_frontporch_byte = vm->hfront_porch * - dsi_tmp_buf_bpp - - data_phy_cycles * - dsi->lanes - 12; + horizontal_frontporch_byte = + vm->hfront_porch * dsi_tmp_buf_bpp - + (data_phy_cycles * dsi->lanes + 12) * + vm->hfront_porch / + (vm->hfront_porch + vm->hback_porch); + horizontal_backporch_byte = horizontal_backporch_byte - + (data_phy_cycles * dsi->lanes + 12) * + vm->hback_porch / + (vm->hfront_porch + vm->hback_porch); } else { DRM_WARN("HFP less than d-phy, FPS will under 60Hz\n"); horizontal_frontporch_byte = vm->hfront_porch * diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 7ad14937fcdf..b67f88872726 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -506,6 +506,14 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) goto fail; } + /* + * Set the ICC path to maximum speed for now by multiplying the fastest + * frequency by the bus width (8). We'll want to scale this later on to + * improve battery life. + */ + icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); + icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); + return gpu; fail: diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index b01388a9e89e..253d8d85daad 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -591,6 +591,14 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) goto fail; } + /* + * Set the ICC path to maximum speed for now by multiplying the fastest + * frequency by the bus width (8). We'll want to scale this later on to + * improve battery life. + */ + icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); + icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); + return gpu; fail: diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index b02e2042547f..7d9e63e20ded 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -753,11 +753,18 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu->funcs->flush(gpu, gpu->rb[0]); if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; - } else { - /* Print a warning so if we die, we know why */ + } else if (ret == -ENODEV) { + /* + * This device does not use zap shader (but print a warning + * just in case someone got their dt wrong.. hopefully they + * have a debug UART to realize the error of their ways... + * if you mess this up you are about to crash horribly) + */ dev_warn_once(gpu->dev->dev, "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); + } else { + return ret; } /* Last step - yield the ringbuffer */ diff --git a/drivers/gpu/drm/msm/adreno/a6xx.xml.h b/drivers/gpu/drm/msm/adreno/a6xx.xml.h index f44553ec3193..ed78fee2a262 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx.xml.h +++ b/drivers/gpu/drm/msm/adreno/a6xx.xml.h @@ -16,11 +16,11 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) - /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) - /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-12-02 17:29:54) -- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 140790 bytes, from 2018-12-02 17:29:54) +- /home/smasetty/playarea/envytools/rnndb/adreno/a6xx.xml ( 161969 bytes, from 2019-11-29 07:18:16) - /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-09-14 13:03:07) - /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) -Copyright (C) 2013-2018 by the following authors: +Copyright (C) 2013-2019 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) @@ -2519,6 +2519,54 @@ static inline uint32_t A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL(uint32_t val) #define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH2 0x0000311a +#define REG_A6XX_GBIF_SCACHE_CNTL1 0x00003c02 + +#define REG_A6XX_GBIF_QSB_SIDE0 0x00003c03 + +#define REG_A6XX_GBIF_QSB_SIDE1 0x00003c04 + +#define REG_A6XX_GBIF_QSB_SIDE2 0x00003c05 + +#define REG_A6XX_GBIF_QSB_SIDE3 0x00003c06 + +#define REG_A6XX_GBIF_HALT 0x00003c45 + +#define REG_A6XX_GBIF_HALT_ACK 0x00003c46 + +#define REG_A6XX_GBIF_PERF_PWR_CNT_EN 0x00003cc0 + +#define REG_A6XX_GBIF_PERF_CNT_SEL 0x00003cc2 + +#define REG_A6XX_GBIF_PERF_PWR_CNT_SEL 0x00003cc3 + +#define REG_A6XX_GBIF_PERF_CNT_LOW0 0x00003cc4 + +#define REG_A6XX_GBIF_PERF_CNT_LOW1 0x00003cc5 + +#define REG_A6XX_GBIF_PERF_CNT_LOW2 0x00003cc6 + +#define REG_A6XX_GBIF_PERF_CNT_LOW3 0x00003cc7 + +#define REG_A6XX_GBIF_PERF_CNT_HIGH0 0x00003cc8 + +#define REG_A6XX_GBIF_PERF_CNT_HIGH1 0x00003cc9 + +#define REG_A6XX_GBIF_PERF_CNT_HIGH2 0x00003cca + +#define REG_A6XX_GBIF_PERF_CNT_HIGH3 0x00003ccb + +#define REG_A6XX_GBIF_PWR_CNT_LOW0 0x00003ccc + +#define REG_A6XX_GBIF_PWR_CNT_LOW1 0x00003ccd + +#define REG_A6XX_GBIF_PWR_CNT_LOW2 0x00003cce + +#define REG_A6XX_GBIF_PWR_CNT_HIGH0 0x00003ccf + +#define REG_A6XX_GBIF_PWR_CNT_HIGH1 0x00003cd0 + +#define REG_A6XX_GBIF_PWR_CNT_HIGH2 0x00003cd1 + #define REG_A6XX_RB_WINDOW_OFFSET2 0x000088d4 #define A6XX_RB_WINDOW_OFFSET2_WINDOW_OFFSET_DISABLE 0x80000000 #define A6XX_RB_WINDOW_OFFSET2_X__MASK 0x00007fff diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 85f14feafdec..983afeaee737 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2017-2018 The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */ #include <linux/clk.h> #include <linux/interconnect.h> @@ -149,6 +149,8 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq) if (freq == gmu->gpu_freqs[perf_index]) break; + gmu->current_perf_index = perf_index; + __a6xx_gmu_set_freq(gmu, perf_index); } @@ -433,6 +435,8 @@ static void __iomem *a6xx_gmu_get_mmio(struct platform_device *pdev, static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu) { + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; struct platform_device *pdev = to_platform_device(gmu->dev); void __iomem *pdcptr = a6xx_gmu_get_mmio(pdev, "gmu_pdc"); void __iomem *seqptr = a6xx_gmu_get_mmio(pdev, "gmu_pdc_seq"); @@ -480,20 +484,34 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu) pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID + 4, 0x10108); pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR + 4, 0x30000); pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_DATA + 4, 0x0); + pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID + 8, 0x10108); - pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR + 8, 0x30080); + if (adreno_is_a618(adreno_gpu)) + pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR + 8, 0x30090); + else + pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR + 8, 0x30080); pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_DATA + 8, 0x0); + pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD_ENABLE_BANK, 7); pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD_WAIT_FOR_CMPL_BANK, 0); pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CONTROL, 0); pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID, 0x10108); pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR, 0x30010); pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA, 2); + pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID + 4, 0x10108); pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR + 4, 0x30000); - pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 4, 0x3); + if (adreno_is_a618(adreno_gpu)) + pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 4, 0x2); + else + pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 4, 0x3); + + pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID + 8, 0x10108); - pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR + 8, 0x30080); + if (adreno_is_a618(adreno_gpu)) + pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR + 8, 0x30090); + else + pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR + 8, 0x30080); pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 8, 0x3); /* Setup GPU PDC */ @@ -741,8 +759,8 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~A6XX_HFI_IRQ_MASK); enable_irq(gmu->hfi_irq); - /* Set the GPU to the highest power frequency */ - __a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1); + /* Set the GPU to the current freq */ + __a6xx_gmu_set_freq(gmu, gmu->current_perf_index); /* * "enable" the GX power domain which won't actually do anything but it @@ -1166,6 +1184,8 @@ static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu) gmu->nr_gpu_freqs = a6xx_gmu_build_freq_table(&gpu->pdev->dev, gmu->gpu_freqs, ARRAY_SIZE(gmu->gpu_freqs)); + gmu->current_perf_index = gmu->nr_gpu_freqs - 1; + /* Build the list of RPMh votes that we'll send to the GMU */ return a6xx_gmu_rpmh_votes_init(gmu); } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h index 39a26dd63674..2af91ed7ed0c 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -63,6 +63,9 @@ struct a6xx_gmu { struct clk_bulk_data *clocks; struct clk *core_clk; + /* current performance index set externally */ + int current_perf_index; + int nr_gpu_freqs; unsigned long gpu_freqs[16]; u32 gx_arc_votes[16]; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index dc8ec2c94301..daf07800cde0 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2017-2018 The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */ #include "msm_gem.h" @@ -378,6 +378,18 @@ static int a6xx_hw_init(struct msm_gpu *gpu) struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); int ret; + /* + * During a previous slumber, GBIF halt is asserted to ensure + * no further transaction can go through GPU before GPU + * headswitch is turned off. + * + * This halt is deasserted once headswitch goes off but + * incase headswitch doesn't goes off clear GBIF halt + * here to ensure GPU wake-up doesn't fail because of + * halted GPU transactions. + */ + gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); + /* Make sure the GMU keeps the GPU on while we set it up */ a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); @@ -406,12 +418,17 @@ static int a6xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1); gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1); - /* enable hardware clockgating */ - a6xx_set_hwcg(gpu, true); + /* + * enable hardware clockgating + * For now enable clock gating only for a630 + */ + if (adreno_is_a630(adreno_gpu)) + a6xx_set_hwcg(gpu, true); - /* VBIF start */ - gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009); + /* VBIF/GBIF start*/ gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3); + if (adreno_is_a630(adreno_gpu)) + gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009); /* Make all blocks contribute to the GPU BUSY perf counter */ gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); @@ -537,12 +554,19 @@ static int a6xx_hw_init(struct msm_gpu *gpu) a6xx_flush(gpu, gpu->rb[0]); if (!a6xx_idle(gpu, gpu->rb[0])) return -EINVAL; - } else { - /* Print a warning so if we die, we know why */ + } else if (ret == -ENODEV) { + /* + * This device does not use zap shader (but print a warning + * just in case someone got their dt wrong.. hopefully they + * have a debug UART to realize the error of their ways... + * if you mess this up you are about to crash horribly) + */ dev_warn_once(gpu->dev->dev, "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); ret = 0; + } else { + return ret; } out: @@ -724,6 +748,39 @@ static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL), }; +#define GBIF_CLIENT_HALT_MASK BIT(0) +#define GBIF_ARB_HALT_MASK BIT(1) + +static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu) +{ + struct msm_gpu *gpu = &adreno_gpu->base; + + if(!a6xx_has_gbif(adreno_gpu)){ + gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf); + spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & + 0xf) == 0xf); + gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0); + + return; + } + + /* Halt new client requests on GBIF */ + gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); + spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & + (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); + + /* Halt all AXI requests on GBIF */ + gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); + spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & + (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); + + /* + * GMU needs DDR access in slumber path. Deassert GBIF halt now + * to allow for GMU to access system memory. + */ + gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); +} + static int a6xx_pm_resume(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -748,6 +805,16 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) devfreq_suspend_device(gpu->devfreq.devfreq); + /* + * Make sure the GMU is idle before continuing (because some transitions + * may use VBIF + */ + a6xx_gmu_wait_for_idle(&a6xx_gpu->gmu); + + /* Clear the VBIF pipe before shutting down */ + /* FIXME: This accesses the GPU - do we need to make sure it is on? */ + a6xx_bus_clear_pending_transactions(adreno_gpu); + return a6xx_gmu_stop(a6xx_gpu); } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 64399554f2dd..7239b8b60939 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2017 The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2017, 2019 The Linux Foundation. All rights reserved. */ #ifndef __A6XX_GPU_H__ #define __A6XX_GPU_H__ @@ -42,6 +42,13 @@ struct a6xx_gpu { #define A6XX_PROTECT_RDONLY(_reg, _len) \ ((((_len) & 0x3FFF) << 18) | ((_reg) & 0x3FFFF)) +static inline bool a6xx_has_gbif(struct adreno_gpu *gpu) +{ + if(adreno_is_a630(gpu)) + return false; + + return true; +} int a6xx_gmu_resume(struct a6xx_gpu *gpu); int a6xx_gmu_stop(struct a6xx_gpu *gpu); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 691c1a277d91..d6023ba8033c 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */ #include <linux/ascii85.h> #include "msm_gem.h" @@ -320,6 +320,7 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu, { struct resource *res; void __iomem *cxdbg = NULL; + int nr_debugbus_blocks; /* Set up the GX debug bus */ @@ -374,9 +375,11 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu, cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); } - a6xx_state->debugbus = state_kcalloc(a6xx_state, - ARRAY_SIZE(a6xx_debugbus_blocks), - sizeof(*a6xx_state->debugbus)); + nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) + + (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0); + + a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks, + sizeof(*a6xx_state->debugbus)); if (a6xx_state->debugbus) { int i; @@ -388,15 +391,31 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu, &a6xx_state->debugbus[i]); a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks); + + /* + * GBIF has same debugbus as of other GPU blocks, fall back to + * default path if GPU uses GBIF, also GBIF uses exactly same + * ID as of VBIF. + */ + if (a6xx_has_gbif(to_adreno_gpu(gpu))) { + a6xx_get_debugbus_block(gpu, a6xx_state, + &a6xx_gbif_debugbus_block, + &a6xx_state->debugbus[i]); + + a6xx_state->nr_debugbus += 1; + } } - a6xx_state->vbif_debugbus = - state_kcalloc(a6xx_state, 1, - sizeof(*a6xx_state->vbif_debugbus)); + /* Dump the VBIF debugbus on applicable targets */ + if (!a6xx_has_gbif(to_adreno_gpu(gpu))) { + a6xx_state->vbif_debugbus = + state_kcalloc(a6xx_state, 1, + sizeof(*a6xx_state->vbif_debugbus)); - if (a6xx_state->vbif_debugbus) - a6xx_get_vbif_debugbus_block(gpu, a6xx_state, - a6xx_state->vbif_debugbus); + if (a6xx_state->vbif_debugbus) + a6xx_get_vbif_debugbus_block(gpu, a6xx_state, + a6xx_state->vbif_debugbus); + } if (cxdbg) { a6xx_state->cx_debugbus = @@ -770,14 +789,16 @@ static void a6xx_get_gmu_registers(struct msm_gpu *gpu, &a6xx_state->gmu_registers[1]); } +#define A6XX_GBIF_REGLIST_SIZE 1 static void a6xx_get_registers(struct msm_gpu *gpu, struct a6xx_gpu_state *a6xx_state, struct a6xx_crashdumper *dumper) { int i, count = ARRAY_SIZE(a6xx_ahb_reglist) + ARRAY_SIZE(a6xx_reglist) + - ARRAY_SIZE(a6xx_hlsq_reglist); + ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE; int index = 0; + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); a6xx_state->registers = state_kcalloc(a6xx_state, count, sizeof(*a6xx_state->registers)); @@ -792,6 +813,15 @@ static void a6xx_get_registers(struct msm_gpu *gpu, a6xx_state, &a6xx_ahb_reglist[i], &a6xx_state->registers[index++]); + if (a6xx_has_gbif(adreno_gpu)) + a6xx_get_ahb_gpu_registers(gpu, + a6xx_state, &a6xx_gbif_reglist, + &a6xx_state->registers[index++]); + else + a6xx_get_ahb_gpu_registers(gpu, + a6xx_state, &a6xx_vbif_reglist, + &a6xx_state->registers[index++]); + for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) a6xx_get_crashdumper_registers(gpu, a6xx_state, &a6xx_reglist[i], diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h index 68cccfa2870a..e67c20c415af 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */ #ifndef _A6XX_CRASH_DUMP_H_ #define _A6XX_CRASH_DUMP_H_ @@ -307,11 +307,20 @@ static const u32 a6xx_vbif_registers[] = { 0x3410, 0x3410, 0x3800, 0x3801, }; +static const u32 a6xx_gbif_registers[] = { + 0x3C00, 0X3C0B, 0X3C40, 0X3C47, 0X3CC0, 0X3CD1, 0xE3A, 0xE3A, +}; + static const struct a6xx_registers a6xx_ahb_reglist[] = { REGS(a6xx_ahb_registers, 0, 0), - REGS(a6xx_vbif_registers, 0, 0), }; +static const struct a6xx_registers a6xx_vbif_reglist = + REGS(a6xx_vbif_registers, 0, 0); + +static const struct a6xx_registers a6xx_gbif_reglist = + REGS(a6xx_gbif_registers, 0, 0); + static const u32 a6xx_gmu_gx_registers[] = { /* GMU GX */ 0x0000, 0x0000, 0x0010, 0x0013, 0x0016, 0x0016, 0x0018, 0x001b, @@ -422,6 +431,9 @@ static const struct a6xx_debugbus_block { DEBUGBUS(A6XX_DBGBUS_TPL1_3, 0x100), }; +static const struct a6xx_debugbus_block a6xx_gbif_debugbus_block = + DEBUGBUS(A6XX_DBGBUS_VBIF, 0x100); + static const struct a6xx_debugbus_block a6xx_cx_debugbus_blocks[] = { DEBUGBUS(A6XX_DBGBUS_GMU_CX, 0x100), DEBUGBUS(A6XX_DBGBUS_CX, 0x100), diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index fbbdf86504f5..cb3a6e597d76 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -167,6 +167,17 @@ static const struct adreno_info gpulist[] = { .init = a5xx_gpu_init, .zapfw = "a540_zap.mdt", }, { + .rev = ADRENO_REV(6, 1, 8, ANY_ID), + .revn = 618, + .name = "A618", + .fw = { + [ADRENO_FW_SQE] = "a630_sqe.fw", + [ADRENO_FW_GMU] = "a630_gmu.bin", + }, + .gmem = SZ_512K, + .inactive_period = DRM_MSM_INACTIVE_PERIOD, + .init = a6xx_gpu_init, + }, { .rev = ADRENO_REV(6, 3, 0, ANY_ID), .revn = 630, .name = "A630", diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 0783e4b5486a..7fd29829b2fa 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -26,6 +26,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, { struct device *dev = &gpu->pdev->dev; const struct firmware *fw; + const char *signed_fwname = NULL; struct device_node *np, *mem_np; struct resource r; phys_addr_t mem_phys; @@ -58,8 +59,43 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, mem_phys = r.start; - /* Request the MDT file for the firmware */ - fw = adreno_request_fw(to_adreno_gpu(gpu), fwname); + /* + * Check for a firmware-name property. This is the new scheme + * to handle firmware that may be signed with device specific + * keys, allowing us to have a different zap fw path for different + * devices. + * + * If the firmware-name property is found, we bypass the + * adreno_request_fw() mechanism, because we don't need to handle + * the /lib/firmware/qcom/... vs /lib/firmware/... case. + * + * If the firmware-name property is not found, for backwards + * compatibility we fall back to the fwname from the gpulist + * table. + */ + of_property_read_string_index(np, "firmware-name", 0, &signed_fwname); + if (signed_fwname) { + fwname = signed_fwname; + ret = request_firmware_direct(&fw, fwname, gpu->dev->dev); + if (ret) + fw = ERR_PTR(ret); + } else if (fwname) { + /* Request the MDT file from the default location: */ + fw = adreno_request_fw(to_adreno_gpu(gpu), fwname); + } else { + /* + * For new targets, we require the firmware-name property, + * if a zap-shader is required, rather than falling back + * to a firmware name specified in gpulist. + * + * Because the firmware is signed with a (potentially) + * device specific key, having the name come from gpulist + * was a bad idea, and is only provided for backwards + * compatibility for older targets. + */ + return -ENODEV; + } + if (IS_ERR(fw)) { DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname); return PTR_ERR(fw); @@ -95,7 +131,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, * not. But since we've already gotten through adreno_request_fw() * we know which of the two cases it is: */ - if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) { + if (signed_fwname || (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY)) { ret = qcom_mdt_load(dev, fw, fwname, pasid, mem_region, mem_phys, mem_size, NULL); } else { @@ -146,14 +182,6 @@ int adreno_zap_shader_load(struct msm_gpu *gpu, u32 pasid) return -EPROBE_DEFER; } - /* Each GPU has a target specific zap shader firmware name to use */ - if (!adreno_gpu->info->zapfw) { - zap_available = false; - DRM_DEV_ERROR(&pdev->dev, - "Zap shader firmware file not specified for this target\n"); - return -ENODEV; - } - return zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw, pasid); } @@ -826,7 +854,7 @@ static int adreno_get_legacy_pwrlevels(struct device *dev) node = of_get_compatible_child(dev->of_node, "qcom,gpu-pwrlevels"); if (!node) { - DRM_DEV_ERROR(dev, "Could not find the GPU powerlevels\n"); + DRM_DEV_DEBUG(dev, "Could not find the GPU powerlevels\n"); return -ENXIO; } @@ -887,10 +915,21 @@ static int adreno_get_pwrlevels(struct device *dev, DBG("fast_rate=%u, slow_rate=27000000", gpu->fast_rate); /* Check for an interconnect path for the bus */ - gpu->icc_path = of_icc_get(dev, NULL); + gpu->icc_path = of_icc_get(dev, "gfx-mem"); + if (!gpu->icc_path) { + /* + * Keep compatbility with device trees that don't have an + * interconnect-names property. + */ + gpu->icc_path = of_icc_get(dev, NULL); + } if (IS_ERR(gpu->icc_path)) gpu->icc_path = NULL; + gpu->ocmem_icc_path = of_icc_get(dev, "ocmem"); + if (IS_ERR(gpu->ocmem_icc_path)) + gpu->ocmem_icc_path = NULL; + return 0; } @@ -977,6 +1016,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) release_firmware(adreno_gpu->fw[i]); icc_put(gpu->icc_path); + icc_put(gpu->ocmem_icc_path); msm_gpu_cleanup(&adreno_gpu->base); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index e71a7570ef72..9ff4e550e7bd 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -3,7 +3,7 @@ * Copyright (C) 2013 Red Hat * Author: Rob Clark <robdclark@gmail.com> * - * Copyright (c) 2014,2017 The Linux Foundation. All rights reserved. + * Copyright (c) 2014,2017, 2019 The Linux Foundation. All rights reserved. */ #ifndef __ADRENO_GPU_H__ @@ -227,6 +227,16 @@ static inline int adreno_is_a540(struct adreno_gpu *gpu) return gpu->revn == 540; } +static inline int adreno_is_a618(struct adreno_gpu *gpu) +{ + return gpu->revn == 618; +} + +static inline int adreno_is_a630(struct adreno_gpu *gpu) +{ + return gpu->revn == 630; +} + int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value); const struct firmware *adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname); @@ -330,10 +340,7 @@ OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) static inline bool adreno_reg_check(struct adreno_gpu *gpu, enum adreno_regs offset_name) { - if (offset_name >= REG_ADRENO_REGISTER_MAX || - !gpu->reg_offsets[offset_name]) { - BUG(); - } + BUG_ON(offset_name >= REG_ADRENO_REGISTER_MAX || !gpu->reg_offsets[offset_name]); /* * REG_SKIP is a special value that tell us that the register in diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index f197dce54576..bf513411b243 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -197,10 +197,6 @@ static void _dpu_crtc_blend_setup(struct drm_crtc *crtc) DPU_DEBUG("%s\n", dpu_crtc->name); for (i = 0; i < cstate->num_mixers; i++) { - if (!mixer[i].hw_lm || !mixer[i].lm_ctl) { - DPU_ERROR("invalid lm or ctl assigned to mixer\n"); - return; - } mixer[i].mixer_op_mode = 0; mixer[i].flush_mask = 0; if (mixer[i].lm_ctl->ops.clear_all_blendstages) @@ -1113,14 +1109,9 @@ static int _dpu_debugfs_status_show(struct seq_file *s, void *data) for (i = 0; i < cstate->num_mixers; ++i) { m = &cstate->mixers[i]; - if (!m->hw_lm) - seq_printf(s, "\tmixer[%d] has no lm\n", i); - else if (!m->lm_ctl) - seq_printf(s, "\tmixer[%d] has no ctl\n", i); - else - seq_printf(s, "\tmixer:%d ctl:%d width:%d height:%d\n", - m->hw_lm->idx - LM_0, m->lm_ctl->idx - CTL_0, - out_width, mode->vdisplay); + seq_printf(s, "\tmixer:%d ctl:%d width:%d height:%d\n", + m->hw_lm->idx - LM_0, m->lm_ctl->idx - CTL_0, + out_width, mode->vdisplay); } seq_puts(s, "\n"); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index f96e142c4361..f8ac3bf60fd6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -58,7 +58,7 @@ #define IDLE_SHORT_TIMEOUT 1 -#define MAX_VDISPLAY_SPLIT 1080 +#define MAX_HDISPLAY_SPLIT 1080 /* timeout in frames waiting for frame done */ #define DPU_ENCODER_FRAME_DONE_TIMEOUT_FRAMES 5 @@ -233,7 +233,7 @@ int dpu_encoder_helper_wait_for_irq(struct dpu_encoder_phys *phys_enc, u32 irq_status; int ret; - if (!phys_enc || !wait_info || intr_idx >= INTR_IDX_MAX) { + if (!wait_info || intr_idx >= INTR_IDX_MAX) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -308,7 +308,7 @@ int dpu_encoder_helper_register_irq(struct dpu_encoder_phys *phys_enc, struct dpu_encoder_irq *irq; int ret = 0; - if (!phys_enc || intr_idx >= INTR_IDX_MAX) { + if (intr_idx >= INTR_IDX_MAX) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -363,10 +363,6 @@ int dpu_encoder_helper_unregister_irq(struct dpu_encoder_phys *phys_enc, struct dpu_encoder_irq *irq; int ret; - if (!phys_enc) { - DPU_ERROR("invalid encoder\n"); - return -EINVAL; - } irq = &phys_enc->irq[intr_idx]; /* silently skip irqs that weren't registered */ @@ -415,7 +411,7 @@ void dpu_encoder_get_hw_resources(struct drm_encoder *drm_enc, for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - if (phys && phys->ops.get_hw_resources) + if (phys->ops.get_hw_resources) phys->ops.get_hw_resources(phys, hw_res); } } @@ -438,7 +434,7 @@ static void dpu_encoder_destroy(struct drm_encoder *drm_enc) for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - if (phys && phys->ops.destroy) { + if (phys->ops.destroy) { phys->ops.destroy(phys); --dpu_enc->num_phys_encs; dpu_enc->phys_encs[i] = NULL; @@ -464,7 +460,7 @@ void dpu_encoder_helper_split_config( struct dpu_hw_mdp *hw_mdptop; struct msm_display_info *disp_info; - if (!phys_enc || !phys_enc->hw_mdptop || !phys_enc->parent) { + if (!phys_enc->hw_mdptop || !phys_enc->parent) { DPU_ERROR("invalid arg(s), encoder %d\n", phys_enc != 0); return; } @@ -534,8 +530,23 @@ static struct msm_display_topology dpu_encoder_get_topology( if (dpu_enc->phys_encs[i]) intf_count++; - /* User split topology for width > 1080 */ - topology.num_lm = (mode->vdisplay > MAX_VDISPLAY_SPLIT) ? 2 : 1; + /* Datapath topology selection + * + * Dual display + * 2 LM, 2 INTF ( Split display using 2 interfaces) + * + * Single display + * 1 LM, 1 INTF + * 2 LM, 1 INTF (stream merge to support high resolution interfaces) + * + */ + if (intf_count == 2) + topology.num_lm = 2; + else if (!dpu_kms->catalog->caps->has_3d_merge) + topology.num_lm = 1; + else + topology.num_lm = (mode->hdisplay > MAX_HDISPLAY_SPLIT) ? 2 : 1; + topology.num_enc = 0; topology.num_intf = intf_count; @@ -583,10 +594,10 @@ static int dpu_encoder_virt_atomic_check( for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - if (phys && phys->ops.atomic_check) + if (phys->ops.atomic_check) ret = phys->ops.atomic_check(phys, crtc_state, conn_state); - else if (phys && phys->ops.mode_fixup) + else if (phys->ops.mode_fixup) if (!phys->ops.mode_fixup(phys, mode, adj_mode)) ret = -EINVAL; @@ -682,7 +693,7 @@ static void _dpu_encoder_irq_control(struct drm_encoder *drm_enc, bool enable) for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - if (phys && phys->ops.irq_control) + if (phys->ops.irq_control) phys->ops.irq_control(phys, enable); } @@ -1032,46 +1043,43 @@ static void dpu_encoder_virt_mode_set(struct drm_encoder *drm_enc, for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - if (phys) { - if (!dpu_enc->hw_pp[i]) { - DPU_ERROR_ENC(dpu_enc, "no pp block assigned" - "at idx: %d\n", i); - goto error; - } + if (!dpu_enc->hw_pp[i]) { + DPU_ERROR_ENC(dpu_enc, + "no pp block assigned at idx: %d\n", i); + goto error; + } - if (!hw_ctl[i]) { - DPU_ERROR_ENC(dpu_enc, "no ctl block assigned" - "at idx: %d\n", i); - goto error; - } + if (!hw_ctl[i]) { + DPU_ERROR_ENC(dpu_enc, + "no ctl block assigned at idx: %d\n", i); + goto error; + } - phys->hw_pp = dpu_enc->hw_pp[i]; - phys->hw_ctl = hw_ctl[i]; + phys->hw_pp = dpu_enc->hw_pp[i]; + phys->hw_ctl = hw_ctl[i]; - dpu_rm_init_hw_iter(&hw_iter, drm_enc->base.id, - DPU_HW_BLK_INTF); - for (j = 0; j < MAX_CHANNELS_PER_ENC; j++) { - struct dpu_hw_intf *hw_intf; + dpu_rm_init_hw_iter(&hw_iter, drm_enc->base.id, + DPU_HW_BLK_INTF); + for (j = 0; j < MAX_CHANNELS_PER_ENC; j++) { + struct dpu_hw_intf *hw_intf; - if (!dpu_rm_get_hw(&dpu_kms->rm, &hw_iter)) - break; + if (!dpu_rm_get_hw(&dpu_kms->rm, &hw_iter)) + break; - hw_intf = (struct dpu_hw_intf *)hw_iter.hw; - if (hw_intf->idx == phys->intf_idx) - phys->hw_intf = hw_intf; - } + hw_intf = (struct dpu_hw_intf *)hw_iter.hw; + if (hw_intf->idx == phys->intf_idx) + phys->hw_intf = hw_intf; + } - if (!phys->hw_intf) { - DPU_ERROR_ENC(dpu_enc, - "no intf block assigned at idx: %d\n", - i); + if (!phys->hw_intf) { + DPU_ERROR_ENC(dpu_enc, + "no intf block assigned at idx: %d\n", i); goto error; - } - - phys->connector = conn->state->connector; - if (phys->ops.mode_set) - phys->ops.mode_set(phys, mode, adj_mode); } + + phys->connector = conn->state->connector; + if (phys->ops.mode_set) + phys->ops.mode_set(phys, mode, adj_mode); } dpu_enc->mode_set_complete = true; @@ -1203,7 +1211,7 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc) for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - if (phys && phys->ops.disable) + if (phys->ops.disable) phys->ops.disable(phys); } @@ -1216,8 +1224,7 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc) dpu_encoder_resource_control(drm_enc, DPU_ENC_RC_EVENT_STOP); for (i = 0; i < dpu_enc->num_phys_encs; i++) { - if (dpu_enc->phys_encs[i]) - dpu_enc->phys_encs[i]->connector = NULL; + dpu_enc->phys_encs[i]->connector = NULL; } DPU_DEBUG_ENC(dpu_enc, "encoder disabled\n"); @@ -1307,7 +1314,7 @@ void dpu_encoder_toggle_vblank_for_crtc(struct drm_encoder *drm_enc, for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - if (phys && phys->ops.control_vblank_irq) + if (phys->ops.control_vblank_irq) phys->ops.control_vblank_irq(phys, enable); } } @@ -1419,7 +1426,7 @@ static void _dpu_encoder_trigger_flush(struct drm_encoder *drm_enc, } ctl = phys->hw_ctl; - if (!ctl || !ctl->ops.trigger_flush) { + if (!ctl->ops.trigger_flush) { DPU_ERROR("missing trigger cb\n"); return; } @@ -1463,13 +1470,8 @@ void dpu_encoder_helper_trigger_start(struct dpu_encoder_phys *phys_enc) { struct dpu_hw_ctl *ctl; - if (!phys_enc) { - DPU_ERROR("invalid encoder\n"); - return; - } - ctl = phys_enc->hw_ctl; - if (ctl && ctl->ops.trigger_start) { + if (ctl->ops.trigger_start) { ctl->ops.trigger_start(ctl); trace_dpu_enc_trigger_start(DRMID(phys_enc->parent), ctl->idx); } @@ -1506,14 +1508,10 @@ static void dpu_encoder_helper_hw_reset(struct dpu_encoder_phys *phys_enc) struct dpu_hw_ctl *ctl; int rc; - if (!phys_enc) { - DPU_ERROR("invalid encoder\n"); - return; - } dpu_enc = to_dpu_encoder_virt(phys_enc->parent); ctl = phys_enc->hw_ctl; - if (!ctl || !ctl->ops.reset) + if (!ctl->ops.reset) return; DRM_DEBUG_KMS("id:%u ctl %d reset\n", DRMID(phys_enc->parent), @@ -1550,12 +1548,10 @@ static void _dpu_encoder_kickoff_phys(struct dpu_encoder_virt *dpu_enc) for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - if (!phys || phys->enable_state == DPU_ENC_DISABLED) + if (phys->enable_state == DPU_ENC_DISABLED) continue; ctl = phys->hw_ctl; - if (!ctl) - continue; /* * This is cleared in frame_done worker, which isn't invoked @@ -1603,17 +1599,15 @@ void dpu_encoder_trigger_kickoff_pending(struct drm_encoder *drm_enc) for (i = 0; i < dpu_enc->num_phys_encs; i++) { phys = dpu_enc->phys_encs[i]; - if (phys && phys->hw_ctl) { - ctl = phys->hw_ctl; - if (ctl->ops.clear_pending_flush) - ctl->ops.clear_pending_flush(ctl); + ctl = phys->hw_ctl; + if (ctl->ops.clear_pending_flush) + ctl->ops.clear_pending_flush(ctl); - /* update only for command mode primary ctl */ - if ((phys == dpu_enc->cur_master) && - (disp_info->capabilities & MSM_DISPLAY_CAP_CMD_MODE) - && ctl->ops.trigger_pending) - ctl->ops.trigger_pending(ctl); - } + /* update only for command mode primary ctl */ + if ((phys == dpu_enc->cur_master) && + (disp_info->capabilities & MSM_DISPLAY_CAP_CMD_MODE) + && ctl->ops.trigger_pending) + ctl->ops.trigger_pending(ctl); } } @@ -1773,12 +1767,10 @@ void dpu_encoder_prepare_for_kickoff(struct drm_encoder *drm_enc) DPU_ATRACE_BEGIN("enc_prepare_for_kickoff"); for (i = 0; i < dpu_enc->num_phys_encs; i++) { phys = dpu_enc->phys_encs[i]; - if (phys) { - if (phys->ops.prepare_for_kickoff) - phys->ops.prepare_for_kickoff(phys); - if (phys->enable_state == DPU_ENC_ERR_NEEDS_HW_RESET) - needs_hw_reset = true; - } + if (phys->ops.prepare_for_kickoff) + phys->ops.prepare_for_kickoff(phys); + if (phys->enable_state == DPU_ENC_ERR_NEEDS_HW_RESET) + needs_hw_reset = true; } DPU_ATRACE_END("enc_prepare_for_kickoff"); @@ -1819,7 +1811,7 @@ void dpu_encoder_kickoff(struct drm_encoder *drm_enc) /* allow phys encs to handle any post-kickoff business */ for (i = 0; i < dpu_enc->num_phys_encs; i++) { phys = dpu_enc->phys_encs[i]; - if (phys && phys->ops.handle_post_kickoff) + if (phys->ops.handle_post_kickoff) phys->ops.handle_post_kickoff(phys); } @@ -1848,7 +1840,7 @@ void dpu_encoder_prepare_commit(struct drm_encoder *drm_enc) for (i = 0; i < dpu_enc->num_phys_encs; i++) { phys = dpu_enc->phys_encs[i]; - if (phys && phys->ops.prepare_commit) + if (phys->ops.prepare_commit) phys->ops.prepare_commit(phys); } } @@ -1863,9 +1855,6 @@ static int _dpu_encoder_status_show(struct seq_file *s, void *data) for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - if (!phys) - continue; - seq_printf(s, "intf:%d vsync:%8d underrun:%8d ", phys->intf_idx - INTF_0, atomic_read(&phys->vsync_cnt), @@ -1924,8 +1913,7 @@ static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc) dpu_enc->debugfs_root, dpu_enc, &debugfs_status_fops); for (i = 0; i < dpu_enc->num_phys_encs; i++) - if (dpu_enc->phys_encs[i] && - dpu_enc->phys_encs[i]->ops.late_register) + if (dpu_enc->phys_encs[i]->ops.late_register) dpu_enc->phys_encs[i]->ops.late_register( dpu_enc->phys_encs[i], dpu_enc->debugfs_root); @@ -2094,11 +2082,8 @@ static int dpu_encoder_setup_display(struct dpu_encoder_virt *dpu_enc, for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - - if (phys) { - atomic_set(&phys->vsync_cnt, 0); - atomic_set(&phys->underrun_cnt, 0); - } + atomic_set(&phys->vsync_cnt, 0); + atomic_set(&phys->underrun_cnt, 0); } mutex_unlock(&dpu_enc->enc_lock); @@ -2240,8 +2225,6 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc, for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - if (!phys) - continue; switch (event) { case MSM_ENC_COMMIT_DONE: @@ -2257,7 +2240,7 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc, DPU_ERROR_ENC(dpu_enc, "unknown wait event %d\n", event); return -EINVAL; - }; + } if (fn_wait) { DPU_ATRACE_BEGIN("wait_for_completion_event"); @@ -2274,7 +2257,6 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc, enum dpu_intf_mode dpu_encoder_get_intf_mode(struct drm_encoder *encoder) { struct dpu_encoder_virt *dpu_enc = NULL; - int i; if (!encoder) { DPU_ERROR("invalid encoder\n"); @@ -2285,12 +2267,8 @@ enum dpu_intf_mode dpu_encoder_get_intf_mode(struct drm_encoder *encoder) if (dpu_enc->cur_master) return dpu_enc->cur_master->intf_mode; - for (i = 0; i < dpu_enc->num_phys_encs; i++) { - struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - - if (phys) - return phys->intf_mode; - } + if (dpu_enc->num_phys_encs) + return dpu_enc->phys_encs[0]->intf_mode; return INTF_MODE_NONE; } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c index 047960949fbb..39e1e280ba44 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c @@ -45,8 +45,7 @@ static bool dpu_encoder_phys_cmd_mode_fixup( const struct drm_display_mode *mode, struct drm_display_mode *adj_mode) { - if (phys_enc) - DPU_DEBUG_CMDENC(to_dpu_encoder_phys_cmd(phys_enc), "\n"); + DPU_DEBUG_CMDENC(to_dpu_encoder_phys_cmd(phys_enc), "\n"); return true; } @@ -58,11 +57,8 @@ static void _dpu_encoder_phys_cmd_update_intf_cfg( struct dpu_hw_ctl *ctl; struct dpu_hw_intf_cfg intf_cfg = { 0 }; - if (!phys_enc) - return; - ctl = phys_enc->hw_ctl; - if (!ctl || !ctl->ops.setup_intf_cfg) + if (!ctl->ops.setup_intf_cfg) return; intf_cfg.intf = phys_enc->intf_idx; @@ -79,7 +75,7 @@ static void dpu_encoder_phys_cmd_pp_tx_done_irq(void *arg, int irq_idx) int new_cnt; u32 event = DPU_ENCODER_FRAME_EVENT_DONE; - if (!phys_enc || !phys_enc->hw_pp) + if (!phys_enc->hw_pp) return; DPU_ATRACE_BEGIN("pp_done_irq"); @@ -106,7 +102,7 @@ static void dpu_encoder_phys_cmd_pp_rd_ptr_irq(void *arg, int irq_idx) struct dpu_encoder_phys *phys_enc = arg; struct dpu_encoder_phys_cmd *cmd_enc; - if (!phys_enc || !phys_enc->hw_pp) + if (!phys_enc->hw_pp) return; DPU_ATRACE_BEGIN("rd_ptr_irq"); @@ -125,9 +121,6 @@ static void dpu_encoder_phys_cmd_ctl_start_irq(void *arg, int irq_idx) { struct dpu_encoder_phys *phys_enc = arg; - if (!phys_enc || !phys_enc->hw_ctl) - return; - DPU_ATRACE_BEGIN("ctl_start_irq"); atomic_add_unless(&phys_enc->pending_ctlstart_cnt, -1, 0); @@ -141,9 +134,6 @@ static void dpu_encoder_phys_cmd_underrun_irq(void *arg, int irq_idx) { struct dpu_encoder_phys *phys_enc = arg; - if (!phys_enc) - return; - if (phys_enc->parent_ops->handle_underrun_virt) phys_enc->parent_ops->handle_underrun_virt(phys_enc->parent, phys_enc); @@ -179,7 +169,7 @@ static void dpu_encoder_phys_cmd_mode_set( struct dpu_encoder_phys_cmd *cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); - if (!phys_enc || !mode || !adj_mode) { + if (!mode || !adj_mode) { DPU_ERROR("invalid args\n"); return; } @@ -198,7 +188,7 @@ static int _dpu_encoder_phys_cmd_handle_ppdone_timeout( u32 frame_event = DPU_ENCODER_FRAME_EVENT_ERROR; bool do_log = false; - if (!phys_enc || !phys_enc->hw_pp || !phys_enc->hw_ctl) + if (!phys_enc->hw_pp) return -EINVAL; cmd_enc->pp_timeout_report_cnt++; @@ -247,11 +237,6 @@ static int _dpu_encoder_phys_cmd_wait_for_idle( struct dpu_encoder_wait_info wait_info; int ret; - if (!phys_enc) { - DPU_ERROR("invalid encoder\n"); - return -EINVAL; - } - wait_info.wq = &phys_enc->pending_kickoff_wq; wait_info.atomic_cnt = &phys_enc->pending_kickoff_cnt; wait_info.timeout_ms = KICKOFF_TIMEOUT_MS; @@ -273,7 +258,7 @@ static int dpu_encoder_phys_cmd_control_vblank_irq( int ret = 0; int refcount; - if (!phys_enc || !phys_enc->hw_pp) { + if (!phys_enc->hw_pp) { DPU_ERROR("invalid encoder\n"); return -EINVAL; } @@ -314,9 +299,6 @@ end: static void dpu_encoder_phys_cmd_irq_control(struct dpu_encoder_phys *phys_enc, bool enable) { - if (!phys_enc) - return; - trace_dpu_enc_phys_cmd_irq_ctrl(DRMID(phys_enc->parent), phys_enc->hw_pp->idx - PINGPONG_0, enable, atomic_read(&phys_enc->vblank_refcount)); @@ -351,7 +333,7 @@ static void dpu_encoder_phys_cmd_tearcheck_config( u32 vsync_hz; struct dpu_kms *dpu_kms; - if (!phys_enc || !phys_enc->hw_pp) { + if (!phys_enc->hw_pp) { DPU_ERROR("invalid encoder\n"); return; } @@ -428,8 +410,7 @@ static void _dpu_encoder_phys_cmd_pingpong_config( struct dpu_encoder_phys_cmd *cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); - if (!phys_enc || !phys_enc->hw_ctl || !phys_enc->hw_pp - || !phys_enc->hw_ctl->ops.setup_intf_cfg) { + if (!phys_enc->hw_pp || !phys_enc->hw_ctl->ops.setup_intf_cfg) { DPU_ERROR("invalid arg(s), enc %d\n", phys_enc != 0); return; } @@ -458,7 +439,7 @@ static void dpu_encoder_phys_cmd_enable_helper( struct dpu_hw_ctl *ctl; u32 flush_mask = 0; - if (!phys_enc || !phys_enc->hw_ctl || !phys_enc->hw_pp) { + if (!phys_enc->hw_pp) { DPU_ERROR("invalid arg(s), encoder %d\n", phys_enc != 0); return; } @@ -480,7 +461,7 @@ static void dpu_encoder_phys_cmd_enable(struct dpu_encoder_phys *phys_enc) struct dpu_encoder_phys_cmd *cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); - if (!phys_enc || !phys_enc->hw_pp) { + if (!phys_enc->hw_pp) { DPU_ERROR("invalid phys encoder\n"); return; } @@ -499,8 +480,7 @@ static void dpu_encoder_phys_cmd_enable(struct dpu_encoder_phys *phys_enc) static void _dpu_encoder_phys_cmd_connect_te( struct dpu_encoder_phys *phys_enc, bool enable) { - if (!phys_enc || !phys_enc->hw_pp || - !phys_enc->hw_pp->ops.connect_external_te) + if (!phys_enc->hw_pp || !phys_enc->hw_pp->ops.connect_external_te) return; trace_dpu_enc_phys_cmd_connect_te(DRMID(phys_enc->parent), enable); @@ -518,7 +498,7 @@ static int dpu_encoder_phys_cmd_get_line_count( { struct dpu_hw_pingpong *hw_pp; - if (!phys_enc || !phys_enc->hw_pp) + if (!phys_enc->hw_pp) return -EINVAL; if (!dpu_encoder_phys_cmd_is_master(phys_enc)) @@ -536,7 +516,7 @@ static void dpu_encoder_phys_cmd_disable(struct dpu_encoder_phys *phys_enc) struct dpu_encoder_phys_cmd *cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); - if (!phys_enc || !phys_enc->hw_pp) { + if (!phys_enc->hw_pp) { DPU_ERROR("invalid encoder\n"); return; } @@ -559,10 +539,6 @@ static void dpu_encoder_phys_cmd_destroy(struct dpu_encoder_phys *phys_enc) struct dpu_encoder_phys_cmd *cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); - if (!phys_enc) { - DPU_ERROR("invalid encoder\n"); - return; - } kfree(cmd_enc); } @@ -580,7 +556,7 @@ static void dpu_encoder_phys_cmd_prepare_for_kickoff( to_dpu_encoder_phys_cmd(phys_enc); int ret; - if (!phys_enc || !phys_enc->hw_pp) { + if (!phys_enc->hw_pp) { DPU_ERROR("invalid encoder\n"); return; } @@ -614,11 +590,6 @@ static int _dpu_encoder_phys_cmd_wait_for_ctl_start( struct dpu_encoder_wait_info wait_info; int ret; - if (!phys_enc || !phys_enc->hw_ctl) { - DPU_ERROR("invalid argument(s)\n"); - return -EINVAL; - } - wait_info.wq = &phys_enc->pending_kickoff_wq; wait_info.atomic_cnt = &phys_enc->pending_ctlstart_cnt; wait_info.timeout_ms = KICKOFF_TIMEOUT_MS; @@ -639,9 +610,6 @@ static int dpu_encoder_phys_cmd_wait_for_tx_complete( { int rc; - if (!phys_enc) - return -EINVAL; - rc = _dpu_encoder_phys_cmd_wait_for_idle(phys_enc); if (rc) { DRM_ERROR("failed wait_for_idle: id:%u ret:%d intf:%d\n", @@ -658,9 +626,6 @@ static int dpu_encoder_phys_cmd_wait_for_commit_done( int rc = 0; struct dpu_encoder_phys_cmd *cmd_enc; - if (!phys_enc) - return -EINVAL; - cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); /* only required for master controller */ @@ -681,9 +646,6 @@ static int dpu_encoder_phys_cmd_wait_for_vblank( struct dpu_encoder_phys_cmd *cmd_enc; struct dpu_encoder_wait_info wait_info; - if (!phys_enc) - return -EINVAL; - cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); /* only required for master controller */ @@ -715,9 +677,6 @@ static void dpu_encoder_phys_cmd_handle_post_kickoff( static void dpu_encoder_phys_cmd_trigger_start( struct dpu_encoder_phys *phys_enc) { - if (!phys_enc) - return; - dpu_encoder_helper_trigger_start(phys_enc); } @@ -816,6 +775,4 @@ struct dpu_encoder_phys *dpu_encoder_phys_cmd_init( DPU_DEBUG_CMDENC(cmd_enc, "created\n"); return phys_enc; - - return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index 3123ef873cdf..c71c18de5966 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -220,8 +220,7 @@ static bool dpu_encoder_phys_vid_mode_fixup( const struct drm_display_mode *mode, struct drm_display_mode *adj_mode) { - if (phys_enc) - DPU_DEBUG_VIDENC(phys_enc, "\n"); + DPU_DEBUG_VIDENC(phys_enc, "\n"); /* * Modifying mode has consequences when the mode comes back to us @@ -239,7 +238,7 @@ static void dpu_encoder_phys_vid_setup_timing_engine( unsigned long lock_flags; struct dpu_hw_intf_cfg intf_cfg = { 0 }; - if (!phys_enc || !phys_enc->hw_ctl->ops.setup_intf_cfg) { + if (!phys_enc->hw_ctl->ops.setup_intf_cfg) { DPU_ERROR("invalid encoder %d\n", phys_enc != 0); return; } @@ -280,6 +279,14 @@ static void dpu_encoder_phys_vid_setup_timing_engine( phys_enc->hw_intf->ops.setup_timing_gen(phys_enc->hw_intf, &timing_params, fmt); phys_enc->hw_ctl->ops.setup_intf_cfg(phys_enc->hw_ctl, &intf_cfg); + + /* setup which pp blk will connect to this intf */ + if (phys_enc->hw_intf->ops.bind_pingpong_blk) + phys_enc->hw_intf->ops.bind_pingpong_blk( + phys_enc->hw_intf, + true, + phys_enc->hw_pp->idx); + spin_unlock_irqrestore(phys_enc->enc_spinlock, lock_flags); programmable_fetch_config(phys_enc, &timing_params); @@ -293,12 +300,7 @@ static void dpu_encoder_phys_vid_vblank_irq(void *arg, int irq_idx) u32 flush_register = 0; int new_cnt = -1, old_cnt = -1; - if (!phys_enc) - return; - hw_ctl = phys_enc->hw_ctl; - if (!hw_ctl) - return; DPU_ATRACE_BEGIN("vblank_irq"); @@ -314,7 +316,7 @@ static void dpu_encoder_phys_vid_vblank_irq(void *arg, int irq_idx) * so we need to double-check with hw that it accepted the flush bits */ spin_lock_irqsave(phys_enc->enc_spinlock, lock_flags); - if (hw_ctl && hw_ctl->ops.get_flush_register) + if (hw_ctl->ops.get_flush_register) flush_register = hw_ctl->ops.get_flush_register(hw_ctl); if (!(flush_register & hw_ctl->ops.get_pending_flush(hw_ctl))) @@ -335,9 +337,6 @@ static void dpu_encoder_phys_vid_underrun_irq(void *arg, int irq_idx) { struct dpu_encoder_phys *phys_enc = arg; - if (!phys_enc) - return; - if (phys_enc->parent_ops->handle_underrun_virt) phys_enc->parent_ops->handle_underrun_virt(phys_enc->parent, phys_enc); @@ -374,11 +373,6 @@ static void dpu_encoder_phys_vid_mode_set( struct drm_display_mode *mode, struct drm_display_mode *adj_mode) { - if (!phys_enc) { - DPU_ERROR("invalid encoder/kms\n"); - return; - } - if (adj_mode) { phys_enc->cached_mode = *adj_mode; drm_mode_debug_printmodeline(adj_mode); @@ -395,11 +389,6 @@ static int dpu_encoder_phys_vid_control_vblank_irq( int ret = 0; int refcount; - if (!phys_enc) { - DPU_ERROR("invalid encoder\n"); - return -EINVAL; - } - refcount = atomic_read(&phys_enc->vblank_refcount); /* Slave encoders don't report vblank */ @@ -435,6 +424,7 @@ static void dpu_encoder_phys_vid_enable(struct dpu_encoder_phys *phys_enc) { struct dpu_hw_ctl *ctl; u32 flush_mask = 0; + u32 intf_flush_mask = 0; ctl = phys_enc->hw_ctl; @@ -459,10 +449,18 @@ static void dpu_encoder_phys_vid_enable(struct dpu_encoder_phys *phys_enc) ctl->ops.get_bitmask_intf(ctl, &flush_mask, phys_enc->hw_intf->idx); ctl->ops.update_pending_flush(ctl, flush_mask); + if (ctl->ops.get_bitmask_active_intf) + ctl->ops.get_bitmask_active_intf(ctl, &intf_flush_mask, + phys_enc->hw_intf->idx); + + if (ctl->ops.update_pending_intf_flush) + ctl->ops.update_pending_intf_flush(ctl, intf_flush_mask); + skip_flush: DPU_DEBUG_VIDENC(phys_enc, - "update pending flush ctl %d flush_mask %x\n", - ctl->idx - CTL_0, flush_mask); + "update pending flush ctl %d flush_mask 0%x intf_mask 0x%x\n", + ctl->idx - CTL_0, flush_mask, intf_flush_mask); + /* ctl_flush & timing engine enable will be triggered by framework */ if (phys_enc->enable_state == DPU_ENC_DISABLED) @@ -471,11 +469,6 @@ skip_flush: static void dpu_encoder_phys_vid_destroy(struct dpu_encoder_phys *phys_enc) { - if (!phys_enc) { - DPU_ERROR("invalid encoder\n"); - return; - } - DPU_DEBUG_VIDENC(phys_enc, "\n"); kfree(phys_enc); } @@ -493,11 +486,6 @@ static int dpu_encoder_phys_vid_wait_for_vblank( struct dpu_encoder_wait_info wait_info; int ret; - if (!phys_enc) { - pr_err("invalid encoder\n"); - return -EINVAL; - } - wait_info.wq = &phys_enc->pending_kickoff_wq; wait_info.atomic_cnt = &phys_enc->pending_kickoff_cnt; wait_info.timeout_ms = KICKOFF_TIMEOUT_MS; @@ -543,13 +531,8 @@ static void dpu_encoder_phys_vid_prepare_for_kickoff( struct dpu_hw_ctl *ctl; int rc; - if (!phys_enc) { - DPU_ERROR("invalid encoder/parameters\n"); - return; - } - ctl = phys_enc->hw_ctl; - if (!ctl || !ctl->ops.wait_reset_status) + if (!ctl->ops.wait_reset_status) return; /* @@ -569,12 +552,12 @@ static void dpu_encoder_phys_vid_disable(struct dpu_encoder_phys *phys_enc) unsigned long lock_flags; int ret; - if (!phys_enc || !phys_enc->parent || !phys_enc->parent->dev) { + if (!phys_enc->parent || !phys_enc->parent->dev) { DPU_ERROR("invalid encoder/device\n"); return; } - if (!phys_enc->hw_intf || !phys_enc->hw_ctl) { + if (!phys_enc->hw_intf) { DPU_ERROR("invalid hw_intf %d hw_ctl %d\n", phys_enc->hw_intf != 0, phys_enc->hw_ctl != 0); return; @@ -639,9 +622,6 @@ static void dpu_encoder_phys_vid_irq_control(struct dpu_encoder_phys *phys_enc, { int ret; - if (!phys_enc) - return; - trace_dpu_enc_phys_vid_irq_ctrl(DRMID(phys_enc->parent), phys_enc->hw_intf->idx - INTF_0, enable, @@ -662,9 +642,6 @@ static void dpu_encoder_phys_vid_irq_control(struct dpu_encoder_phys *phys_enc, static int dpu_encoder_phys_vid_get_line_count( struct dpu_encoder_phys *phys_enc) { - if (!phys_enc) - return -EINVAL; - if (!dpu_encoder_phys_vid_is_master(phys_enc)) return -EINVAL; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c index 24ab6249083a..528632690f1e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c @@ -489,12 +489,28 @@ static const struct dpu_format dpu_format_map_ubwc[] = { true, 4, DPU_FORMAT_FLAG_COMPRESSED, DPU_FETCH_UBWC, 2, DPU_TILE_HEIGHT_UBWC), + /* ARGB8888 and ABGR8888 purposely have the same color + * ordering. The hardware only supports ABGR8888 UBWC + * natively. + */ + INTERLEAVED_RGB_FMT_TILED(ARGB8888, + COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, + C2_R_Cr, C0_G_Y, C1_B_Cb, C3_ALPHA, 4, + true, 4, DPU_FORMAT_FLAG_COMPRESSED, + DPU_FETCH_UBWC, 2, DPU_TILE_HEIGHT_UBWC), + INTERLEAVED_RGB_FMT_TILED(XBGR8888, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, C2_R_Cr, C0_G_Y, C1_B_Cb, C3_ALPHA, 4, false, 4, DPU_FORMAT_FLAG_COMPRESSED, DPU_FETCH_UBWC, 2, DPU_TILE_HEIGHT_UBWC), + INTERLEAVED_RGB_FMT_TILED(XRGB8888, + COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, + C2_R_Cr, C0_G_Y, C1_B_Cb, C3_ALPHA, 4, + false, 4, DPU_FORMAT_FLAG_COMPRESSED, + DPU_FETCH_UBWC, 2, DPU_TILE_HEIGHT_UBWC), + INTERLEAVED_RGB_FMT_TILED(ABGR2101010, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, C2_R_Cr, C0_G_Y, C1_B_Cb, C3_ALPHA, 4, @@ -550,7 +566,9 @@ static int _dpu_format_get_media_color_ubwc(const struct dpu_format *fmt) { static const struct dpu_media_color_map dpu_media_ubwc_map[] = { {DRM_FORMAT_ABGR8888, COLOR_FMT_RGBA8888_UBWC}, + {DRM_FORMAT_ARGB8888, COLOR_FMT_RGBA8888_UBWC}, {DRM_FORMAT_XBGR8888, COLOR_FMT_RGBA8888_UBWC}, + {DRM_FORMAT_XRGB8888, COLOR_FMT_RGBA8888_UBWC}, {DRM_FORMAT_ABGR2101010, COLOR_FMT_RGBA1010102_UBWC}, {DRM_FORMAT_XBGR2101010, COLOR_FMT_RGBA1010102_UBWC}, {DRM_FORMAT_BGR565, COLOR_FMT_RGB565_UBWC}, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index 04c8c44f5b9c..c567917541e8 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -11,11 +11,17 @@ #include "dpu_hw_catalog_format.h" #include "dpu_kms.h" -#define VIG_SDM845_MASK \ - (BIT(DPU_SSPP_SRC) | BIT(DPU_SSPP_SCALER_QSEED3) | BIT(DPU_SSPP_QOS) |\ +#define VIG_MASK \ + (BIT(DPU_SSPP_SRC) | BIT(DPU_SSPP_QOS) |\ BIT(DPU_SSPP_CSC_10BIT) | BIT(DPU_SSPP_CDP) | BIT(DPU_SSPP_QOS_8LVL) |\ BIT(DPU_SSPP_TS_PREFILL) | BIT(DPU_SSPP_EXCL_RECT)) +#define VIG_SDM845_MASK \ + (VIG_MASK | BIT(DPU_SSPP_SCALER_QSEED3)) + +#define VIG_SC7180_MASK \ + (VIG_MASK | BIT(DPU_SSPP_SCALER_QSEED4)) + #define DMA_SDM845_MASK \ (BIT(DPU_SSPP_SRC) | BIT(DPU_SSPP_QOS) | BIT(DPU_SSPP_QOS_8LVL) |\ BIT(DPU_SSPP_TS_PREFILL) | BIT(DPU_SSPP_TS_PREFILL_REC1) |\ @@ -27,6 +33,9 @@ #define MIXER_SDM845_MASK \ (BIT(DPU_MIXER_SOURCESPLIT) | BIT(DPU_DIM_LAYER)) +#define MIXER_SC7180_MASK \ + (BIT(DPU_DIM_LAYER)) + #define PINGPONG_SDM845_MASK BIT(DPU_PINGPONG_DITHER) #define PINGPONG_SDM845_SPLIT_MASK \ @@ -58,9 +67,20 @@ static const struct dpu_caps sdm845_dpu_caps = { .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, + .has_3d_merge = true, +}; + +static const struct dpu_caps sc7180_dpu_caps = { + .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, + .max_mixer_blendstages = 0x9, + .qseed_type = DPU_SSPP_SCALER_QSEED4, + .smart_dma_rev = DPU_SSPP_SMART_DMA_V2, + .ubwc_version = DPU_HW_UBWC_VER_20, + .has_dim_layer = true, + .has_idle_pc = true, }; -static struct dpu_mdp_cfg sdm845_mdp[] = { +static const struct dpu_mdp_cfg sdm845_mdp[] = { { .name = "top_0", .id = MDP_TOP, .base = 0x0, .len = 0x45C, @@ -85,10 +105,27 @@ static struct dpu_mdp_cfg sdm845_mdp[] = { }, }; +static const struct dpu_mdp_cfg sc7180_mdp[] = { + { + .name = "top_0", .id = MDP_TOP, + .base = 0x0, .len = 0x494, + .features = 0, + .highest_bank_bit = 0x3, + .clk_ctrls[DPU_CLK_CTRL_VIG0] = { + .reg_off = 0x2AC, .bit_off = 0}, + .clk_ctrls[DPU_CLK_CTRL_DMA0] = { + .reg_off = 0x2AC, .bit_off = 8}, + .clk_ctrls[DPU_CLK_CTRL_DMA1] = { + .reg_off = 0x2B4, .bit_off = 8}, + .clk_ctrls[DPU_CLK_CTRL_CURSOR0] = { + .reg_off = 0x2BC, .bit_off = 8}, + }, +}; + /************************************************************* * CTL sub blocks config *************************************************************/ -static struct dpu_ctl_cfg sdm845_ctl[] = { +static const struct dpu_ctl_cfg sdm845_ctl[] = { { .name = "ctl_0", .id = CTL_0, .base = 0x1000, .len = 0xE4, @@ -116,6 +153,24 @@ static struct dpu_ctl_cfg sdm845_ctl[] = { }, }; +static const struct dpu_ctl_cfg sc7180_ctl[] = { + { + .name = "ctl_0", .id = CTL_0, + .base = 0x1000, .len = 0xE4, + .features = BIT(DPU_CTL_ACTIVE_CFG) + }, + { + .name = "ctl_1", .id = CTL_1, + .base = 0x1200, .len = 0xE4, + .features = BIT(DPU_CTL_ACTIVE_CFG) + }, + { + .name = "ctl_2", .id = CTL_2, + .base = 0x1400, .len = 0xE4, + .features = BIT(DPU_CTL_ACTIVE_CFG) + }, +}; + /************************************************************* * SSPP sub blocks config *************************************************************/ @@ -128,7 +183,7 @@ static const struct dpu_sspp_blks_common sdm845_sspp_common = { .maxvdeciexp = MAX_VERT_DECIMATION, }; -#define _VIG_SBLK(num, sdma_pri) \ +#define _VIG_SBLK(num, sdma_pri, qseed_ver) \ { \ .common = &sdm845_sspp_common, \ .maxdwnscale = MAX_DOWNSCALE_RATIO, \ @@ -137,7 +192,7 @@ static const struct dpu_sspp_blks_common sdm845_sspp_common = { .src_blk = {.name = STRCAT("sspp_src_", num), \ .id = DPU_SSPP_SRC, .base = 0x00, .len = 0x150,}, \ .scaler_blk = {.name = STRCAT("sspp_scaler", num), \ - .id = DPU_SSPP_SCALER_QSEED3, \ + .id = qseed_ver, \ .base = 0xa00, .len = 0xa0,}, \ .csc_blk = {.name = STRCAT("sspp_csc", num), \ .id = DPU_SSPP_CSC_10BIT, \ @@ -162,10 +217,14 @@ static const struct dpu_sspp_blks_common sdm845_sspp_common = { .virt_num_formats = ARRAY_SIZE(plane_formats), \ } -static const struct dpu_sspp_sub_blks sdm845_vig_sblk_0 = _VIG_SBLK("0", 5); -static const struct dpu_sspp_sub_blks sdm845_vig_sblk_1 = _VIG_SBLK("1", 6); -static const struct dpu_sspp_sub_blks sdm845_vig_sblk_2 = _VIG_SBLK("2", 7); -static const struct dpu_sspp_sub_blks sdm845_vig_sblk_3 = _VIG_SBLK("3", 8); +static const struct dpu_sspp_sub_blks sdm845_vig_sblk_0 = + _VIG_SBLK("0", 5, DPU_SSPP_SCALER_QSEED3); +static const struct dpu_sspp_sub_blks sdm845_vig_sblk_1 = + _VIG_SBLK("1", 6, DPU_SSPP_SCALER_QSEED3); +static const struct dpu_sspp_sub_blks sdm845_vig_sblk_2 = + _VIG_SBLK("2", 7, DPU_SSPP_SCALER_QSEED3); +static const struct dpu_sspp_sub_blks sdm845_vig_sblk_3 = + _VIG_SBLK("3", 8, DPU_SSPP_SCALER_QSEED3); static const struct dpu_sspp_sub_blks sdm845_dma_sblk_0 = _DMA_SBLK("8", 1); static const struct dpu_sspp_sub_blks sdm845_dma_sblk_1 = _DMA_SBLK("9", 2); @@ -184,7 +243,7 @@ static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK("11", 4); .clk_ctrl = _clkctrl \ } -static struct dpu_sspp_cfg sdm845_sspp[] = { +static const struct dpu_sspp_cfg sdm845_sspp[] = { SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, VIG_SDM845_MASK, sdm845_vig_sblk_0, 0, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG0), SSPP_BLK("sspp_1", SSPP_VIG1, 0x6000, VIG_SDM845_MASK, @@ -203,9 +262,26 @@ static struct dpu_sspp_cfg sdm845_sspp[] = { sdm845_dma_sblk_3, 13, SSPP_TYPE_DMA, DPU_CLK_CTRL_CURSOR1), }; +static const struct dpu_sspp_sub_blks sc7180_vig_sblk_0 = + _VIG_SBLK("0", 4, DPU_SSPP_SCALER_QSEED4); + +static const struct dpu_sspp_cfg sc7180_sspp[] = { + SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, VIG_SC7180_MASK, + sc7180_vig_sblk_0, 0, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG0), + SSPP_BLK("sspp_8", SSPP_DMA0, 0x24000, DMA_SDM845_MASK, + sdm845_dma_sblk_0, 1, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA0), + SSPP_BLK("sspp_9", SSPP_DMA1, 0x26000, DMA_SDM845_MASK, + sdm845_dma_sblk_1, 5, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA1), + SSPP_BLK("sspp_10", SSPP_DMA2, 0x28000, DMA_CURSOR_SDM845_MASK, + sdm845_dma_sblk_2, 9, SSPP_TYPE_DMA, DPU_CLK_CTRL_CURSOR0), +}; + /************************************************************* * MIXER sub blocks config *************************************************************/ + +/* SDM845 */ + static const struct dpu_lm_sub_blks sdm845_lm_sblk = { .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .maxblendstages = 11, /* excluding base layer */ @@ -215,23 +291,46 @@ static const struct dpu_lm_sub_blks sdm845_lm_sblk = { }, }; -#define LM_BLK(_name, _id, _base, _pp, _lmpair) \ +#define LM_BLK(_name, _id, _base, _fmask, _sblk, _pp, _lmpair) \ { \ .name = _name, .id = _id, \ .base = _base, .len = 0x320, \ - .features = MIXER_SDM845_MASK, \ - .sblk = &sdm845_lm_sblk, \ + .features = _fmask, \ + .sblk = _sblk, \ .pingpong = _pp, \ .lm_pair_mask = (1 << _lmpair) \ } -static struct dpu_lm_cfg sdm845_lm[] = { - LM_BLK("lm_0", LM_0, 0x44000, PINGPONG_0, LM_1), - LM_BLK("lm_1", LM_1, 0x45000, PINGPONG_1, LM_0), - LM_BLK("lm_2", LM_2, 0x46000, PINGPONG_2, LM_5), - LM_BLK("lm_3", LM_3, 0x0, PINGPONG_MAX, 0), - LM_BLK("lm_4", LM_4, 0x0, PINGPONG_MAX, 0), - LM_BLK("lm_5", LM_5, 0x49000, PINGPONG_3, LM_2), +static const struct dpu_lm_cfg sdm845_lm[] = { + LM_BLK("lm_0", LM_0, 0x44000, MIXER_SDM845_MASK, + &sdm845_lm_sblk, PINGPONG_0, LM_1), + LM_BLK("lm_1", LM_1, 0x45000, MIXER_SDM845_MASK, + &sdm845_lm_sblk, PINGPONG_1, LM_0), + LM_BLK("lm_2", LM_2, 0x46000, MIXER_SDM845_MASK, + &sdm845_lm_sblk, PINGPONG_2, LM_5), + LM_BLK("lm_3", LM_3, 0x0, MIXER_SDM845_MASK, + &sdm845_lm_sblk, PINGPONG_MAX, 0), + LM_BLK("lm_4", LM_4, 0x0, MIXER_SDM845_MASK, + &sdm845_lm_sblk, PINGPONG_MAX, 0), + LM_BLK("lm_5", LM_5, 0x49000, MIXER_SDM845_MASK, + &sdm845_lm_sblk, PINGPONG_3, LM_2), +}; + +/* SC7180 */ + +static const struct dpu_lm_sub_blks sc7180_lm_sblk = { + .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, + .maxblendstages = 7, /* excluding base layer */ + .blendstage_base = { /* offsets relative to mixer base */ + 0x20, 0x38, 0x50, 0x68, 0x80, 0x98, 0xb0 + }, +}; + +static const struct dpu_lm_cfg sc7180_lm[] = { + LM_BLK("lm_0", LM_0, 0x44000, MIXER_SC7180_MASK, + &sc7180_lm_sblk, PINGPONG_0, LM_1), + LM_BLK("lm_1", LM_1, 0x45000, MIXER_SC7180_MASK, + &sc7180_lm_sblk, PINGPONG_1, LM_0), }; /************************************************************* @@ -264,13 +363,18 @@ static const struct dpu_pingpong_sub_blks sdm845_pp_sblk = { .sblk = &sdm845_pp_sblk \ } -static struct dpu_pingpong_cfg sdm845_pp[] = { +static const struct dpu_pingpong_cfg sdm845_pp[] = { PP_BLK_TE("pingpong_0", PINGPONG_0, 0x70000), PP_BLK_TE("pingpong_1", PINGPONG_1, 0x70800), PP_BLK("pingpong_2", PINGPONG_2, 0x71000), PP_BLK("pingpong_3", PINGPONG_3, 0x71800), }; +static struct dpu_pingpong_cfg sc7180_pp[] = { + PP_BLK_TE("pingpong_0", PINGPONG_0, 0x70000), + PP_BLK_TE("pingpong_1", PINGPONG_1, 0x70800), +}; + /************************************************************* * INTF sub blocks config *************************************************************/ @@ -278,26 +382,32 @@ static struct dpu_pingpong_cfg sdm845_pp[] = { {\ .name = _name, .id = _id, \ .base = _base, .len = 0x280, \ + .features = BIT(DPU_CTL_ACTIVE_CFG), \ .type = _type, \ .controller_id = _ctrl_id, \ .prog_fetch_lines_worst_case = 24 \ } -static struct dpu_intf_cfg sdm845_intf[] = { +static const struct dpu_intf_cfg sdm845_intf[] = { INTF_BLK("intf_0", INTF_0, 0x6A000, INTF_DP, 0), INTF_BLK("intf_1", INTF_1, 0x6A800, INTF_DSI, 0), INTF_BLK("intf_2", INTF_2, 0x6B000, INTF_DSI, 1), INTF_BLK("intf_3", INTF_3, 0x6B800, INTF_DP, 1), }; +static const struct dpu_intf_cfg sc7180_intf[] = { + INTF_BLK("intf_0", INTF_0, 0x6A000, INTF_DP, 0), + INTF_BLK("intf_1", INTF_1, 0x6A800, INTF_DSI, 0), +}; + /************************************************************* * VBIF sub blocks config *************************************************************/ /* VBIF QOS remap */ -static u32 sdm845_rt_pri_lvl[] = {3, 3, 4, 4, 5, 5, 6, 6}; -static u32 sdm845_nrt_pri_lvl[] = {3, 3, 3, 3, 3, 3, 3, 3}; +static const u32 sdm845_rt_pri_lvl[] = {3, 3, 4, 4, 5, 5, 6, 6}; +static const u32 sdm845_nrt_pri_lvl[] = {3, 3, 3, 3, 3, 3, 3, 3}; -static struct dpu_vbif_cfg sdm845_vbif[] = { +static const struct dpu_vbif_cfg sdm845_vbif[] = { { .name = "vbif_0", .id = VBIF_0, .base = 0, .len = 0x1040, @@ -316,7 +426,7 @@ static struct dpu_vbif_cfg sdm845_vbif[] = { }, }; -static struct dpu_reg_dma_cfg sdm845_regdma = { +static const struct dpu_reg_dma_cfg sdm845_regdma = { .base = 0x0, .version = 0x1, .trigger_sel_off = 0x119c }; @@ -325,7 +435,7 @@ static struct dpu_reg_dma_cfg sdm845_regdma = { *************************************************************/ /* SSPP QOS LUTs */ -static struct dpu_qos_lut_entry sdm845_qos_linear[] = { +static const struct dpu_qos_lut_entry sdm845_qos_linear[] = { {.fl = 4, .lut = 0x357}, {.fl = 5, .lut = 0x3357}, {.fl = 6, .lut = 0x23357}, @@ -340,7 +450,11 @@ static struct dpu_qos_lut_entry sdm845_qos_linear[] = { {.fl = 0, .lut = 0x11222222223357} }; -static struct dpu_qos_lut_entry sdm845_qos_macrotile[] = { +static const struct dpu_qos_lut_entry sc7180_qos_linear[] = { + {.fl = 0, .lut = 0x0011222222335777}, +}; + +static const struct dpu_qos_lut_entry sdm845_qos_macrotile[] = { {.fl = 10, .lut = 0x344556677}, {.fl = 11, .lut = 0x3344556677}, {.fl = 12, .lut = 0x23344556677}, @@ -349,11 +463,19 @@ static struct dpu_qos_lut_entry sdm845_qos_macrotile[] = { {.fl = 0, .lut = 0x112233344556677}, }; -static struct dpu_qos_lut_entry sdm845_qos_nrt[] = { +static const struct dpu_qos_lut_entry sc7180_qos_macrotile[] = { + {.fl = 0, .lut = 0x0011223344556677}, +}; + +static const struct dpu_qos_lut_entry sdm845_qos_nrt[] = { + {.fl = 0, .lut = 0x0}, +}; + +static const struct dpu_qos_lut_entry sc7180_qos_nrt[] = { {.fl = 0, .lut = 0x0}, }; -static struct dpu_perf_cfg sdm845_perf_data = { +static const struct dpu_perf_cfg sdm845_perf_data = { .max_bw_low = 6800000, .max_bw_high = 6800000, .min_core_ib = 2400000, @@ -392,6 +514,30 @@ static struct dpu_perf_cfg sdm845_perf_data = { }, }; +static const struct dpu_perf_cfg sc7180_perf_data = { + .max_bw_low = 3900000, + .max_bw_high = 5500000, + .min_core_ib = 2400000, + .min_llcc_ib = 800000, + .min_dram_ib = 800000, + .danger_lut_tbl = {0xff, 0xffff, 0x0}, + .qos_lut_tbl = { + {.nentry = ARRAY_SIZE(sc7180_qos_linear), + .entries = sc7180_qos_linear + }, + {.nentry = ARRAY_SIZE(sc7180_qos_macrotile), + .entries = sc7180_qos_macrotile + }, + {.nentry = ARRAY_SIZE(sc7180_qos_nrt), + .entries = sc7180_qos_nrt + }, + }, + .cdp_cfg = { + {.rd_enable = 1, .wr_enable = 1}, + {.rd_enable = 1, .wr_enable = 0} + }, +}; + /************************************************************* * Hardware catalog init *************************************************************/ @@ -421,12 +567,43 @@ static void sdm845_cfg_init(struct dpu_mdss_cfg *dpu_cfg) .reg_dma_count = 1, .dma_cfg = sdm845_regdma, .perf = sdm845_perf_data, + .mdss_irqs = 0x3ff, + }; +} + +/* + * sc7180_cfg_init(): populate sc7180 dpu sub-blocks reg offsets + * and instance counts. + */ +static void sc7180_cfg_init(struct dpu_mdss_cfg *dpu_cfg) +{ + *dpu_cfg = (struct dpu_mdss_cfg){ + .caps = &sc7180_dpu_caps, + .mdp_count = ARRAY_SIZE(sc7180_mdp), + .mdp = sc7180_mdp, + .ctl_count = ARRAY_SIZE(sc7180_ctl), + .ctl = sc7180_ctl, + .sspp_count = ARRAY_SIZE(sc7180_sspp), + .sspp = sc7180_sspp, + .mixer_count = ARRAY_SIZE(sc7180_lm), + .mixer = sc7180_lm, + .pingpong_count = ARRAY_SIZE(sc7180_pp), + .pingpong = sc7180_pp, + .intf_count = ARRAY_SIZE(sc7180_intf), + .intf = sc7180_intf, + .vbif_count = ARRAY_SIZE(sdm845_vbif), + .vbif = sdm845_vbif, + .reg_dma_count = 1, + .dma_cfg = sdm845_regdma, + .perf = sc7180_perf_data, + .mdss_irqs = 0x3f, }; } -static struct dpu_mdss_hw_cfg_handler cfg_handler[] = { +static const struct dpu_mdss_hw_cfg_handler cfg_handler[] = { { .hw_rev = DPU_HW_VER_400, .cfg_init = sdm845_cfg_init}, { .hw_rev = DPU_HW_VER_401, .cfg_init = sdm845_cfg_init}, + { .hw_rev = DPU_HW_VER_620, .cfg_init = sc7180_cfg_init}, }; void dpu_hw_catalog_deinit(struct dpu_mdss_cfg *dpu_cfg) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index ec76b8687a98..09df7d87dd43 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -38,6 +38,7 @@ #define DPU_HW_VER_401 DPU_HW_VER(4, 0, 1) /* sdm845 v2.0 */ #define DPU_HW_VER_410 DPU_HW_VER(4, 1, 0) /* sdm670 v1.0 */ #define DPU_HW_VER_500 DPU_HW_VER(5, 0, 0) /* sdm855 v1.0 */ +#define DPU_HW_VER_620 DPU_HW_VER(6, 2, 0) /* sc7180 v1.0 */ #define IS_MSM8996_TARGET(rev) IS_DPU_MAJOR_MINOR_SAME((rev), DPU_HW_VER_170) @@ -45,6 +46,7 @@ #define IS_SDM845_TARGET(rev) IS_DPU_MAJOR_MINOR_SAME((rev), DPU_HW_VER_400) #define IS_SDM670_TARGET(rev) IS_DPU_MAJOR_MINOR_SAME((rev), DPU_HW_VER_410) #define IS_SDM855_TARGET(rev) IS_DPU_MAJOR_MINOR_SAME((rev), DPU_HW_VER_500) +#define IS_SC7180_TARGET(rev) IS_DPU_MAJOR_MINOR_SAME((rev), DPU_HW_VER_620) #define DPU_HW_BLK_NAME_LEN 16 @@ -92,6 +94,7 @@ enum { * @DPU_SSPP_SRC Src and fetch part of the pipes, * @DPU_SSPP_SCALER_QSEED2, QSEED2 algorithm support * @DPU_SSPP_SCALER_QSEED3, QSEED3 alogorithm support + * @DPU_SSPP_SCALER_QSEED4, QSEED4 algorithm support * @DPU_SSPP_SCALER_RGB, RGB Scaler, supported by RGB pipes * @DPU_SSPP_CSC, Support of Color space converion * @DPU_SSPP_CSC_10BIT, Support of 10-bit Color space conversion @@ -110,6 +113,7 @@ enum { DPU_SSPP_SRC = 0x1, DPU_SSPP_SCALER_QSEED2, DPU_SSPP_SCALER_QSEED3, + DPU_SSPP_SCALER_QSEED4, DPU_SSPP_SCALER_RGB, DPU_SSPP_CSC, DPU_SSPP_CSC_10BIT, @@ -166,6 +170,7 @@ enum { */ enum { DPU_CTL_SPLIT_DISPLAY = 0x1, + DPU_CTL_ACTIVE_CFG, DPU_CTL_MAX }; @@ -269,7 +274,7 @@ struct dpu_qos_lut_entry { */ struct dpu_qos_lut_tbl { u32 nentry; - struct dpu_qos_lut_entry *entries; + const struct dpu_qos_lut_entry *entries; }; /** @@ -283,6 +288,7 @@ struct dpu_qos_lut_tbl { * @has_src_split source split feature status * @has_dim_layer dim layer feature status * @has_idle_pc indicate if idle power collapse feature is supported + * @has_3d_merge indicate if 3D merge is supported */ struct dpu_caps { u32 max_mixer_width; @@ -293,6 +299,7 @@ struct dpu_caps { bool has_src_split; bool has_dim_layer; bool has_idle_pc; + bool has_3d_merge; }; /** @@ -320,6 +327,7 @@ struct dpu_sspp_blks_common { * @maxupscale: maxupscale ratio supported * @smart_dma_priority: hw priority of rect1 of multirect pipe * @max_per_pipe_bw: maximum allowable bandwidth of this pipe in kBps + * @qseed_ver: qseed version * @src_blk: * @scaler_blk: * @csc_blk: @@ -340,6 +348,7 @@ struct dpu_sspp_sub_blks { u32 maxupscale; u32 smart_dma_priority; u32 max_per_pipe_bw; + u32 qseed_ver; struct dpu_src_blk src_blk; struct dpu_scaler_blk scaler_blk; struct dpu_pp_blk csc_blk; @@ -511,7 +520,7 @@ struct dpu_vbif_dynamic_ot_cfg { */ struct dpu_vbif_dynamic_ot_tbl { u32 count; - struct dpu_vbif_dynamic_ot_cfg *cfg; + const struct dpu_vbif_dynamic_ot_cfg *cfg; }; /** @@ -521,7 +530,7 @@ struct dpu_vbif_dynamic_ot_tbl { */ struct dpu_vbif_qos_tbl { u32 npriority_lvl; - u32 *priority_lvl; + const u32 *priority_lvl; }; /** @@ -646,6 +655,7 @@ struct dpu_perf_cfg { * @dma_formats Supported formats for dma pipe * @cursor_formats Supported formats for cursor pipe * @vig_formats Supported formats for vig pipe + * @mdss_irqs: Bitmap with the irqs supported by the target */ struct dpu_mdss_cfg { u32 hwversion; @@ -653,25 +663,25 @@ struct dpu_mdss_cfg { const struct dpu_caps *caps; u32 mdp_count; - struct dpu_mdp_cfg *mdp; + const struct dpu_mdp_cfg *mdp; u32 ctl_count; - struct dpu_ctl_cfg *ctl; + const struct dpu_ctl_cfg *ctl; u32 sspp_count; - struct dpu_sspp_cfg *sspp; + const struct dpu_sspp_cfg *sspp; u32 mixer_count; - struct dpu_lm_cfg *mixer; + const struct dpu_lm_cfg *mixer; u32 pingpong_count; - struct dpu_pingpong_cfg *pingpong; + const struct dpu_pingpong_cfg *pingpong; u32 intf_count; - struct dpu_intf_cfg *intf; + const struct dpu_intf_cfg *intf; u32 vbif_count; - struct dpu_vbif_cfg *vbif; + const struct dpu_vbif_cfg *vbif; u32 reg_dma_count; struct dpu_reg_dma_cfg dma_cfg; @@ -681,9 +691,11 @@ struct dpu_mdss_cfg { /* Add additional block data structures here */ struct dpu_perf_cfg perf; - struct dpu_format_extended *dma_formats; - struct dpu_format_extended *cursor_formats; - struct dpu_format_extended *vig_formats; + const struct dpu_format_extended *dma_formats; + const struct dpu_format_extended *cursor_formats; + const struct dpu_format_extended *vig_formats; + + unsigned long mdss_irqs; }; struct dpu_mdss_hw_cfg_handler { diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog_format.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog_format.h index bb6112c949ae..3766f0fd0bf0 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog_format.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog_format.h @@ -6,8 +6,12 @@ static const uint32_t qcom_compressed_supported_formats[] = { DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB8888, DRM_FORMAT_BGR565, + + DRM_FORMAT_NV12, }; static const uint32_t plane_formats[] = { diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c index 179e8d52cadb..831e5f7a9b7f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c @@ -22,14 +22,18 @@ #define CTL_PREPARE 0x0d0 #define CTL_SW_RESET 0x030 #define CTL_LAYER_EXTN_OFFSET 0x40 +#define CTL_INTF_ACTIVE 0x0F4 +#define CTL_INTF_FLUSH 0x110 +#define CTL_INTF_MASTER 0x134 #define CTL_MIXER_BORDER_OUT BIT(24) #define CTL_FLUSH_MASK_CTL BIT(17) #define DPU_REG_RESET_TIMEOUT_US 2000 +#define INTF_IDX 31 -static struct dpu_ctl_cfg *_ctl_offset(enum dpu_ctl ctl, - struct dpu_mdss_cfg *m, +static const struct dpu_ctl_cfg *_ctl_offset(enum dpu_ctl ctl, + const struct dpu_mdss_cfg *m, void __iomem *addr, struct dpu_hw_blk_reg_map *b) { @@ -100,11 +104,27 @@ static inline void dpu_hw_ctl_update_pending_flush(struct dpu_hw_ctl *ctx, ctx->pending_flush_mask |= flushbits; } +static inline void dpu_hw_ctl_update_pending_intf_flush(struct dpu_hw_ctl *ctx, + u32 flushbits) +{ + ctx->pending_intf_flush_mask |= flushbits; +} + static u32 dpu_hw_ctl_get_pending_flush(struct dpu_hw_ctl *ctx) { return ctx->pending_flush_mask; } +static inline void dpu_hw_ctl_trigger_flush_v1(struct dpu_hw_ctl *ctx) +{ + + if (ctx->pending_flush_mask & BIT(INTF_IDX)) + DPU_REG_WRITE(&ctx->hw, CTL_INTF_FLUSH, + ctx->pending_intf_flush_mask); + + DPU_REG_WRITE(&ctx->hw, CTL_FLUSH, ctx->pending_flush_mask); +} + static inline void dpu_hw_ctl_trigger_flush(struct dpu_hw_ctl *ctx) { trace_dpu_hw_ctl_trigger_pending_flush(ctx->pending_flush_mask, @@ -222,6 +242,36 @@ static int dpu_hw_ctl_get_bitmask_intf(struct dpu_hw_ctl *ctx, return 0; } +static int dpu_hw_ctl_get_bitmask_intf_v1(struct dpu_hw_ctl *ctx, + u32 *flushbits, enum dpu_intf intf) +{ + switch (intf) { + case INTF_0: + case INTF_1: + *flushbits |= BIT(31); + break; + default: + return 0; + } + return 0; +} + +static int dpu_hw_ctl_active_get_bitmask_intf(struct dpu_hw_ctl *ctx, + u32 *flushbits, enum dpu_intf intf) +{ + switch (intf) { + case INTF_0: + *flushbits |= BIT(0); + break; + case INTF_1: + *flushbits |= BIT(1); + break; + default: + return 0; + } + return 0; +} + static u32 dpu_hw_ctl_poll_reset_status(struct dpu_hw_ctl *ctx, u32 timeout_us) { struct dpu_hw_blk_reg_map *c = &ctx->hw; @@ -422,6 +472,24 @@ exit: DPU_REG_WRITE(c, CTL_LAYER_EXT3(lm), mixercfg_ext3); } + +static void dpu_hw_ctl_intf_cfg_v1(struct dpu_hw_ctl *ctx, + struct dpu_hw_intf_cfg *cfg) +{ + struct dpu_hw_blk_reg_map *c = &ctx->hw; + u32 intf_active = 0; + u32 mode_sel = 0; + + if (cfg->intf_mode_sel == DPU_CTL_MODE_SEL_CMD) + mode_sel |= BIT(17); + + intf_active = DPU_REG_READ(c, CTL_INTF_ACTIVE); + intf_active |= BIT(cfg->intf - INTF_0); + + DPU_REG_WRITE(c, CTL_TOP, mode_sel); + DPU_REG_WRITE(c, CTL_INTF_ACTIVE, intf_active); +} + static void dpu_hw_ctl_intf_cfg(struct dpu_hw_ctl *ctx, struct dpu_hw_intf_cfg *cfg) { @@ -455,31 +523,41 @@ static void dpu_hw_ctl_intf_cfg(struct dpu_hw_ctl *ctx, static void _setup_ctl_ops(struct dpu_hw_ctl_ops *ops, unsigned long cap) { + if (cap & BIT(DPU_CTL_ACTIVE_CFG)) { + ops->trigger_flush = dpu_hw_ctl_trigger_flush_v1; + ops->setup_intf_cfg = dpu_hw_ctl_intf_cfg_v1; + ops->get_bitmask_intf = dpu_hw_ctl_get_bitmask_intf_v1; + ops->get_bitmask_active_intf = + dpu_hw_ctl_active_get_bitmask_intf; + ops->update_pending_intf_flush = + dpu_hw_ctl_update_pending_intf_flush; + } else { + ops->trigger_flush = dpu_hw_ctl_trigger_flush; + ops->setup_intf_cfg = dpu_hw_ctl_intf_cfg; + ops->get_bitmask_intf = dpu_hw_ctl_get_bitmask_intf; + } ops->clear_pending_flush = dpu_hw_ctl_clear_pending_flush; ops->update_pending_flush = dpu_hw_ctl_update_pending_flush; ops->get_pending_flush = dpu_hw_ctl_get_pending_flush; - ops->trigger_flush = dpu_hw_ctl_trigger_flush; ops->get_flush_register = dpu_hw_ctl_get_flush_register; ops->trigger_start = dpu_hw_ctl_trigger_start; ops->trigger_pending = dpu_hw_ctl_trigger_pending; - ops->setup_intf_cfg = dpu_hw_ctl_intf_cfg; ops->reset = dpu_hw_ctl_reset_control; ops->wait_reset_status = dpu_hw_ctl_wait_reset_status; ops->clear_all_blendstages = dpu_hw_ctl_clear_all_blendstages; ops->setup_blendstage = dpu_hw_ctl_setup_blendstage; ops->get_bitmask_sspp = dpu_hw_ctl_get_bitmask_sspp; ops->get_bitmask_mixer = dpu_hw_ctl_get_bitmask_mixer; - ops->get_bitmask_intf = dpu_hw_ctl_get_bitmask_intf; }; static struct dpu_hw_blk_ops dpu_hw_ops; struct dpu_hw_ctl *dpu_hw_ctl_init(enum dpu_ctl idx, void __iomem *addr, - struct dpu_mdss_cfg *m) + const struct dpu_mdss_cfg *m) { struct dpu_hw_ctl *c; - struct dpu_ctl_cfg *cfg; + const struct dpu_ctl_cfg *cfg; c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h index d3ae939ef9f8..09e1263c72e2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h @@ -91,6 +91,15 @@ struct dpu_hw_ctl_ops { u32 flushbits); /** + * OR in the given flushbits to the cached pending_intf_flush_mask + * No effect on hardware + * @ctx : ctl path ctx pointer + * @flushbits : module flushmask + */ + void (*update_pending_intf_flush)(struct dpu_hw_ctl *ctx, + u32 flushbits); + + /** * Write the value of the pending_flush_mask to hardware * @ctx : ctl path ctx pointer */ @@ -130,11 +139,24 @@ struct dpu_hw_ctl_ops { uint32_t (*get_bitmask_mixer)(struct dpu_hw_ctl *ctx, enum dpu_lm blk); + /** + * Query the value of the intf flush mask + * No effect on hardware + * @ctx : ctl path ctx pointer + */ int (*get_bitmask_intf)(struct dpu_hw_ctl *ctx, u32 *flushbits, enum dpu_intf blk); /** + * Query the value of the intf active flush mask + * No effect on hardware + * @ctx : ctl path ctx pointer + */ + int (*get_bitmask_active_intf)(struct dpu_hw_ctl *ctx, + u32 *flushbits, enum dpu_intf blk); + + /** * Set all blend stages to disabled * @ctx : ctl path ctx pointer */ @@ -159,6 +181,7 @@ struct dpu_hw_ctl_ops { * @mixer_count: number of mixers * @mixer_hw_caps: mixer hardware capabilities * @pending_flush_mask: storage for pending ctl_flush managed via ops + * @pending_intf_flush_mask: pending INTF flush * @ops: operation list */ struct dpu_hw_ctl { @@ -171,6 +194,7 @@ struct dpu_hw_ctl { int mixer_count; const struct dpu_lm_cfg *mixer_hw_caps; u32 pending_flush_mask; + u32 pending_intf_flush_mask; /* ops */ struct dpu_hw_ctl_ops ops; @@ -195,7 +219,7 @@ static inline struct dpu_hw_ctl *to_dpu_hw_ctl(struct dpu_hw_blk *hw) */ struct dpu_hw_ctl *dpu_hw_ctl_init(enum dpu_ctl idx, void __iomem *addr, - struct dpu_mdss_cfg *m); + const struct dpu_mdss_cfg *m); /** * dpu_hw_ctl_destroy(): Destroys ctl driver context diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c index 8bfa7d0eede6..d84a84f7fe1a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c @@ -800,8 +800,8 @@ static void dpu_hw_intr_dispatch_irq(struct dpu_hw_intr *intr, start_idx = reg_idx * 32; end_idx = start_idx + 32; - if (start_idx >= ARRAY_SIZE(dpu_irq_map) || - end_idx > ARRAY_SIZE(dpu_irq_map)) + if (!test_bit(reg_idx, &intr->irq_mask) || + start_idx >= ARRAY_SIZE(dpu_irq_map)) continue; /* @@ -955,8 +955,11 @@ static int dpu_hw_intr_clear_irqs(struct dpu_hw_intr *intr) if (!intr) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(dpu_intr_set); i++) - DPU_REG_WRITE(&intr->hw, dpu_intr_set[i].clr_off, 0xffffffff); + for (i = 0; i < ARRAY_SIZE(dpu_intr_set); i++) { + if (test_bit(i, &intr->irq_mask)) + DPU_REG_WRITE(&intr->hw, + dpu_intr_set[i].clr_off, 0xffffffff); + } /* ensure register writes go through */ wmb(); @@ -971,8 +974,11 @@ static int dpu_hw_intr_disable_irqs(struct dpu_hw_intr *intr) if (!intr) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(dpu_intr_set); i++) - DPU_REG_WRITE(&intr->hw, dpu_intr_set[i].en_off, 0x00000000); + for (i = 0; i < ARRAY_SIZE(dpu_intr_set); i++) { + if (test_bit(i, &intr->irq_mask)) + DPU_REG_WRITE(&intr->hw, + dpu_intr_set[i].en_off, 0x00000000); + } /* ensure register writes go through */ wmb(); @@ -991,6 +997,9 @@ static void dpu_hw_intr_get_interrupt_statuses(struct dpu_hw_intr *intr) spin_lock_irqsave(&intr->irq_lock, irq_flags); for (i = 0; i < ARRAY_SIZE(dpu_intr_set); i++) { + if (!test_bit(i, &intr->irq_mask)) + continue; + /* Read interrupt status */ intr->save_irq_status[i] = DPU_REG_READ(&intr->hw, dpu_intr_set[i].status_off); @@ -1115,6 +1124,7 @@ struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr, return ERR_PTR(-ENOMEM); } + intr->irq_mask = m->mdss_irqs; spin_lock_init(&intr->irq_lock); return intr; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h index 4edcf402dc46..fc9c98617281 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h @@ -187,6 +187,7 @@ struct dpu_hw_intr { u32 *save_irq_status; u32 irq_idx_tbl_size; spinlock_t irq_lock; + unsigned long irq_mask; }; /** diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c index dcd87cda13fe..efe9a5719c6b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c @@ -56,8 +56,10 @@ #define INTF_FRAME_COUNT 0x0AC #define INTF_LINE_COUNT 0x0B0 -static struct dpu_intf_cfg *_intf_offset(enum dpu_intf intf, - struct dpu_mdss_cfg *m, +#define INTF_MUX 0x25C + +static const struct dpu_intf_cfg *_intf_offset(enum dpu_intf intf, + const struct dpu_mdss_cfg *m, void __iomem *addr, struct dpu_hw_blk_reg_map *b) { @@ -218,6 +220,30 @@ static void dpu_hw_intf_setup_prg_fetch( DPU_REG_WRITE(c, INTF_CONFIG, fetch_enable); } +static void dpu_hw_intf_bind_pingpong_blk( + struct dpu_hw_intf *intf, + bool enable, + const enum dpu_pingpong pp) +{ + struct dpu_hw_blk_reg_map *c; + u32 mux_cfg; + + if (!intf) + return; + + c = &intf->hw; + + mux_cfg = DPU_REG_READ(c, INTF_MUX); + mux_cfg &= ~0xf; + + if (enable) + mux_cfg |= (pp - PINGPONG_0) & 0x7; + else + mux_cfg |= 0xf; + + DPU_REG_WRITE(c, INTF_MUX, mux_cfg); +} + static void dpu_hw_intf_get_status( struct dpu_hw_intf *intf, struct intf_status *s) @@ -254,16 +280,18 @@ static void _setup_intf_ops(struct dpu_hw_intf_ops *ops, ops->get_status = dpu_hw_intf_get_status; ops->enable_timing = dpu_hw_intf_enable_timing_engine; ops->get_line_count = dpu_hw_intf_get_line_count; + if (cap & BIT(DPU_CTL_ACTIVE_CFG)) + ops->bind_pingpong_blk = dpu_hw_intf_bind_pingpong_blk; } static struct dpu_hw_blk_ops dpu_hw_ops; struct dpu_hw_intf *dpu_hw_intf_init(enum dpu_intf idx, void __iomem *addr, - struct dpu_mdss_cfg *m) + const struct dpu_mdss_cfg *m) { struct dpu_hw_intf *c; - struct dpu_intf_cfg *cfg; + const struct dpu_intf_cfg *cfg; c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h index b03acc225c9b..85468981632d 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h @@ -52,6 +52,8 @@ struct intf_status { * @ enable_timing: enable/disable timing engine * @ get_status: returns if timing engine is enabled or not * @ get_line_count: reads current vertical line counter + * @bind_pingpong_blk: enable/disable the connection with pingpong which will + * feed pixels to this interface */ struct dpu_hw_intf_ops { void (*setup_timing_gen)(struct dpu_hw_intf *intf, @@ -68,6 +70,10 @@ struct dpu_hw_intf_ops { struct intf_status *status); u32 (*get_line_count)(struct dpu_hw_intf *intf); + + void (*bind_pingpong_blk)(struct dpu_hw_intf *intf, + bool enable, + const enum dpu_pingpong pp); }; struct dpu_hw_intf { @@ -92,7 +98,7 @@ struct dpu_hw_intf { */ struct dpu_hw_intf *dpu_hw_intf_init(enum dpu_intf idx, void __iomem *addr, - struct dpu_mdss_cfg *m); + const struct dpu_mdss_cfg *m); /** * dpu_hw_intf_destroy(): Destroys INTF driver context diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c index 5bc39baa746a..37becd43bd54 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c @@ -24,8 +24,8 @@ #define LM_BLEND0_FG_ALPHA 0x04 #define LM_BLEND0_BG_ALPHA 0x08 -static struct dpu_lm_cfg *_lm_offset(enum dpu_lm mixer, - struct dpu_mdss_cfg *m, +static const struct dpu_lm_cfg *_lm_offset(enum dpu_lm mixer, + const struct dpu_mdss_cfg *m, void __iomem *addr, struct dpu_hw_blk_reg_map *b) { @@ -147,12 +147,13 @@ static void dpu_hw_lm_setup_color3(struct dpu_hw_mixer *ctx, DPU_REG_WRITE(c, LM_OP_MODE, op_mode); } -static void _setup_mixer_ops(struct dpu_mdss_cfg *m, +static void _setup_mixer_ops(const struct dpu_mdss_cfg *m, struct dpu_hw_lm_ops *ops, unsigned long features) { ops->setup_mixer_out = dpu_hw_lm_setup_out; - if (IS_SDM845_TARGET(m->hwversion) || IS_SDM670_TARGET(m->hwversion)) + if (IS_SDM845_TARGET(m->hwversion) || IS_SDM670_TARGET(m->hwversion) + || IS_SC7180_TARGET(m->hwversion)) ops->setup_blend_config = dpu_hw_lm_setup_blend_config_sdm845; else ops->setup_blend_config = dpu_hw_lm_setup_blend_config; @@ -164,10 +165,10 @@ static struct dpu_hw_blk_ops dpu_hw_ops; struct dpu_hw_mixer *dpu_hw_lm_init(enum dpu_lm idx, void __iomem *addr, - struct dpu_mdss_cfg *m) + const struct dpu_mdss_cfg *m) { struct dpu_hw_mixer *c; - struct dpu_lm_cfg *cfg; + const struct dpu_lm_cfg *cfg; c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h index 147ace31cfc2..4a6b2de19ef6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h @@ -91,7 +91,7 @@ static inline struct dpu_hw_mixer *to_dpu_hw_mixer(struct dpu_hw_blk *hw) */ struct dpu_hw_mixer *dpu_hw_lm_init(enum dpu_lm idx, void __iomem *addr, - struct dpu_mdss_cfg *m); + const struct dpu_mdss_cfg *m); /** * dpu_hw_lm_destroy(): Destroys layer mixer driver context diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c index 5dbaba9fd180..d110a40f0e73 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c @@ -28,8 +28,8 @@ #define PP_FBC_BUDGET_CTL 0x038 #define PP_FBC_LOSSY_MODE 0x03C -static struct dpu_pingpong_cfg *_pingpong_offset(enum dpu_pingpong pp, - struct dpu_mdss_cfg *m, +static const struct dpu_pingpong_cfg *_pingpong_offset(enum dpu_pingpong pp, + const struct dpu_mdss_cfg *m, void __iomem *addr, struct dpu_hw_blk_reg_map *b) { @@ -195,10 +195,10 @@ static struct dpu_hw_blk_ops dpu_hw_ops; struct dpu_hw_pingpong *dpu_hw_pingpong_init(enum dpu_pingpong idx, void __iomem *addr, - struct dpu_mdss_cfg *m) + const struct dpu_mdss_cfg *m) { struct dpu_hw_pingpong *c; - struct dpu_pingpong_cfg *cfg; + const struct dpu_pingpong_cfg *cfg; c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h index 58bdb9279aa8..3d6f46b1db30 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h @@ -106,7 +106,7 @@ struct dpu_hw_pingpong { */ struct dpu_hw_pingpong *dpu_hw_pingpong_init(enum dpu_pingpong idx, void __iomem *addr, - struct dpu_mdss_cfg *m); + const struct dpu_mdss_cfg *m); /** * dpu_hw_pingpong_destroy - destroys pingpong driver context diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c index 4f8b813aab81..82c5dbfdabc7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c @@ -132,6 +132,7 @@ /* traffic shaper clock in Hz */ #define TS_CLK 19200000 + static int _sspp_subblk_offset(struct dpu_hw_pipe *ctx, int s_id, u32 *idx) @@ -657,7 +658,8 @@ static void _setup_layer_ops(struct dpu_hw_pipe *c, test_bit(DPU_SSPP_SMART_DMA_V2, &c->cap->features)) c->ops.setup_multirect = dpu_hw_sspp_setup_multirect; - if (test_bit(DPU_SSPP_SCALER_QSEED3, &features)) { + if (test_bit(DPU_SSPP_SCALER_QSEED3, &features) || + test_bit(DPU_SSPP_SCALER_QSEED4, &features)) { c->ops.setup_scaler = _dpu_hw_sspp_setup_scaler3; c->ops.get_scaler_ver = _dpu_hw_sspp_get_scaler3_ver; } @@ -666,7 +668,7 @@ static void _setup_layer_ops(struct dpu_hw_pipe *c, c->ops.setup_cdp = dpu_hw_sspp_setup_cdp; } -static struct dpu_sspp_cfg *_sspp_offset(enum dpu_sspp sspp, +static const struct dpu_sspp_cfg *_sspp_offset(enum dpu_sspp sspp, void __iomem *addr, struct dpu_mdss_cfg *catalog, struct dpu_hw_blk_reg_map *b) @@ -696,7 +698,7 @@ struct dpu_hw_pipe *dpu_hw_sspp_init(enum dpu_sspp idx, bool is_virtual_pipe) { struct dpu_hw_pipe *hw_pipe; - struct dpu_sspp_cfg *cfg; + const struct dpu_sspp_cfg *cfg; if (!addr || !catalog) return ERR_PTR(-EINVAL); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h index a3680b482b41..85b018a9b03c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h @@ -27,7 +27,8 @@ struct dpu_hw_pipe; */ #define DPU_SSPP_SCALER ((1UL << DPU_SSPP_SCALER_RGB) | \ (1UL << DPU_SSPP_SCALER_QSEED2) | \ - (1UL << DPU_SSPP_SCALER_QSEED3)) + (1UL << DPU_SSPP_SCALER_QSEED3) | \ + (1UL << DPU_SSPP_SCALER_QSEED4)) /** * Component indices @@ -373,7 +374,7 @@ struct dpu_hw_pipe { struct dpu_hw_blk base; struct dpu_hw_blk_reg_map hw; struct dpu_mdss_cfg *catalog; - struct dpu_mdp_cfg *mdp; + const struct dpu_mdp_cfg *mdp; /* Pipe */ enum dpu_sspp idx; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c index 27fbeb504362..078afc5f5882 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c @@ -93,19 +93,12 @@ int msm_dss_enable_clk(struct dss_clk *clk_arry, int num_clk, int enable) DEV_DBG("%pS->%s: enable '%s'\n", __builtin_return_address(0), __func__, clk_arry[i].clk_name); - if (clk_arry[i].clk) { - rc = clk_prepare_enable(clk_arry[i].clk); - if (rc) - DEV_ERR("%pS->%s: %s en fail. rc=%d\n", - __builtin_return_address(0), - __func__, - clk_arry[i].clk_name, rc); - } else { - DEV_ERR("%pS->%s: '%s' is not available\n", - __builtin_return_address(0), __func__, - clk_arry[i].clk_name); - rc = -EPERM; - } + rc = clk_prepare_enable(clk_arry[i].clk); + if (rc) + DEV_ERR("%pS->%s: %s en fail. rc=%d\n", + __builtin_return_address(0), + __func__, + clk_arry[i].clk_name, rc); if (rc && i) { msm_dss_enable_clk(&clk_arry[i - 1], @@ -119,12 +112,7 @@ int msm_dss_enable_clk(struct dss_clk *clk_arry, int num_clk, int enable) __builtin_return_address(0), __func__, clk_arry[i].clk_name); - if (clk_arry[i].clk) - clk_disable_unprepare(clk_arry[i].clk); - else - DEV_ERR("%pS->%s: '%s' is not available\n", - __builtin_return_address(0), __func__, - clk_arry[i].clk_name); + clk_disable_unprepare(clk_arry[i].clk); } } @@ -187,6 +175,7 @@ int msm_dss_parse_clock(struct platform_device *pdev, continue; mp->clk_config[i].rate = rate; mp->clk_config[i].type = DSS_CLK_PCLK; + mp->clk_config[i].max_rate = rate; } mp->num_clk = num_clk; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 6c92f0fbeac9..cb08fafb1dc1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -1059,6 +1059,7 @@ static const struct dev_pm_ops dpu_pm_ops = { static const struct of_device_id dpu_dt_match[] = { { .compatible = "qcom,sdm845-dpu", }, + { .compatible = "qcom,sc7180-dpu", }, {} }; MODULE_DEVICE_TABLE(of, dpu_dt_match); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 58d5acbcfc5c..3b9c33e694bf 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -53,8 +53,13 @@ enum { R_MAX }; +/* + * Default Preload Values + */ #define DPU_QSEED3_DEFAULT_PRELOAD_H 0x4 #define DPU_QSEED3_DEFAULT_PRELOAD_V 0x3 +#define DPU_QSEED4_DEFAULT_PRELOAD_V 0x2 +#define DPU_QSEED4_DEFAULT_PRELOAD_H 0x4 #define DEFAULT_REFRESH_RATE 60 @@ -477,8 +482,16 @@ static void _dpu_plane_setup_scaler3(struct dpu_plane *pdpu, scale_cfg->src_width[i] /= chroma_subsmpl_h; scale_cfg->src_height[i] /= chroma_subsmpl_v; } - scale_cfg->preload_x[i] = DPU_QSEED3_DEFAULT_PRELOAD_H; - scale_cfg->preload_y[i] = DPU_QSEED3_DEFAULT_PRELOAD_V; + + if (pdpu->pipe_hw->cap->features & + BIT(DPU_SSPP_SCALER_QSEED4)) { + scale_cfg->preload_x[i] = DPU_QSEED4_DEFAULT_PRELOAD_H; + scale_cfg->preload_y[i] = DPU_QSEED4_DEFAULT_PRELOAD_V; + } else { + scale_cfg->preload_x[i] = DPU_QSEED3_DEFAULT_PRELOAD_H; + scale_cfg->preload_y[i] = DPU_QSEED3_DEFAULT_PRELOAD_V; + } + pstate->pixel_ext.num_ext_pxls_top[i] = scale_cfg->src_height[i]; pstate->pixel_ext.num_ext_pxls_left[i] = @@ -738,7 +751,7 @@ done: } else { pstate[R0]->multirect_index = DPU_SSPP_RECT_0; pstate[R1]->multirect_index = DPU_SSPP_RECT_1; - }; + } DPU_DEBUG_PLANE(dpu_plane[R0], "R0: %d - %d\n", pstate[R0]->multirect_mode, pstate[R0]->multirect_index); @@ -858,7 +871,7 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, pdpu->pipe_sblk->maxupscale << 16, true, true); if (ret) { - DPU_ERROR_PLANE(pdpu, "Check plane state failed (%d)\n", ret); + DPU_DEBUG_PLANE(pdpu, "Check plane state failed (%d)\n", ret); return ret; } if (!state->visible) @@ -884,13 +897,13 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, (!(pdpu->features & DPU_SSPP_SCALER) || !(pdpu->features & (BIT(DPU_SSPP_CSC) | BIT(DPU_SSPP_CSC_10BIT))))) { - DPU_ERROR_PLANE(pdpu, + DPU_DEBUG_PLANE(pdpu, "plane doesn't have scaler/csc for yuv\n"); return -EINVAL; /* check src bounds */ } else if (!dpu_plane_validate_src(&src, &fb_rect, min_src_size)) { - DPU_ERROR_PLANE(pdpu, "invalid source " DRM_RECT_FMT "\n", + DPU_DEBUG_PLANE(pdpu, "invalid source " DRM_RECT_FMT "\n", DRM_RECT_ARG(&src)); return -E2BIG; @@ -899,19 +912,19 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, (src.x1 & 0x1 || src.y1 & 0x1 || drm_rect_width(&src) & 0x1 || drm_rect_height(&src) & 0x1)) { - DPU_ERROR_PLANE(pdpu, "invalid yuv source " DRM_RECT_FMT "\n", + DPU_DEBUG_PLANE(pdpu, "invalid yuv source " DRM_RECT_FMT "\n", DRM_RECT_ARG(&src)); return -EINVAL; /* min dst support */ } else if (drm_rect_width(&dst) < 0x1 || drm_rect_height(&dst) < 0x1) { - DPU_ERROR_PLANE(pdpu, "invalid dest rect " DRM_RECT_FMT "\n", + DPU_DEBUG_PLANE(pdpu, "invalid dest rect " DRM_RECT_FMT "\n", DRM_RECT_ARG(&dst)); return -EINVAL; /* check decimated source width */ } else if (drm_rect_width(&src) > max_linewidth) { - DPU_ERROR_PLANE(pdpu, "invalid src " DRM_RECT_FMT " line:%u\n", + DPU_DEBUG_PLANE(pdpu, "invalid src " DRM_RECT_FMT " line:%u\n", DRM_RECT_ARG(&src), max_linewidth); return -E2BIG; } @@ -1337,7 +1350,8 @@ static int _dpu_plane_init_debugfs(struct drm_plane *plane) pdpu->debugfs_root, &pdpu->debugfs_src); if (cfg->features & BIT(DPU_SSPP_SCALER_QSEED3) || - cfg->features & BIT(DPU_SSPP_SCALER_QSEED2)) { + cfg->features & BIT(DPU_SSPP_SCALER_QSEED2) || + cfg->features & BIT(DPU_SSPP_SCALER_QSEED4)) { dpu_debugfs_setup_regset32(&pdpu->debugfs_scaler, sblk->scaler_blk.base + cfg->base, sblk->scaler_blk.len, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index ddc8412731af..23f5b1433b35 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -141,11 +141,11 @@ int dpu_rm_destroy(struct dpu_rm *rm) static int _dpu_rm_hw_blk_create( struct dpu_rm *rm, - struct dpu_mdss_cfg *cat, + const struct dpu_mdss_cfg *cat, void __iomem *mmio, enum dpu_hw_blk_type type, uint32_t id, - void *hw_catalog_info) + const void *hw_catalog_info) { struct dpu_rm_hw_blk *blk; void *hw; @@ -215,7 +215,7 @@ int dpu_rm_init(struct dpu_rm *rm, /* Interrogate HW catalog and create tracking items for hw blocks */ for (i = 0; i < cat->mixer_count; i++) { - struct dpu_lm_cfg *lm = &cat->mixer[i]; + const struct dpu_lm_cfg *lm = &cat->mixer[i]; if (lm->pingpong == PINGPONG_MAX) { DPU_DEBUG("skip mixer %d without pingpong\n", lm->id); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c index 991f4c8f8a12..93ab36bd8df3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c @@ -299,7 +299,7 @@ void dpu_debugfs_vbif_init(struct dpu_kms *dpu_kms, struct dentry *debugfs_root) entry = debugfs_create_dir("vbif", debugfs_root); for (i = 0; i < dpu_kms->catalog->vbif_count; i++) { - struct dpu_vbif_cfg *vbif = &dpu_kms->catalog->vbif[i]; + const struct dpu_vbif_cfg *vbif = &dpu_kms->catalog->vbif[i]; snprintf(vbif_name, sizeof(vbif_name), "%d", vbif->id); @@ -318,7 +318,7 @@ void dpu_debugfs_vbif_init(struct dpu_kms *dpu_kms, struct dentry *debugfs_root) (u32 *)&vbif->default_ot_wr_limit); for (j = 0; j < vbif->dynamic_ot_rd_tbl.count; j++) { - struct dpu_vbif_dynamic_ot_cfg *cfg = + const struct dpu_vbif_dynamic_ot_cfg *cfg = &vbif->dynamic_ot_rd_tbl.cfg[j]; snprintf(vbif_name, sizeof(vbif_name), @@ -332,7 +332,7 @@ void dpu_debugfs_vbif_init(struct dpu_kms *dpu_kms, struct dentry *debugfs_root) } for (j = 0; j < vbif->dynamic_ot_wr_tbl.count; j++) { - struct dpu_vbif_dynamic_ot_cfg *cfg = + const struct dpu_vbif_dynamic_ot_cfg *cfg = &vbif->dynamic_ot_wr_tbl.cfg[j]; snprintf(vbif_name, sizeof(vbif_name), diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c index 772f0753ed38..aaf2f26f8505 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c @@ -121,7 +121,7 @@ static void mdp4_dsi_encoder_enable(struct drm_encoder *encoder) if (mdp4_dsi_encoder->enabled) return; - mdp4_crtc_set_config(encoder->crtc, + mdp4_crtc_set_config(encoder->crtc, MDP4_DMA_CONFIG_PACK_ALIGN_MSB | MDP4_DMA_CONFIG_DEFLKR_EN | MDP4_DMA_CONFIG_DITHER_EN | diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c index 1f48f64539a2..e3c4c250238b 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c @@ -902,7 +902,7 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms, major, minor); ret = -ENXIO; goto fail; - }; + } /* only after mdp5_cfg global pointer's init can we access the hw */ for (i = 0; i < num_handlers; i++) { diff --git a/drivers/gpu/drm/msm/dsi/dsi.h b/drivers/gpu/drm/msm/dsi/dsi.h index eff1a4c61258..4de771d6f0be 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.h +++ b/drivers/gpu/drm/msm/dsi/dsi.h @@ -178,6 +178,8 @@ int msm_dsi_host_modeset_init(struct mipi_dsi_host *host, int msm_dsi_host_init(struct msm_dsi *msm_dsi); int msm_dsi_runtime_suspend(struct device *dev); int msm_dsi_runtime_resume(struct device *dev); +int dsi_link_clk_set_rate_6g(struct msm_dsi_host *msm_host); +int dsi_link_clk_set_rate_v2(struct msm_dsi_host *msm_host); int dsi_link_clk_enable_6g(struct msm_dsi_host *msm_host); int dsi_link_clk_enable_v2(struct msm_dsi_host *msm_host); void dsi_link_clk_disable_6g(struct msm_dsi_host *msm_host); diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c index 86ad3fdf207d..813d69deb5e8 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c @@ -153,6 +153,10 @@ static const char * const dsi_sdm845_bus_clk_names[] = { "iface", "bus", }; +static const char * const dsi_sc7180_bus_clk_names[] = { + "iface", "bus", +}; + static const struct msm_dsi_config sdm845_dsi_cfg = { .io_offset = DSI_6G_REG_SHIFT, .reg_cfg = { @@ -167,7 +171,22 @@ static const struct msm_dsi_config sdm845_dsi_cfg = { .num_dsi = 2, }; +static const struct msm_dsi_config sc7180_dsi_cfg = { + .io_offset = DSI_6G_REG_SHIFT, + .reg_cfg = { + .num = 1, + .regs = { + {"vdda", 21800, 4 }, /* 1.2 V */ + }, + }, + .bus_clk_names = dsi_sc7180_bus_clk_names, + .num_bus_clks = ARRAY_SIZE(dsi_sc7180_bus_clk_names), + .io_start = { 0xae94000 }, + .num_dsi = 1, +}; + static const struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = { + .link_clk_set_rate = dsi_link_clk_set_rate_v2, .link_clk_enable = dsi_link_clk_enable_v2, .link_clk_disable = dsi_link_clk_disable_v2, .clk_init_ver = dsi_clk_init_v2, @@ -179,6 +198,7 @@ static const struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = { }; static const struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = { + .link_clk_set_rate = dsi_link_clk_set_rate_6g, .link_clk_enable = dsi_link_clk_enable_6g, .link_clk_disable = dsi_link_clk_disable_6g, .clk_init_ver = NULL, @@ -190,6 +210,7 @@ static const struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = { }; static const struct msm_dsi_host_cfg_ops msm_dsi_6g_v2_host_ops = { + .link_clk_set_rate = dsi_link_clk_set_rate_6g, .link_clk_enable = dsi_link_clk_enable_6g, .link_clk_disable = dsi_link_clk_disable_6g, .clk_init_ver = dsi_clk_init_6g_v2, @@ -223,6 +244,9 @@ static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = { &msm8998_dsi_cfg, &msm_dsi_6g_v2_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_2_1, &sdm845_dsi_cfg, &msm_dsi_6g_v2_host_ops}, + {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_4_1, + &sc7180_dsi_cfg, &msm_dsi_6g_v2_host_ops}, + }; const struct msm_dsi_cfg_handler *msm_dsi_cfg_get(u32 major, u32 minor) diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h index 50a37ceb6a25..217e24a65178 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h @@ -20,6 +20,7 @@ #define MSM_DSI_6G_VER_MINOR_V1_4_2 0x10040002 #define MSM_DSI_6G_VER_MINOR_V2_2_0 0x20000000 #define MSM_DSI_6G_VER_MINOR_V2_2_1 0x20020001 +#define MSM_DSI_6G_VER_MINOR_V2_4_1 0x20040001 #define MSM_DSI_V2_VER_MINOR_8064 0x0 @@ -35,6 +36,7 @@ struct msm_dsi_config { }; struct msm_dsi_host_cfg_ops { + int (*link_clk_set_rate)(struct msm_dsi_host *msm_host); int (*link_clk_enable)(struct msm_dsi_host *msm_host); void (*link_clk_disable)(struct msm_dsi_host *msm_host); int (*clk_init_ver)(struct msm_dsi_host *msm_host); diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 458cec82ae13..11ae5b8444c3 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -505,7 +505,7 @@ int msm_dsi_runtime_resume(struct device *dev) return dsi_bus_clk_enable(msm_host); } -int dsi_link_clk_enable_6g(struct msm_dsi_host *msm_host) +int dsi_link_clk_set_rate_6g(struct msm_dsi_host *msm_host) { int ret; @@ -515,13 +515,13 @@ int dsi_link_clk_enable_6g(struct msm_dsi_host *msm_host) ret = clk_set_rate(msm_host->byte_clk, msm_host->byte_clk_rate); if (ret) { pr_err("%s: Failed to set rate byte clk, %d\n", __func__, ret); - goto error; + return ret; } ret = clk_set_rate(msm_host->pixel_clk, msm_host->pixel_clk_rate); if (ret) { pr_err("%s: Failed to set rate pixel clk, %d\n", __func__, ret); - goto error; + return ret; } if (msm_host->byte_intf_clk) { @@ -530,10 +530,18 @@ int dsi_link_clk_enable_6g(struct msm_dsi_host *msm_host) if (ret) { pr_err("%s: Failed to set rate byte intf clk, %d\n", __func__, ret); - goto error; + return ret; } } + return 0; +} + + +int dsi_link_clk_enable_6g(struct msm_dsi_host *msm_host) +{ + int ret; + ret = clk_prepare_enable(msm_host->esc_clk); if (ret) { pr_err("%s: Failed to enable dsi esc clk\n", __func__); @@ -573,7 +581,7 @@ error: return ret; } -int dsi_link_clk_enable_v2(struct msm_dsi_host *msm_host) +int dsi_link_clk_set_rate_v2(struct msm_dsi_host *msm_host) { int ret; @@ -584,27 +592,34 @@ int dsi_link_clk_enable_v2(struct msm_dsi_host *msm_host) ret = clk_set_rate(msm_host->byte_clk, msm_host->byte_clk_rate); if (ret) { pr_err("%s: Failed to set rate byte clk, %d\n", __func__, ret); - goto error; + return ret; } ret = clk_set_rate(msm_host->esc_clk, msm_host->esc_clk_rate); if (ret) { pr_err("%s: Failed to set rate esc clk, %d\n", __func__, ret); - goto error; + return ret; } ret = clk_set_rate(msm_host->src_clk, msm_host->src_clk_rate); if (ret) { pr_err("%s: Failed to set rate src clk, %d\n", __func__, ret); - goto error; + return ret; } ret = clk_set_rate(msm_host->pixel_clk, msm_host->pixel_clk_rate); if (ret) { pr_err("%s: Failed to set rate pixel clk, %d\n", __func__, ret); - goto error; + return ret; } + return 0; +} + +int dsi_link_clk_enable_v2(struct msm_dsi_host *msm_host) +{ + int ret; + ret = clk_prepare_enable(msm_host->byte_clk); if (ret) { pr_err("%s: Failed to enable dsi byte clk\n", __func__); @@ -818,7 +833,7 @@ static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable, u32 flags = msm_host->mode_flags; enum mipi_dsi_pixel_format mipi_fmt = msm_host->format; const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd; - u32 data = 0; + u32 data = 0, lane_ctrl = 0; if (!enable) { dsi_write(msm_host, REG_DSI_CTRL, 0); @@ -906,9 +921,11 @@ static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable, dsi_write(msm_host, REG_DSI_LANE_SWAP_CTRL, DSI_LANE_SWAP_CTRL_DLN_SWAP_SEL(msm_host->dlane_swap)); - if (!(flags & MIPI_DSI_CLOCK_NON_CONTINUOUS)) + if (!(flags & MIPI_DSI_CLOCK_NON_CONTINUOUS)) { + lane_ctrl = dsi_read(msm_host, REG_DSI_LANE_CTRL); dsi_write(msm_host, REG_DSI_LANE_CTRL, - DSI_LANE_CTRL_CLKLN_HS_FORCE_REQUEST); + lane_ctrl | DSI_LANE_CTRL_CLKLN_HS_FORCE_REQUEST); + } data |= DSI_CTRL_ENABLE; @@ -1996,6 +2013,7 @@ int msm_dsi_host_xfer_prepare(struct mipi_dsi_host *host, * mdp clock need to be enabled to receive dsi interrupt */ pm_runtime_get_sync(&msm_host->pdev->dev); + cfg_hnd->ops->link_clk_set_rate(msm_host); cfg_hnd->ops->link_clk_enable(msm_host); /* TODO: vote for bus bandwidth */ @@ -2344,7 +2362,9 @@ int msm_dsi_host_power_on(struct mipi_dsi_host *host, } pm_runtime_get_sync(&msm_host->pdev->dev); - ret = cfg_hnd->ops->link_clk_enable(msm_host); + ret = cfg_hnd->ops->link_clk_set_rate(msm_host); + if (!ret) + ret = cfg_hnd->ops->link_clk_enable(msm_host); if (ret) { pr_err("%s: failed to enable link clocks. ret=%d\n", __func__, ret); diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 0fc29f1be8cc..104115d112eb 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -432,20 +432,8 @@ static void dsi_mgr_bridge_pre_enable(struct drm_bridge *bridge) } } - if (panel) { - ret = drm_panel_enable(panel); - if (ret) { - pr_err("%s: enable panel %d failed, %d\n", __func__, id, - ret); - goto panel_en_fail; - } - } - return; -panel_en_fail: - if (is_dual_dsi && msm_dsi1) - msm_dsi_host_disable(msm_dsi1->host); host1_en_fail: msm_dsi_host_disable(host); host_en_fail: @@ -464,12 +452,51 @@ phy_en_fail: static void dsi_mgr_bridge_enable(struct drm_bridge *bridge) { - DBG(""); + int id = dsi_mgr_bridge_get_id(bridge); + struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id); + struct drm_panel *panel = msm_dsi->panel; + bool is_dual_dsi = IS_DUAL_DSI(); + int ret; + + DBG("id=%d", id); + if (!msm_dsi_device_connected(msm_dsi)) + return; + + /* Do nothing with the host if it is slave-DSI in case of dual DSI */ + if (is_dual_dsi && !IS_MASTER_DSI_LINK(id)) + return; + + if (panel) { + ret = drm_panel_enable(panel); + if (ret) { + pr_err("%s: enable panel %d failed, %d\n", __func__, id, + ret); + } + } } static void dsi_mgr_bridge_disable(struct drm_bridge *bridge) { - DBG(""); + int id = dsi_mgr_bridge_get_id(bridge); + struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id); + struct drm_panel *panel = msm_dsi->panel; + bool is_dual_dsi = IS_DUAL_DSI(); + int ret; + + DBG("id=%d", id); + if (!msm_dsi_device_connected(msm_dsi)) + return; + + /* Do nothing with the host if it is slave-DSI in case of dual DSI */ + if (is_dual_dsi && !IS_MASTER_DSI_LINK(id)) + return; + + if (panel) { + ret = drm_panel_disable(panel); + if (ret) + pr_err("%s: Panel %d OFF failed, %d\n", __func__, id, + ret); + } } static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) @@ -495,13 +522,6 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) if (is_dual_dsi && !IS_MASTER_DSI_LINK(id)) goto disable_phy; - if (panel) { - ret = drm_panel_disable(panel); - if (ret) - pr_err("%s: Panel %d OFF failed, %d\n", __func__, id, - ret); - } - ret = msm_dsi_host_disable(host); if (ret) pr_err("%s: host %d disable failed, %d\n", __func__, id, ret); diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c index 59702684d576..58707a1f3878 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c @@ -101,7 +101,7 @@ static int gpio_config(struct hdmi *hdmi, bool on) gpiod_set_value_cansleep(gpio.gpiod, value); } - }; + } DBG("gpio off"); } diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index c84f0a8b3f2c..f50fefb87040 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -1192,7 +1192,8 @@ static int add_display_components(struct device *dev, * the interfaces to our components list. */ if (of_device_is_compatible(dev->of_node, "qcom,mdss") || - of_device_is_compatible(dev->of_node, "qcom,sdm845-mdss")) { + of_device_is_compatible(dev->of_node, "qcom,sdm845-mdss") || + of_device_is_compatible(dev->of_node, "qcom,sc7180-mdss")) { ret = of_platform_populate(dev->of_node, NULL, NULL, dev); if (ret) { DRM_DEV_ERROR(dev, "failed to populate children devices\n"); @@ -1317,6 +1318,7 @@ static const struct of_device_id dt_match[] = { { .compatible = "qcom,mdp4", .data = (void *)KMS_MDP4 }, { .compatible = "qcom,mdss", .data = (void *)KMS_MDP5 }, { .compatible = "qcom,sdm845-mdss", .data = (void *)KMS_DPU }, + { .compatible = "qcom,sc7180-mdss", .data = (void *)KMS_DPU }, {} }; MODULE_DEVICE_TABLE(of, dt_match); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index ab8f0f9c9dc8..be5bc2e8425c 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -111,8 +111,15 @@ struct msm_gpu { struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk; uint32_t fast_rate; + /* The gfx-mem interconnect path that's used by all GPU types. */ struct icc_path *icc_path; + /* + * Second interconnect path for some A3xx and all A4xx GPUs to the + * On Chip MEMory (OCMEM). + */ + struct icc_path *ocmem_icc_path; + /* Hang and Inactivity Detection: */ #define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */ diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.h b/drivers/gpu/drm/rockchip/cdn-dp-core.h index 83c4586665b4..81ac9b658a70 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-core.h +++ b/drivers/gpu/drm/rockchip/cdn-dp-core.h @@ -95,7 +95,7 @@ struct cdn_dp_device { struct cdn_dp_port *port[MAX_PHY]; u8 ports; u8 max_lanes; - u8 max_rate; + unsigned int max_rate; u8 lanes; int active_port; diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 2e3a058fc239..ec79e8e5ad3c 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -45,7 +45,7 @@ * @guilty: atomic_t set to 1 when a job on this queue * is found to be guilty causing a timeout * - * Note: the rq_list should have atleast one element to schedule + * Note: the sched_list should have atleast one element to schedule * the entity * * Returns 0 on success or a negative error code on failure. @@ -130,7 +130,7 @@ static struct drm_sched_rq * drm_sched_entity_get_free_sched(struct drm_sched_entity *entity) { struct drm_sched_rq *rq = NULL; - unsigned int min_jobs = UINT_MAX, num_jobs; + unsigned int min_score = UINT_MAX, num_score; int i; for (i = 0; i < entity->num_sched_list; ++i) { @@ -141,9 +141,9 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity *entity) continue; } - num_jobs = atomic_read(&sched->num_jobs); - if (num_jobs < min_jobs) { - min_jobs = num_jobs; + num_score = atomic_read(&sched->score); + if (num_score < min_score) { + min_score = num_score; rq = &entity->sched_list[i]->sched_rq[entity->priority]; } } @@ -498,7 +498,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, bool first; trace_drm_sched_job(sched_job, entity); - atomic_inc(&entity->rq->sched->num_jobs); + atomic_inc(&entity->rq->sched->score); WRITE_ONCE(entity->last_user, current->group_leader); first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 3fad5876a13f..71ce6215956f 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -92,6 +92,7 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq, if (!list_empty(&entity->list)) return; spin_lock(&rq->lock); + atomic_inc(&rq->sched->score); list_add_tail(&entity->list, &rq->entities); spin_unlock(&rq->lock); } @@ -110,6 +111,7 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, if (list_empty(&entity->list)) return; spin_lock(&rq->lock); + atomic_dec(&rq->sched->score); list_del_init(&entity->list); if (rq->current_entity == entity) rq->current_entity = NULL; @@ -655,7 +657,7 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) struct drm_gpu_scheduler *sched = s_fence->sched; atomic_dec(&sched->hw_rq_count); - atomic_dec(&sched->num_jobs); + atomic_dec(&sched->score); trace_drm_sched_process_job(s_fence); @@ -830,7 +832,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, spin_lock_init(&sched->job_list_lock); atomic_set(&sched->hw_rq_count, 0); INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); - atomic_set(&sched->num_jobs, 0); + atomic_set(&sched->score, 0); atomic64_set(&sched->job_id_count, 0); /* Each scheduler will run on a seperate kernel thread */ diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index a7c4654445c7..68d4644ac2dc 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -685,8 +685,6 @@ static void sun4i_hdmi_unbind(struct device *dev, struct device *master, struct sun4i_hdmi *hdmi = dev_get_drvdata(dev); cec_unregister_adapter(hdmi->cec_adap); - drm_connector_cleanup(&hdmi->connector); - drm_encoder_cleanup(&hdmi->encoder); i2c_del_adapter(hdmi->i2c); i2c_put_adapter(hdmi->ddc_i2c); clk_disable_unprepare(hdmi->mod_clk); diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 42651d737c55..c81cdce6ed55 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -489,7 +489,7 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon, WARN_ON(!tcon->quirks->has_channel_0); - tcon->dclk_min_div = 1; + tcon->dclk_min_div = tcon->quirks->dclk_min_div; tcon->dclk_max_div = 127; sun4i_tcon0_mode_set_common(tcon, mode); @@ -1426,12 +1426,14 @@ static int sun8i_r40_tcon_tv_set_mux(struct sun4i_tcon *tcon, static const struct sun4i_tcon_quirks sun4i_a10_quirks = { .has_channel_0 = true, .has_channel_1 = true, + .dclk_min_div = 4, .set_mux = sun4i_a10_tcon_set_mux, }; static const struct sun4i_tcon_quirks sun5i_a13_quirks = { .has_channel_0 = true, .has_channel_1 = true, + .dclk_min_div = 4, .set_mux = sun5i_a13_tcon_set_mux, }; @@ -1440,6 +1442,7 @@ static const struct sun4i_tcon_quirks sun6i_a31_quirks = { .has_channel_1 = true, .has_lvds_alt = true, .needs_de_be_mux = true, + .dclk_min_div = 1, .set_mux = sun6i_tcon_set_mux, }; @@ -1447,11 +1450,13 @@ static const struct sun4i_tcon_quirks sun6i_a31s_quirks = { .has_channel_0 = true, .has_channel_1 = true, .needs_de_be_mux = true, + .dclk_min_div = 1, }; static const struct sun4i_tcon_quirks sun7i_a20_quirks = { .has_channel_0 = true, .has_channel_1 = true, + .dclk_min_div = 4, /* Same display pipeline structure as A10 */ .set_mux = sun4i_a10_tcon_set_mux, }; @@ -1459,11 +1464,13 @@ static const struct sun4i_tcon_quirks sun7i_a20_quirks = { static const struct sun4i_tcon_quirks sun8i_a33_quirks = { .has_channel_0 = true, .has_lvds_alt = true, + .dclk_min_div = 1, }; static const struct sun4i_tcon_quirks sun8i_a83t_lcd_quirks = { .supports_lvds = true, .has_channel_0 = true, + .dclk_min_div = 1, }; static const struct sun4i_tcon_quirks sun8i_a83t_tv_quirks = { @@ -1477,11 +1484,13 @@ static const struct sun4i_tcon_quirks sun8i_r40_tv_quirks = { static const struct sun4i_tcon_quirks sun8i_v3s_quirks = { .has_channel_0 = true, + .dclk_min_div = 1, }; static const struct sun4i_tcon_quirks sun9i_a80_tcon_lcd_quirks = { - .has_channel_0 = true, - .needs_edp_reset = true, + .has_channel_0 = true, + .needs_edp_reset = true, + .dclk_min_div = 1, }; static const struct sun4i_tcon_quirks sun9i_a80_tcon_tv_quirks = { diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.h b/drivers/gpu/drm/sun4i/sun4i_tcon.h index f9f1fe80b206..a62ec826ae71 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.h +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.h @@ -224,6 +224,7 @@ struct sun4i_tcon_quirks { bool needs_de_be_mux; /* sun6i needs mux to select backend */ bool needs_edp_reset; /* a80 edp reset needed for tcon0 access */ bool supports_lvds; /* Does the TCON support an LVDS output? */ + u8 dclk_min_div; /* minimum divider for TCON0 DCLK */ /* callback to handle tcon muxing options */ int (*set_mux)(struct sun4i_tcon *, const struct drm_encoder *); diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 714af052fbef..7c70fd31a4c2 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -1727,6 +1727,7 @@ static void tegra_crtc_atomic_disable(struct drm_crtc *crtc, { struct tegra_dc *dc = to_tegra_dc(crtc); u32 value; + int err; if (!tegra_dc_idle(dc)) { tegra_dc_stop(dc); @@ -1773,7 +1774,9 @@ static void tegra_crtc_atomic_disable(struct drm_crtc *crtc, spin_unlock_irq(&crtc->dev->event_lock); - pm_runtime_put_sync(dc->dev); + err = host1x_client_suspend(&dc->client); + if (err < 0) + dev_err(dc->dev, "failed to suspend: %d\n", err); } static void tegra_crtc_atomic_enable(struct drm_crtc *crtc, @@ -1783,8 +1786,13 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc, struct tegra_dc_state *state = to_dc_state(crtc->state); struct tegra_dc *dc = to_tegra_dc(crtc); u32 value; + int err; - pm_runtime_get_sync(dc->dev); + err = host1x_client_resume(&dc->client); + if (err < 0) { + dev_err(dc->dev, "failed to resume: %d\n", err); + return; + } /* initialize display controller */ if (dc->syncpt) { @@ -1996,7 +2004,7 @@ static bool tegra_dc_has_window_groups(struct tegra_dc *dc) static int tegra_dc_init(struct host1x_client *client) { - struct drm_device *drm = dev_get_drvdata(client->parent); + struct drm_device *drm = dev_get_drvdata(client->host); unsigned long flags = HOST1X_SYNCPT_CLIENT_MANAGED; struct tegra_dc *dc = host1x_client_to_dc(client); struct tegra_drm *tegra = drm->dev_private; @@ -2012,6 +2020,15 @@ static int tegra_dc_init(struct host1x_client *client) if (!tegra_dc_has_window_groups(dc)) return 0; + /* + * Set the display hub as the host1x client parent for the display + * controller. This is needed for the runtime reference counting that + * ensures the display hub is always powered when any of the display + * controllers are. + */ + if (dc->soc->has_nvdisplay) + client->parent = &tegra->hub->client; + dc->syncpt = host1x_syncpt_request(client, flags); if (!dc->syncpt) dev_warn(dc->dev, "failed to allocate syncpoint\n"); @@ -2077,9 +2094,9 @@ static int tegra_dc_init(struct host1x_client *client) /* * Inherit the DMA parameters (such as maximum segment size) from the - * parent device. + * parent host1x device. */ - client->dev->dma_parms = client->parent->dma_parms; + client->dev->dma_parms = client->host->dma_parms; return 0; @@ -2121,9 +2138,74 @@ static int tegra_dc_exit(struct host1x_client *client) return 0; } +static int tegra_dc_runtime_suspend(struct host1x_client *client) +{ + struct tegra_dc *dc = host1x_client_to_dc(client); + struct device *dev = client->dev; + int err; + + err = reset_control_assert(dc->rst); + if (err < 0) { + dev_err(dev, "failed to assert reset: %d\n", err); + return err; + } + + if (dc->soc->has_powergate) + tegra_powergate_power_off(dc->powergate); + + clk_disable_unprepare(dc->clk); + pm_runtime_put_sync(dev); + + return 0; +} + +static int tegra_dc_runtime_resume(struct host1x_client *client) +{ + struct tegra_dc *dc = host1x_client_to_dc(client); + struct device *dev = client->dev; + int err; + + err = pm_runtime_get_sync(dev); + if (err < 0) { + dev_err(dev, "failed to get runtime PM: %d\n", err); + return err; + } + + if (dc->soc->has_powergate) { + err = tegra_powergate_sequence_power_up(dc->powergate, dc->clk, + dc->rst); + if (err < 0) { + dev_err(dev, "failed to power partition: %d\n", err); + goto put_rpm; + } + } else { + err = clk_prepare_enable(dc->clk); + if (err < 0) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto put_rpm; + } + + err = reset_control_deassert(dc->rst); + if (err < 0) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } + } + + return 0; + +disable_clk: + clk_disable_unprepare(dc->clk); +put_rpm: + pm_runtime_put_sync(dev); + return err; +} + static const struct host1x_client_ops dc_client_ops = { .init = tegra_dc_init, .exit = tegra_dc_exit, + .suspend = tegra_dc_runtime_suspend, + .resume = tegra_dc_runtime_resume, }; static const struct tegra_dc_soc_info tegra20_dc_soc_info = { @@ -2535,65 +2617,10 @@ static int tegra_dc_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int tegra_dc_suspend(struct device *dev) -{ - struct tegra_dc *dc = dev_get_drvdata(dev); - int err; - - err = reset_control_assert(dc->rst); - if (err < 0) { - dev_err(dev, "failed to assert reset: %d\n", err); - return err; - } - - if (dc->soc->has_powergate) - tegra_powergate_power_off(dc->powergate); - - clk_disable_unprepare(dc->clk); - - return 0; -} - -static int tegra_dc_resume(struct device *dev) -{ - struct tegra_dc *dc = dev_get_drvdata(dev); - int err; - - if (dc->soc->has_powergate) { - err = tegra_powergate_sequence_power_up(dc->powergate, dc->clk, - dc->rst); - if (err < 0) { - dev_err(dev, "failed to power partition: %d\n", err); - return err; - } - } else { - err = clk_prepare_enable(dc->clk); - if (err < 0) { - dev_err(dev, "failed to enable clock: %d\n", err); - return err; - } - - err = reset_control_deassert(dc->rst); - if (err < 0) { - dev_err(dev, "failed to deassert reset: %d\n", err); - return err; - } - } - - return 0; -} -#endif - -static const struct dev_pm_ops tegra_dc_pm_ops = { - SET_RUNTIME_PM_OPS(tegra_dc_suspend, tegra_dc_resume, NULL) -}; - struct platform_driver tegra_dc_driver = { .driver = { .name = "tegra-dc", .of_match_table = tegra_dc_of_match, - .pm = &tegra_dc_pm_ops, }, .probe = tegra_dc_probe, .remove = tegra_dc_remove, diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c index 622cdf1ad246..7dfb50f65067 100644 --- a/drivers/gpu/drm/tegra/dpaux.c +++ b/drivers/gpu/drm/tegra/dpaux.c @@ -588,7 +588,7 @@ static int tegra_dpaux_remove(struct platform_device *pdev) /* make sure pads are powered down when not in use */ tegra_dpaux_pad_power_down(dpaux); - pm_runtime_put(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); drm_dp_aux_unregister(&dpaux->aux); diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index f455ce71e85d..aa9e49f04988 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -905,7 +905,7 @@ int tegra_drm_unregister_client(struct tegra_drm *tegra, int host1x_client_iommu_attach(struct host1x_client *client) { struct iommu_domain *domain = iommu_get_domain_for_dev(client->dev); - struct drm_device *drm = dev_get_drvdata(client->parent); + struct drm_device *drm = dev_get_drvdata(client->host); struct tegra_drm *tegra = drm->dev_private; struct iommu_group *group = NULL; int err; @@ -941,7 +941,7 @@ int host1x_client_iommu_attach(struct host1x_client *client) void host1x_client_iommu_detach(struct host1x_client *client) { - struct drm_device *drm = dev_get_drvdata(client->parent); + struct drm_device *drm = dev_get_drvdata(client->host); struct tegra_drm *tegra = drm->dev_private; struct iommu_domain *domain; diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index d941553f7a3d..ed99b67deb29 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -144,6 +144,8 @@ int tegra_output_init(struct drm_device *drm, struct tegra_output *output); void tegra_output_exit(struct tegra_output *output); void tegra_output_find_possible_crtcs(struct tegra_output *output, struct drm_device *drm); +int tegra_output_suspend(struct tegra_output *output); +int tegra_output_resume(struct tegra_output *output); int tegra_output_connector_get_modes(struct drm_connector *connector); enum drm_connector_status diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index a5d47e301c5f..88b9d64c77bf 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -840,7 +840,9 @@ static void tegra_dsi_unprepare(struct tegra_dsi *dsi) dev_err(dsi->dev, "failed to disable MIPI calibration: %d\n", err); - pm_runtime_put(dsi->dev); + err = host1x_client_suspend(&dsi->client); + if (err < 0) + dev_err(dsi->dev, "failed to suspend: %d\n", err); } static void tegra_dsi_encoder_disable(struct drm_encoder *encoder) @@ -882,11 +884,15 @@ static void tegra_dsi_encoder_disable(struct drm_encoder *encoder) tegra_dsi_unprepare(dsi); } -static void tegra_dsi_prepare(struct tegra_dsi *dsi) +static int tegra_dsi_prepare(struct tegra_dsi *dsi) { int err; - pm_runtime_get_sync(dsi->dev); + err = host1x_client_resume(&dsi->client); + if (err < 0) { + dev_err(dsi->dev, "failed to resume: %d\n", err); + return err; + } err = tegra_mipi_enable(dsi->mipi); if (err < 0) @@ -899,6 +905,8 @@ static void tegra_dsi_prepare(struct tegra_dsi *dsi) if (dsi->slave) tegra_dsi_prepare(dsi->slave); + + return 0; } static void tegra_dsi_encoder_enable(struct drm_encoder *encoder) @@ -909,8 +917,13 @@ static void tegra_dsi_encoder_enable(struct drm_encoder *encoder) struct tegra_dsi *dsi = to_dsi(output); struct tegra_dsi_state *state; u32 value; + int err; - tegra_dsi_prepare(dsi); + err = tegra_dsi_prepare(dsi); + if (err < 0) { + dev_err(dsi->dev, "failed to prepare: %d\n", err); + return; + } state = tegra_dsi_get_state(dsi); @@ -1030,7 +1043,7 @@ static const struct drm_encoder_helper_funcs tegra_dsi_encoder_helper_funcs = { static int tegra_dsi_init(struct host1x_client *client) { - struct drm_device *drm = dev_get_drvdata(client->parent); + struct drm_device *drm = dev_get_drvdata(client->host); struct tegra_dsi *dsi = host1x_client_to_dsi(client); int err; @@ -1075,9 +1088,89 @@ static int tegra_dsi_exit(struct host1x_client *client) return 0; } +static int tegra_dsi_runtime_suspend(struct host1x_client *client) +{ + struct tegra_dsi *dsi = host1x_client_to_dsi(client); + struct device *dev = client->dev; + int err; + + if (dsi->rst) { + err = reset_control_assert(dsi->rst); + if (err < 0) { + dev_err(dev, "failed to assert reset: %d\n", err); + return err; + } + } + + usleep_range(1000, 2000); + + clk_disable_unprepare(dsi->clk_lp); + clk_disable_unprepare(dsi->clk); + + regulator_disable(dsi->vdd); + pm_runtime_put_sync(dev); + + return 0; +} + +static int tegra_dsi_runtime_resume(struct host1x_client *client) +{ + struct tegra_dsi *dsi = host1x_client_to_dsi(client); + struct device *dev = client->dev; + int err; + + err = pm_runtime_get_sync(dev); + if (err < 0) { + dev_err(dev, "failed to get runtime PM: %d\n", err); + return err; + } + + err = regulator_enable(dsi->vdd); + if (err < 0) { + dev_err(dev, "failed to enable VDD supply: %d\n", err); + goto put_rpm; + } + + err = clk_prepare_enable(dsi->clk); + if (err < 0) { + dev_err(dev, "cannot enable DSI clock: %d\n", err); + goto disable_vdd; + } + + err = clk_prepare_enable(dsi->clk_lp); + if (err < 0) { + dev_err(dev, "cannot enable low-power clock: %d\n", err); + goto disable_clk; + } + + usleep_range(1000, 2000); + + if (dsi->rst) { + err = reset_control_deassert(dsi->rst); + if (err < 0) { + dev_err(dev, "cannot assert reset: %d\n", err); + goto disable_clk_lp; + } + } + + return 0; + +disable_clk_lp: + clk_disable_unprepare(dsi->clk_lp); +disable_clk: + clk_disable_unprepare(dsi->clk); +disable_vdd: + regulator_disable(dsi->vdd); +put_rpm: + pm_runtime_put_sync(dev); + return err; +} + static const struct host1x_client_ops dsi_client_ops = { .init = tegra_dsi_init, .exit = tegra_dsi_exit, + .suspend = tegra_dsi_runtime_suspend, + .resume = tegra_dsi_runtime_resume, }; static int tegra_dsi_setup_clocks(struct tegra_dsi *dsi) @@ -1596,79 +1689,6 @@ static int tegra_dsi_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int tegra_dsi_suspend(struct device *dev) -{ - struct tegra_dsi *dsi = dev_get_drvdata(dev); - int err; - - if (dsi->rst) { - err = reset_control_assert(dsi->rst); - if (err < 0) { - dev_err(dev, "failed to assert reset: %d\n", err); - return err; - } - } - - usleep_range(1000, 2000); - - clk_disable_unprepare(dsi->clk_lp); - clk_disable_unprepare(dsi->clk); - - regulator_disable(dsi->vdd); - - return 0; -} - -static int tegra_dsi_resume(struct device *dev) -{ - struct tegra_dsi *dsi = dev_get_drvdata(dev); - int err; - - err = regulator_enable(dsi->vdd); - if (err < 0) { - dev_err(dsi->dev, "failed to enable VDD supply: %d\n", err); - return err; - } - - err = clk_prepare_enable(dsi->clk); - if (err < 0) { - dev_err(dev, "cannot enable DSI clock: %d\n", err); - goto disable_vdd; - } - - err = clk_prepare_enable(dsi->clk_lp); - if (err < 0) { - dev_err(dev, "cannot enable low-power clock: %d\n", err); - goto disable_clk; - } - - usleep_range(1000, 2000); - - if (dsi->rst) { - err = reset_control_deassert(dsi->rst); - if (err < 0) { - dev_err(dev, "cannot assert reset: %d\n", err); - goto disable_clk_lp; - } - } - - return 0; - -disable_clk_lp: - clk_disable_unprepare(dsi->clk_lp); -disable_clk: - clk_disable_unprepare(dsi->clk); -disable_vdd: - regulator_disable(dsi->vdd); - return err; -} -#endif - -static const struct dev_pm_ops tegra_dsi_pm_ops = { - SET_RUNTIME_PM_OPS(tegra_dsi_suspend, tegra_dsi_resume, NULL) -}; - static const struct of_device_id tegra_dsi_of_match[] = { { .compatible = "nvidia,tegra210-dsi", }, { .compatible = "nvidia,tegra132-dsi", }, @@ -1682,7 +1702,6 @@ struct platform_driver tegra_dsi_driver = { .driver = { .name = "tegra-dsi", .of_match_table = tegra_dsi_of_match, - .pm = &tegra_dsi_pm_ops, }, .probe = tegra_dsi_probe, .remove = tegra_dsi_remove, diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c index 1fc4e56c7cc5..48363f744bb9 100644 --- a/drivers/gpu/drm/tegra/gr2d.c +++ b/drivers/gpu/drm/tegra/gr2d.c @@ -34,7 +34,7 @@ static inline struct gr2d *to_gr2d(struct tegra_drm_client *client) static int gr2d_init(struct host1x_client *client) { struct tegra_drm_client *drm = host1x_to_drm_client(client); - struct drm_device *dev = dev_get_drvdata(client->parent); + struct drm_device *dev = dev_get_drvdata(client->host); unsigned long flags = HOST1X_SYNCPT_HAS_BASE; struct gr2d *gr2d = to_gr2d(drm); int err; @@ -76,7 +76,7 @@ put: static int gr2d_exit(struct host1x_client *client) { struct tegra_drm_client *drm = host1x_to_drm_client(client); - struct drm_device *dev = dev_get_drvdata(client->parent); + struct drm_device *dev = dev_get_drvdata(client->host); struct tegra_drm *tegra = dev->dev_private; struct gr2d *gr2d = to_gr2d(drm); int err; diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c index 24fae0f64032..c0a528be0369 100644 --- a/drivers/gpu/drm/tegra/gr3d.c +++ b/drivers/gpu/drm/tegra/gr3d.c @@ -43,7 +43,7 @@ static inline struct gr3d *to_gr3d(struct tegra_drm_client *client) static int gr3d_init(struct host1x_client *client) { struct tegra_drm_client *drm = host1x_to_drm_client(client); - struct drm_device *dev = dev_get_drvdata(client->parent); + struct drm_device *dev = dev_get_drvdata(client->host); unsigned long flags = HOST1X_SYNCPT_HAS_BASE; struct gr3d *gr3d = to_gr3d(drm); int err; @@ -85,7 +85,7 @@ put: static int gr3d_exit(struct host1x_client *client) { struct tegra_drm_client *drm = host1x_to_drm_client(client); - struct drm_device *dev = dev_get_drvdata(client->parent); + struct drm_device *dev = dev_get_drvdata(client->host); struct gr3d *gr3d = to_gr3d(drm); int err; diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index 21a629adcb51..6f117628f257 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -1146,6 +1146,7 @@ static void tegra_hdmi_encoder_disable(struct drm_encoder *encoder) struct tegra_dc *dc = to_tegra_dc(encoder->crtc); struct tegra_hdmi *hdmi = to_hdmi(output); u32 value; + int err; /* * The following accesses registers of the display controller, so make @@ -1171,7 +1172,9 @@ static void tegra_hdmi_encoder_disable(struct drm_encoder *encoder) tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_INT_ENABLE); tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_INT_MASK); - pm_runtime_put(hdmi->dev); + err = host1x_client_suspend(&hdmi->client); + if (err < 0) + dev_err(hdmi->dev, "failed to suspend: %d\n", err); } static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder) @@ -1186,7 +1189,11 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder) u32 value; int err; - pm_runtime_get_sync(hdmi->dev); + err = host1x_client_resume(&hdmi->client); + if (err < 0) { + dev_err(hdmi->dev, "failed to resume: %d\n", err); + return; + } /* * Enable and unmask the HDA codec SCRATCH0 register interrupt. This @@ -1424,8 +1431,8 @@ static const struct drm_encoder_helper_funcs tegra_hdmi_encoder_helper_funcs = { static int tegra_hdmi_init(struct host1x_client *client) { - struct drm_device *drm = dev_get_drvdata(client->parent); struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client); + struct drm_device *drm = dev_get_drvdata(client->host); int err; hdmi->output.dev = client->dev; @@ -1490,9 +1497,66 @@ static int tegra_hdmi_exit(struct host1x_client *client) return 0; } +static int tegra_hdmi_runtime_suspend(struct host1x_client *client) +{ + struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client); + struct device *dev = client->dev; + int err; + + err = reset_control_assert(hdmi->rst); + if (err < 0) { + dev_err(dev, "failed to assert reset: %d\n", err); + return err; + } + + usleep_range(1000, 2000); + + clk_disable_unprepare(hdmi->clk); + pm_runtime_put_sync(dev); + + return 0; +} + +static int tegra_hdmi_runtime_resume(struct host1x_client *client) +{ + struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client); + struct device *dev = client->dev; + int err; + + err = pm_runtime_get_sync(dev); + if (err < 0) { + dev_err(dev, "failed to get runtime PM: %d\n", err); + return err; + } + + err = clk_prepare_enable(hdmi->clk); + if (err < 0) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto put_rpm; + } + + usleep_range(1000, 2000); + + err = reset_control_deassert(hdmi->rst); + if (err < 0) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } + + return 0; + +disable_clk: + clk_disable_unprepare(hdmi->clk); +put_rpm: + pm_runtime_put_sync(dev); + return err; +} + static const struct host1x_client_ops hdmi_client_ops = { .init = tegra_hdmi_init, .exit = tegra_hdmi_exit, + .suspend = tegra_hdmi_runtime_suspend, + .resume = tegra_hdmi_runtime_resume, }; static const struct tegra_hdmi_config tegra20_hdmi_config = { @@ -1700,58 +1764,10 @@ static int tegra_hdmi_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int tegra_hdmi_suspend(struct device *dev) -{ - struct tegra_hdmi *hdmi = dev_get_drvdata(dev); - int err; - - err = reset_control_assert(hdmi->rst); - if (err < 0) { - dev_err(dev, "failed to assert reset: %d\n", err); - return err; - } - - usleep_range(1000, 2000); - - clk_disable_unprepare(hdmi->clk); - - return 0; -} - -static int tegra_hdmi_resume(struct device *dev) -{ - struct tegra_hdmi *hdmi = dev_get_drvdata(dev); - int err; - - err = clk_prepare_enable(hdmi->clk); - if (err < 0) { - dev_err(dev, "failed to enable clock: %d\n", err); - return err; - } - - usleep_range(1000, 2000); - - err = reset_control_deassert(hdmi->rst); - if (err < 0) { - dev_err(dev, "failed to deassert reset: %d\n", err); - clk_disable_unprepare(hdmi->clk); - return err; - } - - return 0; -} -#endif - -static const struct dev_pm_ops tegra_hdmi_pm_ops = { - SET_RUNTIME_PM_OPS(tegra_hdmi_suspend, tegra_hdmi_resume, NULL) -}; - struct platform_driver tegra_hdmi_driver = { .driver = { .name = "tegra-hdmi", .of_match_table = tegra_hdmi_of_match, - .pm = &tegra_hdmi_pm_ops, }, .probe = tegra_hdmi_probe, .remove = tegra_hdmi_remove, diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index 47d985ac7cd7..8183e617bf6b 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -95,17 +95,25 @@ static inline void tegra_plane_writel(struct tegra_plane *plane, u32 value, static int tegra_windowgroup_enable(struct tegra_windowgroup *wgrp) { + int err = 0; + mutex_lock(&wgrp->lock); if (wgrp->usecount == 0) { - pm_runtime_get_sync(wgrp->parent); + err = host1x_client_resume(wgrp->parent); + if (err < 0) { + dev_err(wgrp->parent->dev, "failed to resume: %d\n", err); + goto unlock; + } + reset_control_deassert(wgrp->rst); } wgrp->usecount++; - mutex_unlock(&wgrp->lock); - return 0; +unlock: + mutex_unlock(&wgrp->lock); + return err; } static void tegra_windowgroup_disable(struct tegra_windowgroup *wgrp) @@ -121,7 +129,7 @@ static void tegra_windowgroup_disable(struct tegra_windowgroup *wgrp) wgrp->index); } - pm_runtime_put(wgrp->parent); + host1x_client_suspend(wgrp->parent); } wgrp->usecount--; @@ -379,6 +387,7 @@ static void tegra_shared_plane_atomic_disable(struct drm_plane *plane, struct tegra_plane *p = to_tegra_plane(plane); struct tegra_dc *dc; u32 value; + int err; /* rien ne va plus */ if (!old_state || !old_state->crtc) @@ -386,6 +395,12 @@ static void tegra_shared_plane_atomic_disable(struct drm_plane *plane, dc = to_tegra_dc(old_state->crtc); + err = host1x_client_resume(&dc->client); + if (err < 0) { + dev_err(dc->dev, "failed to resume: %d\n", err); + return; + } + /* * XXX Legacy helpers seem to sometimes call ->atomic_disable() even * on planes that are already disabled. Make sure we fallback to the @@ -394,15 +409,13 @@ static void tegra_shared_plane_atomic_disable(struct drm_plane *plane, if (WARN_ON(p->dc == NULL)) p->dc = dc; - pm_runtime_get_sync(dc->dev); - value = tegra_plane_readl(p, DC_WIN_WIN_OPTIONS); value &= ~WIN_ENABLE; tegra_plane_writel(p, value, DC_WIN_WIN_OPTIONS); tegra_dc_remove_shared_plane(dc, p); - pm_runtime_put(dc->dev); + host1x_client_suspend(&dc->client); } static void tegra_shared_plane_atomic_update(struct drm_plane *plane, @@ -415,6 +428,7 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane, struct tegra_plane *p = to_tegra_plane(plane); dma_addr_t base; u32 value; + int err; /* rien ne va plus */ if (!plane->state->crtc || !plane->state->fb) @@ -425,7 +439,11 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane, return; } - pm_runtime_get_sync(dc->dev); + err = host1x_client_resume(&dc->client); + if (err < 0) { + dev_err(dc->dev, "failed to resume: %d\n", err); + return; + } tegra_dc_assign_shared_plane(dc, p); @@ -515,7 +533,7 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane, value &= ~CONTROL_CSC_ENABLE; tegra_plane_writel(p, value, DC_WIN_WINDOW_SET_CONTROL); - pm_runtime_put(dc->dev); + host1x_client_suspend(&dc->client); } static const struct drm_plane_helper_funcs tegra_shared_plane_helper_funcs = { @@ -551,7 +569,7 @@ struct drm_plane *tegra_shared_plane_create(struct drm_device *drm, plane->base.index = index; plane->wgrp = &hub->wgrps[wgrp]; - plane->wgrp->parent = dc->dev; + plane->wgrp->parent = &dc->client; p = &plane->base.base; @@ -656,8 +674,13 @@ int tegra_display_hub_atomic_check(struct drm_device *drm, static void tegra_display_hub_update(struct tegra_dc *dc) { u32 value; + int err; - pm_runtime_get_sync(dc->dev); + err = host1x_client_resume(&dc->client); + if (err < 0) { + dev_err(dc->dev, "failed to resume: %d\n", err); + return; + } value = tegra_dc_readl(dc, DC_CMD_IHUB_COMMON_MISC_CTL); value &= ~LATENCY_EVENT; @@ -672,7 +695,7 @@ static void tegra_display_hub_update(struct tegra_dc *dc) tegra_dc_writel(dc, COMMON_ACTREQ, DC_CMD_STATE_CONTROL); tegra_dc_readl(dc, DC_CMD_STATE_CONTROL); - pm_runtime_put(dc->dev); + host1x_client_suspend(&dc->client); } void tegra_display_hub_atomic_commit(struct drm_device *drm, @@ -705,7 +728,7 @@ void tegra_display_hub_atomic_commit(struct drm_device *drm, static int tegra_display_hub_init(struct host1x_client *client) { struct tegra_display_hub *hub = to_tegra_display_hub(client); - struct drm_device *drm = dev_get_drvdata(client->parent); + struct drm_device *drm = dev_get_drvdata(client->host); struct tegra_drm *tegra = drm->dev_private; struct tegra_display_hub_state *state; @@ -723,7 +746,7 @@ static int tegra_display_hub_init(struct host1x_client *client) static int tegra_display_hub_exit(struct host1x_client *client) { - struct drm_device *drm = dev_get_drvdata(client->parent); + struct drm_device *drm = dev_get_drvdata(client->host); struct tegra_drm *tegra = drm->dev_private; drm_atomic_private_obj_fini(&tegra->hub->base); @@ -732,9 +755,85 @@ static int tegra_display_hub_exit(struct host1x_client *client) return 0; } +static int tegra_display_hub_runtime_suspend(struct host1x_client *client) +{ + struct tegra_display_hub *hub = to_tegra_display_hub(client); + struct device *dev = client->dev; + unsigned int i = hub->num_heads; + int err; + + err = reset_control_assert(hub->rst); + if (err < 0) + return err; + + while (i--) + clk_disable_unprepare(hub->clk_heads[i]); + + clk_disable_unprepare(hub->clk_hub); + clk_disable_unprepare(hub->clk_dsc); + clk_disable_unprepare(hub->clk_disp); + + pm_runtime_put_sync(dev); + + return 0; +} + +static int tegra_display_hub_runtime_resume(struct host1x_client *client) +{ + struct tegra_display_hub *hub = to_tegra_display_hub(client); + struct device *dev = client->dev; + unsigned int i; + int err; + + err = pm_runtime_get_sync(dev); + if (err < 0) { + dev_err(dev, "failed to get runtime PM: %d\n", err); + return err; + } + + err = clk_prepare_enable(hub->clk_disp); + if (err < 0) + goto put_rpm; + + err = clk_prepare_enable(hub->clk_dsc); + if (err < 0) + goto disable_disp; + + err = clk_prepare_enable(hub->clk_hub); + if (err < 0) + goto disable_dsc; + + for (i = 0; i < hub->num_heads; i++) { + err = clk_prepare_enable(hub->clk_heads[i]); + if (err < 0) + goto disable_heads; + } + + err = reset_control_deassert(hub->rst); + if (err < 0) + goto disable_heads; + + return 0; + +disable_heads: + while (i--) + clk_disable_unprepare(hub->clk_heads[i]); + + clk_disable_unprepare(hub->clk_hub); +disable_dsc: + clk_disable_unprepare(hub->clk_dsc); +disable_disp: + clk_disable_unprepare(hub->clk_disp); +put_rpm: + pm_runtime_put_sync(dev); + return err; +} + static const struct host1x_client_ops tegra_display_hub_ops = { .init = tegra_display_hub_init, .exit = tegra_display_hub_exit, + .suspend = tegra_display_hub_runtime_suspend, + .resume = tegra_display_hub_runtime_resume, }; static int tegra_display_hub_probe(struct platform_device *pdev) @@ -851,6 +950,7 @@ static int tegra_display_hub_probe(struct platform_device *pdev) static int tegra_display_hub_remove(struct platform_device *pdev) { struct tegra_display_hub *hub = platform_get_drvdata(pdev); + unsigned int i; int err; err = host1x_client_unregister(&hub->client); @@ -859,78 +959,17 @@ static int tegra_display_hub_remove(struct platform_device *pdev) err); } - pm_runtime_disable(&pdev->dev); - - return err; -} - -static int __maybe_unused tegra_display_hub_suspend(struct device *dev) -{ - struct tegra_display_hub *hub = dev_get_drvdata(dev); - unsigned int i = hub->num_heads; - int err; - - err = reset_control_assert(hub->rst); - if (err < 0) - return err; - - while (i--) - clk_disable_unprepare(hub->clk_heads[i]); - - clk_disable_unprepare(hub->clk_hub); - clk_disable_unprepare(hub->clk_dsc); - clk_disable_unprepare(hub->clk_disp); - - return 0; -} - -static int __maybe_unused tegra_display_hub_resume(struct device *dev) -{ - struct tegra_display_hub *hub = dev_get_drvdata(dev); - unsigned int i; - int err; - - err = clk_prepare_enable(hub->clk_disp); - if (err < 0) - return err; - - err = clk_prepare_enable(hub->clk_dsc); - if (err < 0) - goto disable_disp; - - err = clk_prepare_enable(hub->clk_hub); - if (err < 0) - goto disable_dsc; + for (i = 0; i < hub->soc->num_wgrps; i++) { + struct tegra_windowgroup *wgrp = &hub->wgrps[i]; - for (i = 0; i < hub->num_heads; i++) { - err = clk_prepare_enable(hub->clk_heads[i]); - if (err < 0) - goto disable_heads; + mutex_destroy(&wgrp->lock); } - err = reset_control_deassert(hub->rst); - if (err < 0) - goto disable_heads; - - return 0; - -disable_heads: - while (i--) - clk_disable_unprepare(hub->clk_heads[i]); + pm_runtime_disable(&pdev->dev); - clk_disable_unprepare(hub->clk_hub); -disable_dsc: - clk_disable_unprepare(hub->clk_dsc); -disable_disp: - clk_disable_unprepare(hub->clk_disp); return err; } -static const struct dev_pm_ops tegra_display_hub_pm_ops = { - SET_RUNTIME_PM_OPS(tegra_display_hub_suspend, - tegra_display_hub_resume, NULL) -}; - static const struct tegra_display_hub_soc tegra186_display_hub = { .num_wgrps = 6, .supports_dsc = true, @@ -958,7 +997,6 @@ struct platform_driver tegra_display_hub_driver = { .driver = { .name = "tegra-display-hub", .of_match_table = tegra_display_hub_of_match, - .pm = &tegra_display_hub_pm_ops, }, .probe = tegra_display_hub_probe, .remove = tegra_display_hub_remove, diff --git a/drivers/gpu/drm/tegra/hub.h b/drivers/gpu/drm/tegra/hub.h index 767a60d9313c..3efa1be07ff8 100644 --- a/drivers/gpu/drm/tegra/hub.h +++ b/drivers/gpu/drm/tegra/hub.h @@ -17,7 +17,7 @@ struct tegra_windowgroup { struct mutex lock; unsigned int index; - struct device *parent; + struct host1x_client *parent; struct reset_control *rst; }; diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c index 80ddde4adbae..a264259b97a2 100644 --- a/drivers/gpu/drm/tegra/output.c +++ b/drivers/gpu/drm/tegra/output.c @@ -250,3 +250,19 @@ void tegra_output_find_possible_crtcs(struct tegra_output *output, output->encoder.possible_crtcs = mask; } + +int tegra_output_suspend(struct tegra_output *output) +{ + if (output->hpd_irq) + disable_irq(output->hpd_irq); + + return 0; +} + +int tegra_output_resume(struct tegra_output *output) +{ + if (output->hpd_irq) + enable_irq(output->hpd_irq); + + return 0; +} diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index 1b8087d2dafe..41d24949478e 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -2255,7 +2255,7 @@ static void tegra_sor_hdmi_disable(struct drm_encoder *encoder) if (err < 0) dev_err(sor->dev, "failed to power off I/O pad: %d\n", err); - pm_runtime_put(sor->dev); + host1x_client_suspend(&sor->client); } static void tegra_sor_hdmi_enable(struct drm_encoder *encoder) @@ -2276,7 +2276,11 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder) mode = &encoder->crtc->state->adjusted_mode; pclk = mode->clock * 1000; - pm_runtime_get_sync(sor->dev); + err = host1x_client_resume(&sor->client); + if (err < 0) { + dev_err(sor->dev, "failed to resume: %d\n", err); + return; + } /* switch to safe parent clock */ err = tegra_sor_set_parent_clock(sor, sor->clk_safe); @@ -2722,7 +2726,7 @@ static void tegra_sor_dp_disable(struct drm_encoder *encoder) if (output->panel) drm_panel_unprepare(output->panel); - pm_runtime_put(sor->dev); + host1x_client_suspend(&sor->client); } static void tegra_sor_dp_enable(struct drm_encoder *encoder) @@ -2742,7 +2746,11 @@ static void tegra_sor_dp_enable(struct drm_encoder *encoder) mode = &encoder->crtc->state->adjusted_mode; info = &output->connector.display_info; - pm_runtime_get_sync(sor->dev); + err = host1x_client_resume(&sor->client); + if (err < 0) { + dev_err(sor->dev, "failed to resume: %d\n", err); + return; + } /* switch to safe parent clock */ err = tegra_sor_set_parent_clock(sor, sor->clk_safe); @@ -3053,7 +3061,7 @@ static const struct tegra_sor_ops tegra_sor_dp_ops = { static int tegra_sor_init(struct host1x_client *client) { - struct drm_device *drm = dev_get_drvdata(client->parent); + struct drm_device *drm = dev_get_drvdata(client->host); const struct drm_encoder_helper_funcs *helpers = NULL; struct tegra_sor *sor = host1x_client_to_sor(client); int connector = DRM_MODE_CONNECTOR_Unknown; @@ -3190,9 +3198,80 @@ static int tegra_sor_exit(struct host1x_client *client) return 0; } +static int tegra_sor_runtime_suspend(struct host1x_client *client) +{ + struct tegra_sor *sor = host1x_client_to_sor(client); + struct device *dev = client->dev; + int err; + + if (sor->rst) { + err = reset_control_assert(sor->rst); + if (err < 0) { + dev_err(dev, "failed to assert reset: %d\n", err); + return err; + } + + reset_control_release(sor->rst); + } + + usleep_range(1000, 2000); + + clk_disable_unprepare(sor->clk); + pm_runtime_put_sync(dev); + + return 0; +} + +static int tegra_sor_runtime_resume(struct host1x_client *client) +{ + struct tegra_sor *sor = host1x_client_to_sor(client); + struct device *dev = client->dev; + int err; + + err = pm_runtime_get_sync(dev); + if (err < 0) { + dev_err(dev, "failed to get runtime PM: %d\n", err); + return err; + } + + err = clk_prepare_enable(sor->clk); + if (err < 0) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto put_rpm; + } + + usleep_range(1000, 2000); + + if (sor->rst) { + err = reset_control_acquire(sor->rst); + if (err < 0) { + dev_err(dev, "failed to acquire reset: %d\n", err); + goto disable_clk; + } + + err = reset_control_deassert(sor->rst); + if (err < 0) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto release_reset; + } + } + + return 0; + +release_reset: + reset_control_release(sor->rst); +disable_clk: + clk_disable_unprepare(sor->clk); +put_rpm: + pm_runtime_put_sync(dev); + return err; +} + static const struct host1x_client_ops sor_client_ops = { .init = tegra_sor_init, .exit = tegra_sor_exit, + .suspend = tegra_sor_runtime_suspend, + .resume = tegra_sor_runtime_resume, }; static const u8 tegra124_sor_xbar_cfg[5] = { @@ -3843,10 +3922,9 @@ static int tegra_sor_probe(struct platform_device *pdev) if (!sor->clk_pad) { char *name; - err = pm_runtime_get_sync(&pdev->dev); + err = host1x_client_resume(&sor->client); if (err < 0) { - dev_err(&pdev->dev, "failed to get runtime PM: %d\n", - err); + dev_err(sor->dev, "failed to resume: %d\n", err); goto remove; } @@ -3857,7 +3935,7 @@ static int tegra_sor_probe(struct platform_device *pdev) } sor->clk_pad = tegra_clk_sor_pad_register(sor, name); - pm_runtime_put(&pdev->dev); + host1x_client_suspend(&sor->client); } if (IS_ERR(sor->clk_pad)) { @@ -3913,54 +3991,21 @@ static int tegra_sor_remove(struct platform_device *pdev) return 0; } -static int tegra_sor_runtime_suspend(struct device *dev) -{ - struct tegra_sor *sor = dev_get_drvdata(dev); - int err; - - if (sor->rst) { - err = reset_control_assert(sor->rst); - if (err < 0) { - dev_err(dev, "failed to assert reset: %d\n", err); - return err; - } - - reset_control_release(sor->rst); - } - - usleep_range(1000, 2000); - - clk_disable_unprepare(sor->clk); - - return 0; -} - -static int tegra_sor_runtime_resume(struct device *dev) +static int __maybe_unused tegra_sor_suspend(struct device *dev) { struct tegra_sor *sor = dev_get_drvdata(dev); int err; - err = clk_prepare_enable(sor->clk); + err = tegra_output_suspend(&sor->output); if (err < 0) { - dev_err(dev, "failed to enable clock: %d\n", err); + dev_err(dev, "failed to suspend output: %d\n", err); return err; } - usleep_range(1000, 2000); - - if (sor->rst) { - err = reset_control_acquire(sor->rst); - if (err < 0) { - dev_err(dev, "failed to acquire reset: %d\n", err); - clk_disable_unprepare(sor->clk); - return err; - } - - err = reset_control_deassert(sor->rst); + if (sor->hdmi_supply) { + err = regulator_disable(sor->hdmi_supply); if (err < 0) { - dev_err(dev, "failed to deassert reset: %d\n", err); - reset_control_release(sor->rst); - clk_disable_unprepare(sor->clk); + tegra_output_resume(&sor->output); return err; } } @@ -3968,37 +4013,31 @@ static int tegra_sor_runtime_resume(struct device *dev) return 0; } -static int tegra_sor_suspend(struct device *dev) +static int __maybe_unused tegra_sor_resume(struct device *dev) { struct tegra_sor *sor = dev_get_drvdata(dev); int err; if (sor->hdmi_supply) { - err = regulator_disable(sor->hdmi_supply); + err = regulator_enable(sor->hdmi_supply); if (err < 0) return err; } - return 0; -} + err = tegra_output_resume(&sor->output); + if (err < 0) { + dev_err(dev, "failed to resume output: %d\n", err); -static int tegra_sor_resume(struct device *dev) -{ - struct tegra_sor *sor = dev_get_drvdata(dev); - int err; + if (sor->hdmi_supply) + regulator_disable(sor->hdmi_supply); - if (sor->hdmi_supply) { - err = regulator_enable(sor->hdmi_supply); - if (err < 0) - return err; + return err; } return 0; } static const struct dev_pm_ops tegra_sor_pm_ops = { - SET_RUNTIME_PM_OPS(tegra_sor_runtime_suspend, tegra_sor_runtime_resume, - NULL) SET_SYSTEM_SLEEP_PM_OPS(tegra_sor_suspend, tegra_sor_resume) }; diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index 3526c2892ddb..ade56b860cf9 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -161,7 +161,7 @@ static int vic_boot(struct vic *vic) static int vic_init(struct host1x_client *client) { struct tegra_drm_client *drm = host1x_to_drm_client(client); - struct drm_device *dev = dev_get_drvdata(client->parent); + struct drm_device *dev = dev_get_drvdata(client->host); struct tegra_drm *tegra = dev->dev_private; struct vic *vic = to_vic(drm); int err; @@ -190,9 +190,9 @@ static int vic_init(struct host1x_client *client) /* * Inherit the DMA parameters (such as maximum segment size) from the - * parent device. + * parent host1x device. */ - client->dev->dma_parms = client->parent->dma_parms; + client->dev->dma_parms = client->host->dma_parms; return 0; @@ -209,7 +209,7 @@ detach: static int vic_exit(struct host1x_client *client) { struct tegra_drm_client *drm = host1x_to_drm_client(client); - struct drm_device *dev = dev_get_drvdata(client->parent); + struct drm_device *dev = dev_get_drvdata(client->host); struct tegra_drm *tegra = dev->dev_private; struct vic *vic = to_vic(drm); int err; diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index ac42c84d2d7f..d1c3f5fbfee4 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -260,6 +260,7 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane, if (!objs) return; virtio_gpu_array_add_obj(objs, vgfb->base.obj[0]); + virtio_gpu_array_lock_resv(objs); virtio_gpu_cmd_transfer_to_host_2d (vgdev, 0, plane->state->crtc_w, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c index 4ac55fc2bf97..44d858ce4ce7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c @@ -209,8 +209,10 @@ int vmw_cmdbuf_res_add(struct vmw_cmdbuf_res_manager *man, cres->hash.key = user_key | (res_type << 24); ret = drm_ht_insert_item(&man->resources, &cres->hash); - if (unlikely(ret != 0)) + if (unlikely(ret != 0)) { + kfree(cres); goto out_invalid_key; + } cres->state = VMW_CMDBUF_RES_ADD; cres->res = vmw_resource_reference(res); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index e962048f65d2..827458f49112 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -28,10 +28,10 @@ #include <linux/console.h> #include <linux/dma-mapping.h> #include <linux/module.h> +#include <linux/pci.h> #include <drm/drm_drv.h> #include <drm/drm_ioctl.h> -#include <drm/drm_pci.h> #include <drm/drm_sysfs.h> #include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_module.h> @@ -150,6 +150,9 @@ #define DRM_IOCTL_VMW_GB_SURFACE_REF_EXT \ DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_GB_SURFACE_REF_EXT, \ union drm_vmw_gb_surface_reference_ext_arg) +#define DRM_IOCTL_VMW_MSG \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_MSG, \ + struct drm_vmw_msg_arg) /** * The core DRM version of this macro doesn't account for @@ -165,9 +168,9 @@ static const struct drm_ioctl_desc vmw_ioctls[] = { VMW_IOCTL_DEF(VMW_GET_PARAM, vmw_getparam_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), + DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_ALLOC_DMABUF, vmw_bo_alloc_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), + DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_UNREF_DMABUF, vmw_bo_unref_ioctl, DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_CURSOR_BYPASS, @@ -182,16 +185,16 @@ static const struct drm_ioctl_desc vmw_ioctls[] = { DRM_MASTER), VMW_IOCTL_DEF(VMW_CREATE_CONTEXT, vmw_context_define_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), + DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_UNREF_CONTEXT, vmw_context_destroy_ioctl, DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_CREATE_SURFACE, vmw_surface_define_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), + DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_UNREF_SURFACE, vmw_surface_destroy_ioctl, DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_REF_SURFACE, vmw_surface_reference_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_EXECBUF, vmw_execbuf_ioctl, DRM_AUTH | + DRM_RENDER_ALLOW), + VMW_IOCTL_DEF(VMW_EXECBUF, vmw_execbuf_ioctl, DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_FENCE_WAIT, vmw_fence_obj_wait_ioctl, DRM_RENDER_ALLOW), @@ -201,9 +204,9 @@ static const struct drm_ioctl_desc vmw_ioctls[] = { VMW_IOCTL_DEF(VMW_FENCE_UNREF, vmw_fence_obj_unref_ioctl, DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_FENCE_EVENT, vmw_fence_event_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), + DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_GET_3D_CAP, vmw_get_cap_3d_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), + DRM_RENDER_ALLOW), /* these allow direct access to the framebuffers mark as master only */ VMW_IOCTL_DEF(VMW_PRESENT, vmw_present_ioctl, @@ -221,28 +224,31 @@ static const struct drm_ioctl_desc vmw_ioctls[] = { DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_CREATE_SHADER, vmw_shader_define_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), + DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_UNREF_SHADER, vmw_shader_destroy_ioctl, DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_GB_SURFACE_CREATE, vmw_gb_surface_define_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), + DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_GB_SURFACE_REF, vmw_gb_surface_reference_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), + DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_SYNCCPU, vmw_user_bo_synccpu_ioctl, DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_CREATE_EXTENDED_CONTEXT, vmw_extended_context_define_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), + DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_GB_SURFACE_CREATE_EXT, vmw_gb_surface_define_ext_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), + DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_GB_SURFACE_REF_EXT, vmw_gb_surface_reference_ext_ioctl, - DRM_AUTH | DRM_RENDER_ALLOW), + DRM_RENDER_ALLOW), + VMW_IOCTL_DEF(VMW_MSG, + vmw_msg_ioctl, + DRM_RENDER_ALLOW), }; static const struct pci_device_id vmw_pci_id_list[] = { @@ -1211,8 +1217,10 @@ static void vmw_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); + drm_dev_unregister(dev); + vmw_driver_unload(dev); + drm_dev_put(dev); pci_disable_device(pdev); - drm_put_dev(dev); } static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, @@ -1391,8 +1399,6 @@ static const struct file_operations vmwgfx_driver_fops = { static struct drm_driver driver = { .driver_features = DRIVER_MODESET | DRIVER_RENDER | DRIVER_ATOMIC, - .load = vmw_driver_load, - .unload = vmw_driver_unload, .get_vblank_counter = vmw_get_vblank_counter, .enable_vblank = vmw_enable_vblank, .disable_vblank = vmw_disable_vblank, @@ -1431,7 +1437,39 @@ static struct pci_driver vmw_pci_driver = { static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { - return drm_get_pci_dev(pdev, ent, &driver); + struct drm_device *dev; + int ret; + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + dev = drm_dev_alloc(&driver, &pdev->dev); + if (IS_ERR(dev)) { + ret = PTR_ERR(dev); + goto err_pci_disable_device; + } + + dev->pdev = pdev; + pci_set_drvdata(pdev, dev); + + ret = vmw_driver_load(dev, ent->driver_data); + if (ret) + goto err_drm_dev_put; + + ret = drm_dev_register(dev, ent->driver_data); + if (ret) + goto err_vmw_driver_unload; + + return 0; + +err_vmw_driver_unload: + vmw_driver_unload(dev); +err_drm_dev_put: + drm_dev_put(dev); +err_pci_disable_device: + pci_disable_device(pdev); + return ret; } static int __init vmwgfx_init(void) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index a31e726d6d71..86b69397d166 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -56,9 +56,9 @@ #define VMWGFX_DRIVER_NAME "vmwgfx" -#define VMWGFX_DRIVER_DATE "20190328" +#define VMWGFX_DRIVER_DATE "20200114" #define VMWGFX_DRIVER_MAJOR 2 -#define VMWGFX_DRIVER_MINOR 16 +#define VMWGFX_DRIVER_MINOR 17 #define VMWGFX_DRIVER_PATCHLEVEL 0 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024) #define VMWGFX_MAX_RELOCATIONS 2048 @@ -1403,6 +1403,8 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, int vmw_host_get_guestinfo(const char *guest_info_param, char *buffer, size_t *length); int vmw_host_log(const char *log); +int vmw_msg_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); /* VMW logging */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 934ad7c0c342..73489a45decb 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2377,9 +2377,12 @@ static int vmw_cmd_dx_clear_rendertarget_view(struct vmw_private *dev_priv, { VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXClearRenderTargetView) = container_of(header, typeof(*cmd), header); + struct vmw_resource *ret; - return PTR_RET(vmw_view_id_val_add(sw_context, vmw_view_rt, - cmd->body.renderTargetViewId)); + ret = vmw_view_id_val_add(sw_context, vmw_view_rt, + cmd->body.renderTargetViewId); + + return PTR_ERR_OR_ZERO(ret); } /** @@ -2396,9 +2399,12 @@ static int vmw_cmd_dx_clear_depthstencil_view(struct vmw_private *dev_priv, { VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXClearDepthStencilView) = container_of(header, typeof(*cmd), header); + struct vmw_resource *ret; + + ret = vmw_view_id_val_add(sw_context, vmw_view_ds, + cmd->body.depthStencilViewId); - return PTR_RET(vmw_view_id_val_add(sw_context, vmw_view_ds, - cmd->body.depthStencilViewId)); + return PTR_ERR_OR_ZERO(ret); } static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv, @@ -2741,9 +2747,12 @@ static int vmw_cmd_dx_genmips(struct vmw_private *dev_priv, { VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXGenMips) = container_of(header, typeof(*cmd), header); + struct vmw_resource *ret; + + ret = vmw_view_id_val_add(sw_context, vmw_view_sr, + cmd->body.shaderResourceViewId); - return PTR_RET(vmw_view_id_val_add(sw_context, vmw_view_sr, - cmd->body.shaderResourceViewId)); + return PTR_ERR_OR_ZERO(ret); } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index b6c5e4c2ac3c..e9f448a5ebb3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -28,6 +28,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/mem_encrypt.h> #include <asm/hypervisor.h> @@ -56,6 +57,8 @@ #define HIGH_WORD(X) ((X & 0xFFFF0000) >> 16) +#define MAX_USER_MSG_LENGTH PAGE_SIZE + static u32 vmw_msg_enabled = 1; enum rpc_msg_type { @@ -148,7 +151,8 @@ static unsigned long vmw_port_hb_out(struct rpc_channel *channel, unsigned long si, di, eax, ebx, ecx, edx; unsigned long msg_len = strlen(msg); - if (hb) { + /* HB port can't access encrypted memory. */ + if (hb && !mem_encrypt_active()) { unsigned long bp = channel->cookie_high; si = (uintptr_t) msg; @@ -202,7 +206,8 @@ static unsigned long vmw_port_hb_in(struct rpc_channel *channel, char *reply, { unsigned long si, di, eax, ebx, ecx, edx; - if (hb) { + /* HB port can't access encrypted memory */ + if (hb && !mem_encrypt_active()) { unsigned long bp = channel->cookie_low; si = channel->cookie_high; @@ -514,3 +519,84 @@ out_open: return -EINVAL; } + + +/** + * vmw_msg_ioctl: Sends and receveives a message to/from host from/to user-space + * + * Sends a message from user-space to host. + * Can also receive a result from host and return that to user-space. + * + * @dev: Identifies the drm device. + * @data: Pointer to the ioctl argument. + * @file_priv: Identifies the caller. + * Return: Zero on success, negative error code on error. + */ + +int vmw_msg_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_vmw_msg_arg *arg = + (struct drm_vmw_msg_arg *) data; + struct rpc_channel channel; + char *msg; + int length; + + msg = kmalloc(MAX_USER_MSG_LENGTH, GFP_KERNEL); + if (!msg) { + DRM_ERROR("Cannot allocate memory for log message.\n"); + return -ENOMEM; + } + + length = strncpy_from_user(msg, (void __user *)((unsigned long)arg->send), + MAX_USER_MSG_LENGTH); + if (length < 0 || length >= MAX_USER_MSG_LENGTH) { + DRM_ERROR("Userspace message access failure.\n"); + kfree(msg); + return -EINVAL; + } + + + if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM)) { + DRM_ERROR("Failed to open channel.\n"); + goto out_open; + } + + if (vmw_send_msg(&channel, msg)) { + DRM_ERROR("Failed to send message to host.\n"); + goto out_msg; + } + + if (!arg->send_only) { + char *reply = NULL; + size_t reply_len = 0; + + if (vmw_recv_msg(&channel, (void *) &reply, &reply_len)) { + DRM_ERROR("Failed to receive message from host.\n"); + goto out_msg; + } + if (reply && reply_len > 0) { + if (copy_to_user((void __user *)((unsigned long)arg->receive), + reply, reply_len)) { + DRM_ERROR("Failed to copy message to userspace.\n"); + kfree(reply); + goto out_msg; + } + arg->receive_len = (__u32)reply_len; + } + kfree(reply); + } + + vmw_close_channel(&channel); + kfree(msg); + + return 0; + +out_msg: + vmw_close_channel(&channel); +out_open: + kfree(msg); + + return -EINVAL; +} + diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 32b9131b2bae..3ce630aa4fde 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -934,16 +934,12 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, uint32_t handle; struct ttm_base_object *base; int ret; - bool require_exist = false; if (handle_type == DRM_VMW_HANDLE_PRIME) { ret = ttm_prime_fd_to_handle(tfile, u_handle, &handle); if (unlikely(ret != 0)) return ret; } else { - if (unlikely(drm_is_render_client(file_priv))) - require_exist = true; - handle = u_handle; } @@ -960,9 +956,18 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, } if (handle_type != DRM_VMW_HANDLE_PRIME) { + bool require_exist = false; + user_srf = container_of(base, struct vmw_user_surface, prime.base); + /* Error out if we are unauthenticated primary */ + if (drm_is_primary_client(file_priv) && + !file_priv->authenticated) { + ret = -EACCES; + goto out_bad_resource; + } + /* * Make sure the surface creator has the same * authenticating master, or is already registered with us. @@ -971,6 +976,9 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, user_srf->master != file_priv->master) require_exist = true; + if (unlikely(drm_is_render_client(file_priv))) + require_exist = true; + ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, require_exist); if (unlikely(ret != 0)) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c index ce288756531b..aa7e50f63b94 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c @@ -45,6 +45,10 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_ops = &vmw_vm_ops; + /* Use VM_PFNMAP rather than VM_MIXEDMAP if not a COW mapping */ + if ((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE) + vma->vm_flags = (vma->vm_flags & ~VM_MIXEDMAP) | VM_PFNMAP; + return 0; } |