diff options
Diffstat (limited to 'drivers/gpu')
201 files changed, 2436 insertions, 859 deletions
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 68cc9258ffc4..fa432a1ac9e2 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -128,7 +128,6 @@ obj-$(CONFIG_DRM_TTM_HELPER) += drm_ttm_helper.o drm_kms_helper-y := \ drm_atomic_helper.o \ drm_atomic_state_helper.o \ - drm_bridge_connector.o \ drm_crtc_helper.o \ drm_damage_helper.o \ drm_encoder_slave.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9aa952f258cf..6dfdff58bffd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1057,6 +1057,9 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, r = amdgpu_ring_parse_cs(ring, p, job, ib); if (r) return r; + + if (ib->sa_bo) + ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); } else { ib->ptr = (uint32_t *)kptr; r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 5cb33ac99f70..c43d1b6e5d66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -685,16 +685,24 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); args->out.alloc.ctx_id = id; break; case AMDGPU_CTX_OP_FREE_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_free(fpriv, id); break; case AMDGPU_CTX_OP_QUERY_STATE: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_QUERY_STATE2: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_GET_STABLE_PSTATE: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index ac108fca64fe..4bd61c169ca8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -278,7 +278,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, msg = RREG32(mmMP0_SMN_C2PMSG_33); if (msg & 0x80000000) break; - usleep_range(1000, 1100); + msleep(1); } } @@ -1500,6 +1500,7 @@ union gc_info { struct gc_info_v1_0 v1; struct gc_info_v1_1 v1_1; struct gc_info_v1_2 v1_2; + struct gc_info_v1_3 v1_3; struct gc_info_v2_0 v2; struct gc_info_v2_1 v2_1; }; @@ -1558,6 +1559,16 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) adev->gfx.config.gc_gl1c_size_per_instance = le32_to_cpu(gc_info->v1_2.gc_gl1c_size_per_instance); adev->gfx.config.gc_gl2c_per_gpu = le32_to_cpu(gc_info->v1_2.gc_gl2c_per_gpu); } + if (le16_to_cpu(gc_info->v1.header.version_minor) >= 3) { + adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v1_3.gc_tcp_size_per_cu); + adev->gfx.config.gc_tcp_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcp_cache_line_size); + adev->gfx.config.gc_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_size_per_sqc); + adev->gfx.config.gc_instruction_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_line_size); + adev->gfx.config.gc_scalar_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_size_per_sqc); + adev->gfx.config.gc_scalar_data_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_line_size); + adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v1_3.gc_tcc_size); + adev->gfx.config.gc_tcc_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcc_cache_line_size); + } break; case 2: adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index aad2027e5c7c..0e617dff8765 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -348,6 +348,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; } + /* always clear VRAM */ + flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; + /* create a gem object to contain this object in */ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 82452606ae6c..1849510a308a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -509,6 +509,16 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.compute_ring[j], + RESET_QUEUES, 0, 0); + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -551,6 +561,18 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + j = i + xcc_id * adev->gfx.num_gfx_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.gfx_ring[j], + PREEMPT_QUEUES, 0, 0); + } + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -635,7 +657,7 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) uint64_t queue_mask = 0; int r, i, j; - if (adev->enable_mes) + if (adev->mes.enable_legacy_queue_map) return amdgpu_gfx_mes_enable_kcq(adev, xcc_id); if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) @@ -697,7 +719,7 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) amdgpu_device_flush_hdp(adev, NULL); - if (adev->enable_mes) { + if (adev->mes.enable_legacy_queue_map) { for (i = 0; i < adev->gfx.num_gfx_rings; i++) { j = i + xcc_id * adev->gfx.num_gfx_rings; r = amdgpu_mes_map_legacy_queue(adev, @@ -995,7 +1017,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ if (amdgpu_device_skip_hw_access(adev)) return 0; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) return amdgpu_mes_rreg(adev, reg); BUG_ON(!ring->funcs->emit_rreg); @@ -1065,7 +1087,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 if (amdgpu_device_skip_hw_access(adev)) return; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_wreg(adev, reg, v); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ddda94e49db4..56cc58edbb4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -240,6 +240,12 @@ struct amdgpu_gfx_config { uint32_t gc_tcp_size_per_cu; uint32_t gc_num_cu_per_sqc; uint32_t gc_tcc_size; + uint32_t gc_tcp_cache_line_size; + uint32_t gc_instruction_cache_size_per_sqc; + uint32_t gc_instruction_cache_line_size; + uint32_t gc_scalar_data_cache_size_per_sqc; + uint32_t gc_scalar_data_cache_line_size; + uint32_t gc_tcc_cache_line_size; }; struct amdgpu_cu_info { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c02659025656..b49b3650fd62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -589,7 +589,8 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) ring = adev->rings[i]; vmhub = ring->vm_hub; - if (ring == &adev->mes.ring || + if (ring == &adev->mes.ring[0] || + ring == &adev->mes.ring[1] || ring == &adev->umsch_mm.ring) continue; @@ -761,7 +762,7 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, unsigned long flags; uint32_t seq; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, ref, mask); return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index febca3130497..4d951a1baefa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { uint64_t addr, uint64_t *flags); /* get the amount of memory used by the vbios for pre-OS console */ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); + /* get the DCC buffer alignment */ + unsigned int (*get_dcc_alignment)(struct amdgpu_device *adev); enum amdgpu_memory_partition (*query_mem_partition_mode)( struct amdgpu_device *adev); @@ -363,6 +365,10 @@ struct amdgpu_gmc { (adev)->gmc.gmc_funcs->override_vm_pte_flags \ ((adev), (vm), (addr), (pte_flags)) #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) +#define amdgpu_gmc_get_dcc_alignment(adev) ({ \ + typeof(adev) _adev = (adev); \ + _adev->gmc.gmc_funcs->get_dcc_alignment(_adev); \ +}) /** * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index e238f2832f65..908e13455152 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -264,9 +264,8 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, struct dma_fence *fence = NULL; int r; - /* Ignore soft recovered fences here */ r = drm_sched_entity_error(s_entity); - if (r && r != -ENODATA) + if (r) goto error; if (!fence && job->gang_submit) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index dac88d2dd70d..1cb1ec7beefe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -135,9 +135,11 @@ int amdgpu_mes_init(struct amdgpu_device *adev) idr_init(&adev->mes.queue_id_idr); ida_init(&adev->mes.doorbell_ida); spin_lock_init(&adev->mes.queue_id_lock); - spin_lock_init(&adev->mes.ring_lock); mutex_init(&adev->mes.mutex_hidden); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) + spin_lock_init(&adev->mes.ring_lock[i]); + adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; adev->mes.vmid_mask_mmhub = 0xffffff00; adev->mes.vmid_mask_gfxhub = 0xffffff00; @@ -163,36 +165,38 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.sdma_hqd_mask[i] = 0xfc; } - r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs); - if (r) { - dev_err(adev->dev, - "(%d) ring trail_fence_offs wb alloc failed\n", r); - goto error_ids; - } - adev->mes.sch_ctx_gpu_addr = - adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4); - adev->mes.sch_ctx_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs]; + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) ring trail_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.sch_ctx_gpu_addr[i] = + adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4); + adev->mes.sch_ctx_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]]; - r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs); - if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - dev_err(adev->dev, - "(%d) query_status_fence_offs wb alloc failed\n", r); - goto error_ids; + r = amdgpu_device_wb_get(adev, + &adev->mes.query_status_fence_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) query_status_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr + + (adev->mes.query_status_fence_offs[i] * 4); + adev->mes.query_status_fence_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]]; } - adev->mes.query_status_fence_gpu_addr = - adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4); - adev->mes.query_status_fence_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs]; r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs); if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); dev_err(adev->dev, "(%d) read_val_offs alloc failed\n", r); - goto error_ids; + goto error; } adev->mes.read_val_gpu_addr = adev->wb.gpu_addr + (adev->mes.read_val_offs * 4); @@ -212,10 +216,16 @@ int amdgpu_mes_init(struct amdgpu_device *adev) error_doorbell: amdgpu_mes_doorbell_free(adev); error: - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); -error_ids: + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); idr_destroy(&adev->mes.queue_id_idr); @@ -226,13 +236,22 @@ error_ids: void amdgpu_mes_fini(struct amdgpu_device *adev) { + int i; + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, &adev->mes.event_log_cpu_addr); - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + amdgpu_mes_doorbell_free(adev); idr_destroy(&adev->mes.pasid_idr); @@ -1499,7 +1518,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - if (adev->enable_uni_mes && pipe == AMDGPU_MES_SCHED_PIPE) { + if (adev->enable_uni_mes) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_uni_mes.bin", ucode_prefix); } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 2d659c612f03..bcce1add4ef6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -75,6 +75,7 @@ struct amdgpu_mes { uint32_t sched_version; uint32_t kiq_version; + bool enable_legacy_queue_map; uint32_t total_max_queue; uint32_t max_doorbell_slices; @@ -82,8 +83,8 @@ struct amdgpu_mes { uint64_t default_process_quantum; uint64_t default_gang_quantum; - struct amdgpu_ring ring; - spinlock_t ring_lock; + struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES]; + spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES]; const struct firmware *fw[AMDGPU_MAX_MES_PIPES]; @@ -112,12 +113,12 @@ struct amdgpu_mes { uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; - uint32_t sch_ctx_offs; - uint64_t sch_ctx_gpu_addr; - uint64_t *sch_ctx_ptr; - uint32_t query_status_fence_offs; - uint64_t query_status_fence_gpu_addr; - uint64_t *query_status_fence_ptr; + uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES]; + uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES]; uint32_t read_val_offs; uint64_t read_val_gpu_addr; uint32_t *read_val_ptr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 0c856005df6b..38face981c3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -166,6 +166,9 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t if (ret) return -EFAULT; + if (ta_bin_len > PSP_1_MEG) + return -EINVAL; + copy_pos += sizeof(uint32_t); ta_bin = kzalloc(ta_bin_len, GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index ad49cecb20b8..e6344a6b0a9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -212,6 +212,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) sched_hw_submission = max(sched_hw_submission, 256); + if (ring->funcs->type == AMDGPU_RING_TYPE_MES) + sched_hw_submission = 8; else if (ring == &adev->sdma.instance[0].page) sched_hw_submission = 256; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 1a5439abd1a0..c87d68d4be53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -461,8 +461,11 @@ struct amdgpu_vcn5_fw_shared { struct amdgpu_fw_shared_unified_queue_struct sq; uint8_t pad1[8]; struct amdgpu_fw_shared_fw_logging fw_log; + uint8_t pad2[20]; struct amdgpu_fw_shared_rb_setup rb_setup; - uint8_t pad2[4]; + struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; + struct amdgpu_fw_shared_drm_key_wa drm_key_wa; + uint8_t pad3[9]; }; #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 111c380f929b..b287a82e6177 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -858,7 +858,7 @@ void amdgpu_virt_post_reset(struct amdgpu_device *adev) adev->gfx.is_poweron = false; } - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index f91cc149d06c..7d26a962f811 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -456,6 +456,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, u64 vis_usage = 0, max_bytes, min_block_size; struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; + unsigned int adjust_dcc_size = 0; struct drm_buddy *mm = &mgr->mm; struct drm_buddy_block *block; unsigned long pages_per_block; @@ -511,7 +512,19 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC && + adev->gmc.gmc_funcs->get_dcc_alignment) + adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev); + remaining_size = (u64)vres->base.size; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + unsigned int dcc_size; + + dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size); + remaining_size = (u64)dcc_size; + + vres->flags |= DRM_BUDDY_TRIM_DISABLE; + } mutex_lock(&mgr->lock); while (remaining_size) { @@ -521,8 +534,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, min_block_size = mgr->default_page_size; size = remaining_size; - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) + + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) + min_block_size = size; + else if ((size >= (u64)pages_per_block << PAGE_SHIFT) && + !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) min_block_size = (u64)pages_per_block << PAGE_SHIFT; BUG_ON(min_block_size < mm->chunk_size); @@ -553,6 +569,22 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, } mutex_unlock(&mgr->lock); + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + struct drm_buddy_block *dcc_block; + unsigned long dcc_start; + u64 trim_start; + + dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks); + /* Adjust the start address for DCC buffers only */ + dcc_start = + roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block), + adjust_dcc_size); + trim_start = (u64)dcc_start; + drm_buddy_block_trim(mm, &trim_start, + (u64)vres->base.size, + &vres->blocks); + } + vres->base.start = 0; size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), vres->base.size); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 2957702fca0c..e444e621ddaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4116,6 +4116,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { + char fw_name[53]; char ucode_prefix[30]; const char *wks = ""; int err; @@ -4149,8 +4150,8 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); if (!amdgpu_sriov_vf(adev)) { - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, - "amdgpu/%s_rlc.bin", ucode_prefix); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f384be0d1800..e45d23e82878 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -202,6 +202,12 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) }; +static const struct soc15_reg_golden golden_settings_gc_12_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f), + SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -2999,7 +3005,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); @@ -3432,6 +3438,24 @@ static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); } +static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) + return; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if (adev->rev_id == 0) + soc15_program_register_sequence(adev, + golden_settings_gc_12_0, + (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); + break; + default: + break; + } +} + static int gfx_v12_0_hw_init(void *handle) { int r; @@ -3472,6 +3496,9 @@ static int gfx_v12_0_hw_init(void *handle) } } + if (!amdgpu_emu_mode) + gfx_v12_0_init_golden_registers(adev); + adev->gfx.is_poweron = true; if (get_gb_addr_config(adev)) @@ -3519,33 +3546,9 @@ static int gfx_v12_0_hw_init(void *handle) return r; } -static int gfx_v12_0_kiq_disable_kgq(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - int i, r = 0; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_gfx_rings)) - return -ENOMEM; - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], - PREEMPT_QUEUES, 0, 0); - - if (adev->gfx.kiq[0].ring.sched.ready) - r = amdgpu_ring_test_helper(kiq_ring); - - return r; -} - static int gfx_v12_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; uint32_t tmp; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); @@ -3553,8 +3556,7 @@ static int gfx_v12_0_hw_fini(void *handle) if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { - r = gfx_v12_0_kiq_disable_kgq(adev); - if (r) + if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index b88a6fa173b3..2797fd84432b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -231,7 +231,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid, GET_INST(GC, 0)); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index fd3ac483760e..edcb5351f8cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -299,7 +299,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; @@ -542,6 +542,23 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) return 0; } +static unsigned int gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) +{ + unsigned int max_tex_channel_caches, alignment; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 1)) + return 0; + + max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches; + if (is_power_of_2(max_tex_channel_caches)) + alignment = (unsigned int)(max_tex_channel_caches / SZ_4); + else + alignment = roundup_pow_of_two(max_tex_channel_caches); + + return (unsigned int)(alignment * max_tex_channel_caches * SZ_1K); +} + static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, @@ -551,6 +568,7 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .get_vm_pde = gmc_v12_0_get_vm_pde, .get_vm_pte = gmc_v12_0_get_vm_pte, .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, }; static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 71f43a5c7f72..6e0e88076224 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "vcn_v1_0.h" @@ -34,6 +35,9 @@ static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev); static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring); +static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val) { @@ -300,7 +304,10 @@ static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4))); amdgpu_ring_write(ring, PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0)); @@ -554,6 +561,7 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = { .get_rptr = jpeg_v1_0_decode_ring_get_rptr, .get_wptr = jpeg_v1_0_decode_ring_get_wptr, .set_wptr = jpeg_v1_0_decode_ring_set_wptr, + .parse_cs = jpeg_v1_dec_ring_parse_cs, .emit_frame_size = 6 + 6 + /* hdp invalidate / flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + @@ -611,3 +619,69 @@ static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring) vcn_v1_0_set_pg_for_begin_use(ring, set_clocks); } + +/** + * jpeg_v1_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + u32 i, reg, res, cond, type; + int ret = 0; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res || cond != PACKETJ_CONDITION_CHECK0) /* only allow 0 for now */ + return -EINVAL; + + if (reg >= JPEG_V1_REG_RANGE_START && reg <= JPEG_V1_REG_RANGE_END) + continue; + + switch (type) { + case PACKETJ_TYPE0: + if (reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH && + reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW && + reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH && + reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW && + reg != JPEG_V1_REG_CTX_INDEX && + reg != JPEG_V1_REG_CTX_DATA) { + ret = -EINVAL; + } + break; + case PACKETJ_TYPE1: + if (reg != JPEG_V1_REG_CTX_DATA) + ret = -EINVAL; + break; + case PACKETJ_TYPE3: + if (reg != JPEG_V1_REG_SOFT_RESET) + ret = -EINVAL; + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] != CP_PACKETJ_NOP) + ret = -EINVAL; + break; + default: + ret = -EINVAL; + } + + if (ret) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + break; + } + } + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h index bbf33a6a3972..9654d22e0376 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h @@ -29,4 +29,15 @@ int jpeg_v1_0_sw_init(void *handle); void jpeg_v1_0_sw_fini(void *handle); void jpeg_v1_0_start(struct amdgpu_device *adev, int mode); +#define JPEG_V1_REG_RANGE_START 0x8000 +#define JPEG_V1_REG_RANGE_END 0x803f + +#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x8238 +#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x8239 +#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH 0x825a +#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW 0x825b +#define JPEG_V1_REG_CTX_INDEX 0x8328 +#define JPEG_V1_REG_CTX_DATA 0x8329 +#define JPEG_V1_REG_SOFT_RESET 0x83a0 + #endif /*__JPEG_V1_0_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 99adf3625657..41c0f8750dc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "amdgpu_pm.h" #include "soc15.h" #include "soc15d.h" @@ -538,11 +539,15 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -764,6 +769,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_0_dec_ring_get_rptr, .get_wptr = jpeg_v2_0_dec_ring_get_wptr, .set_wptr = jpeg_v2_0_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -810,3 +816,58 @@ const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = { .rev = 0, .funcs = &jpeg_v2_0_ip_funcs, }; + +/** + * jpeg_v2_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + u32 i, reg, res, cond, type; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res) /* only support 0 at the moment */ + return -EINVAL; + + switch (type) { + case PACKETJ_TYPE0: + if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || + reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE3: + if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || + reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] == CP_PACKETJ_NOP) + continue; + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + default: + dev_err(adev->dev, "Unknown packet type %d !\n", type); + return -EINVAL; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h index 654e43e83e2c..63fadda7a673 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h @@ -45,6 +45,9 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 +#define JPEG_REG_RANGE_START 0x4000 +#define JPEG_REG_RANGE_END 0x41c2 + void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring); void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, @@ -57,6 +60,9 @@ void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count); +int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index d8ef95c847c2..eedb9a829d95 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -662,6 +662,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -691,6 +692,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 31cfa3ce6528..b1e7fd25afbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -560,6 +560,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v3_0_dec_ring_get_rptr, .get_wptr = jpeg_v3_0_dec_ring_get_wptr, .set_wptr = jpeg_v3_0_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 3dac8f259d7f..6c5c1a68a9b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -727,6 +727,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h index 07d36c2abd6b..47638fd4d4e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h @@ -32,5 +32,4 @@ enum amdgpu_jpeg_v4_0_sub_block { }; extern const struct amdgpu_ip_block_version jpeg_v4_0_ip_block; - #endif /* __JPEG_V4_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index ad524ddc9760..b55041f38cec 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -25,6 +25,7 @@ #include "amdgpu_jpeg.h" #include "soc15.h" #include "soc15d.h" +#include "jpeg_v2_0.h" #include "jpeg_v4_0_3.h" #include "mmsch_v4_0_3.h" @@ -782,11 +783,15 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -1084,6 +1089,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index f96ac6bce526..44eeed445ea9 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -768,6 +768,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_5_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_5_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_5_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index d694a276498a..d662aa841f97 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -26,6 +26,7 @@ #include "amdgpu_pm.h" #include "soc15.h" #include "soc15d.h" +#include "jpeg_v2_0.h" #include "jpeg_v4_0_3.h" #include "vcn/vcn_5_0_0_offset.h" @@ -646,6 +647,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index f9343642ae7e..8aded0a67037 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -162,13 +162,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[0]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -191,11 +191,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(&mes->ring_lock[0], flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -208,14 +215,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); op_str = mes_v11_0_get_op_string(x_pkt); misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); @@ -229,7 +235,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) @@ -252,8 +258,12 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -512,9 +522,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0]; mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; + mes->query_status_fence_gpu_addr[0]; for (i = 0; i < MAX_COMPUTE_PIPES; i++) mes_set_hw_res_pkt.compute_hqd_mask[i] = @@ -683,6 +693,28 @@ static void mes_v11_0_free_ucode_buffers(struct amdgpu_device *adev, (void **)&adev->mes.ucode_fw_ptr[pipe]); } +static void mes_v11_0_get_fw_version(struct amdgpu_device *adev) +{ + int pipe; + + /* get MES scheduler/KIQ versions */ + mutex_lock(&adev->srbm_mutex); + + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + soc21_grbm_select(adev, 3, pipe, 0, 0); + + if (pipe == AMDGPU_MES_SCHED_PIPE) + adev->mes.sched_version = + RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) + adev->mes.kiq_version = + RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable) { uint64_t ucode_addr; @@ -1015,7 +1047,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); return amdgpu_ring_test_helper(kiq_ring); } @@ -1029,7 +1061,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1052,18 +1084,6 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, mes_v11_0_queue_init_register(ring); } - /* get MES scheduler/KIQ versions */ - mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, 3, pipe, 0, 0); - - if (pipe == AMDGPU_MES_SCHED_PIPE) - adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) - adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - - soc21_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - return 0; } @@ -1071,7 +1091,7 @@ static int mes_v11_0_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; ring->funcs = &mes_v11_0_ring_funcs; @@ -1124,7 +1144,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1200,9 +1220,6 @@ static int mes_v11_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); @@ -1216,12 +1233,12 @@ static int mes_v11_0_sw_fini(void *handle) &adev->gfx.kiq[0].ring.mqd_gpu_addr, &adev->gfx.kiq[0].ring.mqd_ptr); - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, + &adev->mes.ring[0].mqd_gpu_addr, + &adev->mes.ring[0].mqd_ptr); amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + amdgpu_ring_fini(&adev->mes.ring[0]); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1313,15 +1330,24 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev) mes_v11_0_enable(adev, true); + mes_v11_0_get_fw_version(adev); + mes_v11_0_kiq_setting(&adev->gfx.kiq[0].ring); r = mes_v11_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE); if (r) goto failure; - r = mes_v11_0_hw_init(adev); - if (r) - goto failure; + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x47) + adev->mes.enable_legacy_queue_map = true; + else + adev->mes.enable_legacy_queue_map = false; + + if (adev->mes.enable_legacy_queue_map) { + r = mes_v11_0_hw_init(adev); + if (r) + goto failure; + } return r; @@ -1332,9 +1358,9 @@ failure: static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v11_0_kiq_dequeue(&adev->mes.ring); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + mes_v11_0_kiq_dequeue(&adev->mes.ring[0]); + adev->mes.ring[0].sched.ready = false; } if (amdgpu_sriov_vf(adev)) { @@ -1352,7 +1378,7 @@ static int mes_v11_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; if (!adev->enable_mes_kiq) { @@ -1397,7 +1423,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 0713bc3eb263..a79a8adc3aa5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -142,19 +142,20 @@ static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt) } static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size, - int api_status_off) + int pipe, void *pkt, int size, + int api_status_off) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[pipe]; + spinlock_t *ring_lock = &mes->ring_lock[pipe]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -177,11 +178,18 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(ring_lock, flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -194,39 +202,39 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(ring_lock, flags); op_str = mes_v12_0_get_op_string(x_pkt); misc_op_str = mes_v12_0_get_misc_op_string(x_pkt); if (misc_op_str) - dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, - misc_op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n", + pipe, op_str); else - dev_dbg(adev->dev, "MES msg=%d was emitted\n", - x_pkt->header.opcode); + dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n", + pipe, x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", - op_str, misc_op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s\n", - op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n", + pipe, op_str); else - dev_err(adev->dev, "MES failed to respond to msg=%d\n", - x_pkt->header.opcode); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n", + pipe, x_pkt->header.opcode); while (halt_if_hws_hang) schedule(); @@ -238,8 +246,12 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(ring_lock, flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -254,6 +266,8 @@ static int convert_to_mes_queue_type(int queue_type) return MES_QUEUE_TYPE_COMPUTE; else if (queue_type == AMDGPU_RING_TYPE_SDMA) return MES_QUEUE_TYPE_SDMA; + else if (queue_type == AMDGPU_RING_TYPE_MES) + return MES_QUEUE_TYPE_SCHQ; else BUG(); return -1; @@ -311,6 +325,7 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gds_size = input->queue_size; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -330,6 +345,7 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -338,6 +354,7 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { union MESAPI__ADD_QUEUE mes_add_queue_pkt; + int pipe; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); @@ -354,7 +371,12 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); mes_add_queue_pkt.map_legacy_kq = 1; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -363,6 +385,7 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, struct mes_unmap_legacy_queue_input *input) { union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + int pipe; memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); @@ -387,7 +410,12 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -404,7 +432,7 @@ static int mes_v12_0_resume_gang(struct amdgpu_mes *mes, return 0; } -static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) +static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; @@ -414,7 +442,7 @@ static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_status_pkt, sizeof(mes_status_pkt), offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); } @@ -423,6 +451,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, struct mes_misc_op_input *input) { union MESAPI__MISC misc_pkt; + int pipe; memset(&misc_pkt, 0, sizeof(misc_pkt)); @@ -475,12 +504,17 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, return -EINVAL; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &misc_pkt, sizeof(misc_pkt), offsetof(union MESAPI__MISC, api_status)); } -static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) { union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; @@ -491,12 +525,12 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } -static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) { int i; struct amdgpu_device *adev = mes->adev; @@ -508,27 +542,33 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; - mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; - mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; - mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; - mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; - - for (i = 0; i < MAX_COMPUTE_PIPES; i++) - mes_set_hw_res_pkt.compute_hqd_mask[i] = - mes->compute_hqd_mask[i]; - - for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; - - for (i = 0; i < MAX_SDMA_PIPES; i++) - mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; + if (pipe == AMDGPU_MES_SCHED_PIPE) { + mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; + mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; + mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; + mes_set_hw_res_pkt.paging_vmid = 0; + + for (i = 0; i < MAX_COMPUTE_PIPES; i++) + mes_set_hw_res_pkt.compute_hqd_mask[i] = + mes->compute_hqd_mask[i]; + + for (i = 0; i < MAX_GFX_PIPES; i++) + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; + + for (i = 0; i < MAX_SDMA_PIPES; i++) + mes_set_hw_res_pkt.sdma_hqd_mask[i] = + mes->sdma_hqd_mask[i]; + + for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) + mes_set_hw_res_pkt.aggregated_doorbells[i] = + mes->aggregated_doorbells[i]; + } - for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) - mes_set_hw_res_pkt.aggregated_doorbells[i] = - mes->aggregated_doorbells[i]; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = + mes->sch_ctx_gpu_addr[pipe]; + mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = + mes->query_status_fence_gpu_addr[pipe]; for (i = 0; i < 5; i++) { mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; @@ -556,7 +596,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } @@ -734,16 +774,11 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) if (enable) { data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - soc21_grbm_select(adev, 3, pipe, 0, 0); ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; @@ -757,8 +792,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) /* unhalt MES and activate pipe0 */ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); if (amdgpu_emu_mode) @@ -774,8 +808,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) data = REG_SET_FIELD(data, CP_MES_CNTL, MES_INVALIDATE_ICACHE, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); } @@ -790,10 +823,6 @@ static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev) mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - /* me=3, queue=0 */ soc21_grbm_select(adev, 3, pipe, 0, 0); @@ -1085,7 +1114,7 @@ static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); r = amdgpu_ring_test_ring(kiq_ring); if (r) { @@ -1101,14 +1130,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, struct amdgpu_ring *ring; int r; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; else - BUG(); + ring = &adev->mes.ring[pipe]; - if ((pipe == AMDGPU_MES_SCHED_PIPE) && + if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && (amdgpu_in_reset(adev) || adev->in_suspend)) { *(ring->wptr_cpu_addr) = 0; *(ring->rptr_cpu_addr) = 0; @@ -1120,13 +1147,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return r; if (pipe == AMDGPU_MES_SCHED_PIPE) { - if (adev->enable_uni_mes) { - mes_v12_0_queue_init_register(ring); - } else { + if (adev->enable_uni_mes) + r = amdgpu_mes_map_legacy_queue(adev, ring); + else r = mes_v12_0_kiq_enable_queue(adev); - if (r) - return r; - } + if (r) + return r; } else { mes_v12_0_queue_init_register(ring); } @@ -1146,25 +1172,29 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return 0; } -static int mes_v12_0_ring_init(struct amdgpu_device *adev) +static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[pipe]; ring->funcs = &mes_v12_0_ring_funcs; ring->me = 3; - ring->pipe = 0; + ring->pipe = pipe; ring->queue = 0; ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; - ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE]; + ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe]; ring->no_scheduler = true; sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue); + if (pipe == AMDGPU_MES_SCHED_PIPE) + ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; + else + ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1; + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); } @@ -1178,7 +1208,7 @@ static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev) ring = &adev->gfx.kiq[0].ring; ring->me = 3; - ring->pipe = adev->enable_uni_mes ? 0 : 1; + ring->pipe = 1; ring->queue = 0; ring->adev = NULL; @@ -1200,12 +1230,10 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, int r, mqd_size = sizeof(struct v12_compute_mqd); struct amdgpu_ring *ring; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; else - BUG(); + ring = &adev->mes.ring[pipe]; if (ring->mqd_obj) return 0; @@ -1238,6 +1266,7 @@ static int mes_v12_0_sw_init(void *handle) adev->mes.funcs = &mes_v12_0_funcs; adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini; + adev->mes.enable_legacy_queue_map = true; adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; @@ -1246,9 +1275,6 @@ static int mes_v12_0_sw_init(void *handle) return r; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; - r = mes_v12_0_allocate_eop_buf(adev, pipe); if (r) return r; @@ -1256,18 +1282,15 @@ static int mes_v12_0_sw_init(void *handle) r = mes_v12_0_mqd_sw_init(adev, pipe); if (r) return r; - } - if (adev->enable_mes_kiq) { - r = mes_v12_0_kiq_ring_init(adev); + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) + r = mes_v12_0_kiq_ring_init(adev); + else + r = mes_v12_0_ring_init(adev, pipe); if (r) return r; } - r = mes_v12_0_ring_init(adev); - if (r) - return r; - return 0; } @@ -1276,9 +1299,6 @@ static int mes_v12_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); @@ -1286,18 +1306,21 @@ static int mes_v12_0_sw_fini(void *handle) &adev->mes.eop_gpu_addr[pipe], NULL); amdgpu_ucode_release(&adev->mes.fw[pipe]); - } - - amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, - &adev->gfx.kiq[0].ring.mqd_gpu_addr, - &adev->gfx.kiq[0].ring.mqd_ptr); - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { + amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj, + &adev->mes.ring[pipe].mqd_gpu_addr, + &adev->mes.ring[pipe].mqd_ptr); + amdgpu_ring_fini(&adev->mes.ring[pipe]); + } + } - amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + if (!adev->enable_uni_mes) { + amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, + &adev->gfx.kiq[0].ring.mqd_gpu_addr, + &adev->gfx.kiq[0].ring.mqd_ptr); + amdgpu_ring_fini(&adev->gfx.kiq[0].ring); + } if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1341,7 +1364,7 @@ static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) @@ -1362,10 +1385,10 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) { int r = 0; - mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); - if (adev->enable_uni_mes) - return mes_v12_0_hw_init(adev); + mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]); + else + mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { @@ -1392,9 +1415,19 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) if (r) goto failure; - r = mes_v12_0_hw_init(adev); - if (r) - goto failure; + if (adev->enable_uni_mes) { + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE); + if (r) + goto failure; + + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE); + } + + if (adev->mes.enable_legacy_queue_map) { + r = mes_v12_0_hw_init(adev); + if (r) + goto failure; + } return r; @@ -1405,9 +1438,15 @@ failure: static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v12_0_kiq_dequeue_sched(adev); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + if (adev->enable_uni_mes) + amdgpu_mes_unmap_legacy_queue(adev, + &adev->mes.ring[AMDGPU_MES_SCHED_PIPE], + RESET_QUEUES, 0, 0); + else + mes_v12_0_kiq_dequeue_sched(adev); + + adev->mes.ring[0].sched.ready = false; } mes_v12_0_enable(adev, false); @@ -1420,10 +1459,10 @@ static int mes_v12_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; - if (!adev->enable_mes_kiq || adev->enable_uni_mes) { + if (!adev->enable_mes_kiq) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { r = mes_v12_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, true); @@ -1443,23 +1482,23 @@ static int mes_v12_0_hw_init(void *handle) mes_v12_0_enable(adev, true); } + /* Enable the MES to handle doorbell ring on unmapped queue */ + mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); + r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; - r = mes_v12_0_set_hw_resources(&adev->mes); + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; if (adev->enable_uni_mes) - mes_v12_0_set_hw_resources_1(&adev->mes); + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); - /* Enable the MES to handle doorbell ring on unmapped queue */ - mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); - - r = mes_v12_0_query_sched_status(&adev->mes); + r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) { DRM_ERROR("MES is busy\n"); goto failure; @@ -1472,7 +1511,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; @@ -1515,17 +1554,7 @@ static int mes_v12_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe, r; - if (adev->enable_uni_mes) { - r = amdgpu_mes_init_microcode(adev, AMDGPU_MES_SCHED_PIPE); - if (!r) - return 0; - - adev->enable_uni_mes = false; - } - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; r = amdgpu_mes_init_microcode(adev, pipe); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index 5bbaa2b2caab..0fbc3be81f14 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -80,7 +80,8 @@ static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid, /* invalidate using legacy mode on vmid*/ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, PER_VMID_INVALIDATE_REQ, 1 << vmid); - req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + /* Only use legacy inv on mmhub side */ + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index af1e90159ce3..2e72d445415f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -176,14 +176,16 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - /* SDMA seems to miss doorbells sometimes when powergating kicks in. - * Updating the wptr directly will wake it. This is only safe because - * we disallow gfxoff in begin_use() and then allow it again in end_use(). - */ - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(5, 2, 1)) { + /* SDMA seems to miss doorbells sometimes when powergating kicks in. + * Updating the wptr directly will wake it. This is only safe because + * we disallow gfxoff in begin_use() and then allow it again in end_use(). + */ + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } } else { DRM_DEBUG("Not using doorbell -- " "mmSDMA%i_GFX_RB_WPTR == 0x%08x " diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 41b5e45697dc..ecee9e7d7e4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1575,8 +1575,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0) | - SDMA_PKT_COPY_LINEAR_HEADER_CPV((copy_flags & - (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED)) ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_CPV(1); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ @@ -1590,6 +1589,8 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) | ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) | SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1); + else + ib->ptr[ib->length_dw++] = 0; } /** @@ -1616,7 +1617,7 @@ static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib, static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = { .copy_max_bytes = 0x400000, - .copy_num_dw = 7, + .copy_num_dw = 8, .emit_copy_buffer = sdma_v7_0_emit_copy_buffer, .fill_max_bytes = 0x400000, .fill_num_dw = 5, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 2357ff39323f..e74e1983da53 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -76,6 +76,12 @@ ((cond & 0xF) << 24) | \ ((type & 0xF) << 28)) +#define CP_PACKETJ_NOP 0x60000000 +#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF) +#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F) +#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF) +#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF) + /* Packet 3 types */ #define PACKET3_NOP 0x10 #define PACKET3_SET_BASE 0x11 diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index 7d641d0dadba..b0c3678cfb31 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -406,6 +406,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_JPEG | @@ -424,6 +425,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index cd7b81b7b939..48caecf7e72e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1434,7 +1434,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); - pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; + pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; + pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size; i++; } /* Scalar L1 Instruction Cache per SQC */ @@ -1446,6 +1447,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; + pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size; i++; } /* Scalar L1 Data Cache per SQC */ @@ -1456,6 +1458,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; + pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size; i++; } /* GL1 Data Cache per SA */ @@ -1468,6 +1471,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + pcache_info[i].cache_line_size = 0; i++; } /* L2 Data Cache per GPU (Total Tex Cache) */ @@ -1478,6 +1482,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size; i++; } /* L3 Data Cache per GPU */ @@ -1488,6 +1493,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + pcache_info[i].cache_line_size = 0; i++; } return i; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c index b7a08e7a4423..d163d92a692f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c @@ -187,6 +187,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7e7929f24ae4..1e069fa5211e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1752,6 +1752,30 @@ static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device * return bb; } +static enum dmub_ips_disable_type dm_get_default_ips_mode( + struct amdgpu_device *adev) +{ + /* + * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to + * cause a hard hang. A fix exists for newer PMFW. + * + * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest + * IPS state in all cases, except for s0ix and all displays off (DPMS), + * where IPS2 is allowed. + * + * When checking pmfw version, use the major and minor only. + */ + if (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(3, 5, 0) && + (adev->pm.fw_version & 0x00FFFF00) < 0x005D6300) + return DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; + + if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0)) + return DMUB_IPS_ENABLE; + + /* ASICs older than DCN35 do not have IPSs */ + return DMUB_IPS_DISABLE_ALL; +} + static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; @@ -1863,7 +1887,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_debug_mask & DC_DISABLE_IPS) init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL; else - init_data.flags.disable_ips = DMUB_IPS_ENABLE; + init_data.flags.disable_ips = dm_get_default_ips_mode(adev); init_data.flags.disable_ips_in_vpb = 0; @@ -2893,6 +2917,9 @@ static int dm_suspend(void *handle) hpd_rx_irq_work_suspend(dm); + if (adev->dm.dc->caps.ips_support) + dc_allow_idle_optimizations(adev->dm.dc, true); + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3); @@ -4489,7 +4516,7 @@ static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, struct amdgpu_dm_backlight_caps caps; struct dc_link *link; u32 brightness; - bool rc; + bool rc, reallow_idle = false; amdgpu_dm_update_backlight_caps(dm, bl_idx); caps = dm->backlight_caps[bl_idx]; @@ -4502,6 +4529,12 @@ static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, link = (struct dc_link *)dm->backlight_link[bl_idx]; /* Change brightness based on AUX property */ + mutex_lock(&dm->dc_lock); + if (dm->dc->caps.ips_support && dm->dc->ctx->dmub_srv->idle_allowed) { + dc_allow_idle_optimizations(dm->dc, false); + reallow_idle = true; + } + if (caps.aux_support) { rc = dc_link_set_backlight_level_nits(link, true, brightness, AUX_BL_DEFAULT_TRANSITION_TIME_MS); @@ -4513,6 +4546,11 @@ static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", bl_idx); } + if (dm->dc->caps.ips_support && reallow_idle) + dc_allow_idle_optimizations(dm->dc, true); + + mutex_unlock(&dm->dc_lock); + if (rc) dm->actual_brightness[bl_idx] = user_brightness; } @@ -10533,7 +10571,7 @@ static bool should_reset_plane(struct drm_atomic_state *state, * TODO: We can likely skip bandwidth validation if the only thing that * changed about the plane was it'z z-ordering. */ - if (new_crtc_state->zpos_changed) + if (old_plane_state->normalized_zpos != new_plane_state->normalized_zpos) return true; if (drm_atomic_crtc_needs_modeset(new_crtc_state)) @@ -11381,6 +11419,17 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, drm_dbg(dev, "Failed to determine cursor mode\n"); goto fail; } + + /* + * If overlay cursor is needed, DC cannot go through the + * native cursor update path. All enabled planes on the CRTC + * need to be added for DC to not disable a plane by mistake + */ + if (dm_new_crtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE) { + ret = drm_atomic_add_affected_planes(state, crtc); + if (ret) + goto fail; + } } /* Remove exiting planes if they are modified */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 5442da90f508..2e9f6da1acdc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -804,12 +804,25 @@ struct dsc_mst_fairness_params { }; #if defined(CONFIG_DRM_AMD_DC_FP) -static int kbps_to_peak_pbn(int kbps) +static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link) +{ + u8 link_coding_cap; + uint16_t fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B; + + link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(dc_link); + if (link_coding_cap == DP_128b_132b_ENCODING) + fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B; + + return fec_overhead_multiplier_x1000; +} + +static int kbps_to_peak_pbn(int kbps, uint16_t fec_overhead_multiplier_x1000) { u64 peak_kbps = kbps; peak_kbps *= 1006; - peak_kbps = div_u64(peak_kbps, 1000); + peak_kbps *= fec_overhead_multiplier_x1000; + peak_kbps = div_u64(peak_kbps, 1000 * 1000); return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); } @@ -910,11 +923,12 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, int link_timeslots_used; int fair_pbn_alloc; int ret = 0; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled) { initial_slack[i] = - kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn; + kbps_to_peak_pbn(params[i].bw_range.max_kbps, fec_overhead_multiplier_x1000) - vars[i + k].pbn; bpp_increased[i] = false; remaining_to_increase += 1; } else { @@ -1010,6 +1024,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, int next_index; int remaining_to_try = 0; int ret; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -1039,7 +1054,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, if (next_index == -1) break; - vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1052,8 +1067,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { - vars[next_index].pbn = kbps_to_peak_pbn( - params[next_index].bw_range.max_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1082,6 +1096,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, int count = 0; int i, k, ret; bool debugfs_overwrite = false; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); memset(params, 0, sizeof(params)); @@ -1146,7 +1161,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try no compression */ for (i = 0; i < count; i++) { vars[i + k].aconnector = params[i].aconnector; - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, @@ -1165,7 +1180,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try max compression */ for (i = 0; i < count; i++) { if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = true; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, @@ -1173,7 +1188,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (ret < 0) return ret; } else { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, @@ -1270,6 +1285,9 @@ static bool is_dsc_need_re_compute( } } + if (new_stream_on_link_num == 0) + return false; + /* check current_state if there stream on link but it is not in * new request state */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index fa84d34b7373..600d6e221011 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -46,6 +46,9 @@ #define SYNAPTICS_CASCADED_HUB_ID 0x5A #define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0) +#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031 +#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000 + enum mst_msg_ready_type { NONE_MSG_RDY_EVENT = 0, DOWN_REP_MSG_RDY_EVENT = 1, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index a83bd0331c3b..5cb11cc2d063 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -28,6 +28,7 @@ #include <drm/drm_blend.h> #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_plane_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_fourcc.h> #include "amdgpu.h" @@ -935,10 +936,14 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, } afb = to_amdgpu_framebuffer(new_state->fb); - obj = new_state->fb->obj[0]; + obj = drm_gem_fb_get_obj(new_state->fb, 0); + if (!obj) { + DRM_ERROR("Failed to get obj from framebuffer\n"); + return -EINVAL; + } + rbo = gem_to_amdgpu_bo(obj); adev = amdgpu_ttm_adev(rbo->tbo.bdev); - r = amdgpu_bo_reserve(rbo, true); if (r) { dev_err(adev->dev, "fail to reserve bo (%d)\n", r); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 2a21bcf5224f..4d960dc5ce89 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -185,8 +185,7 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, else copy_settings_data->flags.bitfields.force_wakeup_by_tps3 = 0; - - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 3c0222aa4df1..46f9c05de16e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -83,6 +83,8 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcfla CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 603036df68ba..6547cc2c2a77 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -811,7 +811,8 @@ static void build_synchronized_timing_groups( for (j = i + 1; j < display_config->display_config.num_streams; j++) { if (memcmp(master_timing, &display_config->display_config.stream_descriptors[j].timing, - sizeof(struct dml2_timing_cfg)) == 0) { + sizeof(struct dml2_timing_cfg)) == 0 && + display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder) { set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j); set_bit_in_bitfield(&stream_mapped_mask, j); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index e06fc370267b..1d2be574f668 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1402,6 +1402,8 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } @@ -3205,15 +3207,19 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx, * as well. */ for (i = 0; i < num_pipes; i++) { - if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { - if (pipe_ctx[i]->stream_res.tg->funcs->set_drr) - pipe_ctx[i]->stream_res.tg->funcs->set_drr( - pipe_ctx[i]->stream_res.tg, ¶ms); + /* dc_state_destruct() might null the stream resources, so fetch tg + * here first to avoid a race condition. The lifetime of the pointee + * itself (the timing_generator object) is not a problem here. + */ + struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; + + if ((tg != NULL) && tg->funcs) { + if (tg->funcs->set_drr) + tg->funcs->set_drr(tg, ¶ms); if (adjust.v_total_max != 0 && adjust.v_total_min != 0) - if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control) - pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx[i]->stream_res.tg, - event_triggers, num_frames); + if (tg->funcs->set_static_screen_control) + tg->funcs->set_static_screen_control( + tg, event_triggers, num_frames); } } } @@ -3587,7 +3593,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = temp_x + viewport_width; + pos_cpy.x = 2 * viewport_width - temp_x; } } } else { @@ -3680,7 +3686,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = 2 * viewport_width - temp_x; + pos_cpy.x = temp_x + viewport_width; } } } else { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index e4f7078c1026..d5e9aec52a05 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -771,6 +771,8 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } @@ -1460,7 +1462,13 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx, params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num; for (i = 0; i < num_pipes; i++) { - if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { + /* dc_state_destruct() might null the stream resources, so fetch tg + * here first to avoid a race condition. The lifetime of the pointee + * itself (the timing_generator object) is not a problem here. + */ + struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; + + if ((tg != NULL) && tg->funcs) { struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing; struct dc *dc = pipe_ctx[i]->stream->ctx->dc; @@ -1473,14 +1481,12 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx, num_frames = 2 * (frame_rate % 60); } } - if (pipe_ctx[i]->stream_res.tg->funcs->set_drr) - pipe_ctx[i]->stream_res.tg->funcs->set_drr( - pipe_ctx[i]->stream_res.tg, ¶ms); + if (tg->funcs->set_drr) + tg->funcs->set_drr(tg, ¶ms); if (adjust.v_total_max != 0 && adjust.v_total_min != 0) - if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control) - pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx[i]->stream_res.tg, - event_triggers, num_frames); + if (tg->funcs->set_static_screen_control) + tg->funcs->set_static_screen_control( + tg, event_triggers, num_frames); } } } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 9a3cc0514a36..8e0588b1cf30 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1778,6 +1778,9 @@ static bool dcn321_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; /* read VBIOS LTTPR caps */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index a05a2209a44e..34b02147881d 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -723,6 +723,7 @@ static const struct dc_debug_options debug_defaults_drv = { .min_prefetch_in_strobe_ns = 60000, // 60us .disable_unbounded_requesting = false, .enable_legacy_fast_update = false, + .dcc_meta_propagation_delay_us = 10, .fams2_config = { .bits = { .enable = true, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 26efeada4f41..bb46f30d11d0 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -138,7 +138,9 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SRI_ARR(DCHUBP_MALL_CONFIG, HUBP, id), \ SRI_ARR(DCHUBP_VMPG_CONFIG, HUBP, id), \ SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id), \ - HUBP_3DLUT_FL_REG_LIST_DCN401(id) + HUBP_3DLUT_FL_REG_LIST_DCN401(id), \ + SRI_ARR(DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, HUBP, id), \ + SRI_ARR(DCHUBP_MCACHEID_CONFIG, HUBP, id) /* ABM */ #define ABM_DCN401_REG_LIST_RI(id) \ diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 09cbc3afd6d8..b0fc22383e28 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -1038,7 +1038,7 @@ struct display_object_info_table_v1_4 uint16_t supporteddevices; uint8_t number_of_path; uint8_t reserved; - struct atom_display_object_path_v2 display_path[8]; //the real number of this included in the structure is calculated by using the (whole structure size - the header size- number_of_path)/size of atom_display_object_path + struct atom_display_object_path_v2 display_path[]; //the real number of this included in the structure is calculated by using the (whole structure size - the header size- number_of_path)/size of atom_display_object_path }; struct display_object_info_table_v1_5 { @@ -1048,7 +1048,7 @@ struct display_object_info_table_v1_5 { uint8_t reserved; // the real number of this included in the structure is calculated by using the // (whole structure size - the header size- number_of_path)/size of atom_display_object_path - struct atom_display_object_path_v3 display_path[8]; + struct atom_display_object_path_v3 display_path[]; }; /* diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index 46bf19c9c5c4..710e328fad48 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -258,6 +258,48 @@ struct gc_info_v1_2 { uint32_t gc_gl2c_per_gpu; }; +struct gc_info_v1_3 { + struct gpu_info_header header; + uint32_t gc_num_se; + uint32_t gc_num_wgp0_per_sa; + uint32_t gc_num_wgp1_per_sa; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_gl2c; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_sa_per_se; + uint32_t gc_num_packer_per_sc; + uint32_t gc_num_gl2a; + uint32_t gc_num_tcp_per_sa; + uint32_t gc_num_sdp_interface; + uint32_t gc_num_tcps; + uint32_t gc_num_tcp_per_wpg; + uint32_t gc_tcp_l1_size; + uint32_t gc_num_sqc_per_wgp; + uint32_t gc_l1_instruction_cache_size_per_sqc; + uint32_t gc_l1_data_cache_size_per_sqc; + uint32_t gc_gl1c_per_sa; + uint32_t gc_gl1c_size_per_instance; + uint32_t gc_gl2c_per_gpu; + uint32_t gc_tcp_size_per_cu; + uint32_t gc_tcp_cache_line_size; + uint32_t gc_instruction_cache_size_per_sqc; + uint32_t gc_instruction_cache_line_size; + uint32_t gc_scalar_data_cache_size_per_sqc; + uint32_t gc_scalar_data_cache_line_size; + uint32_t gc_tcc_size; + uint32_t gc_tcc_cache_line_size; +}; + struct gc_info_v2_0 { struct gpu_info_header header; diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index 4cf2c9f30b3d..101e2fe962c6 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -97,6 +97,7 @@ enum MES_QUEUE_TYPE { MES_QUEUE_TYPE_SDMA, MES_QUEUE_TYPE_MAX, + MES_QUEUE_TYPE_SCHQ = MES_QUEUE_TYPE_MAX, }; struct MES_API_STATUS { @@ -242,8 +243,12 @@ union MESAPI_SET_HW_RESOURCES { uint32_t send_write_data : 1; uint32_t os_tdr_timeout_override : 1; uint32_t use_rs64mem_for_proc_gang_ctx : 1; + uint32_t halt_on_misaligned_access : 1; + uint32_t use_add_queue_unmap_flag_addr : 1; + uint32_t enable_mes_sch_stb_log : 1; + uint32_t limit_single_process : 1; uint32_t unmapped_doorbell_handling: 2; - uint32_t reserved : 15; + uint32_t reserved : 11; }; uint32_t uint32_all; }; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 9d7454b3c314..2cf951184561 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2224,8 +2224,9 @@ static int smu_bump_power_profile_mode(struct smu_context *smu, } static int smu_adjust_power_state_dynamic(struct smu_context *smu, - enum amd_dpm_forced_level level, - bool skip_display_settings) + enum amd_dpm_forced_level level, + bool skip_display_settings, + bool force_update) { int ret = 0; int index = 0; @@ -2254,7 +2255,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu, } } - if (smu_dpm_ctx->dpm_level != level) { + if (force_update || smu_dpm_ctx->dpm_level != level) { ret = smu_asic_set_performance_level(smu, level); if (ret) { dev_err(smu->adev->dev, "Failed to set performance level!"); @@ -2271,7 +2272,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu, index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; workload[0] = smu->workload_setting[index]; - if (smu->power_profile_mode != workload[0]) + if (force_update || smu->power_profile_mode != workload[0]) smu_bump_power_profile_mode(smu, workload, 0); } @@ -2292,11 +2293,13 @@ static int smu_handle_task(struct smu_context *smu, ret = smu_pre_display_config_changed(smu); if (ret) return ret; - ret = smu_adjust_power_state_dynamic(smu, level, false); + ret = smu_adjust_power_state_dynamic(smu, level, false, false); break; case AMD_PP_TASK_COMPLETE_INIT: + ret = smu_adjust_power_state_dynamic(smu, level, true, true); + break; case AMD_PP_TASK_READJUST_POWER_STATE: - ret = smu_adjust_power_state_dynamic(smu, level, true); + ret = smu_adjust_power_state_dynamic(smu, level, true, false); break; default: break; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h index de2e442281ff..87ca5ceb1ece 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h @@ -92,7 +92,6 @@ //Resets #define PPSMC_MSG_PrepareMp1ForUnload 0x2E -#define PPSMC_MSG_Mode1Reset 0x2F //Set SystemVirtual DramAddrHigh #define PPSMC_MSG_SetSystemVirtualDramAddrHigh 0x30 @@ -119,11 +118,12 @@ //STB to dram log #define PPSMC_MSG_DumpSTBtoDram 0x3D -#define PPSMC_MSG_STBtoDramLogSetDramAddrHigh 0x3E -#define PPSMC_MSG_STBtoDramLogSetDramAddrLow 0x3F +#define PPSMC_MSG_STBtoDramLogSetDramAddress 0x3E +#define PPSMC_MSG_DummyUndefined 0x3F #define PPSMC_MSG_STBtoDramLogSetDramSize 0x40 #define PPSMC_MSG_SetOBMTraceBufferLogging 0x41 +#define PPSMC_MSG_UseProfilingMode 0x42 #define PPSMC_MSG_AllowGfxDcs 0x43 #define PPSMC_MSG_DisallowGfxDcs 0x44 #define PPSMC_MSG_EnableAudioStutterWA 0x45 @@ -135,6 +135,16 @@ #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4B #define PPSMC_MSG_SetPriorityDeltaGain 0x4C #define PPSMC_MSG_AllowIHHostInterrupt 0x4D +#define PPSMC_MSG_EnableShadowDpm 0x4E #define PPSMC_MSG_Mode3Reset 0x4F -#define PPSMC_Message_Count 0x50 +#define PPSMC_MSG_SetDriverDramAddr 0x50 +#define PPSMC_MSG_SetToolsDramAddr 0x51 +#define PPSMC_MSG_TransferTableSmu2DramWithAddr 0x52 +#define PPSMC_MSG_TransferTableDram2SmuWithAddr 0x53 +#define PPSMC_MSG_GetAllRunningSmuFeatures 0x54 +#define PPSMC_MSG_GetSvi3Voltage 0x55 +#define PPSMC_MSG_UpdatePolicy 0x56 +#define PPSMC_MSG_ExtPwrConnSupport 0x57 +#define PPSMC_MSG_PreloadSwPstateForUclkOverDrive 0x58 +#define PPSMC_Message_Count 0x59 #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index ac0dd6b97f8d..e71a721c12b9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -439,7 +439,16 @@ enum smu_clk_type { __SMU_DUMMY_MAP(BACO_CG), \ __SMU_DUMMY_MAP(SOC_CG), \ __SMU_DUMMY_MAP(LOW_POWER_DCNCLKS), \ - __SMU_DUMMY_MAP(WHISPER_MODE), + __SMU_DUMMY_MAP(WHISPER_MODE), \ + __SMU_DUMMY_MAP(EDC_PWRBRK), \ + __SMU_DUMMY_MAP(SOC_EDC_XVMIN), \ + __SMU_DUMMY_MAP(GFX_PSM_DIDT), \ + __SMU_DUMMY_MAP(APT_ALL_ENABLE), \ + __SMU_DUMMY_MAP(APT_SQ_THROTTLE), \ + __SMU_DUMMY_MAP(APT_PF_DCS), \ + __SMU_DUMMY_MAP(GFX_EDC_XVMIN), \ + __SMU_DUMMY_MAP(GFX_DIDT_XVMIN), \ + __SMU_DUMMY_MAP(FAN_ABNORMAL), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(feature) SMU_FEATURE_##feature##_BIT diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h index 4a3fde89aed7..75c921e87360 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h @@ -27,7 +27,8 @@ #pragma pack(push, 1) -#define SMU_14_0_2_TABLE_FORMAT_REVISION 3 +#define SMU_14_0_2_TABLE_FORMAT_REVISION 23 +#define SMU_14_0_2_CUSTOM_TABLE_FORMAT_REVISION 1 // POWERPLAYTABLE::ulPlatformCaps #define SMU_14_0_2_PP_PLATFORM_CAP_POWERPLAY 0x1 // This cap indicates whether CCC need to show Powerplay page. @@ -43,6 +44,7 @@ #define SMU_14_0_2_PP_THERMALCONTROLLER_NONE 0 #define SMU_14_0_2_PP_OVERDRIVE_VERSION 0x1 // TODO: FIX OverDrive Version TBD +#define SMU_14_0_2_PP_CUSTOM_OVERDRIVE_VERSION 0x1 #define SMU_14_0_2_PP_POWERSAVINGCLOCK_VERSION 0x01 // Power Saving Clock Table Version 1.00 enum SMU_14_0_2_OD_SW_FEATURE_CAP @@ -107,6 +109,7 @@ enum SMU_14_0_2_PWRMODE_SETTING SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_BALANCE, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_TURBO, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_RAGE, + SMU_14_0_2_PMSETTING_COUNT }; #define SMU_14_0_2_MAX_PMSETTING 32 // Maximum Number of PowerMode Settings @@ -127,17 +130,24 @@ struct smu_14_0_2_overdrive_table int16_t pm_setting[SMU_14_0_2_MAX_PMSETTING]; // Optimized power mode feature settings }; +enum smu_14_0_3_pptable_source { + PPTABLE_SOURCE_IFWI = 0, + PPTABLE_SOURCE_DRIVER_HARDCODED = 1, + PPTABLE_SOURCE_PPGEN_REGISTRY = 2, + PPTABLE_SOURCE_MAX = PPTABLE_SOURCE_PPGEN_REGISTRY, +}; + struct smu_14_0_2_powerplay_table { struct atom_common_table_header header; // header.format_revision = 3 (HAS TO MATCH SMU_14_0_2_TABLE_FORMAT_REVISION), header.content_revision = ? structuresize is calculated by PPGen. uint8_t table_revision; // PPGen use only: table_revision = 3 - uint8_t padding; // Padding 1 byte to align table_size offset to 6 bytes (pmfw_start_offset, for PMFW to know the starting offset of PPTable_t). + uint8_t pptable_source; // PPGen UI dropdown box uint16_t pmfw_pptable_start_offset; // The start offset of the pmfw portion. i.e. start of PPTable_t (start of SkuTable_t) uint16_t pmfw_pptable_size; // The total size of pmfw_pptable, i.e PPTable_t. - uint16_t pmfw_pfe_table_start_offset; // The start offset of the PFE_Settings_t within pmfw_pptable. - uint16_t pmfw_pfe_table_size; // The size of PFE_Settings_t. - uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t within pmfw_pptable. - uint16_t pmfw_board_table_size; // The size of BoardTable_t. + uint16_t pmfw_sku_table_start_offset; // DO NOT CHANGE ORDER; The absolute start offset of the SkuTable_t (within smu_14_0_3_powerplay_table). + uint16_t pmfw_sku_table_size; // DO NOT CHANGE ORDER; The size of SkuTable_t. + uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t + uint16_t pmfw_board_table_size; // The size of BoardTable_t. uint16_t pmfw_custom_sku_table_start_offset; // The start offset of the CustomSkuTable_t within pmfw_pptable. uint16_t pmfw_custom_sku_table_size; // The size of the CustomSkuTable_t. uint32_t golden_pp_id; // PPGen use only: PP Table ID on the Golden Data Base @@ -159,6 +169,36 @@ struct smu_14_0_2_powerplay_table PPTable_t smc_pptable; // PPTable_t in driver_if.h -- as requested by PMFW, this offset should start at a 32-byte boundary, and the table_size above should remain at offset=6 bytes }; +enum SMU_14_0_2_CUSTOM_OD_SW_FEATURE_CAP { + SMU_14_0_2_CUSTOM_ODCAP_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODCAP_COUNT +}; + +enum SMU_14_0_2_CUSTOM_OD_FEATURE_SETTING_ID { + SMU_14_0_2_CUSTOM_ODSETTING_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODSETTING_COUNT, +}; + +struct smu_14_0_2_custom_overdrive_table { + uint8_t revision; + uint8_t reserve[3]; + uint8_t cap[SMU_14_0_2_CUSTOM_ODCAP_COUNT]; + int32_t max[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int32_t min[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int16_t pm_setting[SMU_14_0_2_PMSETTING_COUNT]; +}; + +struct smu_14_0_3_custom_powerplay_table { + uint8_t custom_table_revision; + uint16_t custom_table_size; + uint16_t custom_sku_table_offset; + uint32_t custom_platform_caps; + uint16_t software_shutdown_temp; + struct smu_14_0_2_custom_overdrive_table custom_overdrive_table; + uint32_t reserve[8]; + CustomSkuTable_t custom_sku_table_pmfw; +}; + #pragma pack(pop) #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 78c3f94bb3ff..9974c9f8135e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -121,6 +121,7 @@ struct mca_ras_info { #define P2S_TABLE_ID_A 0x50325341 #define P2S_TABLE_ID_X 0x50325358 +#define P2S_TABLE_ID_3 0x50325303 // clang-format off static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = { @@ -271,14 +272,18 @@ static int smu_v13_0_6_init_microcode(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; uint32_t p2s_table_id = P2S_TABLE_ID_A; int ret = 0, i, p2stable_count; + int var = (adev->pdev->device & 0xF); char ucode_prefix[15]; /* No need to load P2S tables in IOV mode */ if (amdgpu_sriov_vf(adev)) return 0; - if (!(adev->flags & AMD_IS_APU)) + if (!(adev->flags & AMD_IS_APU)) { p2s_table_id = P2S_TABLE_ID_X; + if (var == 0x5) + p2s_table_id = P2S_TABLE_ID_3; + } amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index a7d0231727e8..7bc95c404377 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2378,7 +2378,7 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf size += sysfs_emit_at(buf, size, " "); for (i = 0; i <= PP_SMC_POWER_PROFILE_WINDOW3D; i++) - size += sysfs_emit_at(buf, size, "%-14s%s", amdgpu_pp_profile_name[i], + size += sysfs_emit_at(buf, size, "%d %-14s%s", i, amdgpu_pp_profile_name[i], (i == smu->power_profile_mode) ? "* " : " "); size += sysfs_emit_at(buf, size, "\n"); @@ -2408,7 +2408,7 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf do { \ size += sysfs_emit_at(buf, size, "%-30s", #field); \ for (j = 0; j <= PP_SMC_POWER_PROFILE_WINDOW3D; j++) \ - size += sysfs_emit_at(buf, size, "%-16d", activity_monitor_external[j].DpmActivityMonitorCoeffInt.field); \ + size += sysfs_emit_at(buf, size, "%-18d", activity_monitor_external[j].DpmActivityMonitorCoeffInt.field); \ size += sysfs_emit_at(buf, size, "\n"); \ } while (0) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index e1a27903c80a..2b45adecbed2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -115,7 +115,6 @@ static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0), MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 0), - MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), @@ -188,6 +187,15 @@ static struct cmn2asic_mapping smu_v14_0_2_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(MEM_TEMP_READ), FEA_MAP(ATHUB_MMHUB_PG), FEA_MAP(SOC_PCC), + FEA_MAP(EDC_PWRBRK), + FEA_MAP(SOC_EDC_XVMIN), + FEA_MAP(GFX_PSM_DIDT), + FEA_MAP(APT_ALL_ENABLE), + FEA_MAP(APT_SQ_THROTTLE), + FEA_MAP(APT_PF_DCS), + FEA_MAP(GFX_EDC_XVMIN), + FEA_MAP(GFX_DIDT_XVMIN), + FEA_MAP(FAN_ABNORMAL), [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT}, @@ -675,6 +683,9 @@ static int smu_v14_0_2_set_default_dpm_table(struct smu_context *smu) pcie_table->clk_freq[pcie_table->num_of_link_levels] = skutable->LclkFreq[link_level]; pcie_table->num_of_link_levels++; + + if (link_level == 0) + link_level++; } /* dcefclk dpm table setup */ @@ -1824,50 +1835,6 @@ static void smu_v14_0_2_set_smu_mailbox_registers(struct smu_context *smu) smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, regMP1_SMN_C2PMSG_54); } -static int smu_v14_0_2_smu_send_bad_mem_page_num(struct smu_context *smu, - uint32_t size) -{ - int ret = 0; - - /* message SMU to update the bad page number on SMUBUS */ - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetNumBadMemoryPagesRetired, - size, NULL); - if (ret) - dev_err(smu->adev->dev, - "[%s] failed to message SMU to update bad memory pages number\n", - __func__); - - return ret; -} - -static int smu_v14_0_2_send_bad_mem_channel_flag(struct smu_context *smu, - uint32_t size) -{ - int ret = 0; - - /* message SMU to update the bad channel info on SMUBUS */ - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, - size, NULL); - if (ret) - dev_err(smu->adev->dev, - "[%s] failed to message SMU to update bad memory pages channel info\n", - __func__); - - return ret; -} - -static ssize_t smu_v14_0_2_get_ecc_info(struct smu_context *smu, - void *table) -{ - int ret = 0; - - // TODO - - return ret; -} - static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, void **table) { @@ -2015,12 +1982,9 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .enable_gfx_features = smu_v14_0_2_enable_gfx_features, .set_mp1_state = smu_v14_0_2_set_mp1_state, .set_df_cstate = smu_v14_0_2_set_df_cstate, - .send_hbm_bad_pages_num = smu_v14_0_2_smu_send_bad_mem_page_num, - .send_hbm_bad_channel_flag = smu_v14_0_2_send_bad_mem_channel_flag, #if 0 .gpo_control = smu_v14_0_gpo_control, #endif - .get_ecc_info = smu_v14_0_2_get_ecc_info, }; void smu_v14_0_2_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c index fe46b0ebefea..e5eb5d672bcd 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c @@ -160,6 +160,7 @@ static int komeda_crtc_normalize_zpos(struct drm_crtc *crtc, struct drm_plane *plane; struct list_head zorder_list; int order = 0, err; + u32 slave_zpos = 0; DRM_DEBUG_ATOMIC("[CRTC:%d:%s] calculating normalized zpos values\n", crtc->base.id, crtc->name); @@ -199,10 +200,13 @@ static int komeda_crtc_normalize_zpos(struct drm_crtc *crtc, plane_st->zpos, plane_st->normalized_zpos); /* calculate max slave zorder */ - if (has_bit(drm_plane_index(plane), kcrtc->slave_planes)) + if (has_bit(drm_plane_index(plane), kcrtc->slave_planes)) { + slave_zpos = plane_st->normalized_zpos; + if (to_kplane_st(plane_st)->layer_split) + slave_zpos++; kcrtc_st->max_slave_zorder = - max(plane_st->normalized_zpos, - kcrtc_st->max_slave_zorder); + max(slave_zpos, kcrtc_st->max_slave_zorder); + } } crtc_st->zpos_changed = true; diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index c621be1a99a8..3eb955333c80 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -390,6 +390,7 @@ config DRM_TI_SN65DSI86 depends on OF select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_KMS_HELPER select REGMAP_I2C select DRM_PANEL diff --git a/drivers/gpu/drm/display/Kconfig b/drivers/gpu/drm/display/Kconfig index 479e62690d75..3b824e01c9b5 100644 --- a/drivers/gpu/drm/display/Kconfig +++ b/drivers/gpu/drm/display/Kconfig @@ -1,19 +1,26 @@ # SPDX-License-Identifier: MIT +config DRM_DISPLAY_DP_AUX_BUS + tristate + depends on DRM + depends on OF || COMPILE_TEST + config DRM_DISPLAY_HELPER tristate depends on DRM help DRM helpers for display adapters. -config DRM_DISPLAY_DP_AUX_BUS - tristate - depends on DRM - depends on OF || COMPILE_TEST +if DRM_DISPLAY_HELPER + +config DRM_BRIDGE_CONNECTOR + bool + select DRM_DISPLAY_HDMI_STATE_HELPER + help + DRM connector implementation terminating DRM bridge chains. config DRM_DISPLAY_DP_AUX_CEC bool "Enable DisplayPort CEC-Tunneling-over-AUX HDMI support" - depends on DRM && DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_HELPER select CEC_CORE help @@ -25,7 +32,6 @@ config DRM_DISPLAY_DP_AUX_CEC config DRM_DISPLAY_DP_AUX_CHARDEV bool "DRM DP AUX Interface" - depends on DRM && DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_HELPER help Choose this option to enable a /dev/drm_dp_auxN node that allows to @@ -34,7 +40,6 @@ config DRM_DISPLAY_DP_AUX_CHARDEV config DRM_DISPLAY_DP_HELPER bool - depends on DRM_DISPLAY_HELPER help DRM display helpers for DisplayPort. @@ -61,19 +66,18 @@ config DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG config DRM_DISPLAY_HDCP_HELPER bool - depends on DRM_DISPLAY_HELPER help DRM display helpers for HDCP. config DRM_DISPLAY_HDMI_HELPER bool - depends on DRM_DISPLAY_HELPER help DRM display helpers for HDMI. config DRM_DISPLAY_HDMI_STATE_HELPER bool - depends on DRM_DISPLAY_HELPER select DRM_DISPLAY_HDMI_HELPER help DRM KMS state helpers for HDMI. + +endif # DRM_DISPLAY_HELPER diff --git a/drivers/gpu/drm/display/Makefile b/drivers/gpu/drm/display/Makefile index 629df2f4d322..fbb9d2b8acd4 100644 --- a/drivers/gpu/drm/display/Makefile +++ b/drivers/gpu/drm/display/Makefile @@ -3,6 +3,8 @@ obj-$(CONFIG_DRM_DISPLAY_DP_AUX_BUS) += drm_dp_aux_bus.o drm_display_helper-y := drm_display_helper_mod.o +drm_display_helper-$(CONFIG_DRM_BRIDGE_CONNECTOR) += \ + drm_bridge_connector.o drm_display_helper-$(CONFIG_DRM_DISPLAY_DP_HELPER) += \ drm_dp_dual_mode_helper.o \ drm_dp_helper.o \ diff --git a/drivers/gpu/drm/drm_bridge_connector.c b/drivers/gpu/drm/display/drm_bridge_connector.c index 0869b663f17e..3da5b8bf8259 100644 --- a/drivers/gpu/drm/drm_bridge_connector.c +++ b/drivers/gpu/drm/display/drm_bridge_connector.c @@ -216,8 +216,19 @@ static void drm_bridge_connector_debugfs_init(struct drm_connector *connector, } } +static void drm_bridge_connector_reset(struct drm_connector *connector) +{ + struct drm_bridge_connector *bridge_connector = + to_drm_bridge_connector(connector); + + drm_atomic_helper_connector_reset(connector); + if (bridge_connector->bridge_hdmi) + __drm_atomic_helper_connector_hdmi_reset(connector, + connector->state); +} + static const struct drm_connector_funcs drm_bridge_connector_funcs = { - .reset = drm_atomic_helper_connector_reset, + .reset = drm_bridge_connector_reset, .detect = drm_bridge_connector_detect, .fill_modes = drm_helper_probe_single_connector_modes, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, @@ -443,10 +454,8 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, panel_bridge = bridge; } - if (connector_type == DRM_MODE_CONNECTOR_Unknown) { - kfree(bridge_connector); + if (connector_type == DRM_MODE_CONNECTOR_Unknown) return ERR_PTR(-EINVAL); - } if (bridge_connector->bridge_hdmi) ret = drmm_connector_hdmi_init(drm, connector, @@ -461,10 +470,8 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, ret = drmm_connector_init(drm, connector, &drm_bridge_connector_funcs, connector_type, ddc); - if (ret) { - kfree(bridge_connector); + if (ret) return ERR_PTR(ret); - } drm_connector_helper_add(connector, &drm_bridge_connector_helper_funcs); diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 7609c798d73d..7936c2023955 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -1071,23 +1071,16 @@ int drm_atomic_set_property(struct drm_atomic_state *state, } if (async_flip && - prop != config->prop_fb_id && - prop != config->prop_in_fence_fd && - prop != config->prop_fb_damage_clips) { + (plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY || + (prop != config->prop_fb_id && + prop != config->prop_in_fence_fd && + prop != config->prop_fb_damage_clips))) { ret = drm_atomic_plane_get_property(plane, plane_state, prop, &old_val); ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); break; } - if (async_flip && plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY) { - drm_dbg_atomic(prop->dev, - "[OBJECT:%d] Only primary planes can be changed during async flip\n", - obj->id); - ret = -EINVAL; - break; - } - ret = drm_atomic_plane_set_property(plane, plane_state, file_priv, prop, prop_value); diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 6a8e45e9d0ec..103c185bb1c8 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -851,6 +851,7 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * drm_buddy_block_trim - free unused pages * * @mm: DRM buddy manager + * @start: start address to begin the trimming. * @new_size: original size requested * @blocks: Input and output list of allocated blocks. * MUST contain single block as input to be trimmed. @@ -866,11 +867,13 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * 0 on success, error code on failure. */ int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks) { struct drm_buddy_block *parent; struct drm_buddy_block *block; + u64 block_start, block_end; LIST_HEAD(dfs); u64 new_start; int err; @@ -882,6 +885,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm, struct drm_buddy_block, link); + block_start = drm_buddy_block_offset(block); + block_end = block_start + drm_buddy_block_size(mm, block); + if (WARN_ON(!drm_buddy_block_is_allocated(block))) return -EINVAL; @@ -894,6 +900,20 @@ int drm_buddy_block_trim(struct drm_buddy *mm, if (new_size == drm_buddy_block_size(mm, block)) return 0; + new_start = block_start; + if (start) { + new_start = *start; + + if (new_start < block_start) + return -EINVAL; + + if (!IS_ALIGNED(new_start, mm->chunk_size)) + return -EINVAL; + + if (range_overflows(new_start, new_size, block_end)) + return -EINVAL; + } + list_del(&block->link); mark_free(mm, block); mm->avail += drm_buddy_block_size(mm, block); @@ -904,7 +924,6 @@ int drm_buddy_block_trim(struct drm_buddy *mm, parent = block->parent; block->parent = NULL; - new_start = drm_buddy_block_offset(block); list_add(&block->tmp_link, &dfs); err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL); if (err) { @@ -1066,7 +1085,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } while (1); /* Trim the allocated block to the required size */ - if (original_size != size) { + if (!(flags & DRM_BUDDY_TRIM_DISABLE) && + original_size != size) { struct list_head *trim_list; LIST_HEAD(temp); u64 trim_size; @@ -1083,6 +1103,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } drm_buddy_block_trim(mm, + NULL, trim_size, trim_list); diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 31af5cf37a09..cee5eafbfb81 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -880,6 +880,11 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, kfree(modeset->mode); modeset->mode = drm_mode_duplicate(dev, mode); + if (!modeset->mode) { + ret = -ENOMEM; + break; + } + drm_connector_get(connector); modeset->connectors[modeset->num_connectors++] = connector; modeset->x = offset->x; diff --git a/drivers/gpu/drm/drm_fbdev_dma.c b/drivers/gpu/drm/drm_fbdev_dma.c index 7ef5a48c8029..b0602c4f3628 100644 --- a/drivers/gpu/drm/drm_fbdev_dma.c +++ b/drivers/gpu/drm/drm_fbdev_dma.c @@ -36,20 +36,11 @@ static int drm_fbdev_dma_fb_release(struct fb_info *info, int user) return 0; } -FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(drm_fbdev_dma, - drm_fb_helper_damage_range, - drm_fb_helper_damage_area); - static int drm_fbdev_dma_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) { struct drm_fb_helper *fb_helper = info->par; - struct drm_framebuffer *fb = fb_helper->fb; - struct drm_gem_dma_object *dma = drm_fb_dma_get_gem_obj(fb, 0); - if (!dma->map_noncoherent) - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - - return fb_deferred_io_mmap(info, vma); + return drm_gem_prime_mmap(fb_helper->buffer->gem, vma); } static void drm_fbdev_dma_fb_destroy(struct fb_info *info) @@ -73,10 +64,37 @@ static const struct fb_ops drm_fbdev_dma_fb_ops = { .owner = THIS_MODULE, .fb_open = drm_fbdev_dma_fb_open, .fb_release = drm_fbdev_dma_fb_release, + __FB_DEFAULT_DMAMEM_OPS_RDWR, + DRM_FB_HELPER_DEFAULT_OPS, + __FB_DEFAULT_DMAMEM_OPS_DRAW, + .fb_mmap = drm_fbdev_dma_fb_mmap, + .fb_destroy = drm_fbdev_dma_fb_destroy, +}; + +FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(drm_fbdev_dma, + drm_fb_helper_damage_range, + drm_fb_helper_damage_area); + +static int drm_fbdev_dma_deferred_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) +{ + struct drm_fb_helper *fb_helper = info->par; + struct drm_framebuffer *fb = fb_helper->fb; + struct drm_gem_dma_object *dma = drm_fb_dma_get_gem_obj(fb, 0); + + if (!dma->map_noncoherent) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + + return fb_deferred_io_mmap(info, vma); +} + +static const struct fb_ops drm_fbdev_dma_deferred_fb_ops = { + .owner = THIS_MODULE, + .fb_open = drm_fbdev_dma_fb_open, + .fb_release = drm_fbdev_dma_fb_release, __FB_DEFAULT_DEFERRED_OPS_RDWR(drm_fbdev_dma), DRM_FB_HELPER_DEFAULT_OPS, __FB_DEFAULT_DEFERRED_OPS_DRAW(drm_fbdev_dma), - .fb_mmap = drm_fbdev_dma_fb_mmap, + .fb_mmap = drm_fbdev_dma_deferred_fb_mmap, .fb_destroy = drm_fbdev_dma_fb_destroy, }; @@ -89,6 +107,7 @@ static int drm_fbdev_dma_helper_fb_probe(struct drm_fb_helper *fb_helper, { struct drm_client_dev *client = &fb_helper->client; struct drm_device *dev = fb_helper->dev; + bool use_deferred_io = false; struct drm_client_buffer *buffer; struct drm_gem_dma_object *dma_obj; struct drm_framebuffer *fb; @@ -111,6 +130,15 @@ static int drm_fbdev_dma_helper_fb_probe(struct drm_fb_helper *fb_helper, fb = buffer->fb; + /* + * Deferred I/O requires struct page for framebuffer memory, + * which is not guaranteed for all DMA ranges. We thus only + * install deferred I/O if we have a framebuffer that requires + * it. + */ + if (fb->funcs->dirty) + use_deferred_io = true; + ret = drm_client_buffer_vmap(buffer, &map); if (ret) { goto err_drm_client_buffer_delete; @@ -130,7 +158,10 @@ static int drm_fbdev_dma_helper_fb_probe(struct drm_fb_helper *fb_helper, drm_fb_helper_fill_info(info, fb_helper, sizes); - info->fbops = &drm_fbdev_dma_fb_ops; + if (use_deferred_io) + info->fbops = &drm_fbdev_dma_deferred_fb_ops; + else + info->fbops = &drm_fbdev_dma_fb_ops; /* screen */ info->flags |= FBINFO_VIRTFB; /* system memory */ @@ -144,14 +175,28 @@ static int drm_fbdev_dma_helper_fb_probe(struct drm_fb_helper *fb_helper, } info->fix.smem_len = info->screen_size; - /* deferred I/O */ - fb_helper->fbdefio.delay = HZ / 20; - fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; + /* + * Only set up deferred I/O if the screen buffer supports + * it. If this disagrees with the previous test for ->dirty, + * mmap on the /dev/fb file might not work correctly. + */ + if (!is_vmalloc_addr(info->screen_buffer) && info->fix.smem_start) { + unsigned long pfn = info->fix.smem_start >> PAGE_SHIFT; - info->fbdefio = &fb_helper->fbdefio; - ret = fb_deferred_io_init(info); - if (ret) - goto err_drm_fb_helper_release_info; + if (drm_WARN_ON(dev, !pfn_to_page(pfn))) + use_deferred_io = false; + } + + /* deferred I/O */ + if (use_deferred_io) { + fb_helper->fbdefio.delay = HZ / 20; + fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; + + info->fbdefio = &fb_helper->fbdefio; + ret = fb_deferred_io_init(info); + if (ret) + goto err_drm_fb_helper_release_info; + } return 0; diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index c16c7678237e..0830cae9a4d0 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -208,6 +208,18 @@ static const struct dmi_system_id orientation_data[] = { DMI_MATCH(DMI_BOARD_NAME, "KUN"), }, .driver_data = (void *)&lcd1600x2560_rightside_up, + }, { /* AYN Loki Max */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Max"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, + }, { /* AYN Loki Zero */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Zero"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, }, { /* Chuwi HiBook (CWI514) */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"), diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index a0e94217b511..4fcfc0b9b386 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1464,6 +1464,7 @@ drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data, struct drm_syncobj *syncobj; struct eventfd_ctx *ev_fd_ctx; struct syncobj_eventfd_entry *entry; + int ret; if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) return -EOPNOTSUPP; @@ -1479,13 +1480,15 @@ drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data, return -ENOENT; ev_fd_ctx = eventfd_ctx_fdget(args->fd); - if (IS_ERR(ev_fd_ctx)) - return PTR_ERR(ev_fd_ctx); + if (IS_ERR(ev_fd_ctx)) { + ret = PTR_ERR(ev_fd_ctx); + goto err_fdget; + } entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) { - eventfd_ctx_put(ev_fd_ctx); - return -ENOMEM; + ret = -ENOMEM; + goto err_kzalloc; } entry->syncobj = syncobj; entry->ev_fd_ctx = ev_fd_ctx; @@ -1496,6 +1499,12 @@ drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data, drm_syncobj_put(syncobj); return 0; + +err_kzalloc: + eventfd_ctx_put(ev_fd_ctx); +err_fdget: + drm_syncobj_put(syncobj); + return ret; } int diff --git a/drivers/gpu/drm/i915/display/intel_alpm.c b/drivers/gpu/drm/i915/display/intel_alpm.c index 866b3b409c4d..10689480338e 100644 --- a/drivers/gpu/drm/i915/display/intel_alpm.c +++ b/drivers/gpu/drm/i915/display/intel_alpm.c @@ -228,7 +228,7 @@ bool intel_alpm_compute_params(struct intel_dp *intel_dp, int tfw_exit_latency = 20; /* eDP spec */ int phy_wake = 4; /* eDP spec */ int preamble = 8; /* eDP spec */ - int precharge = intel_dp_aux_fw_sync_len() - preamble; + int precharge = intel_dp_aux_fw_sync_len(intel_dp) - preamble; u8 max_wake_lines; io_wake_time = max(precharge, io_buffer_wake_time(crtc_state)) + diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 071668bfe5d1..6c3333136737 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -1449,6 +1449,9 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused) static int cnp_num_backlight_controllers(struct drm_i915_private *i915) { + if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_DG1) return 1; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 8713835e2307..f9d3cc3c342b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1885,6 +1885,10 @@ struct intel_dp { } alpm_parameters; u8 alpm_dpcd; + + struct { + unsigned long mask; + } quirks; }; enum lspcon_vendor { diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 59f11af3b0a1..ebe7fe5417ae 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -82,6 +82,7 @@ #include "intel_pch_display.h" #include "intel_pps.h" #include "intel_psr.h" +#include "intel_quirks.h" #include "intel_tc.h" #include "intel_vdsc.h" #include "intel_vrr.h" @@ -3952,6 +3953,7 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *connector drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc, drm_dp_is_branch(intel_dp->dpcd)); + intel_init_dpcd_quirks(intel_dp, &intel_dp->desc.ident); /* * Read the eDP display control registers. @@ -4064,6 +4066,8 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc, drm_dp_is_branch(intel_dp->dpcd)); + intel_init_dpcd_quirks(intel_dp, &intel_dp->desc.ident); + intel_dp_update_sink_caps(intel_dp); } @@ -5935,6 +5939,18 @@ intel_dp_detect(struct drm_connector *connector, else status = connector_status_disconnected; + if (status != connector_status_disconnected && + !intel_dp_mst_verify_dpcd_state(intel_dp)) + /* + * This requires retrying detection for instance to re-enable + * the MST mode that got reset via a long HPD pulse. The retry + * will happen either via the hotplug handler's retry logic, + * ensured by setting the connector here to SST/disconnected, + * or via a userspace connector probing in response to the + * hotplug uevent sent when removing the MST connectors. + */ + status = connector_status_disconnected; + if (status == connector_status_disconnected) { memset(&intel_dp->compliance, 0, sizeof(intel_dp->compliance)); memset(intel_connector->dp.dsc_dpcd, 0, sizeof(intel_connector->dp.dsc_dpcd)); diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c index b8a53bb174da..be58185a77c0 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c @@ -13,6 +13,7 @@ #include "intel_dp_aux.h" #include "intel_dp_aux_regs.h" #include "intel_pps.h" +#include "intel_quirks.h" #include "intel_tc.h" #define AUX_CH_NAME_BUFSIZE 6 @@ -142,16 +143,21 @@ static int intel_dp_aux_sync_len(void) return precharge + preamble; } -int intel_dp_aux_fw_sync_len(void) +int intel_dp_aux_fw_sync_len(struct intel_dp *intel_dp) { + int precharge = 10; /* 10-16 */ + int preamble = 8; + /* * We faced some glitches on Dell Precision 5490 MTL laptop with panel: * "Manufacturer: AUO, Model: 63898" when using HW default 18. Using 20 * is fixing these problems with the panel. It is still within range - * mentioned in eDP specification. + * mentioned in eDP specification. Increasing Fast Wake sync length is + * causing problems with other panels: increase length as a quirk for + * this specific laptop. */ - int precharge = 12; /* 10-16 */ - int preamble = 8; + if (intel_has_dpcd_quirk(intel_dp, QUIRK_FW_SYNC_LEN)) + precharge += 2; return precharge + preamble; } @@ -211,7 +217,7 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp, DP_AUX_CH_CTL_TIME_OUT_MAX | DP_AUX_CH_CTL_RECEIVE_ERROR | DP_AUX_CH_CTL_MESSAGE_SIZE(send_bytes) | - DP_AUX_CH_CTL_FW_SYNC_PULSE_SKL(intel_dp_aux_fw_sync_len()) | + DP_AUX_CH_CTL_FW_SYNC_PULSE_SKL(intel_dp_aux_fw_sync_len(intel_dp)) | DP_AUX_CH_CTL_SYNC_PULSE_SKL(intel_dp_aux_sync_len()); if (intel_tc_port_in_tbt_alt_mode(dig_port)) diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.h b/drivers/gpu/drm/i915/display/intel_dp_aux.h index 76d1f2ed7c2f..593f58fafab7 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.h +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.h @@ -20,6 +20,6 @@ enum aux_ch intel_dp_aux_ch(struct intel_encoder *encoder); void intel_dp_aux_irq_handler(struct drm_i915_private *i915); u32 intel_dp_aux_pack(const u8 *src, int src_bytes); -int intel_dp_aux_fw_sync_len(void); +int intel_dp_aux_fw_sync_len(struct intel_dp *intel_dp); #endif /* __INTEL_DP_AUX_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index 2edffe62f360..b0101d72b9c1 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -39,7 +39,9 @@ static u32 transcoder_to_stream_enc_status(enum transcoder cpu_transcoder) static void intel_dp_hdcp_wait_for_cp_irq(struct intel_connector *connector, int timeout) { - struct intel_hdcp *hdcp = &connector->hdcp; + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct intel_dp *dp = &dig_port->dp; + struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; long ret; #define C (hdcp->cp_irq_count_cached != atomic_read(&hdcp->cp_irq_count)) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 27ce5c3f5951..17978a1f9ab0 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1998,3 +1998,43 @@ bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state, return false; } + +/* + * intel_dp_mst_verify_dpcd_state - verify the MST SW enabled state wrt. the DPCD + * @intel_dp: DP port object + * + * Verify if @intel_dp's MST enabled SW state matches the corresponding DPCD + * state. A long HPD pulse - not long enough to be detected as a disconnected + * state - could've reset the DPCD state, which requires tearing + * down/recreating the MST topology. + * + * Returns %true if the SW MST enabled and DPCD states match, %false + * otherwise. + */ +bool intel_dp_mst_verify_dpcd_state(struct intel_dp *intel_dp) +{ + struct intel_display *display = to_intel_display(intel_dp); + struct intel_connector *connector = intel_dp->attached_connector; + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct intel_encoder *encoder = &dig_port->base; + int ret; + u8 val; + + if (!intel_dp->is_mst) + return true; + + ret = drm_dp_dpcd_readb(intel_dp->mst_mgr.aux, DP_MSTM_CTRL, &val); + + /* Adjust the expected register value for SST + SideBand. */ + if (ret < 0 || val != (DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC)) { + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s][ENCODER:%d:%s] MST mode got reset, removing topology (ret=%d, ctrl=0x%02x)\n", + connector->base.base.id, connector->base.name, + encoder->base.base.id, encoder->base.name, + ret, val); + + return false; + } + + return true; +} diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.h b/drivers/gpu/drm/i915/display/intel_dp_mst.h index 8ca1d599091c..9e4c7679f1c3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.h +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.h @@ -27,5 +27,6 @@ int intel_dp_mst_atomic_check_link(struct intel_atomic_state *state, struct intel_link_bw_limits *limits); bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state, struct intel_crtc *crtc); +bool intel_dp_mst_verify_dpcd_state(struct intel_dp *intel_dp); #endif /* __INTEL_DP_MST_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c index 7602cb30ebf1..e1213f3d93cc 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c +++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c @@ -326,6 +326,8 @@ static void intel_modeset_update_connector_atomic_state(struct drm_i915_private static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state) { + struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + if (intel_crtc_is_joiner_secondary(crtc_state)) return; @@ -337,11 +339,30 @@ static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state crtc_state->uapi.adjusted_mode = crtc_state->hw.adjusted_mode; crtc_state->uapi.scaling_filter = crtc_state->hw.scaling_filter; - /* assume 1:1 mapping */ - drm_property_replace_blob(&crtc_state->hw.degamma_lut, - crtc_state->pre_csc_lut); - drm_property_replace_blob(&crtc_state->hw.gamma_lut, - crtc_state->post_csc_lut); + if (DISPLAY_INFO(i915)->color.degamma_lut_size) { + /* assume 1:1 mapping */ + drm_property_replace_blob(&crtc_state->hw.degamma_lut, + crtc_state->pre_csc_lut); + drm_property_replace_blob(&crtc_state->hw.gamma_lut, + crtc_state->post_csc_lut); + } else { + /* + * ilk/snb hw may be configured for either pre_csc_lut + * or post_csc_lut, but we don't advertise degamma_lut as + * being available in the uapi since there is only one + * hardware LUT. Always assign the result of the readout + * to gamma_lut as that is the only valid source of LUTs + * in the uapi. + */ + drm_WARN_ON(&i915->drm, crtc_state->post_csc_lut && + crtc_state->pre_csc_lut); + + drm_property_replace_blob(&crtc_state->hw.degamma_lut, + NULL); + drm_property_replace_blob(&crtc_state->hw.gamma_lut, + crtc_state->post_csc_lut ?: + crtc_state->pre_csc_lut); + } drm_property_replace_blob(&crtc_state->uapi.degamma_lut, crtc_state->hw.degamma_lut); diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 42306bc4ba86..7ce926241e83 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -351,6 +351,9 @@ static int intel_num_pps(struct drm_i915_private *i915) if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_DG1) return 1; diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c index 14d5fefc9c5b..dfd8b4960e6d 100644 --- a/drivers/gpu/drm/i915/display/intel_quirks.c +++ b/drivers/gpu/drm/i915/display/intel_quirks.c @@ -14,6 +14,11 @@ static void intel_set_quirk(struct intel_display *display, enum intel_quirk_id q display->quirks.mask |= BIT(quirk); } +static void intel_set_dpcd_quirk(struct intel_dp *intel_dp, enum intel_quirk_id quirk) +{ + intel_dp->quirks.mask |= BIT(quirk); +} + /* * Some machines (Lenovo U160) do not work with SSC on LVDS for some reason */ @@ -65,6 +70,14 @@ static void quirk_no_pps_backlight_power_hook(struct intel_display *display) drm_info(display->drm, "Applying no pps backlight power quirk\n"); } +static void quirk_fw_sync_len(struct intel_dp *intel_dp) +{ + struct intel_display *display = to_intel_display(intel_dp); + + intel_set_dpcd_quirk(intel_dp, QUIRK_FW_SYNC_LEN); + drm_info(display->drm, "Applying Fast Wake sync pulse count quirk\n"); +} + struct intel_quirk { int device; int subsystem_vendor; @@ -72,6 +85,21 @@ struct intel_quirk { void (*hook)(struct intel_display *display); }; +struct intel_dpcd_quirk { + int device; + int subsystem_vendor; + int subsystem_device; + u8 sink_oui[3]; + u8 sink_device_id[6]; + void (*hook)(struct intel_dp *intel_dp); +}; + +#define SINK_OUI(first, second, third) { (first), (second), (third) } +#define SINK_DEVICE_ID(first, second, third, fourth, fifth, sixth) \ + { (first), (second), (third), (fourth), (fifth), (sixth) } + +#define SINK_DEVICE_ID_ANY SINK_DEVICE_ID(0, 0, 0, 0, 0, 0) + /* For systems that don't have a meaningful PCI subdevice/subvendor ID */ struct intel_dmi_quirk { void (*hook)(struct intel_display *display); @@ -203,6 +231,18 @@ static struct intel_quirk intel_quirks[] = { { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness }, }; +static struct intel_dpcd_quirk intel_dpcd_quirks[] = { + /* Dell Precision 5490 */ + { + .device = 0x7d55, + .subsystem_vendor = 0x1028, + .subsystem_device = 0x0cc7, + .sink_oui = SINK_OUI(0x38, 0xec, 0x11), + .hook = quirk_fw_sync_len, + }, + +}; + void intel_init_quirks(struct intel_display *display) { struct pci_dev *d = to_pci_dev(display->drm->dev); @@ -224,7 +264,35 @@ void intel_init_quirks(struct intel_display *display) } } +void intel_init_dpcd_quirks(struct intel_dp *intel_dp, + const struct drm_dp_dpcd_ident *ident) +{ + struct intel_display *display = to_intel_display(intel_dp); + struct pci_dev *d = to_pci_dev(display->drm->dev); + int i; + + for (i = 0; i < ARRAY_SIZE(intel_dpcd_quirks); i++) { + struct intel_dpcd_quirk *q = &intel_dpcd_quirks[i]; + + if (d->device == q->device && + (d->subsystem_vendor == q->subsystem_vendor || + q->subsystem_vendor == PCI_ANY_ID) && + (d->subsystem_device == q->subsystem_device || + q->subsystem_device == PCI_ANY_ID) && + !memcmp(q->sink_oui, ident->oui, sizeof(ident->oui)) && + (!memcmp(q->sink_device_id, ident->device_id, + sizeof(ident->device_id)) || + !memchr_inv(q->sink_device_id, 0, sizeof(q->sink_device_id)))) + q->hook(intel_dp); + } +} + bool intel_has_quirk(struct intel_display *display, enum intel_quirk_id quirk) { return display->quirks.mask & BIT(quirk); } + +bool intel_has_dpcd_quirk(struct intel_dp *intel_dp, enum intel_quirk_id quirk) +{ + return intel_dp->quirks.mask & BIT(quirk); +} diff --git a/drivers/gpu/drm/i915/display/intel_quirks.h b/drivers/gpu/drm/i915/display/intel_quirks.h index 151c8f4ae576..cafdebda7535 100644 --- a/drivers/gpu/drm/i915/display/intel_quirks.h +++ b/drivers/gpu/drm/i915/display/intel_quirks.h @@ -9,6 +9,8 @@ #include <linux/types.h> struct intel_display; +struct intel_dp; +struct drm_dp_dpcd_ident; enum intel_quirk_id { QUIRK_BACKLIGHT_PRESENT, @@ -17,9 +19,13 @@ enum intel_quirk_id { QUIRK_INVERT_BRIGHTNESS, QUIRK_LVDS_SSC_DISABLE, QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK, + QUIRK_FW_SYNC_LEN, }; void intel_init_quirks(struct intel_display *display); +void intel_init_dpcd_quirks(struct intel_dp *intel_dp, + const struct drm_dp_dpcd_ident *ident); bool intel_has_quirk(struct intel_display *display, enum intel_quirk_id quirk); +bool intel_has_dpcd_quirk(struct intel_dp *intel_dp, enum intel_quirk_id quirk); #endif /* __INTEL_QUIRKS_H__ */ diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index eae5b5e09aa8..931d2cf74ed8 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1870,7 +1870,6 @@ static const struct dmi_system_id vlv_dsi_dmi_quirk_table[] = { /* Lenovo Yoga Tab 3 Pro YT3-X90F */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"), }, .driver_data = (void *)vlv_dsi_lenovo_yoga_tab3_backlight_fixup, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index a2195e28b625..cac6d4184506 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -290,6 +290,41 @@ out: return i915_error_to_vmf_fault(err); } +static void set_address_limits(struct vm_area_struct *area, + struct i915_vma *vma, + unsigned long obj_offset, + unsigned long *start_vaddr, + unsigned long *end_vaddr) +{ + unsigned long vm_start, vm_end, vma_size; /* user's memory parameters */ + long start, end; /* memory boundaries */ + + /* + * Let's move into the ">> PAGE_SHIFT" + * domain to be sure not to lose bits + */ + vm_start = area->vm_start >> PAGE_SHIFT; + vm_end = area->vm_end >> PAGE_SHIFT; + vma_size = vma->size >> PAGE_SHIFT; + + /* + * Calculate the memory boundaries by considering the offset + * provided by the user during memory mapping and the offset + * provided for the partial mapping. + */ + start = vm_start; + start -= obj_offset; + start += vma->gtt_view.partial.offset; + end = start + vma_size; + + start = max_t(long, start, vm_start); + end = min_t(long, end, vm_end); + + /* Let's move back into the "<< PAGE_SHIFT" domain */ + *start_vaddr = (unsigned long)start << PAGE_SHIFT; + *end_vaddr = (unsigned long)end << PAGE_SHIFT; +} + static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) { #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) @@ -302,14 +337,18 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) struct i915_ggtt *ggtt = to_gt(i915)->ggtt; bool write = area->vm_flags & VM_WRITE; struct i915_gem_ww_ctx ww; + unsigned long obj_offset; + unsigned long start, end; /* memory boundaries */ intel_wakeref_t wakeref; struct i915_vma *vma; pgoff_t page_offset; + unsigned long pfn; int srcu; int ret; - /* We don't use vmf->pgoff since that has the fake offset */ + obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node); page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; + page_offset += obj_offset; trace_i915_gem_object_fault(obj, page_offset, true, write); @@ -402,12 +441,14 @@ retry: if (ret) goto err_unpin; + set_address_limits(area, vma, obj_offset, &start, &end); + + pfn = (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT; + pfn += (start - area->vm_start) >> PAGE_SHIFT; + pfn += obj_offset - vma->gtt_view.partial.offset; + /* Finally, remap it using the new GTT offset */ - ret = remap_io_mapping(area, - area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT), - (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT, - min_t(u64, vma->size, area->vm_end - area->vm_start), - &ggtt->iomap); + ret = remap_io_mapping(area, start, pfn, end - start, &ggtt->iomap); if (ret) goto err_fence; @@ -1084,6 +1125,8 @@ int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma mmo = mmap_offset_attach(obj, mmap_type, NULL); if (IS_ERR(mmo)) return PTR_ERR(mmo); + + vma->vm_pgoff += drm_vma_node_start(&mmo->vma_node); } /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index e6f177183c0f..5c72462d1f57 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -165,7 +165,6 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : obj->mm.region, &places[0], obj->bo_offset, obj->base.size, flags); - places[0].flags |= TTM_PL_FLAG_DESIRED; /* Cache this on object? */ for (i = 0; i < num_allowed; ++i) { @@ -779,13 +778,16 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, .interruptible = true, .no_wait_gpu = false, }; - int real_num_busy; + struct ttm_placement initial_placement; + struct ttm_place initial_place; int ret; /* First try only the requested placement. No eviction. */ - real_num_busy = placement->num_placement; - placement->num_placement = 1; - ret = ttm_bo_validate(bo, placement, &ctx); + initial_placement.num_placement = 1; + memcpy(&initial_place, placement->placement, sizeof(struct ttm_place)); + initial_place.flags |= TTM_PL_FLAG_DESIRED; + initial_placement.placement = &initial_place; + ret = ttm_bo_validate(bo, &initial_placement, &ctx); if (ret) { ret = i915_ttm_err_to_gem(ret); /* @@ -800,7 +802,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, * If the initial attempt fails, allow all accepted placements, * evicting if necessary. */ - placement->num_placement = real_num_busy; ret = ttm_bo_validate(bo, placement, &ctx); if (ret) return i915_ttm_err_to_gem(ret); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index 3b69bc6616bd..551b0d7974ff 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -212,6 +212,37 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s } } + if (IS_ARROWLAKE(gt->i915)) { + bool too_old = false; + + /* + * ARL requires a newer firmware than MTL did (102.0.10.1878) but the + * firmware is actually common. So, need to do an explicit version check + * here rather than using a separate table entry. And if the older + * MTL-only version is found, then just don't use GSC rather than aborting + * the driver load. + */ + if (gsc->release.major < 102) { + too_old = true; + } else if (gsc->release.major == 102) { + if (gsc->release.minor == 0) { + if (gsc->release.patch < 10) { + too_old = true; + } else if (gsc->release.patch == 10) { + if (gsc->release.build < 1878) + too_old = true; + } + } + } + + if (too_old) { + gt_info(gt, "GSC firmware too old for ARL, got %d.%d.%d.%d but need at least 102.0.10.1878", + gsc->release.major, gsc->release.minor, + gsc->release.patch, gsc->release.build); + return -EINVAL; + } + } + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c index 453d855dd1de..3d3191deb0ab 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c @@ -302,7 +302,7 @@ void intel_gsc_uc_load_start(struct intel_gsc_uc *gsc) { struct intel_gt *gt = gsc_uc_to_gt(gsc); - if (!intel_uc_fw_is_loadable(&gsc->fw)) + if (!intel_uc_fw_is_loadable(&gsc->fw) || intel_uc_fw_is_in_error(&gsc->fw)) return; if (intel_gsc_uc_fw_init_done(gsc)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9400d0eb682b..908ebfa22933 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2842,9 +2842,9 @@ static void prepare_context_registration_info_v70(struct intel_context *ce, ce->parallel.guc.wqi_tail = 0; ce->parallel.guc.wqi_head = 0; - wq_desc_offset = i915_ggtt_offset(ce->state) + + wq_desc_offset = (u64)i915_ggtt_offset(ce->state) + __get_parent_scratch_offset(ce); - wq_base_offset = i915_ggtt_offset(ce->state) + + wq_base_offset = (u64)i915_ggtt_offset(ce->state) + __get_wq_offset(ce); info->wq_desc_lo = lower_32_bits(wq_desc_offset); info->wq_desc_hi = upper_32_bits(wq_desc_offset); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index d80278eb45d7..ec33ad942115 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -698,12 +698,18 @@ static int check_gsc_manifest(struct intel_gt *gt, const struct firmware *fw, struct intel_uc_fw *uc_fw) { + int ret; + switch (uc_fw->type) { case INTEL_UC_FW_TYPE_HUC: - intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size); + ret = intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size); + if (ret) + return ret; break; case INTEL_UC_FW_TYPE_GSC: - intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size); + ret = intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size); + if (ret) + return ret; break; default: MISSING_CASE(uc_fw->type); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 9a431726c8d5..ac7b3aad2222 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -258,6 +258,11 @@ static inline bool intel_uc_fw_is_running(struct intel_uc_fw *uc_fw) return __intel_uc_fw_status(uc_fw) == INTEL_UC_FIRMWARE_RUNNING; } +static inline bool intel_uc_fw_is_in_error(struct intel_uc_fw *uc_fw) +{ + return intel_uc_fw_status_to_error(__intel_uc_fw_status(uc_fw)) != 0; +} + static inline bool intel_uc_fw_is_overridden(const struct intel_uc_fw *uc_fw) { return uc_fw->user_overridden; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d7723dd11c80..110340e02a02 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -546,6 +546,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_LUNARLAKE(i915) (0 && i915) #define IS_BATTLEMAGE(i915) (0 && i915) +#define IS_ARROWLAKE(i915) \ + IS_SUBPLATFORM(i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_ARL) #define IS_DG2_G10(i915) \ IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_G10) #define IS_DG2_G11(i915) \ diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 8a9aad523eec..1d4cc91c0e40 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -51,7 +51,7 @@ static inline void debug_fence_init(struct i915_sw_fence *fence) debug_object_init(fence, &i915_sw_fence_debug_descr); } -static inline void debug_fence_init_onstack(struct i915_sw_fence *fence) +static inline __maybe_unused void debug_fence_init_onstack(struct i915_sw_fence *fence) { debug_object_init_on_stack(fence, &i915_sw_fence_debug_descr); } @@ -77,7 +77,7 @@ static inline void debug_fence_destroy(struct i915_sw_fence *fence) debug_object_destroy(fence, &i915_sw_fence_debug_descr); } -static inline void debug_fence_free(struct i915_sw_fence *fence) +static inline __maybe_unused void debug_fence_free(struct i915_sw_fence *fence) { debug_object_free(fence, &i915_sw_fence_debug_descr); smp_wmb(); /* flush the change in state before reallocation */ @@ -94,7 +94,7 @@ static inline void debug_fence_init(struct i915_sw_fence *fence) { } -static inline void debug_fence_init_onstack(struct i915_sw_fence *fence) +static inline __maybe_unused void debug_fence_init_onstack(struct i915_sw_fence *fence) { } @@ -115,7 +115,7 @@ static inline void debug_fence_destroy(struct i915_sw_fence *fence) { } -static inline void debug_fence_free(struct i915_sw_fence *fence) +static inline __maybe_unused void debug_fence_free(struct i915_sw_fence *fence) { } diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index d26de37719a7..eede5417cb3f 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -203,6 +203,10 @@ static const u16 subplatform_g12_ids[] = { INTEL_DG2_G12_IDS(ID), }; +static const u16 subplatform_arl_ids[] = { + INTEL_ARL_IDS(ID), +}; + static bool find_devid(u16 id, const u16 *p, unsigned int num) { for (; num; num--, p++) { @@ -260,6 +264,9 @@ static void intel_device_info_subplatform_init(struct drm_i915_private *i915) } else if (find_devid(devid, subplatform_g12_ids, ARRAY_SIZE(subplatform_g12_ids))) { mask = BIT(INTEL_SUBPLATFORM_G12); + } else if (find_devid(devid, subplatform_arl_ids, + ARRAY_SIZE(subplatform_arl_ids))) { + mask = BIT(INTEL_SUBPLATFORM_ARL); } GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK); diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index d1a2abc7e513..df73ef94615d 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -127,6 +127,9 @@ enum intel_platform { #define INTEL_SUBPLATFORM_N 1 #define INTEL_SUBPLATFORM_RPLU 2 +/* MTL */ +#define INTEL_SUBPLATFORM_ARL 0 + enum intel_ppgtt_type { INTEL_PPGTT_NONE = I915_GEM_PPGTT_NONE, INTEL_PPGTT_ALIASING = I915_GEM_PPGTT_ALIASING, diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c index e59517ba039e..97c0f772ed65 100644 --- a/drivers/gpu/drm/imagination/pvr_vm.c +++ b/drivers/gpu/drm/imagination/pvr_vm.c @@ -114,6 +114,8 @@ struct pvr_vm_gpuva { struct drm_gpuva base; }; +#define to_pvr_vm_gpuva(va) container_of_const(va, struct pvr_vm_gpuva, base) + enum pvr_vm_bind_type { PVR_VM_BIND_TYPE_MAP, PVR_VM_BIND_TYPE_UNMAP, @@ -386,6 +388,7 @@ pvr_vm_gpuva_unmap(struct drm_gpuva_op *op, void *op_ctx) drm_gpuva_unmap(&op->unmap); drm_gpuva_unlink(op->unmap.va); + kfree(to_pvr_vm_gpuva(op->unmap.va)); return 0; } @@ -433,6 +436,7 @@ pvr_vm_gpuva_remap(struct drm_gpuva_op *op, void *op_ctx) } drm_gpuva_unlink(op->remap.unmap->va); + kfree(to_pvr_vm_gpuva(op->remap.unmap->va)); return 0; } diff --git a/drivers/gpu/drm/imx/dcss/Kconfig b/drivers/gpu/drm/imx/dcss/Kconfig index 3ffc061d392b..59e3b6a1dff0 100644 --- a/drivers/gpu/drm/imx/dcss/Kconfig +++ b/drivers/gpu/drm/imx/dcss/Kconfig @@ -2,6 +2,8 @@ config DRM_IMX_DCSS tristate "i.MX8MQ DCSS" select IMX_IRQSTEER select DRM_KMS_HELPER + select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_GEM_DMA_HELPER select VIDEOMODE_HELPERS depends on DRM && ARCH_MXC && ARM64 diff --git a/drivers/gpu/drm/imx/lcdc/Kconfig b/drivers/gpu/drm/imx/lcdc/Kconfig index 7e57922bbd9d..9c28bb0f4662 100644 --- a/drivers/gpu/drm/imx/lcdc/Kconfig +++ b/drivers/gpu/drm/imx/lcdc/Kconfig @@ -3,5 +3,7 @@ config DRM_IMX_LCDC depends on DRM && (ARCH_MXC || COMPILE_TEST) select DRM_GEM_DMA_HELPER select DRM_KMS_HELPER + select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR help Found on i.MX1, i.MX21, i.MX25 and i.MX27. diff --git a/drivers/gpu/drm/ingenic/Kconfig b/drivers/gpu/drm/ingenic/Kconfig index 3db117c5edd9..8cd7b750dffe 100644 --- a/drivers/gpu/drm/ingenic/Kconfig +++ b/drivers/gpu/drm/ingenic/Kconfig @@ -8,6 +8,8 @@ config DRM_INGENIC select DRM_BRIDGE select DRM_PANEL_BRIDGE select DRM_KMS_HELPER + select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_GEM_DMA_HELPER select REGMAP select REGMAP_MMIO diff --git a/drivers/gpu/drm/kmb/Kconfig b/drivers/gpu/drm/kmb/Kconfig index fd011367db1d..e5ae3ec52392 100644 --- a/drivers/gpu/drm/kmb/Kconfig +++ b/drivers/gpu/drm/kmb/Kconfig @@ -3,6 +3,8 @@ config DRM_KMB_DISPLAY depends on DRM depends on ARCH_KEEMBAY || COMPILE_TEST select DRM_KMS_HELPER + select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_GEM_DMA_HELPER select DRM_MIPI_DSI help diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index d6449ebae838..417ac8c9af41 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -9,6 +9,8 @@ config DRM_MEDIATEK depends on MTK_MMSYS select DRM_GEM_DMA_HELPER if DRM_FBDEV_EMULATION select DRM_KMS_HELPER + select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_MIPI_DSI select DRM_PANEL select MEMORY diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index ae5c6ec24a1e..77b50c56c124 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -539,8 +539,8 @@ static int mtk_drm_kms_init(struct drm_device *drm) } /* IGT will check if the cursor size is configured */ - drm->mode_config.cursor_width = drm->mode_config.max_width; - drm->mode_config.cursor_height = drm->mode_config.max_height; + drm->mode_config.cursor_width = 512; + drm->mode_config.cursor_height = 512; /* Use OVL device for all DMA memory allocations */ crtc = drm_crtc_from_index(drm, 0); diff --git a/drivers/gpu/drm/meson/Kconfig b/drivers/gpu/drm/meson/Kconfig index 615fdd0ce41b..2544756538cc 100644 --- a/drivers/gpu/drm/meson/Kconfig +++ b/drivers/gpu/drm/meson/Kconfig @@ -4,6 +4,8 @@ config DRM_MESON depends on DRM && OF && (ARM || ARM64) depends on ARCH_MESON || COMPILE_TEST select DRM_KMS_HELPER + select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_GEM_DMA_HELPER select DRM_DISPLAY_CONNECTOR select VIDEOMODE_HELPERS diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 26a4c71da63a..90c68106b63b 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -17,6 +17,7 @@ config DRM_MSM select DRM_DISPLAY_DP_AUX_BUS select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_EXEC select DRM_KMS_HELPER select DRM_PANEL diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 1c6626747b98..ecc3fc5cec22 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -99,7 +99,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, * was a bad idea, and is only provided for backwards * compatibility for older targets. */ - return -ENODEV; + return -ENOENT; } if (IS_ERR(fw)) { diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 34c56e855af7..3b171bf227d1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1171,8 +1171,6 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc, cstate->num_mixers = num_lm; - dpu_enc->connector = conn_state->connector; - for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; @@ -1270,6 +1268,8 @@ static void dpu_encoder_virt_atomic_enable(struct drm_encoder *drm_enc, dpu_enc->commit_done_timedout = false; + dpu_enc->connector = drm_atomic_get_new_connector_for_encoder(state, drm_enc); + cur_mode = &dpu_enc->base.crtc->state->adjusted_mode; dpu_enc->wide_bus_en = dpu_encoder_is_widebus_enabled(drm_enc); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index fc178ec73907..648c8d0a4c36 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -308,8 +308,8 @@ static const u32 wb2_formats_rgb_yuv[] = { { \ .maxdwnscale = SSPP_UNITY_SCALE, \ .maxupscale = SSPP_UNITY_SCALE, \ - .format_list = plane_formats_yuv, \ - .num_formats = ARRAY_SIZE(plane_formats_yuv), \ + .format_list = plane_formats, \ + .num_formats = ARRAY_SIZE(plane_formats), \ .virt_format_list = plane_formats, \ .virt_num_formats = ARRAY_SIZE(plane_formats), \ } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index e2adc937ea63..935ff6fd172c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -31,24 +31,14 @@ * @fmt: Pointer to format string */ #define DPU_DEBUG(fmt, ...) \ - do { \ - if (drm_debug_enabled(DRM_UT_KMS)) \ - DRM_DEBUG(fmt, ##__VA_ARGS__); \ - else \ - pr_debug(fmt, ##__VA_ARGS__); \ - } while (0) + DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) /** * DPU_DEBUG_DRIVER - macro for hardware driver logging * @fmt: Pointer to format string */ #define DPU_DEBUG_DRIVER(fmt, ...) \ - do { \ - if (drm_debug_enabled(DRM_UT_DRIVER)) \ - DRM_ERROR(fmt, ##__VA_ARGS__); \ - else \ - pr_debug(fmt, ##__VA_ARGS__); \ - } while (0) + DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) #define DPU_ERROR(fmt, ...) pr_err("[dpu error]" fmt, ##__VA_ARGS__) #define DPU_ERROR_RATELIMITED(fmt, ...) pr_err_ratelimited("[dpu error]" fmt, ##__VA_ARGS__) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 40c4dd2c3139..29298e066163 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -681,6 +681,9 @@ static int dpu_plane_prepare_fb(struct drm_plane *plane, new_state->fb, &layout); if (ret) { DPU_ERROR_PLANE(pdpu, "failed to get format layout, %d\n", ret); + if (pstate->aspace) + msm_framebuffer_cleanup(new_state->fb, pstate->aspace, + pstate->needs_dirtyfb); return ret; } @@ -744,10 +747,9 @@ static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, min_src_size = MSM_FORMAT_IS_YUV(fmt) ? 2 : 1; if (MSM_FORMAT_IS_YUV(fmt) && - (!pipe->sspp->cap->sblk->scaler_blk.len || - !pipe->sspp->cap->sblk->csc_blk.len)) { + !pipe->sspp->cap->sblk->csc_blk.len) { DPU_DEBUG_PLANE(pdpu, - "plane doesn't have scaler/csc for yuv\n"); + "plane doesn't have csc for yuv\n"); return -EINVAL; } @@ -864,6 +866,10 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, max_linewidth = pdpu->catalog->caps->max_linewidth; + drm_rect_rotate(&pipe_cfg->src_rect, + new_plane_state->fb->width, new_plane_state->fb->height, + new_plane_state->rotation); + if ((drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) || _dpu_plane_calc_clk(&crtc_state->adjusted_mode, pipe_cfg) > max_mdp_clk_rate) { /* @@ -913,6 +919,14 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, r_pipe_cfg->dst_rect.x1 = pipe_cfg->dst_rect.x2; } + drm_rect_rotate_inv(&pipe_cfg->src_rect, + new_plane_state->fb->width, new_plane_state->fb->height, + new_plane_state->rotation); + if (r_pipe->sspp) + drm_rect_rotate_inv(&r_pipe_cfg->src_rect, + new_plane_state->fb->width, new_plane_state->fb->height, + new_plane_state->rotation); + ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt, &crtc_state->adjusted_mode); if (ret) return ret; diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 7bc8a9f0657a..f342fc5ae41e 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1286,6 +1286,8 @@ static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl, link_info.rate = ctrl->link->link_params.rate; link_info.capabilities = DP_LINK_CAP_ENHANCED_FRAMING; + dp_link_reset_phy_params_vx_px(ctrl->link); + dp_aux_link_configure(ctrl->aux, &link_info); if (drm_dp_max_downspread(dpcd)) diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index a916b5f3b317..6ff6c9ef351f 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -90,22 +90,22 @@ static int dp_panel_read_dpcd(struct dp_panel *dp_panel) static u32 dp_panel_get_supported_bpp(struct dp_panel *dp_panel, u32 mode_edid_bpp, u32 mode_pclk_khz) { - struct dp_link_info *link_info; + const struct dp_link_info *link_info; const u32 max_supported_bpp = 30, min_supported_bpp = 18; - u32 bpp = 0, data_rate_khz = 0; + u32 bpp, data_rate_khz; - bpp = min_t(u32, mode_edid_bpp, max_supported_bpp); + bpp = min(mode_edid_bpp, max_supported_bpp); link_info = &dp_panel->link_info; data_rate_khz = link_info->num_lanes * link_info->rate * 8; - while (bpp > min_supported_bpp) { + do { if (mode_pclk_khz * bpp <= data_rate_khz) - break; + return bpp; bpp -= 6; - } + } while (bpp > min_supported_bpp); - return bpp; + return min_supported_bpp; } int dp_panel_read_sink_caps(struct dp_panel *dp_panel, @@ -423,8 +423,9 @@ int dp_panel_init_panel_info(struct dp_panel *dp_panel) drm_mode->clock); drm_dbg_dp(panel->drm_dev, "bpp = %d\n", dp_panel->dp_mode.bpp); - dp_panel->dp_mode.bpp = max_t(u32, 18, - min_t(u32, dp_panel->dp_mode.bpp, 30)); + dp_panel->dp_mode.bpp = dp_panel_get_mode_bpp(dp_panel, dp_panel->dp_mode.bpp, + dp_panel->dp_mode.drm_mode.clock); + drm_dbg_dp(panel->drm_dev, "updated bpp = %d\n", dp_panel->dp_mode.bpp); diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index d90b9471ba6f..faa88fd6eb4d 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -577,7 +577,7 @@ static const struct msm_mdss_data sc7180_data = { .ubwc_enc_version = UBWC_2_0, .ubwc_dec_version = UBWC_2_0, .ubwc_static = 0x1e, - .highest_bank_bit = 0x3, + .highest_bank_bit = 0x1, .reg_bus_bw = 76800, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c index adc60b25f8e6..0af01a0ec601 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c @@ -205,7 +205,8 @@ nvkm_firmware_dtor(struct nvkm_firmware *fw) break; case NVKM_FIRMWARE_IMG_DMA: nvkm_memory_unref(&memory); - dma_free_coherent(fw->device->dev, sg_dma_len(&fw->mem.sgl), fw->img, fw->phys); + dma_free_noncoherent(fw->device->dev, sg_dma_len(&fw->mem.sgl), + fw->img, fw->phys, DMA_TO_DEVICE); break; case NVKM_FIRMWARE_IMG_SGT: nvkm_memory_unref(&memory); @@ -236,10 +237,12 @@ nvkm_firmware_ctor(const struct nvkm_firmware_func *func, const char *name, break; case NVKM_FIRMWARE_IMG_DMA: { dma_addr_t addr; - len = ALIGN(fw->len, PAGE_SIZE); - fw->img = dma_alloc_coherent(fw->device->dev, len, &addr, GFP_KERNEL); + fw->img = dma_alloc_noncoherent(fw->device->dev, + len, &addr, + DMA_TO_DEVICE, + GFP_KERNEL); if (fw->img) { memcpy(fw->img, src, fw->len); fw->phys = addr; diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c index 80a480b12174..a1c8545f1249 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c @@ -89,6 +89,12 @@ nvkm_falcon_fw_boot(struct nvkm_falcon_fw *fw, struct nvkm_subdev *user, nvkm_falcon_fw_dtor_sigs(fw); } + /* after last write to the img, sync dma mappings */ + dma_sync_single_for_device(fw->fw.device->dev, + fw->fw.phys, + sg_dma_len(&fw->fw.mem.sgl), + DMA_TO_DEVICE); + FLCNFW_DBG(fw, "resetting"); fw->func->reset(fw); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h index 50f0c1914f58..4c3f74396579 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h @@ -46,6 +46,8 @@ u32 gm107_ram_probe_fbp(const struct nvkm_ram_func *, u32 gm200_ram_probe_fbp_amount(const struct nvkm_ram_func *, u32, struct nvkm_device *, int, int *); +int gp100_ram_init(struct nvkm_ram *); + /* RAM type-specific MR calculation routines */ int nvkm_sddr2_calc(struct nvkm_ram *); int nvkm_sddr3_calc(struct nvkm_ram *); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c index 378f6fb70990..8987a21e81d1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c @@ -27,7 +27,7 @@ #include <subdev/bios/init.h> #include <subdev/bios/rammap.h> -static int +int gp100_ram_init(struct nvkm_ram *ram) { struct nvkm_subdev *subdev = &ram->fb->subdev; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c index 8550f5e47347..b6b6ee59019d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c @@ -5,6 +5,7 @@ static const struct nvkm_ram_func gp102_ram = { + .init = gp100_ram_init, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c index 330d72b1a4af..52412965fac1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c @@ -324,7 +324,7 @@ nvkm_gsp_fwsec_sb(struct nvkm_gsp *gsp) return ret; /* Verify. */ - err = nvkm_rd32(device, 0x001400 + (0xf * 4)) & 0x0000ffff; + err = nvkm_rd32(device, 0x001400 + (0x15 * 4)) & 0x0000ffff; if (err) { nvkm_error(subdev, "fwsec-sb: 0x%04x\n", err); return -EIO; diff --git a/drivers/gpu/drm/omapdrm/Kconfig b/drivers/gpu/drm/omapdrm/Kconfig index 3f7139e211d2..fbd9af758581 100644 --- a/drivers/gpu/drm/omapdrm/Kconfig +++ b/drivers/gpu/drm/omapdrm/Kconfig @@ -1,9 +1,12 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_OMAP tristate "OMAP DRM" + depends on MMU depends on DRM && OF depends on ARCH_OMAP2PLUS || (COMPILE_TEST && PAGE_SIZE_LESS_THAN_64KB) select DRM_KMS_HELPER + select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select FB_DMAMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION select VIDEOMODE_HELPERS select HDMI diff --git a/drivers/gpu/drm/panel/panel-newvision-nv3052c.c b/drivers/gpu/drm/panel/panel-newvision-nv3052c.c index c5d3ead38555..d3baccfe6286 100644 --- a/drivers/gpu/drm/panel/panel-newvision-nv3052c.c +++ b/drivers/gpu/drm/panel/panel-newvision-nv3052c.c @@ -925,7 +925,7 @@ MODULE_DEVICE_TABLE(spi, nv3052c_ids); static const struct of_device_id nv3052c_of_match[] = { { .compatible = "leadtek,ltk035c5444t", .data = <k035c5444t_panel_info }, { .compatible = "fascontek,fs035vg158", .data = &fs035vg158_panel_info }, - { .compatible = "wl-355608-a8", .data = &wl_355608_a8_panel_info }, + { .compatible = "anbernic,rg35xx-plus-panel", .data = &wl_355608_a8_panel_info }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, nv3052c_of_match); diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index b5e7b919f241..34182f67136c 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -10,6 +10,7 @@ #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <drm/drm_auth.h> #include <drm/drm_debugfs.h> #include <drm/drm_drv.h> #include <drm/drm_exec.h> @@ -996,6 +997,24 @@ static int panthor_ioctl_group_destroy(struct drm_device *ddev, void *data, return panthor_group_destroy(pfile, args->group_handle); } +static int group_priority_permit(struct drm_file *file, + u8 priority) +{ + /* Ensure that priority is valid */ + if (priority > PANTHOR_GROUP_PRIORITY_HIGH) + return -EINVAL; + + /* Medium priority and below are always allowed */ + if (priority <= PANTHOR_GROUP_PRIORITY_MEDIUM) + return 0; + + /* Higher priorities require CAP_SYS_NICE or DRM_MASTER */ + if (capable(CAP_SYS_NICE) || drm_is_current_master(file)) + return 0; + + return -EACCES; +} + static int panthor_ioctl_group_create(struct drm_device *ddev, void *data, struct drm_file *file) { @@ -1011,6 +1030,10 @@ static int panthor_ioctl_group_create(struct drm_device *ddev, void *data, if (ret) return ret; + ret = group_priority_permit(file, args->priority); + if (ret) + return ret; + ret = panthor_group_create(pfile, args, queue_args); if (ret >= 0) { args->group_handle = ret; diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c index 857f3f11258a..ef232c0c2049 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.c +++ b/drivers/gpu/drm/panthor/panthor_fw.c @@ -1089,6 +1089,12 @@ int panthor_fw_post_reset(struct panthor_device *ptdev) panthor_fw_stop(ptdev); ptdev->fw->fast_reset = false; drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset"); + + ret = panthor_vm_flush_all(ptdev->fw->vm); + if (ret) { + drm_err(&ptdev->base, "FW slow reset failed (couldn't flush FW's AS l2cache)"); + return ret; + } } /* Reload all sections, including RO ones. We're not supposed @@ -1099,7 +1105,7 @@ int panthor_fw_post_reset(struct panthor_device *ptdev) ret = panthor_fw_start(ptdev); if (ret) { - drm_err(&ptdev->base, "FW slow reset failed"); + drm_err(&ptdev->base, "FW slow reset failed (couldn't start the FW )"); return ret; } diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index fa0a002b1016..cc6e13a97783 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -576,6 +576,12 @@ static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, if (as_nr < 0) return 0; + /* + * If the AS number is greater than zero, then we can be sure + * the device is up and running, so we don't need to explicitly + * power it up + */ + if (op != AS_COMMAND_UNLOCK) lock_region(ptdev, as_nr, iova, size); @@ -874,14 +880,23 @@ static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size) if (!drm_dev_enter(&ptdev->base, &cookie)) return 0; - /* Flush the PTs only if we're already awake */ - if (pm_runtime_active(ptdev->base.dev)) - ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT); + ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT); drm_dev_exit(cookie); return ret; } +/** + * panthor_vm_flush_all() - Flush L2 caches for the entirety of a VM's AS + * @vm: VM whose cache to flush + * + * Return: 0 on success, a negative error code if flush failed. + */ +int panthor_vm_flush_all(struct panthor_vm *vm) +{ + return panthor_vm_flush_range(vm, vm->base.mm_start, vm->base.mm_range); +} + static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) { struct panthor_device *ptdev = vm->ptdev; diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h b/drivers/gpu/drm/panthor/panthor_mmu.h index f3c1ed19f973..6788771071e3 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.h +++ b/drivers/gpu/drm/panthor/panthor_mmu.h @@ -31,6 +31,7 @@ panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset); int panthor_vm_active(struct panthor_vm *vm); void panthor_vm_idle(struct panthor_vm *vm); int panthor_vm_as(struct panthor_vm *vm); +int panthor_vm_flush_all(struct panthor_vm *vm); struct panthor_heap_pool * panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create); diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 463bcd3cf00f..12b272a912f8 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -3092,7 +3092,7 @@ int panthor_group_create(struct panthor_file *pfile, if (group_args->pad) return -EINVAL; - if (group_args->priority > PANTHOR_CSG_PRIORITY_HIGH) + if (group_args->priority >= PANTHOR_CSG_PRIORITY_COUNT) return -EINVAL; if ((group_args->compute_core_mask & ~ptdev->gpu_info.shader_present) || diff --git a/drivers/gpu/drm/renesas/rcar-du/Kconfig b/drivers/gpu/drm/renesas/rcar-du/Kconfig index c17e7c50492c..025677fe88d3 100644 --- a/drivers/gpu/drm/renesas/rcar-du/Kconfig +++ b/drivers/gpu/drm/renesas/rcar-du/Kconfig @@ -5,6 +5,8 @@ config DRM_RCAR_DU depends on ARM || ARM64 || COMPILE_TEST depends on ARCH_RENESAS || COMPILE_TEST select DRM_KMS_HELPER + select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_GEM_DMA_HELPER select VIDEOMODE_HELPERS help diff --git a/drivers/gpu/drm/renesas/rz-du/Kconfig b/drivers/gpu/drm/renesas/rz-du/Kconfig index 5f0db2c5fee6..e1a6dd322caf 100644 --- a/drivers/gpu/drm/renesas/rz-du/Kconfig +++ b/drivers/gpu/drm/renesas/rz-du/Kconfig @@ -6,6 +6,8 @@ config DRM_RZG2L_DU depends on VIDEO_RENESAS_VSP1 select DRM_GEM_DMA_HELPER select DRM_KMS_HELPER + select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select VIDEOMODE_HELPERS help Choose this option if you have an RZ/G2L alike chipset. diff --git a/drivers/gpu/drm/renesas/shmobile/Kconfig b/drivers/gpu/drm/renesas/shmobile/Kconfig index 027220b8fe1c..c329ab8a7a8b 100644 --- a/drivers/gpu/drm/renesas/shmobile/Kconfig +++ b/drivers/gpu/drm/renesas/shmobile/Kconfig @@ -5,6 +5,8 @@ config DRM_SHMOBILE depends on ARCH_RENESAS || ARCH_SHMOBILE || COMPILE_TEST select BACKLIGHT_CLASS_DEVICE select DRM_KMS_HELPER + select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_GEM_DMA_HELPER select VIDEOMODE_HELPERS help diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig index 7df875e38517..23c49e91f1cc 100644 --- a/drivers/gpu/drm/rockchip/Kconfig +++ b/drivers/gpu/drm/rockchip/Kconfig @@ -86,6 +86,8 @@ config ROCKCHIP_LVDS bool "Rockchip LVDS support" depends on DRM_ROCKCHIP depends on PINCTRL && OF + select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR help Choose this option to enable support for Rockchip LVDS controllers. Rockchip rk3288 SoC has LVDS TX Controller can be used, and it @@ -96,6 +98,8 @@ config ROCKCHIP_RGB bool "Rockchip RGB support" depends on DRM_ROCKCHIP depends on PINCTRL + select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR help Choose this option to enable support for Rockchip RGB output. Some Rockchip CRTCs, like rv1108, can directly output parallel diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index 2241e53a2946..dec6913cec5b 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -279,7 +279,6 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector, const u8 *buffer, size_t len) { struct inno_hdmi *hdmi = connector_to_inno_hdmi(connector); - u8 packed_frame[HDMI_MAXIMUM_INFO_FRAME_SIZE]; ssize_t i; if (type != HDMI_INFOFRAME_TYPE_AVI) { @@ -291,8 +290,7 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector, inno_hdmi_disable_frame(connector, type); for (i = 0; i < len; i++) - hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i, - packed_frame[i]); + hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i, buffer[i]); return 0; } diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index 44d769d9234d..11e5d10de4d7 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -103,13 +103,17 @@ static int rockchip_drm_init_iommu(struct drm_device *drm_dev) struct rockchip_drm_private *private = drm_dev->dev_private; struct iommu_domain_geometry *geometry; u64 start, end; + int ret; if (IS_ERR_OR_NULL(private->iommu_dev)) return 0; - private->domain = iommu_domain_alloc(private->iommu_dev->bus); - if (!private->domain) - return -ENOMEM; + private->domain = iommu_paging_domain_alloc(private->iommu_dev); + if (IS_ERR(private->domain)) { + ret = PTR_ERR(private->domain); + private->domain = NULL; + return ret; + } geometry = &private->domain->geometry; start = geometry->aperture_start; diff --git a/drivers/gpu/drm/stm/Kconfig b/drivers/gpu/drm/stm/Kconfig index 1cc6b6cbdfa9..d7f41a87808e 100644 --- a/drivers/gpu/drm/stm/Kconfig +++ b/drivers/gpu/drm/stm/Kconfig @@ -2,6 +2,7 @@ config DRM_STM tristate "DRM Support for STMicroelectronics SoC Series" depends on DRM && (ARCH_STM32 || COMPILE_TEST) + depends on COMMON_CLK select DRM_KMS_HELPER select DRM_GEM_DMA_HELPER select DRM_PANEL_BRIDGE diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig index 782f51d3044a..e688d8104652 100644 --- a/drivers/gpu/drm/tegra/Kconfig +++ b/drivers/gpu/drm/tegra/Kconfig @@ -8,6 +8,7 @@ config DRM_TEGRA select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_DISPLAY_DP_AUX_BUS select DRM_KMS_HELPER select DRM_MIPI_DSI diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 03d1c76aec2d..d79c76a287f2 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1135,6 +1135,7 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev) static int host1x_drm_probe(struct host1x_device *dev) { + struct device *dma_dev = dev->dev.parent; struct tegra_drm *tegra; struct drm_device *drm; int err; @@ -1149,8 +1150,8 @@ static int host1x_drm_probe(struct host1x_device *dev) goto put; } - if (host1x_drm_wants_iommu(dev) && iommu_present(&platform_bus_type)) { - tegra->domain = iommu_domain_alloc(&platform_bus_type); + if (host1x_drm_wants_iommu(dev) && device_iommu_mapped(dma_dev)) { + tegra->domain = iommu_paging_domain_alloc(dma_dev); if (!tegra->domain) { err = -ENOMEM; goto free; diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c index c3758faa1b83..d8d0e4d1682f 100644 --- a/drivers/gpu/drm/tests/drm_gem_shmem_test.c +++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c @@ -102,6 +102,17 @@ static void drm_gem_shmem_test_obj_create_private(struct kunit *test) sg_init_one(sgt->sgl, buf, TEST_SIZE); + /* + * Set the DMA mask to 64-bits and map the sgtables + * otherwise drm_gem_shmem_free will cause a warning + * on debug kernels. + */ + ret = dma_set_mask(drm_dev->dev, DMA_BIT_MASK(64)); + KUNIT_ASSERT_EQ(test, ret, 0); + + ret = dma_map_sgtable(drm_dev->dev, sgt, DMA_BIDIRECTIONAL, 0); + KUNIT_ASSERT_EQ(test, ret, 0); + /* Init a mock DMA-BUF */ buf_mock.size = TEST_SIZE; attach_mock.dmabuf = &buf_mock; diff --git a/drivers/gpu/drm/tidss/Kconfig b/drivers/gpu/drm/tidss/Kconfig index 378600806167..2385c56493b9 100644 --- a/drivers/gpu/drm/tidss/Kconfig +++ b/drivers/gpu/drm/tidss/Kconfig @@ -3,6 +3,8 @@ config DRM_TIDSS depends on DRM && OF depends on ARM || ARM64 || COMPILE_TEST select DRM_KMS_HELPER + select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_GEM_DMA_HELPER help The TI Keystone family SoCs introduced a new generation of diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 9bd7453b25ad..ad1e6236ff6f 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -134,6 +134,8 @@ v3d_job_start_stats(struct v3d_job *job, enum v3d_queue queue) struct v3d_stats *local_stats = &file->stats[queue]; u64 now = local_clock(); + preempt_disable(); + write_seqcount_begin(&local_stats->lock); local_stats->start_ns = now; write_seqcount_end(&local_stats->lock); @@ -141,6 +143,8 @@ v3d_job_start_stats(struct v3d_job *job, enum v3d_queue queue) write_seqcount_begin(&global_stats->lock); global_stats->start_ns = now; write_seqcount_end(&global_stats->lock); + + preempt_enable(); } static void @@ -162,8 +166,10 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue) struct v3d_stats *local_stats = &file->stats[queue]; u64 now = local_clock(); + preempt_disable(); v3d_stats_update(local_stats, now); v3d_stats_update(global_stats, now); + preempt_enable(); } static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) @@ -315,7 +321,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) struct v3d_dev *v3d = job->base.v3d; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; - int i, csd_cfg0_reg, csd_cfg_reg_count; + int i, csd_cfg0_reg; v3d->csd_job = job; @@ -335,9 +341,17 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) v3d_switch_perfmon(v3d, &job->base); csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver); - csd_cfg_reg_count = v3d->ver < 71 ? 6 : 7; - for (i = 1; i <= csd_cfg_reg_count; i++) + for (i = 1; i <= 6; i++) V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]); + + /* Although V3D 7.1 has an eighth configuration register, we are not + * using it. Therefore, make sure it remains unused. + * + * XXX: Set the CFG7 register + */ + if (v3d->ver >= 71) + V3D_CORE_WRITE(0, V3D_V7_CSD_QUEUED_CFG7, 0); + /* CFG0 write kicks off the job. */ V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c index 717d624e9a05..890a66a2361f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c @@ -27,6 +27,8 @@ **************************************************************************/ #include "vmwgfx_drv.h" + +#include "vmwgfx_bo.h" #include <linux/highmem.h> /* @@ -420,13 +422,105 @@ static int vmw_bo_cpu_blit_line(struct vmw_bo_blit_line_data *d, return 0; } +static void *map_external(struct vmw_bo *bo, struct iosys_map *map) +{ + struct vmw_private *vmw = + container_of(bo->tbo.bdev, struct vmw_private, bdev); + void *ptr = NULL; + int ret; + + if (bo->tbo.base.import_attach) { + ret = dma_buf_vmap(bo->tbo.base.dma_buf, map); + if (ret) { + drm_dbg_driver(&vmw->drm, + "Wasn't able to map external bo!\n"); + goto out; + } + ptr = map->vaddr; + } else { + ptr = vmw_bo_map_and_cache(bo); + } + +out: + return ptr; +} + +static void unmap_external(struct vmw_bo *bo, struct iosys_map *map) +{ + if (bo->tbo.base.import_attach) + dma_buf_vunmap(bo->tbo.base.dma_buf, map); + else + vmw_bo_unmap(bo); +} + +static int vmw_external_bo_copy(struct vmw_bo *dst, u32 dst_offset, + u32 dst_stride, struct vmw_bo *src, + u32 src_offset, u32 src_stride, + u32 width_in_bytes, u32 height, + struct vmw_diff_cpy *diff) +{ + struct vmw_private *vmw = + container_of(dst->tbo.bdev, struct vmw_private, bdev); + size_t dst_size = dst->tbo.resource->size; + size_t src_size = src->tbo.resource->size; + struct iosys_map dst_map = {0}; + struct iosys_map src_map = {0}; + int ret, i; + int x_in_bytes; + u8 *vsrc; + u8 *vdst; + + vsrc = map_external(src, &src_map); + if (!vsrc) { + drm_dbg_driver(&vmw->drm, "Wasn't able to map src\n"); + ret = -ENOMEM; + goto out; + } + + vdst = map_external(dst, &dst_map); + if (!vdst) { + drm_dbg_driver(&vmw->drm, "Wasn't able to map dst\n"); + ret = -ENOMEM; + goto out; + } + + vsrc += src_offset; + vdst += dst_offset; + if (src_stride == dst_stride) { + dst_size -= dst_offset; + src_size -= src_offset; + memcpy(vdst, vsrc, + min(dst_stride * height, min(dst_size, src_size))); + } else { + WARN_ON(dst_stride < width_in_bytes); + for (i = 0; i < height; ++i) { + memcpy(vdst, vsrc, width_in_bytes); + vsrc += src_stride; + vdst += dst_stride; + } + } + + x_in_bytes = (dst_offset % dst_stride); + diff->rect.x1 = x_in_bytes / diff->cpp; + diff->rect.y1 = ((dst_offset - x_in_bytes) / dst_stride); + diff->rect.x2 = diff->rect.x1 + width_in_bytes / diff->cpp; + diff->rect.y2 = diff->rect.y1 + height; + + ret = 0; +out: + unmap_external(src, &src_map); + unmap_external(dst, &dst_map); + + return ret; +} + /** * vmw_bo_cpu_blit - in-kernel cpu blit. * - * @dst: Destination buffer object. + * @vmw_dst: Destination buffer object. * @dst_offset: Destination offset of blit start in bytes. * @dst_stride: Destination stride in bytes. - * @src: Source buffer object. + * @vmw_src: Source buffer object. * @src_offset: Source offset of blit start in bytes. * @src_stride: Source stride in bytes. * @w: Width of blit. @@ -444,13 +538,15 @@ static int vmw_bo_cpu_blit_line(struct vmw_bo_blit_line_data *d, * Neither of the buffer objects may be placed in PCI memory * (Fixed memory in TTM terminology) when using this function. */ -int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, +int vmw_bo_cpu_blit(struct vmw_bo *vmw_dst, u32 dst_offset, u32 dst_stride, - struct ttm_buffer_object *src, + struct vmw_bo *vmw_src, u32 src_offset, u32 src_stride, u32 w, u32 h, struct vmw_diff_cpy *diff) { + struct ttm_buffer_object *src = &vmw_src->tbo; + struct ttm_buffer_object *dst = &vmw_dst->tbo; struct ttm_operation_ctx ctx = { .interruptible = false, .no_wait_gpu = false @@ -460,6 +556,11 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, int ret = 0; struct page **dst_pages = NULL; struct page **src_pages = NULL; + bool src_external = (src->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0; + bool dst_external = (dst->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0; + + if (WARN_ON(dst == src)) + return -EINVAL; /* Buffer objects need to be either pinned or reserved: */ if (!(dst->pin_count)) @@ -479,6 +580,11 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, return ret; } + if (src_external || dst_external) + return vmw_external_bo_copy(vmw_dst, dst_offset, dst_stride, + vmw_src, src_offset, src_stride, + w, h, diff); + if (!src->ttm->pages && src->ttm->sg) { src_pages = kvmalloc_array(src->ttm->num_pages, sizeof(struct page *), GFP_KERNEL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index f42ebc4a7c22..a0e433fbcba6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -360,6 +360,8 @@ void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size) void *virtual; int ret; + atomic_inc(&vbo->map_count); + virtual = ttm_kmap_obj_virtual(&vbo->map, ¬_used); if (virtual) return virtual; @@ -383,11 +385,17 @@ void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size) */ void vmw_bo_unmap(struct vmw_bo *vbo) { + int map_count; + if (vbo->map.bo == NULL) return; - ttm_bo_kunmap(&vbo->map); - vbo->map.bo = NULL; + map_count = atomic_dec_return(&vbo->map_count); + + if (!map_count) { + ttm_bo_kunmap(&vbo->map); + vbo->map.bo = NULL; + } } @@ -421,6 +429,7 @@ static int vmw_bo_init(struct vmw_private *dev_priv, vmw_bo->tbo.priority = 3; vmw_bo->res_tree = RB_ROOT; xa_init(&vmw_bo->detached_resources); + atomic_set(&vmw_bo->map_count, 0); params->size = ALIGN(params->size, PAGE_SIZE); drm_gem_private_object_init(vdev, &vmw_bo->tbo.base, params->size); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index 62b4342d5f7c..43b5439ec9f7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -71,6 +71,8 @@ struct vmw_bo_params { * @map: Kmap object for semi-persistent mappings * @res_tree: RB tree of resources using this buffer object as a backing MOB * @res_prios: Eviction priority counts for attached resources + * @map_count: The number of currently active maps. Will differ from the + * cpu_writers because it includes kernel maps. * @cpu_writers: Number of synccpu write grabs. Protected by reservation when * increased. May be decreased without reservation. * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB @@ -90,6 +92,7 @@ struct vmw_bo { u32 res_prios[TTM_MAX_BO_PRIORITY]; struct xarray detached_resources; + atomic_t map_count; atomic_t cpu_writers; /* Not ref-counted. Protected by binding_mutex */ struct vmw_resource *dx_query_ctx; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 32f50e595809..3f4719b3c268 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1353,9 +1353,9 @@ void vmw_diff_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src, void vmw_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src, size_t n); -int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, +int vmw_bo_cpu_blit(struct vmw_bo *dst, u32 dst_offset, u32 dst_stride, - struct ttm_buffer_object *src, + struct vmw_bo *src, u32 src_offset, u32 src_stride, u32 w, u32 h, struct vmw_diff_cpy *diff); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index 5453f7cf0e2d..fab155a68054 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -502,7 +502,7 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty) container_of(dirty->unit, typeof(*stdu), base); s32 width, height; s32 src_pitch, dst_pitch; - struct ttm_buffer_object *src_bo, *dst_bo; + struct vmw_bo *src_bo, *dst_bo; u32 src_offset, dst_offset; struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(stdu->cpp); @@ -517,11 +517,11 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty) /* Assume we are blitting from Guest (bo) to Host (display_srf) */ src_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp; - src_bo = &stdu->display_srf->res.guest_memory_bo->tbo; + src_bo = stdu->display_srf->res.guest_memory_bo; src_offset = ddirty->top * src_pitch + ddirty->left * stdu->cpp; dst_pitch = ddirty->pitch; - dst_bo = &ddirty->buf->tbo; + dst_bo = ddirty->buf; dst_offset = ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp; (void) vmw_bo_cpu_blit(dst_bo, dst_offset, dst_pitch, @@ -1170,7 +1170,7 @@ vmw_stdu_bo_populate_update_cpu(struct vmw_du_update_plane *update, void *cmd, struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(0); struct vmw_stdu_update_gb_image *cmd_img = cmd; struct vmw_stdu_update *cmd_update; - struct ttm_buffer_object *src_bo, *dst_bo; + struct vmw_bo *src_bo, *dst_bo; u32 src_offset, dst_offset; s32 src_pitch, dst_pitch; s32 width, height; @@ -1184,11 +1184,11 @@ vmw_stdu_bo_populate_update_cpu(struct vmw_du_update_plane *update, void *cmd, diff.cpp = stdu->cpp; - dst_bo = &stdu->display_srf->res.guest_memory_bo->tbo; + dst_bo = stdu->display_srf->res.guest_memory_bo; dst_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp; dst_offset = bb->y1 * dst_pitch + bb->x1 * stdu->cpp; - src_bo = &vfbbo->buffer->tbo; + src_bo = vfbbo->buffer; src_pitch = update->vfb->base.pitches[0]; src_offset = bo_update->fb_top * src_pitch + bo_update->fb_left * stdu->cpp; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 8ae6a761c900..1625b30d9970 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -2283,9 +2283,11 @@ int vmw_dumb_create(struct drm_file *file_priv, /* * Without mob support we're just going to use raw memory buffer * because we wouldn't be able to support full surface coherency - * without mobs + * without mobs. There also no reason to support surface coherency + * without 3d (i.e. gpu usage on the host) because then all the + * contents is going to be rendered guest side. */ - if (!dev_priv->has_mob) { + if (!dev_priv->has_mob || !vmw_supports_3d(dev_priv)) { int cpp = DIV_ROUND_UP(args->bpp, 8); switch (cpp) { diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 628c245c4822..e97c9da451b3 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -25,12 +25,14 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ uses_generated_oob := \ $(obj)/xe_ggtt.o \ + $(obj)/xe_device.o \ $(obj)/xe_gsc.o \ $(obj)/xe_gt.o \ $(obj)/xe_guc.o \ $(obj)/xe_guc_ads.o \ $(obj)/xe_guc_pc.o \ $(obj)/xe_migrate.o \ + $(obj)/xe_pat.o \ $(obj)/xe_ring_ops.o \ $(obj)/xe_vm.o \ $(obj)/xe_wa.o \ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 2feedddf1e40..1f1ad4d3ef51 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -83,7 +83,7 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270 /* Workarounds not handled yet */ -#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step <= last; }) +#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step < last; }) #define IS_LP(xe) (0) #define IS_GEN9_LP(xe) (0) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h index 0c47661bdc6a..a473aa6697d0 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h @@ -13,7 +13,7 @@ static inline int snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val, int fast_timeout_us, int slow_timeout_ms) { - return xe_pcode_write_timeout(__compat_uncore_to_gt(uncore), mbox, val, + return xe_pcode_write_timeout(__compat_uncore_to_tile(uncore), mbox, val, slow_timeout_ms ?: 1); } @@ -21,13 +21,13 @@ static inline int snb_pcode_write(struct intel_uncore *uncore, u32 mbox, u32 val) { - return xe_pcode_write(__compat_uncore_to_gt(uncore), mbox, val); + return xe_pcode_write(__compat_uncore_to_tile(uncore), mbox, val); } static inline int snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1) { - return xe_pcode_read(__compat_uncore_to_gt(uncore), mbox, val, val1); + return xe_pcode_read(__compat_uncore_to_tile(uncore), mbox, val, val1); } static inline int @@ -35,7 +35,7 @@ skl_pcode_request(struct intel_uncore *uncore, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_base_ms) { - return xe_pcode_request(__compat_uncore_to_gt(uncore), mbox, request, reply_mask, reply, + return xe_pcode_request(__compat_uncore_to_tile(uncore), mbox, request, reply_mask, reply, timeout_base_ms); } diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h index 083c4da2ea41..eb5b5f0e4bd9 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -17,6 +17,13 @@ static inline struct xe_gt *__compat_uncore_to_gt(struct intel_uncore *uncore) return xe_root_mmio_gt(xe); } +static inline struct xe_tile *__compat_uncore_to_tile(struct intel_uncore *uncore) +{ + struct xe_device *xe = container_of(uncore, struct xe_device, uncore); + + return xe_device_get_root_tile(xe); +} + static inline u32 intel_uncore_read(struct intel_uncore *uncore, i915_reg_t i915_reg) { diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 8b83dcff72e1..c860fda410c8 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -132,6 +132,7 @@ static void xe_display_fini_noirq(void *arg) return; intel_display_driver_remove_noirq(xe); + intel_opregion_cleanup(xe); } int xe_display_init_noirq(struct xe_device *xe) @@ -157,8 +158,10 @@ int xe_display_init_noirq(struct xe_device *xe) intel_display_device_info_runtime_init(xe); err = intel_display_driver_probe_noirq(xe); - if (err) + if (err) { + intel_opregion_cleanup(xe); return err; + } return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noirq, xe); } @@ -280,6 +283,27 @@ static bool suspend_to_idle(void) return false; } +static void xe_display_flush_cleanup_work(struct xe_device *xe) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(&xe->drm, crtc) { + struct drm_crtc_commit *commit; + + spin_lock(&crtc->base.commit_lock); + commit = list_first_entry_or_null(&crtc->base.commit_list, + struct drm_crtc_commit, commit_entry); + if (commit) + drm_crtc_commit_get(commit); + spin_unlock(&crtc->base.commit_lock); + + if (commit) { + wait_for_completion(&commit->cleanup_done); + drm_crtc_commit_put(commit); + } + } +} + void xe_display_pm_suspend(struct xe_device *xe, bool runtime) { bool s2idle = suspend_to_idle(); @@ -291,22 +315,29 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) * properly. */ intel_power_domains_disable(xe); - if (has_display(xe)) + intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); + if (has_display(xe)) { drm_kms_helper_poll_disable(&xe->drm); + if (!runtime) + intel_display_driver_disable_user_access(xe); + } if (!runtime) intel_display_driver_suspend(xe); + xe_display_flush_cleanup_work(xe); + intel_dp_mst_suspend(xe); intel_hpd_cancel_work(xe); - intel_encoder_suspend_all(&xe->display); + if (!runtime && has_display(xe)) { + intel_display_driver_suspend_access(xe); + intel_encoder_suspend_all(&xe->display); + } intel_opregion_suspend(xe, s2idle ? PCI_D1 : PCI_D3cold); - intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); - intel_dmc_suspend(xe); } @@ -344,14 +375,20 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) intel_display_driver_init_hw(xe); intel_hpd_init(xe); + if (!runtime && has_display(xe)) + intel_display_driver_resume_access(xe); + /* MST sideband requires HPD interrupts enabled */ intel_dp_mst_resume(xe); if (!runtime) intel_display_driver_resume(xe); - intel_hpd_poll_disable(xe); - if (has_display(xe)) + if (has_display(xe)) { drm_kms_helper_poll_enable(&xe->drm); + if (!runtime) + intel_display_driver_enable_user_access(xe); + } + intel_hpd_poll_disable(xe); intel_opregion_resume(xe); diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index 9e860c61f4b3..ccd0d87d438a 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -7,6 +7,8 @@ #include "intel_display_types.h" #include "intel_dsb_buffer.h" #include "xe_bo.h" +#include "xe_device.h" +#include "xe_device_types.h" #include "xe_gt.h" u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) @@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); + xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); + xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 423f367c7065..d7db44e79eaf 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -10,6 +10,7 @@ #include "intel_fb.h" #include "intel_fb_pin.h" #include "xe_bo.h" +#include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_pm.h" @@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return vma; err_unpin: diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d44564bad009..3c2865040058 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -80,6 +80,9 @@ #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) #define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) +#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194) +#define CG_DIS_CNTLBUS REG_BIT(6) + #define CCS_AUX_INV XE_REG(0x4208) #define VD0_AUX_INV XE_REG(0x4218) @@ -372,6 +375,11 @@ #define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8) +#define XE2_GLOBAL_INVAL XE_REG(0xb404) + +#define SCRATCH1LPFC XE_REG(0xb474) +#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0) + #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) #define XE2_TDF_CTRL XE_REG(0xb418) @@ -429,6 +437,7 @@ #define DIS_FIX_EOT1_FLUSH REG_BIT(9) #define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED) +#define STK_ID_RESTRICT REG_BIT(12) #define SLM_WMTP_RESTORE REG_BIT(11) #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index 1189f5a540a8..a9b0091cb7ee 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -52,6 +52,7 @@ #define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ #define OAG_OACONTROL XE_REG(0xdaf4) +#define OAG_OACONTROL_OA_PES_DISAG_EN REG_GENMASK(27, 22) #define OAG_OACONTROL_OA_CCS_SELECT_MASK REG_GENMASK(18, 16) #define OAG_OACONTROL_OA_COUNTER_SEL_MASK REG_GENMASK(4, 2) #define OAG_OACONTROL_OA_COUNTER_ENABLE REG_BIT(0) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 31192d983d9e..261d3d6c8a93 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1575,7 +1575,7 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, return bo; } -static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg) +static void __xe_bo_unpin_map_no_vm(void *arg) { xe_bo_unpin_map_no_vm(arg); } @@ -1590,7 +1590,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile if (IS_ERR(bo)) return bo; - ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo); + ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo); if (ret) return ERR_PTR(ret); @@ -1638,7 +1638,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str if (IS_ERR(bo)) return PTR_ERR(bo); - drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src); + devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src); *src = bo; return 0; diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 76109415eba6..c89deffffb6d 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -54,6 +54,9 @@ #include "xe_vm.h" #include "xe_vram.h" #include "xe_wait_user_fence.h" +#include "xe_wa.h" + +#include <generated/xe_wa_oob.h> static int xe_file_open(struct drm_device *dev, struct drm_file *file) { @@ -87,9 +90,55 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) spin_unlock(&xe->clients.lock); file->driver_priv = xef; + kref_init(&xef->refcount); + return 0; } +static void xe_file_destroy(struct kref *ref) +{ + struct xe_file *xef = container_of(ref, struct xe_file, refcount); + struct xe_device *xe = xef->xe; + + xa_destroy(&xef->exec_queue.xa); + mutex_destroy(&xef->exec_queue.lock); + xa_destroy(&xef->vm.xa); + mutex_destroy(&xef->vm.lock); + + spin_lock(&xe->clients.lock); + xe->clients.count--; + spin_unlock(&xe->clients.lock); + + xe_drm_client_put(xef->client); + kfree(xef); +} + +/** + * xe_file_get() - Take a reference to the xe file object + * @xef: Pointer to the xe file + * + * Anyone with a pointer to xef must take a reference to the xe file + * object using this call. + * + * Return: xe file pointer + */ +struct xe_file *xe_file_get(struct xe_file *xef) +{ + kref_get(&xef->refcount); + return xef; +} + +/** + * xe_file_put() - Drop a reference to the xe file object + * @xef: Pointer to the xe file + * + * Used to drop reference to the xef object + */ +void xe_file_put(struct xe_file *xef) +{ + kref_put(&xef->refcount, xe_file_destroy); +} + static void xe_file_close(struct drm_device *dev, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -98,6 +147,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) struct xe_exec_queue *q; unsigned long idx; + xe_pm_runtime_get(xe); + /* * No need for exec_queue.lock here as there is no contention for it * when FD is closing as IOCTLs presumably can't be modifying the @@ -108,21 +159,14 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xe_exec_queue_kill(q); xe_exec_queue_put(q); } - xa_destroy(&xef->exec_queue.xa); - mutex_destroy(&xef->exec_queue.lock); mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); mutex_unlock(&xef->vm.lock); - xa_destroy(&xef->vm.xa); - mutex_destroy(&xef->vm.lock); - spin_lock(&xe->clients.lock); - xe->clients.count--; - spin_unlock(&xe->clients.lock); + xe_file_put(xef); - xe_drm_client_put(xef->client); - kfree(xef); + xe_pm_runtime_put(xe); } static const struct drm_ioctl_desc xe_ioctls[] = { @@ -779,6 +823,11 @@ void xe_device_td_flush(struct xe_device *xe) if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) return; + if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { + xe_device_l2_flush(xe); + return; + } + for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) continue; @@ -802,6 +851,30 @@ void xe_device_td_flush(struct xe_device *xe) } } +void xe_device_l2_flush(struct xe_device *xe) +{ + struct xe_gt *gt; + int err; + + gt = xe_root_mmio_gt(xe); + + if (!XE_WA(gt, 16023588340)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return; + + spin_lock(>->global_invl_lock); + xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1); + + if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true)) + xe_gt_err_once(gt, "Global invalidation timeout\n"); + spin_unlock(>->global_invl_lock); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index bb07f5669dbb..533ccfb2567a 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -162,6 +162,7 @@ u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address); u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address); void xe_device_td_flush(struct xe_device *xe); +void xe_device_l2_flush(struct xe_device *xe); static inline bool xe_device_wedged(struct xe_device *xe) { @@ -170,4 +171,7 @@ static inline bool xe_device_wedged(struct xe_device *xe) void xe_device_declare_wedged(struct xe_device *xe); +struct xe_file *xe_file_get(struct xe_file *xef); +void xe_file_put(struct xe_file *xef); + #endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 3bca6d344744..9e5fdf96750b 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -203,6 +203,12 @@ struct xe_tile { } vf; } sriov; + /** @pcode: tile's PCODE */ + struct { + /** @pcode.lock: protecting tile's PCODE mailbox data */ + struct mutex lock; + } pcode; + /** @migrate: Migration helper for vram blits and clearing */ struct xe_migrate *migrate; @@ -566,6 +572,9 @@ struct xe_file { /** @client: drm client */ struct xe_drm_client *client; + + /** @refcount: ref count of this xe file */ + struct kref refcount; }; #endif diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 6a26923fa10e..1af95b9b9171 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -10,6 +10,7 @@ #include <linux/slab.h> #include <linux/types.h> +#include "xe_assert.h" #include "xe_bo.h" #include "xe_bo_types.h" #include "xe_device_types.h" @@ -151,10 +152,13 @@ void xe_drm_client_add_bo(struct xe_drm_client *client, */ void xe_drm_client_remove_bo(struct xe_bo *bo) { + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); struct xe_drm_client *client = bo->client; + xe_assert(xe, !kref_read(&bo->ttm.base.refcount)); + spin_lock(&client->bos_lock); - list_del(&bo->client_link); + list_del_init(&bo->client_link); spin_unlock(&client->bos_lock); xe_drm_client_put(client); @@ -166,6 +170,8 @@ static void bo_meminfo(struct xe_bo *bo, u64 sz = bo->size; u32 mem_type; + xe_bo_assert_held(bo); + if (bo->placement.placement) mem_type = bo->placement.placement->mem_type; else @@ -196,6 +202,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) struct xe_drm_client *client; struct drm_gem_object *obj; struct xe_bo *bo; + LLIST_HEAD(deferred); unsigned int id; u32 mem_type; @@ -206,7 +213,20 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) idr_for_each_entry(&file->object_idr, obj, id) { struct xe_bo *bo = gem_to_xe_bo(obj); - bo_meminfo(bo, stats); + if (dma_resv_trylock(bo->ttm.base.resv)) { + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + } else { + xe_bo_get(bo); + spin_unlock(&file->table_lock); + + xe_bo_lock(bo, false); + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + + xe_bo_put(bo); + spin_lock(&file->table_lock); + } } spin_unlock(&file->table_lock); @@ -215,11 +235,28 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) list_for_each_entry(bo, &client->bos_list, client_link) { if (!kref_get_unless_zero(&bo->ttm.base.refcount)) continue; - bo_meminfo(bo, stats); - xe_bo_put(bo); + + if (dma_resv_trylock(bo->ttm.base.resv)) { + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + } else { + spin_unlock(&client->bos_lock); + + xe_bo_lock(bo, false); + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + + spin_lock(&client->bos_lock); + /* The bo ref will prevent this bo from being removed from the list */ + xe_assert(xef->xe, !list_empty(&bo->client_link)); + } + + xe_bo_put_deferred(bo, &deferred); } spin_unlock(&client->bos_lock); + xe_bo_put_commit(&deferred); + for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { if (!xe_mem_type_to_name[mem_type]) continue; @@ -251,11 +288,8 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) /* Accumulate all the exec queues from this client */ mutex_lock(&xef->exec_queue.lock); - xa_for_each(&xef->exec_queue.xa, i, q) { + xa_for_each(&xef->exec_queue.xa, i, q) xe_exec_queue_update_run_ticks(q); - xef->run_ticks[q->class] += q->run_ticks - q->old_run_ticks; - q->old_run_ticks = q->run_ticks; - } mutex_unlock(&xef->exec_queue.lock); /* Get the total GPU cycles */ diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 0ba37835849b..9731dcd0b1bd 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -37,6 +37,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) { if (q->vm) xe_vm_put(q->vm); + + if (q->xef) + xe_file_put(q->xef); + kfree(q); } @@ -101,22 +105,35 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, static int __xe_exec_queue_init(struct xe_exec_queue *q) { + struct xe_vm *vm = q->vm; int i, err; + if (vm) { + err = xe_vm_lock(vm, true); + if (err) + return err; + } + for (i = 0; i < q->width; ++i) { q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); if (IS_ERR(q->lrc[i])) { err = PTR_ERR(q->lrc[i]); - goto err_lrc; + goto err_unlock; } } + if (vm) + xe_vm_unlock(vm); + err = q->ops->init(q); if (err) goto err_lrc; return 0; +err_unlock: + if (vm) + xe_vm_unlock(vm); err_lrc: for (i = i - 1; i >= 0; --i) xe_lrc_put(q->lrc[i]); @@ -136,15 +153,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (IS_ERR(q)) return q; - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - goto err_post_alloc; - } - err = __xe_exec_queue_init(q); - if (vm) - xe_vm_unlock(vm); if (err) goto err_post_alloc; @@ -634,7 +643,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (xe_vm_in_preempt_fence_mode(vm)) { q->lr.context = dma_fence_context_alloc(1); - spin_lock_init(&q->lr.lock); err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) @@ -649,6 +657,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, goto kill_exec_queue; args->exec_queue_id = id; + q->xef = xe_file_get(xef); return 0; @@ -762,6 +771,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q) */ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { + struct xe_file *xef; struct xe_lrc *lrc; u32 old_ts, new_ts; @@ -773,6 +783,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) if (!q->vm || !q->vm->xef) return; + xef = q->vm->xef; + /* * Only sample the first LRC. For parallel submission, all of them are * scheduled together and we compensate that below by multiplying by @@ -783,7 +795,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) */ lrc = q->lrc[0]; new_ts = xe_lrc_update_timestamp(lrc, &old_ts); - q->run_ticks += (new_ts - old_ts) * q->width; + xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; } void xe_exec_queue_kill(struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 201588ec33c3..f6ee0ae80fd6 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -38,6 +38,9 @@ enum xe_exec_queue_priority { * a kernel object. */ struct xe_exec_queue { + /** @xef: Back pointer to xe file if this is user created exec queue */ + struct xe_file *xef; + /** @gt: graphics tile this exec queue can submit to */ struct xe_gt *gt; /** @@ -123,8 +126,6 @@ struct xe_exec_queue { u32 seqno; /** @lr.link: link into VM's list of exec queues */ struct list_head link; - /** @lr.lock: preemption fences lock */ - spinlock_t lock; } lr; /** @ops: submission backend exec queue operations */ @@ -139,10 +140,6 @@ struct xe_exec_queue { * Protected by @vm's resv. Unused if @vm == NULL. */ u64 tlb_flush_seqno; - /** @old_run_ticks: prior hw engine class run time in ticks for this exec queue */ - u64 old_run_ticks; - /** @run_ticks: hw engine class run time in ticks for this exec queue */ - u64 run_ticks; /** @lrc: logical ring context for this exec queue */ struct xe_lrc *lrc[]; }; diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index f8239a13fa2b..29f96f409391 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -260,7 +260,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) struct xe_tile *tile = gt_to_tile(gt); int ret; - if (XE_WA(gt, 14018094691)) { + if (XE_WA(tile->primary_gt, 14018094691)) { ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); /* @@ -278,7 +278,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) ret = gsc_upload(gsc); - if (XE_WA(gt, 14018094691)) + if (XE_WA(tile->primary_gt, 14018094691)) xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); if (ret) @@ -437,7 +437,7 @@ out: return ret; } -static void free_resources(struct drm_device *drm, void *arg) +static void free_resources(void *arg) { struct xe_gsc *gsc = arg; @@ -501,7 +501,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) gsc->q = q; gsc->wq = wq; - err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); + err = devm_add_action_or_reset(xe->drm.dev, free_resources, gsc); if (err) return err; @@ -519,10 +519,22 @@ out_bo: void xe_gsc_load_start(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q) return; + /* + * The GSC HW is only reset by driver FLR or D3cold entry. We don't + * support the former at runtime, while the latter is only supported on + * DGFX, for which we don't support GSC. Therefore, if GSC failed to + * load previously there is no need to try again because the HW is + * stuck in the error state. + */ + xe_assert(xe, !IS_DGFX(xe)); + if (xe_uc_fw_is_in_error_state(&gsc->fw)) + return; + /* GSC FW survives GT reset and D3Hot */ if (gsc_fw_is_loaded(gt)) { xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 31b2e64c70c6..cb9df15e7137 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -9,6 +9,7 @@ #include <drm/drm_managed.h> #include <drm/xe_drm.h> + #include <generated/xe_wa_oob.h> #include "instructions/xe_gfxpipe_commands.h" @@ -45,7 +46,6 @@ #include "xe_migrate.h" #include "xe_mmio.h" #include "xe_pat.h" -#include "xe_pcode.h" #include "xe_pm.h" #include "xe_mocs.h" #include "xe_reg_sr.h" @@ -95,6 +95,51 @@ void xe_gt_sanitize(struct xe_gt *gt) gt->uc.guc.submission_state.enabled = false; } +static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) +{ + u32 reg; + int err; + + if (!XE_WA(gt, 16023588340)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (WARN_ON(err)) + return; + + if (!xe_gt_is_media_type(gt)) { + xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH); + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg |= CG_DIS_CNTLBUS; + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + } + + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + +static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) +{ + u32 reg; + int err; + + if (!XE_WA(gt, 16023588340)) + return; + + if (xe_gt_is_media_type(gt)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (WARN_ON(err)) + return; + + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg &= ~CG_DIS_CNTLBUS; + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + /** * xe_gt_remove() - Clean up the GT structures before driver removal * @gt: the GT object @@ -111,6 +156,8 @@ void xe_gt_remove(struct xe_gt *gt) for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) xe_hw_fence_irq_finish(>->fence_irq[i]); + + xe_gt_disable_host_l2_vram(gt); } static void gt_reset_worker(struct work_struct *w); @@ -338,7 +385,7 @@ int xe_gt_init_early(struct xe_gt *gt) xe_tuning_process_gt(gt); xe_force_wake_init_gt(gt, gt_to_fw(gt)); - xe_pcode_init(gt); + spin_lock_init(>->global_invl_lock); return 0; } @@ -508,6 +555,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) xe_gt_mcr_init_early(gt); xe_pat_init(gt); + xe_gt_enable_host_l2_vram(gt); err = xe_uc_init(>->uc); if (err) @@ -643,6 +691,8 @@ static int do_gt_restart(struct xe_gt *gt) xe_pat_init(gt); + xe_gt_enable_host_l2_vram(gt); + xe_gt_mcr_set_implicit_defaults(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); @@ -702,12 +752,13 @@ static int gt_reset(struct xe_gt *gt) xe_gt_info(gt, "reset started\n"); + xe_pm_runtime_get(gt_to_xe(gt)); + if (xe_fault_inject_gt_reset()) { err = -ECANCELED; goto err_fail; } - xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_sanitize(gt); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); @@ -742,11 +793,11 @@ err_out: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: XE_WARN_ON(xe_uc_start(>->uc)); - xe_pm_runtime_put(gt_to_xe(gt)); err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); xe_device_declare_wedged(gt_to_xe(gt)); + xe_pm_runtime_put(gt_to_xe(gt)); return err; } @@ -796,6 +847,8 @@ int xe_gt_suspend(struct xe_gt *gt) xe_gt_idle_disable_pg(gt); + xe_gt_disable_host_l2_vram(gt); + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); xe_gt_dbg(gt, "suspended\n"); diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 9292d5468868..b2a7fa55bd18 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -382,6 +382,18 @@ static void pf_queue_work_func(struct work_struct *w) static void acc_queue_work_func(struct work_struct *w); +static void pagefault_fini(void *arg) +{ + struct xe_gt *gt = arg; + struct xe_device *xe = gt_to_xe(gt); + + if (!xe->info.has_usm) + return; + + destroy_workqueue(gt->usm.acc_wq); + destroy_workqueue(gt->usm.pf_wq); +} + int xe_gt_pagefault_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -409,10 +421,12 @@ int xe_gt_pagefault_init(struct xe_gt *gt) gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", WQ_UNBOUND | WQ_HIGHPRI, NUM_ACC_QUEUE); - if (!gt->usm.acc_wq) + if (!gt->usm.acc_wq) { + destroy_workqueue(gt->usm.pf_wq); return -ENOMEM; + } - return 0; + return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt); } void xe_gt_pagefault_reset(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 4699b7836001..b6f0a7299c03 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1927,6 +1927,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); + bool is_primary = !xe_gt_is_media_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; @@ -1935,13 +1936,17 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) valid_dbs = pf_get_vf_config_dbs(gt, vfid); /* note that GuC doorbells are optional */ - valid_any = valid_ggtt || valid_ctxs || valid_dbs; - valid_all = valid_ggtt && valid_ctxs; + valid_any = valid_ctxs || valid_dbs; + valid_all = valid_ctxs; + + /* and GGTT/LMEM is configured on primary GT only */ + valid_all = valid_all && valid_ggtt; + valid_any = valid_any || (valid_ggtt && is_primary); if (IS_DGFX(xe)) { bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid); - valid_any = valid_any || valid_lmem; + valid_any = valid_any || (valid_lmem && is_primary); valid_all = valid_all && valid_lmem; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 41e46a00c01e..8892d6c2291e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -850,7 +850,7 @@ static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr) xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - return bsearch(&key, runtime->regs, runtime->regs_size, sizeof(key), + return bsearch(&key, runtime->regs, runtime->num_regs, sizeof(key), vf_runtime_reg_cmp); } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index d9359976ab8b..87cb76a8718c 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -13,10 +13,13 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_mmio.h" +#include "xe_pm.h" #include "xe_sriov.h" #include "xe_trace.h" #include "regs/xe_guc_regs.h" +#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS + /* * TLB inval depends on pending commands in the CT queue and then the real * invalidation time. Double up the time to process full CT queue @@ -33,6 +36,24 @@ static long tlb_timeout_jiffies(struct xe_gt *gt) return hw_tlb_timeout + 2 * delay; } +static void +__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) +{ + bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); + + trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); + xe_gt_tlb_invalidation_fence_fini(fence); + dma_fence_signal(&fence->base); + if (!stack) + dma_fence_put(&fence->base); +} + +static void +invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) +{ + list_del(&fence->link); + __invalidation_fence_signal(xe, fence); +} static void xe_gt_tlb_fence_timeout(struct work_struct *work) { @@ -54,10 +75,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", fence->seqno, gt->tlb_invalidation.seqno_recv); - list_del(&fence->link); fence->base.error = -ETIME; - dma_fence_signal(&fence->base); - dma_fence_put(&fence->base); + invalidation_fence_signal(xe, fence); } if (!list_empty(>->tlb_invalidation.pending_fences)) queue_delayed_work(system_wq, @@ -87,21 +106,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) return 0; } -static void -__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); - dma_fence_signal(&fence->base); - dma_fence_put(&fence->base); -} - -static void -invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - list_del(&fence->link); - __invalidation_fence_signal(xe, fence); -} - /** * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset * @gt: graphics tile @@ -111,7 +115,6 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) { struct xe_gt_tlb_invalidation_fence *fence, *next; - struct xe_guc *guc = >->uc.guc; int pending_seqno; /* @@ -134,7 +137,6 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) else pending_seqno = gt->tlb_invalidation.seqno - 1; WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno); - wake_up_all(&guc->ct.wq); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) @@ -165,6 +167,8 @@ static int send_tlb_invalidation(struct xe_guc *guc, int seqno; int ret; + xe_gt_assert(gt, fence); + /* * XXX: The seqno algorithm relies on TLB invalidation being processed * in order which they currently are, if that changes the algorithm will @@ -173,14 +177,12 @@ static int send_tlb_invalidation(struct xe_guc *guc, mutex_lock(&guc->ct.lock); seqno = gt->tlb_invalidation.seqno; - if (fence) { - fence->seqno = seqno; - trace_xe_gt_tlb_invalidation_fence_send(xe, fence); - } + fence->seqno = seqno; + trace_xe_gt_tlb_invalidation_fence_send(xe, fence); action[1] = seqno; ret = xe_guc_ct_send_locked(&guc->ct, action, len, G2H_LEN_DW_TLB_INVALIDATE, 1); - if (!ret && fence) { + if (!ret) { spin_lock_irq(>->tlb_invalidation.pending_lock); /* * We haven't actually published the TLB fence as per @@ -201,7 +203,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, tlb_timeout_jiffies(gt)); } spin_unlock_irq(>->tlb_invalidation.pending_lock); - } else if (ret < 0 && fence) { + } else if (ret < 0) { __invalidation_fence_signal(xe, fence); } if (!ret) { @@ -209,7 +211,6 @@ static int send_tlb_invalidation(struct xe_guc *guc, TLB_INVALIDATION_SEQNO_MAX; if (!gt->tlb_invalidation.seqno) gt->tlb_invalidation.seqno = 1; - ret = seqno; } mutex_unlock(&guc->ct.lock); @@ -223,14 +224,16 @@ static int send_tlb_invalidation(struct xe_guc *guc, /** * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC * @gt: graphics tile + * @fence: invalidation fence which will be signal on TLB invalidation + * completion * * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and - * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion. + * caller can use the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: 0 on success, negative error code on error */ -static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) +static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence) { u32 action[] = { XE_GUC_ACTION_TLB_INVALIDATION, @@ -238,7 +241,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), }; - return send_tlb_invalidation(>->uc.guc, NULL, action, + return send_tlb_invalidation(>->uc.guc, fence, action, ARRAY_SIZE(action)); } @@ -257,13 +260,17 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) if (xe_guc_ct_enabled(>->uc.guc.ct) && gt->uc.guc.submission_state.enabled) { - int seqno; - - seqno = xe_gt_tlb_invalidation_guc(gt); - if (seqno <= 0) - return seqno; + struct xe_gt_tlb_invalidation_fence fence; + int ret; + + xe_gt_tlb_invalidation_fence_init(gt, &fence, true); + ret = xe_gt_tlb_invalidation_guc(gt, &fence); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence); + return ret; + } - xe_gt_tlb_invalidation_wait(gt, seqno); + xe_gt_tlb_invalidation_fence_wait(&fence); } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { if (IS_SRIOV_VF(xe)) return 0; @@ -290,18 +297,16 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) * * @gt: graphics tile * @fence: invalidation fence which will be signal on TLB invalidation - * completion, can be NULL + * completion * @start: start address * @end: end address * @asid: address space id * * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can either use - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for - * completion. + * TLB invalidation. Completion of TLB is asynchronous and caller can use + * the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: Negative error code on error, 0 on success */ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, @@ -312,11 +317,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, u32 action[MAX_TLB_INVALIDATION_LEN]; int len = 0; + xe_gt_assert(gt, fence); + /* Execlists not supported */ if (gt_to_xe(gt)->info.force_execlist) { - if (fence) - __invalidation_fence_signal(xe, fence); - + __invalidation_fence_signal(xe, fence); return 0; } @@ -382,12 +387,10 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, * @vma: VMA to invalidate * * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can either use - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for - * completion. + * TLB invalidation. Completion of TLB is asynchronous and caller can use + * the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: Negative error code on error, 0 on success */ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, @@ -401,43 +404,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, } /** - * xe_gt_tlb_invalidation_wait - Wait for TLB to complete - * @gt: graphics tile - * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation - * - * Wait for tlb_timeout_jiffies() for a TLB invalidation to complete. - * - * Return: 0 on success, -ETIME on TLB invalidation timeout - */ -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) -{ - struct xe_guc *guc = >->uc.guc; - int ret; - - /* Execlists not supported */ - if (gt_to_xe(gt)->info.force_execlist) - return 0; - - /* - * XXX: See above, this algorithm only works if seqno are always in - * order - */ - ret = wait_event_timeout(guc->ct.wq, - tlb_invalidation_seqno_past(gt, seqno), - tlb_timeout_jiffies(gt)); - if (!ret) { - struct drm_printer p = xe_gt_err_printer(gt); - - xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n", - seqno, gt->tlb_invalidation.seqno_recv); - xe_guc_ct_print(&guc->ct, &p, true); - return -ETIME; - } - - return 0; -} - -/** * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler * @guc: guc * @msg: message indicating TLB invalidation done @@ -480,12 +446,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return 0; } - /* - * wake_up_all() and wait_event_timeout() already have the correct - * barriers. - */ WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]); - wake_up_all(&guc->ct.wq); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) { @@ -508,3 +469,59 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return 0; } + +static const char * +invalidation_fence_get_driver_name(struct dma_fence *dma_fence) +{ + return "xe"; +} + +static const char * +invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + return "invalidation_fence"; +} + +static const struct dma_fence_ops invalidation_fence_ops = { + .get_driver_name = invalidation_fence_get_driver_name, + .get_timeline_name = invalidation_fence_get_timeline_name, +}; + +/** + * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence + * @gt: GT + * @fence: TLB invalidation fence to initialize + * @stack: fence is stack variable + * + * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini + * must be called if fence is not signaled. + */ +void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + bool stack) +{ + xe_pm_runtime_get_noresume(gt_to_xe(gt)); + + spin_lock_irq(>->tlb_invalidation.lock); + dma_fence_init(&fence->base, &invalidation_fence_ops, + >->tlb_invalidation.lock, + dma_fence_context_alloc(1), 1); + spin_unlock_irq(>->tlb_invalidation.lock); + INIT_LIST_HEAD(&fence->link); + if (stack) + set_bit(FENCE_STACK_BIT, &fence->base.flags); + else + dma_fence_get(&fence->base); + fence->gt = gt; +} + +/** + * xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence + * @fence: TLB invalidation fence to finalize + * + * Drop PM ref which fence took durinig init. + */ +void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence) +{ + xe_pm_runtime_put(gt_to_xe(fence->gt)); +} diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index bf3bebd9f985..a84065fa324c 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -23,7 +23,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, u64 start, u64 end, u32 asid); -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); +void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + bool stack); +void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence); + +static inline void +xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) +{ + dma_fence_wait(&fence->base, false); +} + #endif /* _XE_GT_TLB_INVALIDATION_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h index 934c828efe31..de6e825e0851 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h @@ -8,6 +8,8 @@ #include <linux/dma-fence.h> +struct xe_gt; + /** * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence * @@ -17,6 +19,8 @@ struct xe_gt_tlb_invalidation_fence { /** @base: dma fence base */ struct dma_fence base; + /** @gt: GT which fence belong to */ + struct xe_gt *gt; /** @link: link into list of pending tlb fences */ struct list_head link; /** @seqno: seqno of TLB invalidation to signal fence one */ diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 6b5e0b45efb0..c582541970df 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -310,12 +310,6 @@ struct xe_gt { /** @eclass: per hardware engine class interface on the GT */ struct xe_hw_engine_class_intf eclass[XE_ENGINE_CLASS_MAX]; - /** @pcode: GT's PCODE */ - struct { - /** @pcode.lock: protecting GT's PCODE mailbox data */ - struct mutex lock; - } pcode; - /** @sysfs: sysfs' kobj used by xe_gt_sysfs */ struct kobject *sysfs; @@ -362,6 +356,12 @@ struct xe_gt { */ spinlock_t mcr_lock; + /** + * @global_invl_lock: protects the register for the duration + * of a global invalidation of l2 cache + */ + spinlock_t global_invl_lock; + /** @wa_active: keep track of active workarounds */ struct { /** @wa_active.gt: bitmap with active GT workarounds */ diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 7d2e937da1d8..64afc90ad2c5 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -327,6 +327,8 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 || state == XE_GUC_CT_STATE_STOPPED); + if (ct->g2h_outstanding) + xe_pm_runtime_put(ct_to_xe(ct)); ct->g2h_outstanding = 0; ct->state = state; @@ -495,10 +497,15 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) { xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space); + xe_gt_assert(ct_to_gt(ct), (!g2h_len && !num_g2h) || + (g2h_len && num_g2h)); if (g2h_len) { lockdep_assert_held(&ct->fast_lock); + if (!ct->g2h_outstanding) + xe_pm_runtime_get_noresume(ct_to_xe(ct)); + ct->ctbs.g2h.info.space -= g2h_len; ct->g2h_outstanding += num_g2h; } @@ -511,7 +518,8 @@ static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); ct->ctbs.g2h.info.space += g2h_len; - --ct->g2h_outstanding; + if (!--ct->g2h_outstanding) + xe_pm_runtime_put(ct_to_xe(ct)); } static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 32e93a8127d4..ccd574e948aa 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -915,7 +915,7 @@ static void pc_init_pcode_freq(struct xe_guc_pc *pc) u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER); u32 max = DIV_ROUND_CLOSEST(pc->rp0_freq, GT_FREQUENCY_MULTIPLIER); - XE_WARN_ON(xe_pcode_init_min_freq_table(pc_to_gt(pc), min, max)); + XE_WARN_ON(xe_pcode_init_min_freq_table(gt_to_tile(pc_to_gt(pc)), min, max)); } static int pc_init_freqs(struct xe_guc_pc *pc) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 8d7e7f4bbff7..59b36c7998c2 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -284,7 +284,7 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) free_submit_wq(guc); } -static void guc_submit_wedged_fini(struct drm_device *drm, void *arg) +static void guc_submit_wedged_fini(void *arg) { struct xe_guc *guc = arg; struct xe_exec_queue *q; @@ -877,7 +877,7 @@ void xe_guc_submit_wedge(struct xe_guc *guc) xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); - err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm, + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, guc_submit_wedged_fini, guc); if (err) { drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); @@ -1375,6 +1375,8 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) { + struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); + trace_xe_sched_msg_recv(msg); switch (msg->opcode) { @@ -1393,6 +1395,8 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) default: XE_WARN_ON("Unknown message type"); } + + xe_pm_runtime_put(xe); } static const struct drm_sched_backend_ops drm_sched_ops = { @@ -1482,6 +1486,8 @@ static void guc_exec_queue_kill(struct xe_exec_queue *q) static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, u32 opcode) { + xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); + INIT_LIST_HEAD(&msg->link); msg->opcode = opcode; msg->private_data = q; diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 45a9789cf501..0b4f12be3692 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -148,20 +148,20 @@ static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev); + return dev_name(fence->xe->drm.dev); } static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return fence->ctx->name; + return fence->name; } static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - struct xe_device *xe = gt_to_xe(fence->ctx->gt); + struct xe_device *xe = fence->xe; u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); return dma_fence->error || @@ -253,7 +253,8 @@ void xe_hw_fence_init(struct dma_fence *fence, struct xe_hw_fence_ctx *ctx, struct xe_hw_fence *hw_fence = container_of(fence, typeof(*hw_fence), dma); - hw_fence->ctx = ctx; + hw_fence->xe = gt_to_xe(ctx->gt); + snprintf(hw_fence->name, sizeof(hw_fence->name), "%s", ctx->name); hw_fence->seqno_map = seqno_map; INIT_LIST_HEAD(&hw_fence->irq_link); diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h index b33c4956e8ea..364a61f4bfda 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence_types.h +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -12,6 +12,7 @@ #include <linux/list.h> #include <linux/spinlock.h> +struct xe_device; struct xe_gt; /** @@ -61,8 +62,10 @@ struct xe_hw_fence_ctx { struct xe_hw_fence { /** @dma: base dma fence for hardware fence context */ struct dma_fence dma; - /** @ctx: hardware fence context */ - struct xe_hw_fence_ctx *ctx; + /** @xe: Xe device for hw fence driver name */ + struct xe_device *xe; + /** @name: name of hardware fence context */ + char name[MAX_FENCE_NAME_LEN]; /** @seqno_map: I/O map for seqno */ struct iosys_map seqno_map; /** @irq_link: Link in struct xe_hw_fence_irq.pending */ diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 0c8ce09e5025..98e3ec08279e 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -203,9 +203,10 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va reg_val = xe_mmio_rmw32(hwmon->gt, rapl_limit, PKG_PWR_LIM_1_EN, 0); reg_val = xe_mmio_read32(hwmon->gt, rapl_limit); if (reg_val & PKG_PWR_LIM_1_EN) { + drm_warn(>_to_xe(hwmon->gt)->drm, "PL1 disable is not supported!\n"); ret = -EOPNOTSUPP; - goto unlock; } + goto unlock; } /* Computation in 64-bits to avoid overflow. Round to nearest. */ @@ -440,16 +441,16 @@ static int xe_hwmon_pcode_read_i1(struct xe_gt *gt, u32 *uval) if (gt_to_xe(gt)->info.platform == XE_DG2) return -ENXIO; - return xe_pcode_read(gt, PCODE_MBOX(PCODE_POWER_SETUP, + return xe_pcode_read(gt_to_tile(gt), PCODE_MBOX(PCODE_POWER_SETUP, POWER_SETUP_SUBCOMMAND_READ_I1, 0), uval, NULL); } static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval) { - return xe_pcode_write(gt, PCODE_MBOX(PCODE_POWER_SETUP, + return xe_pcode_write(gt_to_tile(gt), PCODE_MBOX(PCODE_POWER_SETUP, POWER_SETUP_SUBCOMMAND_WRITE_I1, 0), - uval); + (uval & POWER_SETUP_I1_DATA_MASK)); } static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 94ff62e1d95e..58121821f081 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1634,6 +1634,9 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; + if (lrc->bo && lrc->bo->vm) + xe_vm_get(lrc->bo->vm); + snapshot->context_desc = xe_lrc_ggtt_addr(lrc); snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); snapshot->head = xe_lrc_ring_head(lrc); @@ -1653,12 +1656,14 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) { struct xe_bo *bo; + struct xe_vm *vm; struct iosys_map src; if (!snapshot) return; bo = snapshot->lrc_bo; + vm = bo->vm; snapshot->lrc_bo = NULL; snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); @@ -1678,6 +1683,8 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) xe_bo_unlock(bo); put_bo: xe_bo_put(bo); + if (vm) + xe_vm_put(vm); } void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) @@ -1727,8 +1734,14 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) return; kvfree(snapshot->lrc_snapshot); - if (snapshot->lrc_bo) + if (snapshot->lrc_bo) { + struct xe_vm *vm; + + vm = snapshot->lrc_bo->vm; xe_bo_put(snapshot->lrc_bo); + if (vm) + xe_vm_put(vm); + } kfree(snapshot); } diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index f92faad4b96d..aa68cac9fdf8 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -30,7 +30,8 @@ static void tiles_fini(void *arg) int id; for_each_tile(tile, xe, id) - tile->mmio.regs = NULL; + if (tile != xe_device_get_root_tile(xe)) + tile->mmio.regs = NULL; } int xe_mmio_probe_tiles(struct xe_device *xe) @@ -91,9 +92,11 @@ add_mmio_ext: static void mmio_fini(void *arg) { struct xe_device *xe = arg; + struct xe_tile *root_tile = xe_device_get_root_tile(xe); pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); xe->mmio.regs = NULL; + root_tile->mmio.regs = NULL; } int xe_mmio_init(struct xe_device *xe) @@ -121,12 +124,29 @@ int xe_mmio_init(struct xe_device *xe) return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe); } +static void mmio_flush_pending_writes(struct xe_gt *gt) +{ +#define DUMMY_REG_OFFSET 0x130030 + struct xe_tile *tile = gt_to_tile(gt); + int i; + + if (tile->xe->info.platform != XE_LUNARLAKE) + return; + + /* 4 dummy writes */ + for (i = 0; i < 4; i++) + writel(0, tile->mmio.regs + DUMMY_REG_OFFSET); +} + u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); u8 val; + /* Wa_15015404425 */ + mmio_flush_pending_writes(gt); + val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); @@ -139,6 +159,9 @@ u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); u16 val; + /* Wa_15015404425 */ + mmio_flush_pending_writes(gt); + val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); @@ -160,6 +183,9 @@ u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); u32 val; + /* Wa_15015404425 */ + mmio_flush_pending_writes(gt); + if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt))) val = xe_gt_sriov_vf_read32(gt, reg); else diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 6d69f751bf78..22f14eba2c63 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -440,6 +440,10 @@ static void xe_oa_enable(struct xe_oa_stream *stream) val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) | __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; + if (GRAPHICS_VER(stream->oa->xe) >= 20 && + stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) + val |= OAG_OACONTROL_OA_PES_DISAG_EN; + xe_mmio_write32(stream->gt, regs->oa_ctrl, val); } diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c index fcb584b42a7d..a78c92a44ec2 100644 --- a/drivers/gpu/drm/xe/xe_observation.c +++ b/drivers/gpu/drm/xe/xe_observation.c @@ -66,7 +66,6 @@ static struct ctl_table observation_ctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - {} }; /** diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 4ee32ee1cc88..722278cc23fc 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -7,6 +7,8 @@ #include <drm/xe_drm.h> +#include <generated/xe_wa_oob.h> + #include "regs/xe_reg_defs.h" #include "xe_assert.h" #include "xe_device.h" @@ -15,6 +17,7 @@ #include "xe_gt_mcr.h" #include "xe_mmio.h" #include "xe_sriov.h" +#include "xe_wa.h" #define _PAT_ATS 0x47fc #define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \ @@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe) if (GRAPHICS_VER(xe) == 20) { xe->pat.ops = &xe2_pat_ops; xe->pat.table = xe2_pat_table; - xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table); + + /* Wa_16023588340. XXX: Should use XE_WA */ + if (GRAPHICS_VERx100(xe) == 2001) + xe->pat.n_entries = 28; /* Disable CLOS3 */ + else + xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 3; xe->pat.idx[XE_CACHE_WT] = 15; xe->pat.idx[XE_CACHE_WB] = 2; diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 9c4eefdf6642..7397d556996a 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -12,7 +12,6 @@ #include "xe_assert.h" #include "xe_device.h" -#include "xe_gt.h" #include "xe_mmio.h" #include "xe_pcode_api.h" @@ -30,7 +29,7 @@ * - PCODE for display operations */ -static int pcode_mailbox_status(struct xe_gt *gt) +static int pcode_mailbox_status(struct xe_tile *tile) { u32 err; static const struct pcode_err_decode err_decode[] = { @@ -45,9 +44,9 @@ static int pcode_mailbox_status(struct xe_gt *gt) [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"}, }; - err = xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_ERROR_MASK; + err = xe_mmio_read32(tile->primary_gt, PCODE_MAILBOX) & PCODE_ERROR_MASK; if (err) { - drm_err(>_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err, + drm_err(&tile_to_xe(tile)->drm, "PCODE Mailbox failed: %d %s", err, err_decode[err].str ?: "Unknown"); return err_decode[err].errno ?: -EPROTO; } @@ -55,84 +54,85 @@ static int pcode_mailbox_status(struct xe_gt *gt) return 0; } -static int __pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, +static int __pcode_mailbox_rw(struct xe_tile *tile, u32 mbox, u32 *data0, u32 *data1, unsigned int timeout_ms, bool return_data, bool atomic) { + struct xe_gt *mmio = tile->primary_gt; int err; - if (gt_to_xe(gt)->info.skip_pcode) + if (tile_to_xe(tile)->info.skip_pcode) return 0; - if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0) + if ((xe_mmio_read32(mmio, PCODE_MAILBOX) & PCODE_READY) != 0) return -EAGAIN; - xe_mmio_write32(gt, PCODE_DATA0, *data0); - xe_mmio_write32(gt, PCODE_DATA1, data1 ? *data1 : 0); - xe_mmio_write32(gt, PCODE_MAILBOX, PCODE_READY | mbox); + xe_mmio_write32(mmio, PCODE_DATA0, *data0); + xe_mmio_write32(mmio, PCODE_DATA1, data1 ? *data1 : 0); + xe_mmio_write32(mmio, PCODE_MAILBOX, PCODE_READY | mbox); - err = xe_mmio_wait32(gt, PCODE_MAILBOX, PCODE_READY, 0, + err = xe_mmio_wait32(mmio, PCODE_MAILBOX, PCODE_READY, 0, timeout_ms * USEC_PER_MSEC, NULL, atomic); if (err) return err; if (return_data) { - *data0 = xe_mmio_read32(gt, PCODE_DATA0); + *data0 = xe_mmio_read32(mmio, PCODE_DATA0); if (data1) - *data1 = xe_mmio_read32(gt, PCODE_DATA1); + *data1 = xe_mmio_read32(mmio, PCODE_DATA1); } - return pcode_mailbox_status(gt); + return pcode_mailbox_status(tile); } -static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, +static int pcode_mailbox_rw(struct xe_tile *tile, u32 mbox, u32 *data0, u32 *data1, unsigned int timeout_ms, bool return_data, bool atomic) { - if (gt_to_xe(gt)->info.skip_pcode) + if (tile_to_xe(tile)->info.skip_pcode) return 0; - lockdep_assert_held(>->pcode.lock); + lockdep_assert_held(&tile->pcode.lock); - return __pcode_mailbox_rw(gt, mbox, data0, data1, timeout_ms, return_data, atomic); + return __pcode_mailbox_rw(tile, mbox, data0, data1, timeout_ms, return_data, atomic); } -int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout) +int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 data, int timeout) { int err; - mutex_lock(>->pcode.lock); - err = pcode_mailbox_rw(gt, mbox, &data, NULL, timeout, false, false); - mutex_unlock(>->pcode.lock); + mutex_lock(&tile->pcode.lock); + err = pcode_mailbox_rw(tile, mbox, &data, NULL, timeout, false, false); + mutex_unlock(&tile->pcode.lock); return err; } -int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1) +int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1) { int err; - mutex_lock(>->pcode.lock); - err = pcode_mailbox_rw(gt, mbox, val, val1, 1, true, false); - mutex_unlock(>->pcode.lock); + mutex_lock(&tile->pcode.lock); + err = pcode_mailbox_rw(tile, mbox, val, val1, 1, true, false); + mutex_unlock(&tile->pcode.lock); return err; } -static int pcode_try_request(struct xe_gt *gt, u32 mbox, +static int pcode_try_request(struct xe_tile *tile, u32 mbox, u32 request, u32 reply_mask, u32 reply, u32 *status, bool atomic, int timeout_us, bool locked) { int slept, wait = 10; - xe_gt_assert(gt, timeout_us > 0); + xe_tile_assert(tile, timeout_us > 0); for (slept = 0; slept < timeout_us; slept += wait) { if (locked) - *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, + *status = pcode_mailbox_rw(tile, mbox, &request, NULL, 1, true, atomic); else - *status = __pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, + *status = __pcode_mailbox_rw(tile, mbox, &request, NULL, 1, true, atomic); if ((*status == 0) && ((request & reply_mask) == reply)) return 0; @@ -149,7 +149,7 @@ static int pcode_try_request(struct xe_gt *gt, u32 mbox, /** * xe_pcode_request - send PCODE request until acknowledgment - * @gt: gt + * @tile: tile * @mbox: PCODE mailbox ID the request is targeted for * @request: request ID * @reply_mask: mask used to check for request acknowledgment @@ -166,17 +166,17 @@ static int pcode_try_request(struct xe_gt *gt, u32 mbox, * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some * other error as reported by PCODE. */ -int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, - u32 reply_mask, u32 reply, int timeout_base_ms) +int xe_pcode_request(struct xe_tile *tile, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms) { u32 status; int ret; - xe_gt_assert(gt, timeout_base_ms <= 3); + xe_tile_assert(tile, timeout_base_ms <= 3); - mutex_lock(>->pcode.lock); + mutex_lock(&tile->pcode.lock); - ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, + ret = pcode_try_request(tile, mbox, request, reply_mask, reply, &status, false, timeout_base_ms * 1000, true); if (!ret) goto out; @@ -191,20 +191,20 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, * requests, and for any quirks of the PCODE firmware that delays * the request completion. */ - drm_err(>_to_xe(gt)->drm, + drm_err(&tile_to_xe(tile)->drm, "PCODE timeout, retrying with preemption disabled\n"); preempt_disable(); - ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, + ret = pcode_try_request(tile, mbox, request, reply_mask, reply, &status, true, 50 * 1000, true); preempt_enable(); out: - mutex_unlock(>->pcode.lock); + mutex_unlock(&tile->pcode.lock); return status ? status : ret; } /** * xe_pcode_init_min_freq_table - Initialize PCODE's QOS frequency table - * @gt: gt instance + * @tile: tile instance * @min_gt_freq: Minimal (RPn) GT frequency in units of 50MHz. * @max_gt_freq: Maximal (RP0) GT frequency in units of 50MHz. * @@ -227,30 +227,30 @@ out: * - -EACCES, "PCODE Rejected" * - -EPROTO, "Unknown" */ -int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, +int xe_pcode_init_min_freq_table(struct xe_tile *tile, u32 min_gt_freq, u32 max_gt_freq) { int ret; u32 freq; - if (!gt_to_xe(gt)->info.has_llc) + if (!tile_to_xe(tile)->info.has_llc) return 0; if (max_gt_freq <= min_gt_freq) return -EINVAL; - mutex_lock(>->pcode.lock); + mutex_lock(&tile->pcode.lock); for (freq = min_gt_freq; freq <= max_gt_freq; freq++) { u32 data = freq << PCODE_FREQ_RING_RATIO_SHIFT | freq; - ret = pcode_mailbox_rw(gt, PCODE_WRITE_MIN_FREQ_TABLE, + ret = pcode_mailbox_rw(tile, PCODE_WRITE_MIN_FREQ_TABLE, &data, NULL, 1, false, false); if (ret) goto unlock; } unlock: - mutex_unlock(>->pcode.lock); + mutex_unlock(&tile->pcode.lock); return ret; } @@ -270,7 +270,7 @@ unlock: int xe_pcode_ready(struct xe_device *xe, bool locked) { u32 status, request = DGFX_GET_INIT_STATUS; - struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_tile *tile = xe_device_get_root_tile(xe); int timeout_us = 180000000; /* 3 min */ int ret; @@ -281,15 +281,15 @@ int xe_pcode_ready(struct xe_device *xe, bool locked) return 0; if (locked) - mutex_lock(>->pcode.lock); + mutex_lock(&tile->pcode.lock); - ret = pcode_try_request(gt, DGFX_PCODE_STATUS, request, + ret = pcode_try_request(tile, DGFX_PCODE_STATUS, request, DGFX_INIT_STATUS_COMPLETE, DGFX_INIT_STATUS_COMPLETE, &status, false, timeout_us, locked); if (locked) - mutex_unlock(>->pcode.lock); + mutex_unlock(&tile->pcode.lock); if (ret) drm_err(&xe->drm, @@ -300,14 +300,14 @@ int xe_pcode_ready(struct xe_device *xe, bool locked) /** * xe_pcode_init: initialize components of PCODE - * @gt: gt instance + * @tile: tile instance * * This function initializes the xe_pcode component. * To be called once only during probe. */ -void xe_pcode_init(struct xe_gt *gt) +void xe_pcode_init(struct xe_tile *tile) { - drmm_mutex_init(>_to_xe(gt)->drm, >->pcode.lock); + drmm_mutex_init(&tile_to_xe(tile)->drm, &tile->pcode.lock); } /** diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h index 3f54c6d2a57d..ba33991d72a7 100644 --- a/drivers/gpu/drm/xe/xe_pcode.h +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -7,21 +7,21 @@ #define _XE_PCODE_H_ #include <linux/types.h> -struct xe_gt; +struct xe_tile; struct xe_device; -void xe_pcode_init(struct xe_gt *gt); +void xe_pcode_init(struct xe_tile *tile); int xe_pcode_probe_early(struct xe_device *xe); int xe_pcode_ready(struct xe_device *xe, bool locked); -int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, +int xe_pcode_init_min_freq_table(struct xe_tile *tile, u32 min_gt_freq, u32 max_gt_freq); -int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1); -int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 val, +int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1); +int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 val, int timeout_ms); -#define xe_pcode_write(gt, mbox, val) \ - xe_pcode_write_timeout(gt, mbox, val, 1) +#define xe_pcode_write(tile, mbox, val) \ + xe_pcode_write_timeout(tile, mbox, val, 1) -int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, +int xe_pcode_request(struct xe_tile *tile, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_ms); #define PCODE_MBOX(mbcmd, param1, param2)\ diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index de3b5df65e48..9a3f618d22dc 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -91,13 +91,13 @@ int xe_pm_suspend(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); + xe_display_pm_suspend(xe, false); + /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) goto err; - xe_display_pm_suspend(xe, false); - for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); if (err) { @@ -151,11 +151,11 @@ int xe_pm_resume(struct xe_device *xe) xe_irq_resume(xe); - xe_display_pm_resume(xe, false); - for_each_gt(gt, xe, id) xe_gt_resume(gt); + xe_display_pm_resume(xe, false); + err = xe_bo_restore_user(xe); if (err) goto err; @@ -363,10 +363,11 @@ int xe_pm_runtime_suspend(struct xe_device *xe) mutex_unlock(&xe->mem_access.vram_userfault.lock); if (xe->d3cold.allowed) { + xe_display_pm_suspend(xe, true); + err = xe_bo_evict_all(xe); if (err) goto out; - xe_display_pm_suspend(xe, true); } for_each_gt(gt, xe, id) { diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index e8b8ae5c6485..c453f45328b1 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -128,8 +128,9 @@ xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q, { list_del_init(&pfence->link); pfence->q = xe_exec_queue_get(q); + spin_lock_init(&pfence->lock); dma_fence_init(&pfence->base, &preempt_fence_ops, - &q->lr.lock, context, seqno); + &pfence->lock, context, seqno); return &pfence->base; } diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h index b54b5c29b533..312c3372a49f 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h +++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h @@ -25,6 +25,8 @@ struct xe_preempt_fence { struct xe_exec_queue *q; /** @preempt_work: work struct which issues preemption */ struct work_struct preempt_work; + /** @lock: dma-fence fence lock */ + spinlock_t lock; /** @error: preempt fence is in error state */ int error; }; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index ade9e7a3a0ad..31a751a5de3f 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1115,23 +1115,6 @@ struct invalidation_fence { u32 asid; }; -static const char * -invalidation_fence_get_driver_name(struct dma_fence *dma_fence) -{ - return "xe"; -} - -static const char * -invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) -{ - return "invalidation_fence"; -} - -static const struct dma_fence_ops invalidation_fence_ops = { - .get_driver_name = invalidation_fence_get_driver_name, - .get_timeline_name = invalidation_fence_get_timeline_name, -}; - static void invalidation_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { @@ -1170,15 +1153,8 @@ static int invalidation_fence_init(struct xe_gt *gt, trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - spin_lock_irq(>->tlb_invalidation.lock); - dma_fence_init(&ifence->base.base, &invalidation_fence_ops, - >->tlb_invalidation.lock, - dma_fence_context_alloc(1), 1); - spin_unlock_irq(>->tlb_invalidation.lock); - - INIT_LIST_HEAD(&ifence->base.link); + xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - dma_fence_get(&ifence->base.base); /* Ref for caller */ ifence->fence = fence; ifence->gt = gt; ifence->start = start; diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 02e28274282f..5efe83cc82ab 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -231,7 +231,7 @@ static void rtp_mark_active(struct xe_device *xe, if (first == last) bitmap_set(ctx->active_entries, first, 1); else - bitmap_set(ctx->active_entries, first, last - first + 2); + bitmap_set(ctx->active_entries, first, last - first + 1); } /** diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 44d534e362cd..9628f9deb3c0 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -171,12 +171,13 @@ void xe_sched_job_destroy(struct kref *ref) struct xe_sched_job *job = container_of(ref, struct xe_sched_job, refcount); struct xe_device *xe = job_to_xe(job); + struct xe_exec_queue *q = job->q; xe_sched_job_free_fences(job); - xe_exec_queue_put(job->q); dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); job_free(job); + xe_exec_queue_put(q); xe_pm_runtime_put(xe); } diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 2883d9aca404..80499681bd58 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -53,14 +53,18 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, u64 value) { struct xe_user_fence *ufence; + u64 __user *ptr = u64_to_user_ptr(addr); + + if (!access_ok(ptr, sizeof(*ptr))) + return ERR_PTR(-EFAULT); ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) - return NULL; + return ERR_PTR(-ENOMEM); ufence->xe = xe; kref_init(&ufence->refcount); - ufence->addr = u64_to_user_ptr(addr); + ufence->addr = ptr; ufence->value = value; ufence->mm = current->mm; mmgrab(ufence->mm); @@ -183,8 +187,8 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, } else { sync->ufence = user_fence_create(xe, sync_in.addr, sync_in.timeline_value); - if (XE_IOCTL_DBG(xe, !sync->ufence)) - return -ENOMEM; + if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) + return PTR_ERR(sync->ufence); } break; @@ -263,7 +267,7 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) if (sync->fence) dma_fence_put(sync->fence); if (sync->chain_fence) - dma_fence_put(&sync->chain_fence->base); + dma_fence_chain_free(sync->chain_fence); if (sync->ufence) user_fence_put(sync->ufence); } diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 15ea0a942f67..dda5268507d8 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -9,6 +9,7 @@ #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_migrate.h" +#include "xe_pcode.h" #include "xe_sa.h" #include "xe_tile.h" #include "xe_tile_sysfs.h" @@ -124,6 +125,8 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id) if (IS_ERR(tile->primary_gt)) return PTR_ERR(tile->primary_gt); + xe_pcode_init(tile); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index baba14fb1e32..01837f6f609f 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_ARGS(fence), TP_STRUCT__entry( - __string(dev, __dev_name_gt(fence->ctx->gt)) + __string(dev, __dev_name_xe(fence->xe)) __field(u64, ctx) __field(u32, seqno) __field(struct xe_hw_fence *, fence) diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index fe3779fdba2c..423b261ea743 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -150,7 +150,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, } while (remaining_size); if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - if (!drm_buddy_block_trim(mm, vres->base.size, &vres->blocks)) + if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) size = vres->base.size; } diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h index c108e9d08e70..6195e353f269 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.h +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -65,7 +65,7 @@ const char *xe_uc_fw_status_repr(enum xe_uc_fw_status status) return "<invalid>"; } -static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status) +static inline int xe_uc_fw_status_to_error(const enum xe_uc_fw_status status) { switch (status) { case XE_UC_FIRMWARE_NOT_SUPPORTED: @@ -108,7 +108,7 @@ static inline const char *xe_uc_fw_type_repr(enum xe_uc_fw_type type) } static inline enum xe_uc_fw_status -__xe_uc_fw_status(struct xe_uc_fw *uc_fw) +__xe_uc_fw_status(const struct xe_uc_fw *uc_fw) { /* shouldn't call this before checking hw/blob availability */ XE_WARN_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED); @@ -156,6 +156,11 @@ static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw) return uc_fw->user_overridden; } +static inline bool xe_uc_fw_is_in_error_state(const struct xe_uc_fw *uc_fw) +{ + return xe_uc_fw_status_to_error(__xe_uc_fw_status(uc_fw)) < 0; +} + static inline void xe_uc_fw_sanitize(struct xe_uc_fw *uc_fw) { if (xe_uc_fw_is_loadable(uc_fw)) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 5b166fa03684..50e8fc49ba6c 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1601,6 +1601,10 @@ static void vm_destroy_work_func(struct work_struct *w) XE_WARN_ON(vm->pt_root[id]); trace_xe_vm_free(vm); + + if (vm->xef) + xe_file_put(vm->xef); + kfree(vm); } @@ -1916,7 +1920,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, } args->vm_id = id; - vm->xef = xef; + vm->xef = xe_file_get(xef); /* Record BO memory for VM pagetable created against client */ for_each_tile(tile, xe, id) @@ -3337,10 +3341,11 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; struct xe_tile *tile; - u32 tile_needs_invalidate = 0; - int seqno[XE_MAX_TILES_PER_DEVICE]; + struct xe_gt_tlb_invalidation_fence + fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; u8 id; - int ret; + u32 fence_id = 0; + int ret = 0; xe_assert(xe, !xe_vma_is_null(vma)); trace_xe_vma_invalidate(vma); @@ -3365,29 +3370,43 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) for_each_tile(tile, xe, id) { if (xe_pt_zap_ptes(tile, vma)) { - tile_needs_invalidate |= BIT(id); xe_device_wmb(xe); - /* - * FIXME: We potentially need to invalidate multiple - * GTs within the tile - */ - seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma); - if (seqno[id] < 0) - return seqno[id]; - } - } + xe_gt_tlb_invalidation_fence_init(tile->primary_gt, + &fence[fence_id], + true); + + ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, + &fence[fence_id], vma); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); + goto wait; + } + ++fence_id; - for_each_tile(tile, xe, id) { - if (tile_needs_invalidate & BIT(id)) { - ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]); - if (ret < 0) - return ret; + if (!tile->media_gt) + continue; + + xe_gt_tlb_invalidation_fence_init(tile->media_gt, + &fence[fence_id], + true); + + ret = xe_gt_tlb_invalidation_vma(tile->media_gt, + &fence[fence_id], vma); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); + goto wait; + } + ++fence_id; } } +wait: + for (id = 0; id < fence_id; ++id) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); + vma->tile_invalidated = vma->tile_mask; - return 0; + return ret; } struct xe_vm_snapshot { diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c index 99ff95e408e0..b26e26d73dae 100644 --- a/drivers/gpu/drm/xe/xe_vram_freq.c +++ b/drivers/gpu/drm/xe/xe_vram_freq.c @@ -34,7 +34,6 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_tile *tile = dev_to_tile(dev); - struct xe_gt *gt = tile->primary_gt; u32 val, mbox; int err; @@ -42,7 +41,7 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr, | REG_FIELD_PREP(PCODE_MB_PARAM1, PCODE_MBOX_FC_SC_READ_FUSED_P0) | REG_FIELD_PREP(PCODE_MB_PARAM2, PCODE_MBOX_DOMAIN_HBM); - err = xe_pcode_read(gt, mbox, &val, NULL); + err = xe_pcode_read(tile, mbox, &val, NULL); if (err) return err; @@ -57,7 +56,6 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_tile *tile = dev_to_tile(dev); - struct xe_gt *gt = tile->primary_gt; u32 val, mbox; int err; @@ -65,7 +63,7 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr, | REG_FIELD_PREP(PCODE_MB_PARAM1, PCODE_MBOX_FC_SC_READ_FUSED_PN) | REG_FIELD_PREP(PCODE_MB_PARAM2, PCODE_MBOX_DOMAIN_HBM); - err = xe_pcode_read(gt, mbox, &val, NULL); + err = xe_pcode_read(tile, mbox, &val, NULL); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index c7bf0862b231..e648265d081b 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -486,6 +486,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE)) }, + { XE_RTP_NAME("14021402888"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + }, /* Xe2_HPG */ @@ -538,6 +542,20 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, + { XE_RTP_NAME("14021821874"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) + }, + + /* Xe2_LPM */ + + { XE_RTP_NAME("16021639441"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), + GHWSP_CSB_REPORT_DIS | + PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, /* Xe2_HPM */ diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 26066beb4f6f..08f7336881e3 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -29,3 +29,4 @@ 13011645652 GRAPHICS_VERSION(2004) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) +16023588340 GRAPHICS_VERSION(2001) diff --git a/drivers/gpu/drm/xlnx/Kconfig b/drivers/gpu/drm/xlnx/Kconfig index 68ee897de9d7..626e5ac4c33d 100644 --- a/drivers/gpu/drm/xlnx/Kconfig +++ b/drivers/gpu/drm/xlnx/Kconfig @@ -8,6 +8,7 @@ config DRM_ZYNQMP_DPSUB select DMA_ENGINE select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_GEM_DMA_HELPER select DRM_KMS_HELPER select GENERIC_PHY |